Jason M. Bills | d1e4060 | 2019-05-09 11:43:51 -0700 | [diff] [blame] | 1 | #!/usr/bin/python3 |
| 2 | import sys |
| 3 | import requests |
| 4 | import re |
| 5 | import argparse |
| 6 | from bs4 import BeautifulSoup |
| 7 | |
| 8 | |
| 9 | def parse_args(argv): |
| 10 | """Parse the command-line arguments""" |
| 11 | parser = argparse.ArgumentParser(description='Get the PCI-SIG Vendor IDs') |
| 12 | parser.add_argument('--http-proxy', action='store', help="HTTP Proxy Address") |
| 13 | parser.add_argument('--https-proxy', action='store', help="HTTPS Proxy Address") |
| 14 | args = parser.parse_args(argv) |
| 15 | return args |
| 16 | |
| 17 | |
| 18 | def main(argv): |
| 19 | """Go to the PCI-SIG members page and construct a |
| 20 | dictionary of member companies to their Vendor IDs""" |
| 21 | args = parse_args(argv) |
| 22 | |
| 23 | proxyDict = { |
| 24 | "http": args.http_proxy, |
| 25 | "https": args.https_proxy |
| 26 | } |
| 27 | page = 'https://pcisig.com/membership/member-companies' |
| 28 | pciVendorIDs = {} |
| 29 | while True: |
| 30 | r = requests.get(page, proxies=proxyDict) |
| 31 | soup = BeautifulSoup(r.text) |
| 32 | |
| 33 | for row in soup.table.tbody.find_all("tr"): |
| 34 | fields = row.find_all("td") |
| 35 | vendorID = fields[1].text.strip() |
| 36 | if 'hex' in vendorID.lower(): |
| 37 | match = re.match(r'\w+ \((\w+) hex\)', vendorID, re.I) |
| 38 | if match is not None: |
| 39 | vendorID = match.group(1) |
| 40 | else: |
| 41 | vendorID = '' |
| 42 | if vendorID != '': |
| 43 | vendorName = fields[0].text.replace('"', '').strip() |
| 44 | pciVendorIDs[vendorName] = vendorID |
| 45 | |
| 46 | page = soup.find("a", title="Go to next page") |
| 47 | if page is None: |
| 48 | break |
| 49 | page = 'https://pcisig.com' + page["href"] |
| 50 | |
| 51 | for name, vid in sorted(pciVendorIDs.items(), key=lambda x: x[0].lower()): |
| 52 | print("{{0x{}, \"{}\"}},".format(vid, name)) |
| 53 | |
| 54 | |
| 55 | if __name__ == '__main__': |
| 56 | main(sys.argv[1:]) |