Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 2 | # |
| 3 | # Determine dependencies of python scripts or available python modules in a search path. |
| 4 | # |
| 5 | # Given the -d argument and a filename/filenames, returns the modules imported by those files. |
| 6 | # Given the -d argument and a directory/directories, recurses to find all |
| 7 | # python packages and modules, returns the modules imported by these. |
| 8 | # Given the -p argument and a path or paths, scans that path for available python modules/packages. |
| 9 | |
| 10 | import argparse |
| 11 | import ast |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 12 | import importlib |
| 13 | from importlib import machinery |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 14 | import logging |
| 15 | import os.path |
| 16 | import sys |
| 17 | |
| 18 | |
| 19 | logger = logging.getLogger('pythondeps') |
| 20 | |
Brad Bishop | 1932369 | 2019-04-05 15:28:33 -0400 | [diff] [blame] | 21 | suffixes = importlib.machinery.all_suffixes() |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 22 | |
| 23 | class PythonDepError(Exception): |
| 24 | pass |
| 25 | |
| 26 | |
| 27 | class DependError(PythonDepError): |
| 28 | def __init__(self, path, error): |
| 29 | self.path = path |
| 30 | self.error = error |
| 31 | PythonDepError.__init__(self, error) |
| 32 | |
| 33 | def __str__(self): |
| 34 | return "Failure determining dependencies of {}: {}".format(self.path, self.error) |
| 35 | |
| 36 | |
| 37 | class ImportVisitor(ast.NodeVisitor): |
| 38 | def __init__(self): |
| 39 | self.imports = set() |
| 40 | self.importsfrom = [] |
| 41 | |
| 42 | def visit_Import(self, node): |
| 43 | for alias in node.names: |
| 44 | self.imports.add(alias.name) |
| 45 | |
| 46 | def visit_ImportFrom(self, node): |
| 47 | self.importsfrom.append((node.module, [a.name for a in node.names], node.level)) |
| 48 | |
| 49 | |
| 50 | def walk_up(path): |
| 51 | while path: |
| 52 | yield path |
| 53 | path, _, _ = path.rpartition(os.sep) |
| 54 | |
| 55 | |
| 56 | def get_provides(path): |
| 57 | path = os.path.realpath(path) |
| 58 | |
| 59 | def get_fn_name(fn): |
| 60 | for suffix in suffixes: |
| 61 | if fn.endswith(suffix): |
| 62 | return fn[:-len(suffix)] |
| 63 | |
| 64 | isdir = os.path.isdir(path) |
| 65 | if isdir: |
| 66 | pkg_path = path |
| 67 | walk_path = path |
| 68 | else: |
| 69 | pkg_path = get_fn_name(path) |
| 70 | if pkg_path is None: |
| 71 | return |
| 72 | walk_path = os.path.dirname(path) |
| 73 | |
| 74 | for curpath in walk_up(walk_path): |
| 75 | if not os.path.exists(os.path.join(curpath, '__init__.py')): |
| 76 | libdir = curpath |
| 77 | break |
| 78 | else: |
| 79 | libdir = '' |
| 80 | |
| 81 | package_relpath = pkg_path[len(libdir)+1:] |
| 82 | package = '.'.join(package_relpath.split(os.sep)) |
| 83 | if not isdir: |
| 84 | yield package, path |
| 85 | else: |
| 86 | if os.path.exists(os.path.join(path, '__init__.py')): |
| 87 | yield package, path |
| 88 | |
| 89 | for dirpath, dirnames, filenames in os.walk(path): |
| 90 | relpath = dirpath[len(path)+1:] |
| 91 | if relpath: |
| 92 | if '__init__.py' not in filenames: |
| 93 | dirnames[:] = [] |
| 94 | continue |
| 95 | else: |
| 96 | context = '.'.join(relpath.split(os.sep)) |
| 97 | if package: |
| 98 | context = package + '.' + context |
| 99 | yield context, dirpath |
| 100 | else: |
| 101 | context = package |
| 102 | |
| 103 | for fn in filenames: |
| 104 | adjusted_fn = get_fn_name(fn) |
| 105 | if not adjusted_fn or adjusted_fn == '__init__': |
| 106 | continue |
| 107 | |
| 108 | fullfn = os.path.join(dirpath, fn) |
| 109 | if context: |
| 110 | yield context + '.' + adjusted_fn, fullfn |
| 111 | else: |
| 112 | yield adjusted_fn, fullfn |
| 113 | |
| 114 | |
| 115 | def get_code_depends(code_string, path=None, provide=None, ispkg=False): |
| 116 | try: |
| 117 | code = ast.parse(code_string, path) |
| 118 | except TypeError as exc: |
| 119 | raise DependError(path, exc) |
| 120 | except SyntaxError as exc: |
| 121 | raise DependError(path, exc) |
| 122 | |
| 123 | visitor = ImportVisitor() |
| 124 | visitor.visit(code) |
| 125 | for builtin_module in sys.builtin_module_names: |
| 126 | if builtin_module in visitor.imports: |
| 127 | visitor.imports.remove(builtin_module) |
| 128 | |
| 129 | if provide: |
| 130 | provide_elements = provide.split('.') |
| 131 | if ispkg: |
| 132 | provide_elements.append("__self__") |
| 133 | context = '.'.join(provide_elements[:-1]) |
| 134 | package_path = os.path.dirname(path) |
| 135 | else: |
| 136 | context = None |
| 137 | package_path = None |
| 138 | |
| 139 | levelzero_importsfrom = (module for module, names, level in visitor.importsfrom |
| 140 | if level == 0) |
| 141 | for module in visitor.imports | set(levelzero_importsfrom): |
| 142 | if context and path: |
| 143 | module_basepath = os.path.join(package_path, module.replace('.', '/')) |
| 144 | if os.path.exists(module_basepath): |
| 145 | # Implicit relative import |
| 146 | yield context + '.' + module, path |
| 147 | continue |
| 148 | |
| 149 | for suffix in suffixes: |
| 150 | if os.path.exists(module_basepath + suffix): |
| 151 | # Implicit relative import |
| 152 | yield context + '.' + module, path |
| 153 | break |
| 154 | else: |
| 155 | yield module, path |
| 156 | else: |
| 157 | yield module, path |
| 158 | |
| 159 | for module, names, level in visitor.importsfrom: |
| 160 | if level == 0: |
| 161 | continue |
| 162 | elif not provide: |
| 163 | raise DependError("Error: ImportFrom non-zero level outside of a package: {0}".format((module, names, level)), path) |
| 164 | elif level > len(provide_elements): |
| 165 | raise DependError("Error: ImportFrom level exceeds package depth: {0}".format((module, names, level)), path) |
| 166 | else: |
| 167 | context = '.'.join(provide_elements[:-level]) |
| 168 | if module: |
| 169 | if context: |
| 170 | yield context + '.' + module, path |
| 171 | else: |
| 172 | yield module, path |
| 173 | |
| 174 | |
| 175 | def get_file_depends(path): |
| 176 | try: |
| 177 | code_string = open(path, 'r').read() |
| 178 | except (OSError, IOError) as exc: |
| 179 | raise DependError(path, exc) |
| 180 | |
| 181 | return get_code_depends(code_string, path) |
| 182 | |
| 183 | |
| 184 | def get_depends_recursive(directory): |
| 185 | directory = os.path.realpath(directory) |
| 186 | |
| 187 | provides = dict((v, k) for k, v in get_provides(directory)) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 188 | for filename, provide in provides.items(): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 189 | if os.path.isdir(filename): |
| 190 | filename = os.path.join(filename, '__init__.py') |
| 191 | ispkg = True |
| 192 | elif not filename.endswith('.py'): |
| 193 | continue |
| 194 | else: |
| 195 | ispkg = False |
| 196 | |
| 197 | with open(filename, 'r') as f: |
| 198 | source = f.read() |
| 199 | |
| 200 | depends = get_code_depends(source, filename, provide, ispkg) |
| 201 | for depend, by in depends: |
| 202 | yield depend, by |
| 203 | |
| 204 | |
| 205 | def get_depends(path): |
| 206 | if os.path.isdir(path): |
| 207 | return get_depends_recursive(path) |
| 208 | else: |
| 209 | return get_file_depends(path) |
| 210 | |
| 211 | |
| 212 | def main(): |
| 213 | logging.basicConfig() |
| 214 | |
| 215 | parser = argparse.ArgumentParser(description='Determine dependencies and provided packages for python scripts/modules') |
| 216 | parser.add_argument('path', nargs='+', help='full path to content to be processed') |
| 217 | group = parser.add_mutually_exclusive_group() |
| 218 | group.add_argument('-p', '--provides', action='store_true', |
| 219 | help='given a path, display the provided python modules') |
| 220 | group.add_argument('-d', '--depends', action='store_true', |
| 221 | help='given a filename, display the imported python modules') |
| 222 | |
| 223 | args = parser.parse_args() |
| 224 | if args.provides: |
| 225 | modules = set() |
| 226 | for path in args.path: |
| 227 | for provide, fn in get_provides(path): |
| 228 | modules.add(provide) |
| 229 | |
| 230 | for module in sorted(modules): |
| 231 | print(module) |
| 232 | elif args.depends: |
| 233 | for path in args.path: |
| 234 | try: |
| 235 | modules = get_depends(path) |
| 236 | except PythonDepError as exc: |
| 237 | logger.error(str(exc)) |
| 238 | sys.exit(1) |
| 239 | |
| 240 | for module, imp_by in modules: |
| 241 | print("{}\t{}".format(module, imp_by)) |
| 242 | else: |
| 243 | parser.print_help() |
| 244 | sys.exit(2) |
| 245 | |
| 246 | |
| 247 | if __name__ == '__main__': |
| 248 | main() |