Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 2 | # |
| 3 | # Determine dependencies of python scripts or available python modules in a search path. |
| 4 | # |
| 5 | # Given the -d argument and a filename/filenames, returns the modules imported by those files. |
| 6 | # Given the -d argument and a directory/directories, recurses to find all |
| 7 | # python packages and modules, returns the modules imported by these. |
| 8 | # Given the -p argument and a path or paths, scans that path for available python modules/packages. |
| 9 | |
| 10 | import argparse |
| 11 | import ast |
| 12 | import imp |
| 13 | import logging |
| 14 | import os.path |
| 15 | import sys |
| 16 | |
| 17 | |
| 18 | logger = logging.getLogger('pythondeps') |
| 19 | |
| 20 | suffixes = [] |
| 21 | for triple in imp.get_suffixes(): |
| 22 | suffixes.append(triple[0]) |
| 23 | |
| 24 | |
| 25 | class PythonDepError(Exception): |
| 26 | pass |
| 27 | |
| 28 | |
| 29 | class DependError(PythonDepError): |
| 30 | def __init__(self, path, error): |
| 31 | self.path = path |
| 32 | self.error = error |
| 33 | PythonDepError.__init__(self, error) |
| 34 | |
| 35 | def __str__(self): |
| 36 | return "Failure determining dependencies of {}: {}".format(self.path, self.error) |
| 37 | |
| 38 | |
| 39 | class ImportVisitor(ast.NodeVisitor): |
| 40 | def __init__(self): |
| 41 | self.imports = set() |
| 42 | self.importsfrom = [] |
| 43 | |
| 44 | def visit_Import(self, node): |
| 45 | for alias in node.names: |
| 46 | self.imports.add(alias.name) |
| 47 | |
| 48 | def visit_ImportFrom(self, node): |
| 49 | self.importsfrom.append((node.module, [a.name for a in node.names], node.level)) |
| 50 | |
| 51 | |
| 52 | def walk_up(path): |
| 53 | while path: |
| 54 | yield path |
| 55 | path, _, _ = path.rpartition(os.sep) |
| 56 | |
| 57 | |
| 58 | def get_provides(path): |
| 59 | path = os.path.realpath(path) |
| 60 | |
| 61 | def get_fn_name(fn): |
| 62 | for suffix in suffixes: |
| 63 | if fn.endswith(suffix): |
| 64 | return fn[:-len(suffix)] |
| 65 | |
| 66 | isdir = os.path.isdir(path) |
| 67 | if isdir: |
| 68 | pkg_path = path |
| 69 | walk_path = path |
| 70 | else: |
| 71 | pkg_path = get_fn_name(path) |
| 72 | if pkg_path is None: |
| 73 | return |
| 74 | walk_path = os.path.dirname(path) |
| 75 | |
| 76 | for curpath in walk_up(walk_path): |
| 77 | if not os.path.exists(os.path.join(curpath, '__init__.py')): |
| 78 | libdir = curpath |
| 79 | break |
| 80 | else: |
| 81 | libdir = '' |
| 82 | |
| 83 | package_relpath = pkg_path[len(libdir)+1:] |
| 84 | package = '.'.join(package_relpath.split(os.sep)) |
| 85 | if not isdir: |
| 86 | yield package, path |
| 87 | else: |
| 88 | if os.path.exists(os.path.join(path, '__init__.py')): |
| 89 | yield package, path |
| 90 | |
| 91 | for dirpath, dirnames, filenames in os.walk(path): |
| 92 | relpath = dirpath[len(path)+1:] |
| 93 | if relpath: |
| 94 | if '__init__.py' not in filenames: |
| 95 | dirnames[:] = [] |
| 96 | continue |
| 97 | else: |
| 98 | context = '.'.join(relpath.split(os.sep)) |
| 99 | if package: |
| 100 | context = package + '.' + context |
| 101 | yield context, dirpath |
| 102 | else: |
| 103 | context = package |
| 104 | |
| 105 | for fn in filenames: |
| 106 | adjusted_fn = get_fn_name(fn) |
| 107 | if not adjusted_fn or adjusted_fn == '__init__': |
| 108 | continue |
| 109 | |
| 110 | fullfn = os.path.join(dirpath, fn) |
| 111 | if context: |
| 112 | yield context + '.' + adjusted_fn, fullfn |
| 113 | else: |
| 114 | yield adjusted_fn, fullfn |
| 115 | |
| 116 | |
| 117 | def get_code_depends(code_string, path=None, provide=None, ispkg=False): |
| 118 | try: |
| 119 | code = ast.parse(code_string, path) |
| 120 | except TypeError as exc: |
| 121 | raise DependError(path, exc) |
| 122 | except SyntaxError as exc: |
| 123 | raise DependError(path, exc) |
| 124 | |
| 125 | visitor = ImportVisitor() |
| 126 | visitor.visit(code) |
| 127 | for builtin_module in sys.builtin_module_names: |
| 128 | if builtin_module in visitor.imports: |
| 129 | visitor.imports.remove(builtin_module) |
| 130 | |
| 131 | if provide: |
| 132 | provide_elements = provide.split('.') |
| 133 | if ispkg: |
| 134 | provide_elements.append("__self__") |
| 135 | context = '.'.join(provide_elements[:-1]) |
| 136 | package_path = os.path.dirname(path) |
| 137 | else: |
| 138 | context = None |
| 139 | package_path = None |
| 140 | |
| 141 | levelzero_importsfrom = (module for module, names, level in visitor.importsfrom |
| 142 | if level == 0) |
| 143 | for module in visitor.imports | set(levelzero_importsfrom): |
| 144 | if context and path: |
| 145 | module_basepath = os.path.join(package_path, module.replace('.', '/')) |
| 146 | if os.path.exists(module_basepath): |
| 147 | # Implicit relative import |
| 148 | yield context + '.' + module, path |
| 149 | continue |
| 150 | |
| 151 | for suffix in suffixes: |
| 152 | if os.path.exists(module_basepath + suffix): |
| 153 | # Implicit relative import |
| 154 | yield context + '.' + module, path |
| 155 | break |
| 156 | else: |
| 157 | yield module, path |
| 158 | else: |
| 159 | yield module, path |
| 160 | |
| 161 | for module, names, level in visitor.importsfrom: |
| 162 | if level == 0: |
| 163 | continue |
| 164 | elif not provide: |
| 165 | raise DependError("Error: ImportFrom non-zero level outside of a package: {0}".format((module, names, level)), path) |
| 166 | elif level > len(provide_elements): |
| 167 | raise DependError("Error: ImportFrom level exceeds package depth: {0}".format((module, names, level)), path) |
| 168 | else: |
| 169 | context = '.'.join(provide_elements[:-level]) |
| 170 | if module: |
| 171 | if context: |
| 172 | yield context + '.' + module, path |
| 173 | else: |
| 174 | yield module, path |
| 175 | |
| 176 | |
| 177 | def get_file_depends(path): |
| 178 | try: |
| 179 | code_string = open(path, 'r').read() |
| 180 | except (OSError, IOError) as exc: |
| 181 | raise DependError(path, exc) |
| 182 | |
| 183 | return get_code_depends(code_string, path) |
| 184 | |
| 185 | |
| 186 | def get_depends_recursive(directory): |
| 187 | directory = os.path.realpath(directory) |
| 188 | |
| 189 | provides = dict((v, k) for k, v in get_provides(directory)) |
Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 190 | for filename, provide in provides.items(): |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 191 | if os.path.isdir(filename): |
| 192 | filename = os.path.join(filename, '__init__.py') |
| 193 | ispkg = True |
| 194 | elif not filename.endswith('.py'): |
| 195 | continue |
| 196 | else: |
| 197 | ispkg = False |
| 198 | |
| 199 | with open(filename, 'r') as f: |
| 200 | source = f.read() |
| 201 | |
| 202 | depends = get_code_depends(source, filename, provide, ispkg) |
| 203 | for depend, by in depends: |
| 204 | yield depend, by |
| 205 | |
| 206 | |
| 207 | def get_depends(path): |
| 208 | if os.path.isdir(path): |
| 209 | return get_depends_recursive(path) |
| 210 | else: |
| 211 | return get_file_depends(path) |
| 212 | |
| 213 | |
| 214 | def main(): |
| 215 | logging.basicConfig() |
| 216 | |
| 217 | parser = argparse.ArgumentParser(description='Determine dependencies and provided packages for python scripts/modules') |
| 218 | parser.add_argument('path', nargs='+', help='full path to content to be processed') |
| 219 | group = parser.add_mutually_exclusive_group() |
| 220 | group.add_argument('-p', '--provides', action='store_true', |
| 221 | help='given a path, display the provided python modules') |
| 222 | group.add_argument('-d', '--depends', action='store_true', |
| 223 | help='given a filename, display the imported python modules') |
| 224 | |
| 225 | args = parser.parse_args() |
| 226 | if args.provides: |
| 227 | modules = set() |
| 228 | for path in args.path: |
| 229 | for provide, fn in get_provides(path): |
| 230 | modules.add(provide) |
| 231 | |
| 232 | for module in sorted(modules): |
| 233 | print(module) |
| 234 | elif args.depends: |
| 235 | for path in args.path: |
| 236 | try: |
| 237 | modules = get_depends(path) |
| 238 | except PythonDepError as exc: |
| 239 | logger.error(str(exc)) |
| 240 | sys.exit(1) |
| 241 | |
| 242 | for module, imp_by in modules: |
| 243 | print("{}\t{}".format(module, imp_by)) |
| 244 | else: |
| 245 | parser.print_help() |
| 246 | sys.exit(2) |
| 247 | |
| 248 | |
| 249 | if __name__ == '__main__': |
| 250 | main() |