| Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 1 | #!/usr/bin/env python3 | 
| Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 2 | # | 
 | 3 | # Determine dependencies of python scripts or available python modules in a search path. | 
 | 4 | # | 
 | 5 | # Given the -d argument and a filename/filenames, returns the modules imported by those files. | 
 | 6 | # Given the -d argument and a directory/directories, recurses to find all | 
 | 7 | # python packages and modules, returns the modules imported by these. | 
 | 8 | # Given the -p argument and a path or paths, scans that path for available python modules/packages. | 
 | 9 |  | 
 | 10 | import argparse | 
 | 11 | import ast | 
 | 12 | import imp | 
 | 13 | import logging | 
 | 14 | import os.path | 
 | 15 | import sys | 
 | 16 |  | 
 | 17 |  | 
 | 18 | logger = logging.getLogger('pythondeps') | 
 | 19 |  | 
 | 20 | suffixes = [] | 
 | 21 | for triple in imp.get_suffixes(): | 
 | 22 |     suffixes.append(triple[0]) | 
 | 23 |  | 
 | 24 |  | 
 | 25 | class PythonDepError(Exception): | 
 | 26 |     pass | 
 | 27 |  | 
 | 28 |  | 
 | 29 | class DependError(PythonDepError): | 
 | 30 |     def __init__(self, path, error): | 
 | 31 |         self.path = path | 
 | 32 |         self.error = error | 
 | 33 |         PythonDepError.__init__(self, error) | 
 | 34 |  | 
 | 35 |     def __str__(self): | 
 | 36 |         return "Failure determining dependencies of {}: {}".format(self.path, self.error) | 
 | 37 |  | 
 | 38 |  | 
 | 39 | class ImportVisitor(ast.NodeVisitor): | 
 | 40 |     def __init__(self): | 
 | 41 |         self.imports = set() | 
 | 42 |         self.importsfrom = [] | 
 | 43 |  | 
 | 44 |     def visit_Import(self, node): | 
 | 45 |         for alias in node.names: | 
 | 46 |             self.imports.add(alias.name) | 
 | 47 |  | 
 | 48 |     def visit_ImportFrom(self, node): | 
 | 49 |         self.importsfrom.append((node.module, [a.name for a in node.names], node.level)) | 
 | 50 |  | 
 | 51 |  | 
 | 52 | def walk_up(path): | 
 | 53 |     while path: | 
 | 54 |         yield path | 
 | 55 |         path, _, _ = path.rpartition(os.sep) | 
 | 56 |  | 
 | 57 |  | 
 | 58 | def get_provides(path): | 
 | 59 |     path = os.path.realpath(path) | 
 | 60 |  | 
 | 61 |     def get_fn_name(fn): | 
 | 62 |         for suffix in suffixes: | 
 | 63 |             if fn.endswith(suffix): | 
 | 64 |                 return fn[:-len(suffix)] | 
 | 65 |  | 
 | 66 |     isdir = os.path.isdir(path) | 
 | 67 |     if isdir: | 
 | 68 |         pkg_path = path | 
 | 69 |         walk_path = path | 
 | 70 |     else: | 
 | 71 |         pkg_path = get_fn_name(path) | 
 | 72 |         if pkg_path is None: | 
 | 73 |             return | 
 | 74 |         walk_path = os.path.dirname(path) | 
 | 75 |  | 
 | 76 |     for curpath in walk_up(walk_path): | 
 | 77 |         if not os.path.exists(os.path.join(curpath, '__init__.py')): | 
 | 78 |             libdir = curpath | 
 | 79 |             break | 
 | 80 |     else: | 
 | 81 |         libdir = '' | 
 | 82 |  | 
 | 83 |     package_relpath = pkg_path[len(libdir)+1:] | 
 | 84 |     package = '.'.join(package_relpath.split(os.sep)) | 
 | 85 |     if not isdir: | 
 | 86 |         yield package, path | 
 | 87 |     else: | 
 | 88 |         if os.path.exists(os.path.join(path, '__init__.py')): | 
 | 89 |             yield package, path | 
 | 90 |  | 
 | 91 |         for dirpath, dirnames, filenames in os.walk(path): | 
 | 92 |             relpath = dirpath[len(path)+1:] | 
 | 93 |             if relpath: | 
 | 94 |                 if '__init__.py' not in filenames: | 
 | 95 |                     dirnames[:] = [] | 
 | 96 |                     continue | 
 | 97 |                 else: | 
 | 98 |                     context = '.'.join(relpath.split(os.sep)) | 
 | 99 |                     if package: | 
 | 100 |                         context = package + '.' + context | 
 | 101 |                     yield context, dirpath | 
 | 102 |             else: | 
 | 103 |                 context = package | 
 | 104 |  | 
 | 105 |             for fn in filenames: | 
 | 106 |                 adjusted_fn = get_fn_name(fn) | 
 | 107 |                 if not adjusted_fn or adjusted_fn == '__init__': | 
 | 108 |                     continue | 
 | 109 |  | 
 | 110 |                 fullfn = os.path.join(dirpath, fn) | 
 | 111 |                 if context: | 
 | 112 |                     yield context + '.' + adjusted_fn, fullfn | 
 | 113 |                 else: | 
 | 114 |                     yield adjusted_fn, fullfn | 
 | 115 |  | 
 | 116 |  | 
 | 117 | def get_code_depends(code_string, path=None, provide=None, ispkg=False): | 
 | 118 |     try: | 
 | 119 |         code = ast.parse(code_string, path) | 
 | 120 |     except TypeError as exc: | 
 | 121 |         raise DependError(path, exc) | 
 | 122 |     except SyntaxError as exc: | 
 | 123 |         raise DependError(path, exc) | 
 | 124 |  | 
 | 125 |     visitor = ImportVisitor() | 
 | 126 |     visitor.visit(code) | 
 | 127 |     for builtin_module in sys.builtin_module_names: | 
 | 128 |         if builtin_module in visitor.imports: | 
 | 129 |             visitor.imports.remove(builtin_module) | 
 | 130 |  | 
 | 131 |     if provide: | 
 | 132 |         provide_elements = provide.split('.') | 
 | 133 |         if ispkg: | 
 | 134 |             provide_elements.append("__self__") | 
 | 135 |         context = '.'.join(provide_elements[:-1]) | 
 | 136 |         package_path = os.path.dirname(path) | 
 | 137 |     else: | 
 | 138 |         context = None | 
 | 139 |         package_path = None | 
 | 140 |  | 
 | 141 |     levelzero_importsfrom = (module for module, names, level in visitor.importsfrom | 
 | 142 |                              if level == 0) | 
 | 143 |     for module in visitor.imports | set(levelzero_importsfrom): | 
 | 144 |         if context and path: | 
 | 145 |             module_basepath = os.path.join(package_path, module.replace('.', '/')) | 
 | 146 |             if os.path.exists(module_basepath): | 
 | 147 |                 # Implicit relative import | 
 | 148 |                 yield context + '.' + module, path | 
 | 149 |                 continue | 
 | 150 |  | 
 | 151 |             for suffix in suffixes: | 
 | 152 |                 if os.path.exists(module_basepath + suffix): | 
 | 153 |                     # Implicit relative import | 
 | 154 |                     yield context + '.' + module, path | 
 | 155 |                     break | 
 | 156 |             else: | 
 | 157 |                 yield module, path | 
 | 158 |         else: | 
 | 159 |             yield module, path | 
 | 160 |  | 
 | 161 |     for module, names, level in visitor.importsfrom: | 
 | 162 |         if level == 0: | 
 | 163 |             continue | 
 | 164 |         elif not provide: | 
 | 165 |             raise DependError("Error: ImportFrom non-zero level outside of a package: {0}".format((module, names, level)), path) | 
 | 166 |         elif level > len(provide_elements): | 
 | 167 |             raise DependError("Error: ImportFrom level exceeds package depth: {0}".format((module, names, level)), path) | 
 | 168 |         else: | 
 | 169 |             context = '.'.join(provide_elements[:-level]) | 
 | 170 |             if module: | 
 | 171 |                 if context: | 
 | 172 |                     yield context + '.' + module, path | 
 | 173 |                 else: | 
 | 174 |                     yield module, path | 
 | 175 |  | 
 | 176 |  | 
 | 177 | def get_file_depends(path): | 
 | 178 |     try: | 
 | 179 |         code_string = open(path, 'r').read() | 
 | 180 |     except (OSError, IOError) as exc: | 
 | 181 |         raise DependError(path, exc) | 
 | 182 |  | 
 | 183 |     return get_code_depends(code_string, path) | 
 | 184 |  | 
 | 185 |  | 
 | 186 | def get_depends_recursive(directory): | 
 | 187 |     directory = os.path.realpath(directory) | 
 | 188 |  | 
 | 189 |     provides = dict((v, k) for k, v in get_provides(directory)) | 
| Patrick Williams | c0f7c04 | 2017-02-23 20:41:17 -0600 | [diff] [blame] | 190 |     for filename, provide in provides.items(): | 
| Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 191 |         if os.path.isdir(filename): | 
 | 192 |             filename = os.path.join(filename, '__init__.py') | 
 | 193 |             ispkg = True | 
 | 194 |         elif not filename.endswith('.py'): | 
 | 195 |             continue | 
 | 196 |         else: | 
 | 197 |             ispkg = False | 
 | 198 |  | 
 | 199 |         with open(filename, 'r') as f: | 
 | 200 |             source = f.read() | 
 | 201 |  | 
 | 202 |         depends = get_code_depends(source, filename, provide, ispkg) | 
 | 203 |         for depend, by in depends: | 
 | 204 |             yield depend, by | 
 | 205 |  | 
 | 206 |  | 
 | 207 | def get_depends(path): | 
 | 208 |     if os.path.isdir(path): | 
 | 209 |         return get_depends_recursive(path) | 
 | 210 |     else: | 
 | 211 |         return get_file_depends(path) | 
 | 212 |  | 
 | 213 |  | 
 | 214 | def main(): | 
 | 215 |     logging.basicConfig() | 
 | 216 |  | 
 | 217 |     parser = argparse.ArgumentParser(description='Determine dependencies and provided packages for python scripts/modules') | 
 | 218 |     parser.add_argument('path', nargs='+', help='full path to content to be processed') | 
 | 219 |     group = parser.add_mutually_exclusive_group() | 
 | 220 |     group.add_argument('-p', '--provides', action='store_true', | 
 | 221 |                        help='given a path, display the provided python modules') | 
 | 222 |     group.add_argument('-d', '--depends', action='store_true', | 
 | 223 |                        help='given a filename, display the imported python modules') | 
 | 224 |  | 
 | 225 |     args = parser.parse_args() | 
 | 226 |     if args.provides: | 
 | 227 |         modules = set() | 
 | 228 |         for path in args.path: | 
 | 229 |             for provide, fn in get_provides(path): | 
 | 230 |                 modules.add(provide) | 
 | 231 |  | 
 | 232 |         for module in sorted(modules): | 
 | 233 |             print(module) | 
 | 234 |     elif args.depends: | 
 | 235 |         for path in args.path: | 
 | 236 |             try: | 
 | 237 |                 modules = get_depends(path) | 
 | 238 |             except PythonDepError as exc: | 
 | 239 |                 logger.error(str(exc)) | 
 | 240 |                 sys.exit(1) | 
 | 241 |  | 
 | 242 |             for module, imp_by in modules: | 
 | 243 |                 print("{}\t{}".format(module, imp_by)) | 
 | 244 |     else: | 
 | 245 |         parser.print_help() | 
 | 246 |         sys.exit(2) | 
 | 247 |  | 
 | 248 |  | 
 | 249 | if __name__ == '__main__': | 
 | 250 |     main() |