Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 1 | # This script is used as a bitbake task to create a new python manifest |
| 2 | # $ bitbake python -c create_manifest |
| 3 | # |
| 4 | # Our goal is to keep python-core as small as posible and add other python |
| 5 | # packages only when the user needs them, hence why we split upstream python |
| 6 | # into several packages. |
| 7 | # |
| 8 | # In a very simplistic way what this does is: |
| 9 | # Launch python and see specifically what is required for it to run at a minimum |
| 10 | # |
| 11 | # Go through the python-manifest file and launch a separate task for every single |
| 12 | # one of the files on each package, this task will check what was required for that |
| 13 | # specific module to run, these modules will be called dependencies. |
| 14 | # The output of such task will be a list of the modules or dependencies that were |
| 15 | # found for that file. |
| 16 | # |
| 17 | # Such output will be parsed by this script, we will look for each dependency on the |
| 18 | # manifest and if we find that another package already includes it, then we will add |
| 19 | # that package as an RDEPENDS to the package we are currently checking; in case we dont |
| 20 | # find the current dependency on any other package we will add it to the current package |
| 21 | # as part of FILES. |
| 22 | # |
| 23 | # |
| 24 | # This way we will create a new manifest from the data structure that was built during |
Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame^] | 25 | # this process, on this new manifest each package will contain specifically only |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 26 | # what it needs to run. |
| 27 | # |
| 28 | # There are some caveats which we try to deal with, such as repeated files on different |
| 29 | # packages, packages that include folders, wildcards, and special packages. |
| 30 | # Its also important to note that this method only works for python files, and shared |
| 31 | # libraries. Static libraries, header files and binaries need to be dealt with manually. |
| 32 | # |
Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame^] | 33 | # Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29 at gmail dot com> |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 34 | |
| 35 | |
| 36 | import sys |
| 37 | import subprocess |
| 38 | import json |
| 39 | import os |
Andrew Geissler | 730fed8 | 2018-09-19 09:22:27 -0700 | [diff] [blame] | 40 | import collections |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 41 | |
| 42 | # Hack to get native python search path (for folders), not fond of it but it works for now |
| 43 | pivot='recipe-sysroot-native' |
| 44 | for p in sys.path: |
| 45 | if pivot in p: |
| 46 | nativelibfolder=p[:p.find(pivot)+len(pivot)] |
| 47 | |
| 48 | # Empty dict to hold the whole manifest |
Andrew Geissler | 730fed8 | 2018-09-19 09:22:27 -0700 | [diff] [blame] | 49 | new_manifest = collections.OrderedDict() |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 50 | |
| 51 | # Check for repeated files, folders and wildcards |
| 52 | allfiles=[] |
| 53 | repeated=[] |
| 54 | wildcards=[] |
| 55 | |
| 56 | hasfolders=[] |
| 57 | allfolders=[] |
| 58 | |
| 59 | def isFolder(value): |
| 60 | if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')): |
| 61 | return True |
| 62 | else: |
| 63 | return False |
| 64 | |
Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame^] | 65 | def prepend_comments(comments, json_manifest): |
| 66 | with open(json_manifest, 'r+') as manifest: |
| 67 | json_contents = manifest.read() |
| 68 | manifest.seek(0, 0) |
| 69 | manifest.write(comments + json_contents) |
| 70 | |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 71 | # Read existing JSON manifest |
| 72 | with open('python2-manifest.json') as manifest: |
Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame^] | 73 | # The JSON format doesn't allow comments so we hack the call to keep the comments using a marker |
| 74 | manifest_str = manifest.read() |
| 75 | json_start = manifest_str.find('# EOC') + 6 # EOC + \n |
| 76 | manifest.seek(0) |
| 77 | comments = manifest.read(json_start) |
| 78 | manifest_str = manifest.read() |
| 79 | old_manifest = json.loads(manifest_str, object_pairs_hook=collections.OrderedDict) |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 80 | |
| 81 | # First pass to get core-package functionality, because we base everything on the fact that core is actually working |
| 82 | # Not exactly the same so it should not be a function |
| 83 | print ("Getting dependencies for core package:") |
| 84 | |
| 85 | # Special call to check for core package |
| 86 | output = subprocess.check_output([sys.executable, 'get_module_deps2.py', 'python-core-package']) |
| 87 | for item in output.split(): |
| 88 | # We append it so it doesnt hurt what we currently have: |
| 89 | if item not in old_manifest['core']['files']: |
| 90 | # We use the same data structure since its the one which will be used to check |
| 91 | # dependencies for other packages |
| 92 | old_manifest['core']['files'].append(item) |
| 93 | |
| 94 | for value in old_manifest['core']['files']: |
| 95 | # Ignore folders, since we don't import those, difficult to handle multilib |
| 96 | if isFolder(value): |
| 97 | # Pass it directly |
| 98 | if value not in old_manifest['core']['files']: |
| 99 | old_manifest['core']['files'].append(value) |
| 100 | # Ignore binaries, since we don't import those, assume it was added correctly (manually) |
| 101 | if '${bindir}' in value: |
| 102 | # Pass it directly |
| 103 | if value not in old_manifest['core']['files']: |
| 104 | old_manifest['core']['files'].append(value) |
| 105 | continue |
| 106 | # Ignore empty values |
| 107 | if value == '': |
| 108 | continue |
| 109 | if '${includedir}' in value: |
| 110 | if value not in old_manifest['core']['files']: |
| 111 | old_manifest['core']['files'].append(value) |
| 112 | continue |
| 113 | # Get module name , shouldnt be affected by libdir/bindir |
| 114 | value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0] |
| 115 | |
| 116 | |
| 117 | # Launch separate task for each module for deterministic behavior |
| 118 | # Each module will only import what is necessary for it to work in specific |
| 119 | print ('Getting dependencies for module: %s' % value) |
| 120 | output = subprocess.check_output([sys.executable, 'get_module_deps2.py', '%s' % value]) |
| 121 | for item in output.split(): |
| 122 | # We append it so it doesnt hurt what we currently have: |
| 123 | if item not in old_manifest['core']['files']: |
| 124 | old_manifest['core']['files'].append(item) |
| 125 | |
| 126 | # We check which packages include folders |
| 127 | for key in old_manifest: |
| 128 | for value in old_manifest[key]['files']: |
| 129 | # Ignore folders, since we don't import those, difficult to handle multilib |
| 130 | if isFolder(value): |
| 131 | print ('%s is a folder' % value) |
| 132 | if key not in hasfolders: |
| 133 | hasfolders.append(key) |
| 134 | if value not in allfolders: |
| 135 | allfolders.append(value) |
| 136 | |
| 137 | for key in old_manifest: |
| 138 | # Use an empty dict as data structure to hold data for each package and fill it up |
Andrew Geissler | 730fed8 | 2018-09-19 09:22:27 -0700 | [diff] [blame] | 139 | new_manifest[key] = collections.OrderedDict() |
| 140 | new_manifest[key]['summary'] = old_manifest[key]['summary'] |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 141 | new_manifest[key]['rdepends']=[] |
Andrew Geissler | 730fed8 | 2018-09-19 09:22:27 -0700 | [diff] [blame] | 142 | new_manifest[key]['files'] = [] |
| 143 | |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 144 | # All packages should depend on core |
| 145 | if key != 'core': |
Andrew Geissler | 730fed8 | 2018-09-19 09:22:27 -0700 | [diff] [blame] | 146 | new_manifest[key]['rdepends'].append('core') |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 147 | |
| 148 | # Handle special cases, we assume that when they were manually added |
| 149 | # to the manifest we knew what we were doing. |
| 150 | print ('Handling package %s' % key) |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 151 | special_packages=['misc', 'modules', 'tests', 'dev'] |
Brad Bishop | 316dfdd | 2018-06-25 12:45:53 -0400 | [diff] [blame] | 152 | if key in special_packages or 'staticdev' in key: |
| 153 | print('Passing %s package directly' % key) |
| 154 | new_manifest[key]=old_manifest[key] |
| 155 | continue |
| 156 | |
| 157 | for value in old_manifest[key]['files']: |
| 158 | # We already handled core on the first pass |
| 159 | if key == 'core': |
| 160 | new_manifest[key]['files'].append(value) |
| 161 | continue |
| 162 | # Ignore folders, since we don't import those, difficult to handle multilib |
| 163 | if isFolder(value): |
| 164 | # Pass folders directly |
| 165 | new_manifest[key]['files'].append(value) |
| 166 | # Ignore binaries, since we don't import those |
| 167 | if '${bindir}' in value: |
| 168 | # Pass it directly to the new manifest data structure |
| 169 | if value not in new_manifest[key]['files']: |
| 170 | new_manifest[key]['files'].append(value) |
| 171 | continue |
| 172 | # Ignore empty values |
| 173 | if value == '': |
| 174 | continue |
| 175 | if '${includedir}' in value: |
| 176 | if value not in new_manifest[key]['files']: |
| 177 | new_manifest[key]['files'].append(value) |
| 178 | continue |
| 179 | # Get module name , shouldnt be affected by libdir/bindir |
| 180 | value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0] |
| 181 | |
| 182 | # Launch separate task for each module for deterministic behavior |
| 183 | # Each module will only import what is necessary for it to work in specific |
| 184 | print ('Getting dependencies for module: %s' % value) |
| 185 | output = subprocess.check_output([sys.executable, 'get_module_deps2.py', '%s' % value]) |
| 186 | |
| 187 | # We can print dependencies for debugging purposes |
| 188 | #print (output) |
| 189 | # Output will have all dependencies |
| 190 | for item in output.split(): |
| 191 | |
| 192 | # Warning: This first part is ugly |
| 193 | # One of the dependencies that was found, could be inside of one of the folders included by another package |
| 194 | # We need to check if this happens so we can add the package containing the folder as an RDEPENDS |
| 195 | # e.g. Folder encodings contained in codecs |
| 196 | # This would be solved if no packages included any folders |
| 197 | |
| 198 | # This can be done in two ways: |
| 199 | # 1 - We assume that if we take out the filename from the path we would get |
| 200 | # the folder string, then we would check if folder string is in the list of folders |
| 201 | # This would not work if a package contains a folder which contains another folder |
| 202 | # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 |
| 203 | # folder_string would not match any value contained in the list of folders |
| 204 | # |
| 205 | # 2 - We do it the other way around, checking if the folder is contained in the path |
| 206 | # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2 |
| 207 | # is folder_string inside path/folder1/folder2/filename?, |
| 208 | # Yes, it works, but we waste a couple of milliseconds. |
| 209 | |
| 210 | inFolders=False |
| 211 | for folder in allfolders: |
| 212 | if folder in item: |
| 213 | inFolders = True # Did we find a folder? |
| 214 | folderFound = False # Second flag to break inner for |
| 215 | # Loop only through packages which contain folders |
| 216 | for keyfolder in hasfolders: |
| 217 | if (folderFound == False): |
| 218 | #print("Checking folder %s on package %s" % (item,keyfolder)) |
| 219 | for file_folder in old_manifest[keyfolder]['files']: |
| 220 | if file_folder==folder: |
| 221 | print ('%s found in %s' % (folder, keyfolder)) |
| 222 | folderFound = True |
| 223 | if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key: |
| 224 | new_manifest[key]['rdepends'].append(keyfolder) |
| 225 | else: |
| 226 | break |
| 227 | |
| 228 | # A folder was found so we're done with this item, we can go on |
| 229 | if inFolders: |
| 230 | continue |
| 231 | |
| 232 | # We might already have it on the dictionary since it could depend on a (previously checked) module |
| 233 | if item not in new_manifest[key]['files']: |
| 234 | # Handle core as a special package, we already did it so we pass it to NEW data structure directly |
| 235 | if key=='core': |
| 236 | print('Adding %s to %s FILES' % (item, key)) |
| 237 | if item.endswith('*'): |
| 238 | wildcards.append(item) |
| 239 | new_manifest[key]['files'].append(item) |
| 240 | |
| 241 | # Check for repeated files |
| 242 | if item not in allfiles: |
| 243 | allfiles.append(item) |
| 244 | else: |
| 245 | repeated.append(item) |
| 246 | |
| 247 | else: |
| 248 | |
| 249 | # Check if this dependency is already contained on another package, so we add it |
| 250 | # as an RDEPENDS, or if its not, it means it should be contained on the current |
| 251 | # package, so we should add it to FILES |
| 252 | for newkey in old_manifest: |
| 253 | # Debug |
| 254 | #print("Checking %s " % item + " in %s" % newkey) |
| 255 | if item in old_manifest[newkey]['files']: |
| 256 | # Since were nesting, we need to check its not the same key |
| 257 | if(newkey!=key): |
| 258 | if newkey not in new_manifest[key]['rdepends']: |
| 259 | # Add it to the new manifest data struct |
| 260 | # Debug |
| 261 | print('Adding %s to %s RDEPENDS, because it contains %s' % (newkey, key, item)) |
| 262 | new_manifest[key]['rdepends'].append(newkey) |
| 263 | break |
| 264 | else: |
| 265 | # Debug |
| 266 | print('Adding %s to %s FILES' % (item, key)) |
| 267 | # Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package |
| 268 | new_manifest[key]['files'].append(item) |
| 269 | if item.endswith('*'): |
| 270 | wildcards.append(item) |
| 271 | if item not in allfiles: |
| 272 | allfiles.append(item) |
| 273 | else: |
| 274 | repeated.append(item) |
| 275 | |
| 276 | print ('The following files are repeated (contained in more than one package), please check which package should get it:') |
| 277 | print (repeated) |
| 278 | print('The following files contain wildcards, please check they are necessary') |
| 279 | print(wildcards) |
| 280 | print('The following files contain folders, please check they are necessary') |
| 281 | print(hasfolders) |
| 282 | |
| 283 | # Sort it just so it looks nice |
| 284 | for key in new_manifest: |
| 285 | new_manifest[key]['files'].sort() |
| 286 | new_manifest[key]['rdepends'].sort() |
| 287 | |
| 288 | # Create the manifest from the data structure that was built |
| 289 | with open('python2-manifest.json.new','w') as outfile: |
Andrew Geissler | 730fed8 | 2018-09-19 09:22:27 -0700 | [diff] [blame] | 290 | json.dump(new_manifest,outfile, indent=4) |
Brad Bishop | a5c52ff | 2018-11-23 10:55:50 +1300 | [diff] [blame^] | 291 | |
| 292 | prepend_comments(comments,'python2-manifest.json.new') |