blob: fddb23cdc42bf656a8eae27bd54d6d10b7790816 [file] [log] [blame]
Brad Bishop316dfdd2018-06-25 12:45:53 -04001# This script is used as a bitbake task to create a new python manifest
2# $ bitbake python -c create_manifest
3#
4# Our goal is to keep python-core as small as posible and add other python
5# packages only when the user needs them, hence why we split upstream python
6# into several packages.
7#
8# In a very simplistic way what this does is:
9# Launch python and see specifically what is required for it to run at a minimum
10#
11# Go through the python-manifest file and launch a separate task for every single
12# one of the files on each package, this task will check what was required for that
13# specific module to run, these modules will be called dependencies.
14# The output of such task will be a list of the modules or dependencies that were
15# found for that file.
16#
17# Such output will be parsed by this script, we will look for each dependency on the
18# manifest and if we find that another package already includes it, then we will add
19# that package as an RDEPENDS to the package we are currently checking; in case we dont
20# find the current dependency on any other package we will add it to the current package
21# as part of FILES.
22#
23#
24# This way we will create a new manifest from the data structure that was built during
25# this process, ont this new manifest each package will contain specifically only
26# what it needs to run.
27#
28# There are some caveats which we try to deal with, such as repeated files on different
29# packages, packages that include folders, wildcards, and special packages.
30# Its also important to note that this method only works for python files, and shared
31# libraries. Static libraries, header files and binaries need to be dealt with manually.
32#
33# This script differs from its python2 version mostly on how shared libraries are handled
34# The manifest file for python3 has an extra field which contains the cached files for
35# each package.
36# Tha method to handle cached files does not work when a module includes a folder which
37# itself contains the pycache folder, gladly this is almost never the case.
38#
39# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29@gmail.com>
40
41
42import sys
43import subprocess
44import json
45import os
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080046import collections
47
48# Get python version from ${PYTHON_MAJMIN}
49pyversion = str(sys.argv[1])
Brad Bishop316dfdd2018-06-25 12:45:53 -040050
51# Hack to get native python search path (for folders), not fond of it but it works for now
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080052pivot = 'recipe-sysroot-native'
Brad Bishop316dfdd2018-06-25 12:45:53 -040053for p in sys.path:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080054 if pivot in p:
55 nativelibfolder = p[:p.find(pivot)+len(pivot)]
Brad Bishop316dfdd2018-06-25 12:45:53 -040056
57# Empty dict to hold the whole manifest
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080058new_manifest = collections.OrderedDict()
Brad Bishop316dfdd2018-06-25 12:45:53 -040059
60# Check for repeated files, folders and wildcards
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080061allfiles = []
62repeated = []
63wildcards = []
Brad Bishop316dfdd2018-06-25 12:45:53 -040064
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080065hasfolders = []
66allfolders = []
Brad Bishop316dfdd2018-06-25 12:45:53 -040067
68def isFolder(value):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080069 value = value.replace('${PYTHON_MAJMIN}',pyversion)
70 if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
71 return True
72 else:
73 return False
Brad Bishop316dfdd2018-06-25 12:45:53 -040074
75def isCached(item):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080076 if '__pycache__' in item:
77 return True
78 else:
79 return False
Brad Bishop316dfdd2018-06-25 12:45:53 -040080
81# Read existing JSON manifest
82with open('python3-manifest.json') as manifest:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080083 old_manifest = json.load(manifest, object_pairs_hook=collections.OrderedDict)
Brad Bishop316dfdd2018-06-25 12:45:53 -040084
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080085#
Brad Bishop316dfdd2018-06-25 12:45:53 -040086# First pass to get core-package functionality, because we base everything on the fact that core is actually working
87# Not exactly the same so it should not be a function
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080088#
89
Brad Bishop316dfdd2018-06-25 12:45:53 -040090print ('Getting dependencies for package: core')
91
Brad Bishop1a4b7ee2018-12-16 17:11:34 -080092
93# This special call gets the core dependencies and
94# appends to the old manifest so it doesnt hurt what it
95# currently holds.
96# This way when other packages check for dependencies
97# on the new core package, they will still find them
98# even when checking the old_manifest
99
Brad Bishop316dfdd2018-06-25 12:45:53 -0400100output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8')
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800101for coredep in output.split():
102 coredep = coredep.replace(pyversion,'${PYTHON_MAJMIN}')
103 if isCached(coredep):
104 if coredep not in old_manifest['core']['cached']:
105 old_manifest['core']['cached'].append(coredep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400106 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800107 if coredep not in old_manifest['core']['files']:
108 old_manifest['core']['files'].append(coredep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400109
110
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800111# The second step is to loop through the existing files contained in the core package
112# according to the old manifest, identify if they are modules, or some other type
113# of file that we cant import (directories, binaries, configs) in which case we
114# can only assume they were added correctly (manually) so we ignore those and
115# pass them to the manifest directly.
Brad Bishop316dfdd2018-06-25 12:45:53 -0400116
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800117for filedep in old_manifest['core']['files']:
118 if isFolder(filedep):
119 if isCached(filedep):
120 if filedep not in old_manifest['core']['cached']:
121 old_manifest['core']['cached'].append(filedep)
122 else:
123 if filedep not in old_manifest['core']['files']:
124 old_manifest['core']['files'].append(filedep)
125 continue
126 if '${bindir}' in filedep:
127 if filedep not in old_manifest['core']['files']:
128 old_manifest['core']['files'].append(filedep)
129 continue
130 if filedep == '':
131 continue
132 if '${includedir}' in filedep:
133 if filedep not in old_manifest['core']['files']:
134 old_manifest['core']['files'].append(filedep)
135 continue
136
137 # Get actual module name , shouldnt be affected by libdir/bindir, etc.
138 pymodule = os.path.splitext(os.path.basename(os.path.normpath(filedep)))[0]
139
140
141 # We now know that were dealing with a python module, so we can import it
142 # and check what its dependencies are.
143 # We launch a separate task for each module for deterministic behavior.
144 # Each module will only import what is necessary for it to work in specific.
145 # The output of each task will contain each module's dependencies
146
147 print ('Getting dependencies for module: %s' % pymodule)
148 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8')
149 print ('The following dependencies were found for module %s:\n' % pymodule)
150 print (output)
151
152
153 for pymodule_dep in output.split():
154 pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
155
156 if isCached(pymodule_dep):
157 if pymodule_dep not in old_manifest['core']['cached']:
158 old_manifest['core']['cached'].append(pymodule_dep)
159 else:
160 if pymodule_dep not in old_manifest['core']['files']:
161 old_manifest['core']['files'].append(pymodule_dep)
162
163
164# At this point we are done with the core package.
165# The old_manifest dictionary is updated only for the core package because
166# all others will use this a base.
167
168
169# To improve the script speed, we check which packages contain directories
170# since we will be looping through (only) those later.
171for pypkg in old_manifest:
172 for filedep in old_manifest[pypkg]['files']:
173 if isFolder(filedep):
174 print ('%s is a folder' % filedep)
175 if pypkg not in hasfolders:
176 hasfolders.append(pypkg)
177 if filedep not in allfolders:
178 allfolders.append(filedep)
179
180
181
182# This is the main loop that will handle each package.
183# It works in a similar fashion than the step before, but
184# we will now be updating a new dictionary that will eventually
185# become the new manifest.
186#
187# The following loops though all packages in the manifest,
188# through all files on each of them, and checks whether or not
189# they are modules and can be imported.
190# If they can be imported, then it checks for dependencies for
191# each of them by launching a separate task.
192# The output of that task is then parsed and the manifest is updated
193# accordingly, wether it should add the module on FILES for the current package
194# or if that module already belongs to another package then the current one
195# will RDEPEND on it
196
197for pypkg in old_manifest:
Brad Bishop316dfdd2018-06-25 12:45:53 -0400198 # Use an empty dict as data structure to hold data for each package and fill it up
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800199 new_manifest[pypkg] = collections.OrderedDict()
200 new_manifest[pypkg]['summary'] = old_manifest[pypkg]['summary']
201 new_manifest[pypkg]['rdepends'] = []
202 new_manifest[pypkg]['files'] = []
203 new_manifest[pypkg]['cached'] = old_manifest[pypkg]['cached']
204
Brad Bishop316dfdd2018-06-25 12:45:53 -0400205 # All packages should depend on core
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800206 if pypkg != 'core':
207 new_manifest[pypkg]['rdepends'].append('core')
208 new_manifest[pypkg]['cached'] = []
209
210 print('\n')
211 print('--------------------------')
212 print ('Handling package %s' % pypkg)
213 print('--------------------------')
Brad Bishop316dfdd2018-06-25 12:45:53 -0400214
215 # Handle special cases, we assume that when they were manually added
216 # to the manifest we knew what we were doing.
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800217 special_packages = ['misc', 'modules', 'dev', 'tests']
218 if pypkg in special_packages or 'staticdev' in pypkg:
219 print('Passing %s package directly' % pypkg)
220 new_manifest[pypkg] = old_manifest[pypkg]
Brad Bishop316dfdd2018-06-25 12:45:53 -0400221 continue
222
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800223 for filedep in old_manifest[pypkg]['files']:
224 # We already handled core on the first pass, we can ignore it now
225 if pypkg == 'core':
226 if filedep not in new_manifest[pypkg]['files']:
227 new_manifest[pypkg]['files'].append(filedep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400228 continue
229
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800230 # Handle/ignore what we cant import
231 if isFolder(filedep):
232 new_manifest[pypkg]['files'].append(filedep)
233 # Asyncio (and others) are both the package and the folder name, we should not skip those...
234 path,mod = os.path.split(filedep)
235 if mod != pypkg:
236 continue
237 if '${bindir}' in filedep:
238 if filedep not in new_manifest[pypkg]['files']:
239 new_manifest[pypkg]['files'].append(filedep)
240 continue
241 if filedep == '':
242 continue
243 if '${includedir}' in filedep:
244 if filedep not in new_manifest[pypkg]['files']:
245 new_manifest[pypkg]['files'].append(filedep)
246 continue
247
248 # Get actual module name , shouldnt be affected by libdir/bindir, etc.
Brad Bishop316dfdd2018-06-25 12:45:53 -0400249 # We need to check if the imported module comes from another (e.g. sqlite3.dump)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800250 path,pymodule = os.path.split(filedep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400251 path = os.path.basename(path)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800252 pymodule = os.path.splitext(os.path.basename(pymodule))[0]
Brad Bishop316dfdd2018-06-25 12:45:53 -0400253
254 # If this condition is met, it means we need to import it from another module
255 # or its the folder itself (e.g. unittest)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800256 if path == pypkg:
257 if pymodule:
258 pymodule = path + '.' + pymodule
259 else:
260 pymodule = path
Brad Bishop316dfdd2018-06-25 12:45:53 -0400261
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800262
263
264 # We now know that were dealing with a python module, so we can import it
265 # and check what its dependencies are.
266 # We launch a separate task for each module for deterministic behavior.
267 # Each module will only import what is necessary for it to work in specific.
268 # The output of each task will contain each module's dependencies
269
270 print ('\nGetting dependencies for module: %s' % pymodule)
271 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % pymodule]).decode('utf8')
272 print ('The following dependencies were found for module %s:\n' % pymodule)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400273 print (output)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400274
275 reportFILES = []
276 reportRDEPS = []
277
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800278 for pymodule_dep in output.split():
Brad Bishop316dfdd2018-06-25 12:45:53 -0400279
280 # Warning: This first part is ugly
281 # One of the dependencies that was found, could be inside of one of the folders included by another package
282 # We need to check if this happens so we can add the package containing the folder as an rdependency
283 # e.g. Folder encodings contained in codecs
284 # This would be solved if no packages included any folders
285
286 # This can be done in two ways:
287 # 1 - We assume that if we take out the filename from the path we would get
288 # the folder string, then we would check if folder string is in the list of folders
289 # This would not work if a package contains a folder which contains another folder
290 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
291 # folder_string would not match any value contained in the list of folders
292 #
293 # 2 - We do it the other way around, checking if the folder is contained in the path
294 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
295 # is folder_string inside path/folder1/folder2/filename?,
296 # Yes, it works, but we waste a couple of milliseconds.
297
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800298 pymodule_dep = pymodule_dep.replace(pyversion,'${PYTHON_MAJMIN}')
299 inFolders = False
Brad Bishop316dfdd2018-06-25 12:45:53 -0400300 for folder in allfolders:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800301 if folder in pymodule_dep:
Brad Bishop316dfdd2018-06-25 12:45:53 -0400302 inFolders = True # Did we find a folder?
303 folderFound = False # Second flag to break inner for
304 # Loop only through packages which contain folders
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800305 for pypkg_with_folder in hasfolders:
Brad Bishop316dfdd2018-06-25 12:45:53 -0400306 if (folderFound == False):
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800307 # print('Checking folder %s on package %s' % (pymodule_dep,pypkg_with_folder))
308 for folder_dep in old_manifest[pypkg_with_folder]['files'] or folder_dep in old_manifest[pypkg_with_folder]['cached']:
309 if folder_dep == folder:
310 print ('%s folder found in %s' % (folder, pypkg_with_folder))
Brad Bishop316dfdd2018-06-25 12:45:53 -0400311 folderFound = True
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800312 if pypkg_with_folder not in new_manifest[pypkg]['rdepends'] and pypkg_with_folder != pypkg:
313 new_manifest[pypkg]['rdepends'].append(pypkg_with_folder)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400314 else:
315 break
316
317 # A folder was found so we're done with this item, we can go on
318 if inFolders:
319 continue
320
321
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800322
323 # No directories beyond this point
324 # We might already have this module on the dictionary since it could depend on a (previously checked) module
325 if pymodule_dep not in new_manifest[pypkg]['files'] and pymodule_dep not in new_manifest[pypkg]['cached']:
Brad Bishop316dfdd2018-06-25 12:45:53 -0400326 # Handle core as a special package, we already did it so we pass it to NEW data structure directly
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800327 if pypkg == 'core':
328 print('Adding %s to %s FILES' % (pymodule_dep, pypkg))
329 if pymodule_dep.endswith('*'):
330 wildcards.append(pymodule_dep)
331 if isCached(pymodule_dep):
332 new_manifest[pypkg]['cached'].append(pymodule_dep)
333 else:
334 new_manifest[pypkg]['files'].append(pymodule_dep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400335
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800336 # Check for repeated files
337 if pymodule_dep not in allfiles:
338 allfiles.append(pymodule_dep)
339 else:
340 if pymodule_dep not in repeated:
341 repeated.append(pymodule_dep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400342 else:
343
344
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800345 # Last step: Figure out if we this belongs to FILES or RDEPENDS
346 # We check if this module is already contained on another package, so we add that one
Brad Bishop316dfdd2018-06-25 12:45:53 -0400347 # as an RDEPENDS, or if its not, it means it should be contained on the current
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800348 # package, and we should add it to FILES
349 for possible_rdep in old_manifest:
Brad Bishop316dfdd2018-06-25 12:45:53 -0400350 # Debug
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800351 # print('Checking %s ' % pymodule_dep + ' in %s' % possible_rdep)
352 if pymodule_dep in old_manifest[possible_rdep]['files'] or pymodule_dep in old_manifest[possible_rdep]['cached']:
353 # Since were nesting, we need to check its not the same pypkg
354 if(possible_rdep != pypkg):
355 if possible_rdep not in new_manifest[pypkg]['rdepends']:
356 # Add it to the new manifest data struct as RDEPENDS since it contains something this module needs
357 reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (possible_rdep, pypkg, pymodule_dep))
358 new_manifest[pypkg]['rdepends'].append(possible_rdep)
359 break
Brad Bishop316dfdd2018-06-25 12:45:53 -0400360 else:
Brad Bishop316dfdd2018-06-25 12:45:53 -0400361
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800362 # Since this module wasnt found on another package, it is not an RDEP,
363 # so we add it to FILES for this package.
364 # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
365 if os.path.basename(pymodule_dep) != pypkg:
366 reportFILES.append(('Adding %s to %s FILES\n' % (pymodule_dep, pypkg)))
367 if isCached(pymodule_dep):
368 new_manifest[pypkg]['cached'].append(pymodule_dep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400369 else:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800370 new_manifest[pypkg]['files'].append(pymodule_dep)
371 if pymodule_dep.endswith('*'):
372 wildcards.append(pymodule_dep)
373 if pymodule_dep not in allfiles:
374 allfiles.append(pymodule_dep)
375 else:
376 if pymodule_dep not in repeated:
377 repeated.append(pymodule_dep)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400378
379 print('\n')
380 print('#################################')
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800381 print('Summary for module %s' % pymodule)
382 print('FILES found for module %s:' % pymodule)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400383 print(''.join(reportFILES))
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800384 print('RDEPENDS found for module %s:' % pymodule)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400385 print(''.join(reportRDEPS))
386 print('#################################')
387
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800388print('The following FILES contain wildcards, please check if they are necessary')
Brad Bishop316dfdd2018-06-25 12:45:53 -0400389print(wildcards)
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800390print('The following FILES contain folders, please check if they are necessary')
Brad Bishop316dfdd2018-06-25 12:45:53 -0400391print(hasfolders)
392
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800393
Brad Bishop316dfdd2018-06-25 12:45:53 -0400394# Sort it just so it looks nicer
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800395for pypkg in new_manifest:
396 new_manifest[pypkg]['files'].sort()
397 new_manifest[pypkg]['cached'].sort()
398 new_manifest[pypkg]['rdepends'].sort()
Brad Bishop316dfdd2018-06-25 12:45:53 -0400399
400# Create the manifest from the data structure that was built
401with open('python3-manifest.json.new','w') as outfile:
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800402 json.dump(new_manifest,outfile, indent=4)
Brad Bishop316dfdd2018-06-25 12:45:53 -0400403 outfile.write('\n')
Brad Bishop1a4b7ee2018-12-16 17:11:34 -0800404
405if (repeated):
406 error_msg = '\n\nERROR:\n'
407 error_msg += 'The following files are repeated (contained in more than one package),\n'
408 error_msg += 'this is likely to happen when new files are introduced after an upgrade,\n'
409 error_msg += 'please check which package should get it,\n modify the manifest accordingly and re-run the create_manifest task:\n'
410 error_msg += '\n'.join(repeated)
411 error_msg += '\n'
412 sys.exit(error_msg)
413