blob: 2f944f9b13e64b510d7c565c62ce9f3825e576cb [file] [log] [blame]
Brad Bishop316dfdd2018-06-25 12:45:53 -04001# This script is used as a bitbake task to create a new python manifest
2# $ bitbake python -c create_manifest
3#
4# Our goal is to keep python-core as small as posible and add other python
5# packages only when the user needs them, hence why we split upstream python
6# into several packages.
7#
8# In a very simplistic way what this does is:
9# Launch python and see specifically what is required for it to run at a minimum
10#
11# Go through the python-manifest file and launch a separate task for every single
12# one of the files on each package, this task will check what was required for that
13# specific module to run, these modules will be called dependencies.
14# The output of such task will be a list of the modules or dependencies that were
15# found for that file.
16#
17# Such output will be parsed by this script, we will look for each dependency on the
18# manifest and if we find that another package already includes it, then we will add
19# that package as an RDEPENDS to the package we are currently checking; in case we dont
20# find the current dependency on any other package we will add it to the current package
21# as part of FILES.
22#
23#
24# This way we will create a new manifest from the data structure that was built during
25# this process, ont this new manifest each package will contain specifically only
26# what it needs to run.
27#
28# There are some caveats which we try to deal with, such as repeated files on different
29# packages, packages that include folders, wildcards, and special packages.
30# Its also important to note that this method only works for python files, and shared
31# libraries. Static libraries, header files and binaries need to be dealt with manually.
32#
33# This script differs from its python2 version mostly on how shared libraries are handled
34# The manifest file for python3 has an extra field which contains the cached files for
35# each package.
36# Tha method to handle cached files does not work when a module includes a folder which
37# itself contains the pycache folder, gladly this is almost never the case.
38#
39# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29@gmail.com>
40
41
42import sys
43import subprocess
44import json
45import os
46
47# Hack to get native python search path (for folders), not fond of it but it works for now
48pivot='recipe-sysroot-native'
49for p in sys.path:
50 if pivot in p:
51 nativelibfolder=p[:p.find(pivot)+len(pivot)]
52
53# Empty dict to hold the whole manifest
54new_manifest = {}
55
56# Check for repeated files, folders and wildcards
57allfiles=[]
58repeated=[]
59wildcards=[]
60
61hasfolders=[]
62allfolders=[]
63
64def isFolder(value):
65 if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
66 return True
67 else:
68 return False
69
70def isCached(item):
71 if '__pycache__' in item:
72 return True
73 else:
74 return False
75
76# Read existing JSON manifest
77with open('python3-manifest.json') as manifest:
78 old_manifest=json.load(manifest)
79
80
81# First pass to get core-package functionality, because we base everything on the fact that core is actually working
82# Not exactly the same so it should not be a function
83print ('Getting dependencies for package: core')
84
85# Special call to check for core package
86output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8')
87for item in output.split():
88 # We append it so it doesnt hurt what we currently have:
89 if isCached(item):
90 if item not in old_manifest['core']['cached']:
91 # We use the same data structure since its the one which will be used to check
92 # dependencies for other packages
93 old_manifest['core']['cached'].append(item)
94 else:
95 if item not in old_manifest['core']['files']:
96 # We use the same data structure since its the one which will be used to check
97 # dependencies for other packages
98 old_manifest['core']['files'].append(item)
99
100for value in old_manifest['core']['files']:
101 # Ignore folders, since we don't import those, difficult to handle multilib
102 if isFolder(value):
103 # Pass it directly
104 if isCached(value):
105 if value not in old_manifest['core']['cached']:
106 old_manifest['core']['cached'].append(value)
107 else:
108 if value not in old_manifest['core']['files']:
109 old_manifest['core']['files'].append(value)
110 continue
111 # Ignore binaries, since we don't import those, assume it was added correctly (manually)
112 if '${bindir}' in value:
113 # Pass it directly
114 if value not in old_manifest['core']['files']:
115 old_manifest['core']['files'].append(value)
116 continue
117 # Ignore empty values
118 if value == '':
119 continue
120 if '${includedir}' in value:
121 if value not in old_manifest['core']['files']:
122 old_manifest['core']['files'].append(value)
123 continue
124 # Get module name , shouldnt be affected by libdir/bindir
125 value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]
126
127 # Launch separate task for each module for deterministic behavior
128 # Each module will only import what is necessary for it to work in specific
129 print ('Getting dependencies for module: %s' % value)
130 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
131 print ('The following dependencies were found for module %s:\n' % value)
132 print (output)
133 for item in output.split():
134 # We append it so it doesnt hurt what we currently have:
135 if isCached(item):
136 if item not in old_manifest['core']['cached']:
137 # We use the same data structure since its the one which will be used to check
138 # dependencies for other packages
139 old_manifest['core']['cached'].append(item)
140 else:
141 if item not in old_manifest['core']['files']:
142 # We use the same data structure since its the one which will be used to check
143 # dependencies for other packages
144 old_manifest['core']['files'].append(item)
145
146
147# We check which packages include folders
148for key in old_manifest:
149 for value in old_manifest[key]['files']:
150 # Ignore folders, since we don't import those, difficult to handle multilib
151 if isFolder(value):
152 print ('%s is a folder' % value)
153 if key not in hasfolders:
154 hasfolders.append(key)
155 if value not in allfolders:
156 allfolders.append(value)
157
158for key in old_manifest:
159 # Use an empty dict as data structure to hold data for each package and fill it up
160 new_manifest[key]={}
161 new_manifest[key]['files']=[]
162
163 new_manifest[key]['rdepends']=[]
164 # All packages should depend on core
165 if key != 'core':
166 new_manifest[key]['rdepends'].append('core')
167 new_manifest[key]['cached']=[]
168 else:
169 new_manifest[key]['cached']=old_manifest[key]['cached']
170 new_manifest[key]['summary']=old_manifest[key]['summary']
171
172 # Handle special cases, we assume that when they were manually added
173 # to the manifest we knew what we were doing.
174 print('\n')
175 print('--------------------------')
176 print ('Handling package %s' % key)
177 print('--------------------------')
178 special_packages=['misc', 'modules', 'dev']
179 if key in special_packages or 'staticdev' in key:
180 print('Passing %s package directly' % key)
181 new_manifest[key]=old_manifest[key]
182 continue
183
184 for value in old_manifest[key]['files']:
185 # We already handled core on the first pass
186 if key == 'core':
187 new_manifest[key]['files'].append(value)
188 continue
189 # Ignore folders, since we don't import those, difficult to handle multilib
190 if isFolder(value):
191 # Pass folders directly
192 new_manifest[key]['files'].append(value)
193 # Ignore binaries, since we don't import those
194 if '${bindir}' in value:
195 # Pass it directly to the new manifest data structure
196 if value not in new_manifest[key]['files']:
197 new_manifest[key]['files'].append(value)
198 continue
199 # Ignore empty values
200 if value == '':
201 continue
202 if '${includedir}' in value:
203 if value not in new_manifest[key]['files']:
204 new_manifest[key]['files'].append(value)
205 continue
206
207 # Get module name , shouldnt be affected by libdir/bindir
208 # We need to check if the imported module comes from another (e.g. sqlite3.dump)
209 path,value = os.path.split(value)
210 path = os.path.basename(path)
211 value = os.path.splitext(os.path.basename(value))[0]
212
213 # If this condition is met, it means we need to import it from another module
214 # or its the folder itself (e.g. unittest)
215 if path == key:
216 if value:
217 value = path + '.' + value
218 else:
219 value = path
220
221 # Launch separate task for each module for deterministic behavior
222 # Each module will only import what is necessary for it to work in specific
223 print ('\nGetting dependencies for module: %s' % value)
224 output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
225 # We can print dependencies for debugging purposes
226 print ('The following dependencies were found for module %s:\n' % value)
227 print (output)
228 # Output will have all dependencies
229
230 reportFILES = []
231 reportRDEPS = []
232
233 for item in output.split():
234
235 # Warning: This first part is ugly
236 # One of the dependencies that was found, could be inside of one of the folders included by another package
237 # We need to check if this happens so we can add the package containing the folder as an rdependency
238 # e.g. Folder encodings contained in codecs
239 # This would be solved if no packages included any folders
240
241 # This can be done in two ways:
242 # 1 - We assume that if we take out the filename from the path we would get
243 # the folder string, then we would check if folder string is in the list of folders
244 # This would not work if a package contains a folder which contains another folder
245 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
246 # folder_string would not match any value contained in the list of folders
247 #
248 # 2 - We do it the other way around, checking if the folder is contained in the path
249 # e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
250 # is folder_string inside path/folder1/folder2/filename?,
251 # Yes, it works, but we waste a couple of milliseconds.
252
253 inFolders=False
254 for folder in allfolders:
255 if folder in item:
256 inFolders = True # Did we find a folder?
257 folderFound = False # Second flag to break inner for
258 # Loop only through packages which contain folders
259 for keyfolder in hasfolders:
260 if (folderFound == False):
261 #print('Checking folder %s on package %s' % (item,keyfolder))
262 for file_folder in old_manifest[keyfolder]['files'] or file_folder in old_manifest[keyfolder]['cached']:
263 if file_folder==folder:
264 print ('%s folder found in %s' % (folder, keyfolder))
265 folderFound = True
266 if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key:
267 new_manifest[key]['rdepends'].append(keyfolder)
268
269 else:
270 break
271
272 # A folder was found so we're done with this item, we can go on
273 if inFolders:
274 continue
275
276
277 # We might already have it on the dictionary since it could depend on a (previously checked) module
278 if item not in new_manifest[key]['files'] and item not in new_manifest[key]['cached']:
279 # Handle core as a special package, we already did it so we pass it to NEW data structure directly
280 if key=='core':
281 print('Adding %s to %s FILES' % (item, key))
282 if item.endswith('*'):
283 wildcards.append(item)
284 if isCached(item):
285 new_manifest[key]['cached'].append(item)
286 else:
287 new_manifest[key]['files'].append(item)
288
289 # Check for repeated files
290 if item not in allfiles:
291 allfiles.append(item)
292 else:
293 repeated.append(item)
294
295 else:
296
297
298 # Check if this dependency is already contained on another package, so we add it
299 # as an RDEPENDS, or if its not, it means it should be contained on the current
300 # package, so we should add it to FILES
301 for newkey in old_manifest:
302 # Debug
303 #print('Checking %s ' % item + ' in %s' % newkey)
304 if item in old_manifest[newkey]['files'] or item in old_manifest[newkey]['cached']:
305 # Since were nesting, we need to check its not the same key
306 if(newkey!=key):
307 if newkey not in new_manifest[key]['rdepends']:
308 # Add it to the new manifest data struct
309 reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (newkey, key, item))
310 new_manifest[key]['rdepends'].append(newkey)
311 break
312 else:
313 # A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
314 if os.path.basename(item) != key:
315 reportFILES.append(('Adding %s to %s FILES\n' % (item, key)))
316 # Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package
317 if isCached(item):
318 new_manifest[key]['cached'].append(item)
319 else:
320 new_manifest[key]['files'].append(item)
321
322 if item.endswith('*'):
323 wildcards.append(item)
324 if item not in allfiles:
325 allfiles.append(item)
326 else:
327 repeated.append(item)
328
329 print('\n')
330 print('#################################')
331 print('Summary for module %s' % value)
332 print('FILES found for module %s:' % value)
333 print(''.join(reportFILES))
334 print('RDEPENDS found for module %s:' % value)
335 print(''.join(reportRDEPS))
336 print('#################################')
337
338print ('The following files are repeated (contained in more than one package), please check which package should get it:')
339print (repeated)
340print('The following files contain wildcards, please check they are necessary')
341print(wildcards)
342print('The following files contain folders, please check they are necessary')
343print(hasfolders)
344
345# Sort it just so it looks nicer
346for key in new_manifest:
347 new_manifest[key]['files'].sort()
348 new_manifest[key]['cached'].sort()
349 new_manifest[key]['rdepends'].sort()
350
351# Create the manifest from the data structure that was built
352with open('python3-manifest.json.new','w') as outfile:
353 json.dump(new_manifest,outfile,sort_keys=True, indent=4)
354 outfile.write('\n')