blob: 5395c768a3b199f91e62e0f566d6029eea42bcdc [file] [log] [blame]
Patrick Williamsc124f4f2015-09-15 14:41:29 -05001# Report significant differences in the buildhistory repository since a specific revision
2#
3# Copyright (C) 2012 Intel Corporation
4# Author: Paul Eggleton <paul.eggleton@linux.intel.com>
5#
6# Note: requires GitPython 0.3.1+
7#
8# You can use this from the command line by running scripts/buildhistory-diff
9#
10
11import sys
12import os.path
13import difflib
14import git
15import re
16import bb.utils
17
18
19# How to display fields
20list_fields = ['DEPENDS', 'RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RSUGGESTS', 'RREPLACES', 'RCONFLICTS', 'FILES', 'FILELIST', 'USER_CLASSES', 'IMAGE_CLASSES', 'IMAGE_FEATURES', 'IMAGE_LINGUAS', 'IMAGE_INSTALL', 'BAD_RECOMMENDATIONS', 'PACKAGE_EXCLUDE']
21list_order_fields = ['PACKAGES']
22defaultval_map = {'PKG': 'PKG', 'PKGE': 'PE', 'PKGV': 'PV', 'PKGR': 'PR'}
23numeric_fields = ['PKGSIZE', 'IMAGESIZE']
24# Fields to monitor
25monitor_fields = ['RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RREPLACES', 'RCONFLICTS', 'PACKAGES', 'FILELIST', 'PKGSIZE', 'IMAGESIZE', 'PKG']
26ver_monitor_fields = ['PKGE', 'PKGV', 'PKGR']
27# Percentage change to alert for numeric fields
28monitor_numeric_threshold = 10
29# Image files to monitor (note that image-info.txt is handled separately)
30img_monitor_files = ['installed-package-names.txt', 'files-in-image.txt']
31# Related context fields for reporting (note: PE, PV & PR are always reported for monitored package fields)
32related_fields = {}
33related_fields['RDEPENDS'] = ['DEPENDS']
34related_fields['RRECOMMENDS'] = ['DEPENDS']
35related_fields['FILELIST'] = ['FILES']
36related_fields['PKGSIZE'] = ['FILELIST']
37related_fields['files-in-image.txt'] = ['installed-package-names.txt', 'USER_CLASSES', 'IMAGE_CLASSES', 'ROOTFS_POSTPROCESS_COMMAND', 'IMAGE_POSTPROCESS_COMMAND']
38related_fields['installed-package-names.txt'] = ['IMAGE_FEATURES', 'IMAGE_LINGUAS', 'IMAGE_INSTALL', 'BAD_RECOMMENDATIONS', 'NO_RECOMMENDATIONS', 'PACKAGE_EXCLUDE']
39
40
41class ChangeRecord:
42 def __init__(self, path, fieldname, oldvalue, newvalue, monitored):
43 self.path = path
44 self.fieldname = fieldname
45 self.oldvalue = oldvalue
46 self.newvalue = newvalue
47 self.monitored = monitored
48 self.related = []
49 self.filechanges = None
50
51 def __str__(self):
52 return self._str_internal(True)
53
54 def _str_internal(self, outer):
55 if outer:
56 if '/image-files/' in self.path:
57 prefix = '%s: ' % self.path.split('/image-files/')[0]
58 else:
59 prefix = '%s: ' % self.path
60 else:
61 prefix = ''
62
63 def pkglist_combine(depver):
64 pkglist = []
65 for k,v in depver.iteritems():
66 if v:
67 pkglist.append("%s (%s)" % (k,v))
68 else:
69 pkglist.append(k)
70 return pkglist
71
72 if self.fieldname in list_fields or self.fieldname in list_order_fields:
73 if self.fieldname in ['RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RSUGGESTS', 'RREPLACES', 'RCONFLICTS']:
74 (depvera, depverb) = compare_pkg_lists(self.oldvalue, self.newvalue)
75 aitems = pkglist_combine(depvera)
76 bitems = pkglist_combine(depverb)
77 else:
78 aitems = self.oldvalue.split()
79 bitems = self.newvalue.split()
80 removed = list(set(aitems) - set(bitems))
81 added = list(set(bitems) - set(aitems))
82
83 if removed or added:
84 if removed and not bitems:
85 out = '%s: removed all items "%s"' % (self.fieldname, ' '.join(removed))
86 else:
87 out = '%s:%s%s' % (self.fieldname, ' removed "%s"' % ' '.join(removed) if removed else '', ' added "%s"' % ' '.join(added) if added else '')
88 else:
89 out = '%s changed order' % self.fieldname
90 elif self.fieldname in numeric_fields:
91 aval = int(self.oldvalue or 0)
92 bval = int(self.newvalue or 0)
93 if aval != 0:
94 percentchg = ((bval - aval) / float(aval)) * 100
95 else:
96 percentchg = 100
97 out = '%s changed from %s to %s (%s%d%%)' % (self.fieldname, self.oldvalue or "''", self.newvalue or "''", '+' if percentchg > 0 else '', percentchg)
98 elif self.fieldname in defaultval_map:
99 out = '%s changed from %s to %s' % (self.fieldname, self.oldvalue, self.newvalue)
100 if self.fieldname == 'PKG' and '[default]' in self.newvalue:
101 out += ' - may indicate debian renaming failure'
102 elif self.fieldname in ['pkg_preinst', 'pkg_postinst', 'pkg_prerm', 'pkg_postrm']:
103 if self.oldvalue and self.newvalue:
104 out = '%s changed:\n ' % self.fieldname
105 elif self.newvalue:
106 out = '%s added:\n ' % self.fieldname
107 elif self.oldvalue:
108 out = '%s cleared:\n ' % self.fieldname
109 alines = self.oldvalue.splitlines()
110 blines = self.newvalue.splitlines()
111 diff = difflib.unified_diff(alines, blines, self.fieldname, self.fieldname, lineterm='')
112 out += '\n '.join(list(diff)[2:])
113 out += '\n --'
114 elif self.fieldname in img_monitor_files or '/image-files/' in self.path:
115 fieldname = self.fieldname
116 if '/image-files/' in self.path:
117 fieldname = os.path.join('/' + self.path.split('/image-files/')[1], self.fieldname)
118 out = 'Changes to %s:\n ' % fieldname
119 else:
120 if outer:
121 prefix = 'Changes to %s ' % self.path
122 out = '(%s):\n ' % self.fieldname
123 if self.filechanges:
124 out += '\n '.join(['%s' % i for i in self.filechanges])
125 else:
126 alines = self.oldvalue.splitlines()
127 blines = self.newvalue.splitlines()
128 diff = difflib.unified_diff(alines, blines, fieldname, fieldname, lineterm='')
129 out += '\n '.join(list(diff))
130 out += '\n --'
131 else:
132 out = '%s changed from "%s" to "%s"' % (self.fieldname, self.oldvalue, self.newvalue)
133
134 if self.related:
135 for chg in self.related:
136 if not outer and chg.fieldname in ['PE', 'PV', 'PR']:
137 continue
138 for line in chg._str_internal(False).splitlines():
139 out += '\n * %s' % line
140
141 return '%s%s' % (prefix, out)
142
143class FileChange:
144 changetype_add = 'A'
145 changetype_remove = 'R'
146 changetype_type = 'T'
147 changetype_perms = 'P'
148 changetype_ownergroup = 'O'
149 changetype_link = 'L'
150
151 def __init__(self, path, changetype, oldvalue = None, newvalue = None):
152 self.path = path
153 self.changetype = changetype
154 self.oldvalue = oldvalue
155 self.newvalue = newvalue
156
157 def _ftype_str(self, ftype):
158 if ftype == '-':
159 return 'file'
160 elif ftype == 'd':
161 return 'directory'
162 elif ftype == 'l':
163 return 'symlink'
164 elif ftype == 'c':
165 return 'char device'
166 elif ftype == 'b':
167 return 'block device'
168 elif ftype == 'p':
169 return 'fifo'
170 elif ftype == 's':
171 return 'socket'
172 else:
173 return 'unknown (%s)' % ftype
174
175 def __str__(self):
176 if self.changetype == self.changetype_add:
177 return '%s was added' % self.path
178 elif self.changetype == self.changetype_remove:
179 return '%s was removed' % self.path
180 elif self.changetype == self.changetype_type:
181 return '%s changed type from %s to %s' % (self.path, self._ftype_str(self.oldvalue), self._ftype_str(self.newvalue))
182 elif self.changetype == self.changetype_perms:
183 return '%s changed permissions from %s to %s' % (self.path, self.oldvalue, self.newvalue)
184 elif self.changetype == self.changetype_ownergroup:
185 return '%s changed owner/group from %s to %s' % (self.path, self.oldvalue, self.newvalue)
186 elif self.changetype == self.changetype_link:
187 return '%s changed symlink target from %s to %s' % (self.path, self.oldvalue, self.newvalue)
188 else:
189 return '%s changed (unknown)' % self.path
190
191
192def blob_to_dict(blob):
193 alines = blob.data_stream.read().splitlines()
194 adict = {}
195 for line in alines:
196 splitv = [i.strip() for i in line.split('=',1)]
197 if len(splitv) > 1:
198 adict[splitv[0]] = splitv[1]
199 return adict
200
201
202def file_list_to_dict(lines):
203 adict = {}
204 for line in lines:
205 # Leave the last few fields intact so we handle file names containing spaces
206 splitv = line.split(None,4)
207 # Grab the path and remove the leading .
208 path = splitv[4][1:].strip()
209 # Handle symlinks
210 if(' -> ' in path):
211 target = path.split(' -> ')[1]
212 path = path.split(' -> ')[0]
213 adict[path] = splitv[0:3] + [target]
214 else:
215 adict[path] = splitv[0:3]
216 return adict
217
218
219def compare_file_lists(alines, blines):
220 adict = file_list_to_dict(alines)
221 bdict = file_list_to_dict(blines)
222 filechanges = []
223 for path, splitv in adict.iteritems():
224 newsplitv = bdict.pop(path, None)
225 if newsplitv:
226 # Check type
227 oldvalue = splitv[0][0]
228 newvalue = newsplitv[0][0]
229 if oldvalue != newvalue:
230 filechanges.append(FileChange(path, FileChange.changetype_type, oldvalue, newvalue))
231 # Check permissions
232 oldvalue = splitv[0][1:]
233 newvalue = newsplitv[0][1:]
234 if oldvalue != newvalue:
235 filechanges.append(FileChange(path, FileChange.changetype_perms, oldvalue, newvalue))
236 # Check owner/group
237 oldvalue = '%s/%s' % (splitv[1], splitv[2])
238 newvalue = '%s/%s' % (newsplitv[1], newsplitv[2])
239 if oldvalue != newvalue:
240 filechanges.append(FileChange(path, FileChange.changetype_ownergroup, oldvalue, newvalue))
241 # Check symlink target
242 if newsplitv[0][0] == 'l':
243 if len(splitv) > 3:
244 oldvalue = splitv[3]
245 else:
246 oldvalue = None
247 newvalue = newsplitv[3]
248 if oldvalue != newvalue:
249 filechanges.append(FileChange(path, FileChange.changetype_link, oldvalue, newvalue))
250 else:
251 filechanges.append(FileChange(path, FileChange.changetype_remove))
252
253 # Whatever is left over has been added
254 for path in bdict:
255 filechanges.append(FileChange(path, FileChange.changetype_add))
256
257 return filechanges
258
259
260def compare_lists(alines, blines):
261 removed = list(set(alines) - set(blines))
262 added = list(set(blines) - set(alines))
263
264 filechanges = []
265 for pkg in removed:
266 filechanges.append(FileChange(pkg, FileChange.changetype_remove))
267 for pkg in added:
268 filechanges.append(FileChange(pkg, FileChange.changetype_add))
269
270 return filechanges
271
272
273def compare_pkg_lists(astr, bstr):
274 depvera = bb.utils.explode_dep_versions2(astr)
275 depverb = bb.utils.explode_dep_versions2(bstr)
276
277 # Strip out changes where the version has increased
278 remove = []
279 for k in depvera:
280 if k in depverb:
281 dva = depvera[k]
282 dvb = depverb[k]
283 if dva and dvb and len(dva) == len(dvb):
284 # Since length is the same, sort so that prefixes (e.g. >=) will line up
285 dva.sort()
286 dvb.sort()
287 removeit = True
288 for dvai, dvbi in zip(dva, dvb):
289 if dvai != dvbi:
290 aiprefix = dvai.split(' ')[0]
291 biprefix = dvbi.split(' ')[0]
292 if aiprefix == biprefix and aiprefix in ['>=', '=']:
293 if bb.utils.vercmp(bb.utils.split_version(dvai), bb.utils.split_version(dvbi)) > 0:
294 removeit = False
295 break
296 else:
297 removeit = False
298 break
299 if removeit:
300 remove.append(k)
301
302 for k in remove:
303 depvera.pop(k)
304 depverb.pop(k)
305
306 return (depvera, depverb)
307
308
309def compare_dict_blobs(path, ablob, bblob, report_all, report_ver):
310 adict = blob_to_dict(ablob)
311 bdict = blob_to_dict(bblob)
312
313 pkgname = os.path.basename(path)
314
315 defaultvals = {}
316 defaultvals['PKG'] = pkgname
317 defaultvals['PKGE'] = '0'
318
319 changes = []
320 keys = list(set(adict.keys()) | set(bdict.keys()) | set(defaultval_map.keys()))
321 for key in keys:
322 astr = adict.get(key, '')
323 bstr = bdict.get(key, '')
324 if key in ver_monitor_fields:
325 monitored = report_ver or astr or bstr
326 else:
327 monitored = key in monitor_fields
328 mapped_key = defaultval_map.get(key, '')
329 if mapped_key:
330 if not astr:
331 astr = '%s [default]' % adict.get(mapped_key, defaultvals.get(key, ''))
332 if not bstr:
333 bstr = '%s [default]' % bdict.get(mapped_key, defaultvals.get(key, ''))
334
335 if astr != bstr:
336 if (not report_all) and key in numeric_fields:
337 aval = int(astr or 0)
338 bval = int(bstr or 0)
339 if aval != 0:
340 percentchg = ((bval - aval) / float(aval)) * 100
341 else:
342 percentchg = 100
343 if abs(percentchg) < monitor_numeric_threshold:
344 continue
345 elif (not report_all) and key in list_fields:
346 if key == "FILELIST" and path.endswith("-dbg") and bstr.strip() != '':
347 continue
348 if key in ['RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RSUGGESTS', 'RREPLACES', 'RCONFLICTS']:
349 (depvera, depverb) = compare_pkg_lists(astr, bstr)
350 if depvera == depverb:
351 continue
352 alist = astr.split()
353 alist.sort()
354 blist = bstr.split()
355 blist.sort()
356 # We don't care about the removal of self-dependencies
357 if pkgname in alist and not pkgname in blist:
358 alist.remove(pkgname)
359 if ' '.join(alist) == ' '.join(blist):
360 continue
361
362 chg = ChangeRecord(path, key, astr, bstr, monitored)
363 changes.append(chg)
364 return changes
365
366
367def process_changes(repopath, revision1, revision2='HEAD', report_all=False, report_ver=False):
368 repo = git.Repo(repopath)
369 assert repo.bare == False
370 commit = repo.commit(revision1)
371 diff = commit.diff(revision2)
372
373 changes = []
374 for d in diff.iter_change_type('M'):
375 path = os.path.dirname(d.a_blob.path)
376 if path.startswith('packages/'):
377 filename = os.path.basename(d.a_blob.path)
378 if filename == 'latest':
379 changes.extend(compare_dict_blobs(path, d.a_blob, d.b_blob, report_all, report_ver))
380 elif filename.startswith('latest.'):
381 chg = ChangeRecord(path, filename, d.a_blob.data_stream.read(), d.b_blob.data_stream.read(), True)
382 changes.append(chg)
383 elif path.startswith('images/'):
384 filename = os.path.basename(d.a_blob.path)
385 if filename in img_monitor_files:
386 if filename == 'files-in-image.txt':
387 alines = d.a_blob.data_stream.read().splitlines()
388 blines = d.b_blob.data_stream.read().splitlines()
389 filechanges = compare_file_lists(alines,blines)
390 if filechanges:
391 chg = ChangeRecord(path, filename, None, None, True)
392 chg.filechanges = filechanges
393 changes.append(chg)
394 elif filename == 'installed-package-names.txt':
395 alines = d.a_blob.data_stream.read().splitlines()
396 blines = d.b_blob.data_stream.read().splitlines()
397 filechanges = compare_lists(alines,blines)
398 if filechanges:
399 chg = ChangeRecord(path, filename, None, None, True)
400 chg.filechanges = filechanges
401 changes.append(chg)
402 else:
403 chg = ChangeRecord(path, filename, d.a_blob.data_stream.read(), d.b_blob.data_stream.read(), True)
404 changes.append(chg)
405 elif filename == 'image-info.txt':
406 changes.extend(compare_dict_blobs(path, d.a_blob, d.b_blob, report_all, report_ver))
407 elif '/image-files/' in path:
408 chg = ChangeRecord(path, filename, d.a_blob.data_stream.read(), d.b_blob.data_stream.read(), True)
409 changes.append(chg)
410
411 # Look for added preinst/postinst/prerm/postrm
412 # (without reporting newly added recipes)
413 addedpkgs = []
414 addedchanges = []
415 for d in diff.iter_change_type('A'):
416 path = os.path.dirname(d.b_blob.path)
417 if path.startswith('packages/'):
418 filename = os.path.basename(d.b_blob.path)
419 if filename == 'latest':
420 addedpkgs.append(path)
421 elif filename.startswith('latest.'):
422 chg = ChangeRecord(path, filename[7:], '', d.b_blob.data_stream.read(), True)
423 addedchanges.append(chg)
424 for chg in addedchanges:
425 found = False
426 for pkg in addedpkgs:
427 if chg.path.startswith(pkg):
428 found = True
429 break
430 if not found:
431 changes.append(chg)
432
433 # Look for cleared preinst/postinst/prerm/postrm
434 for d in diff.iter_change_type('D'):
435 path = os.path.dirname(d.a_blob.path)
436 if path.startswith('packages/'):
437 filename = os.path.basename(d.a_blob.path)
438 if filename != 'latest' and filename.startswith('latest.'):
439 chg = ChangeRecord(path, filename[7:], d.a_blob.data_stream.read(), '', True)
440 changes.append(chg)
441
442 # Link related changes
443 for chg in changes:
444 if chg.monitored:
445 for chg2 in changes:
446 # (Check dirname in the case of fields from recipe info files)
447 if chg.path == chg2.path or os.path.dirname(chg.path) == chg2.path:
448 if chg2.fieldname in related_fields.get(chg.fieldname, []):
449 chg.related.append(chg2)
450 elif chg.path == chg2.path and chg.path.startswith('packages/') and chg2.fieldname in ['PE', 'PV', 'PR']:
451 chg.related.append(chg2)
452
453 if report_all:
454 return changes
455 else:
456 return [chg for chg in changes if chg.monitored]