Patrick Williams | 92b42cb | 2022-09-03 06:53:57 -0500 | [diff] [blame^] | 1 | # |
| 2 | # Copyright OpenEmbedded Contributors |
| 3 | # |
| 4 | # SPDX-License-Identifier: MIT |
| 5 | # |
| 6 | |
| 7 | BUILDSTATS_BASE = "${TMPDIR}/buildstats/" |
| 8 | |
| 9 | ################################################################################ |
| 10 | # Build statistics gathering. |
| 11 | # |
| 12 | # The CPU and Time gathering/tracking functions and bbevent inspiration |
| 13 | # were written by Christopher Larson. |
| 14 | # |
| 15 | ################################################################################ |
| 16 | |
| 17 | def get_buildprocess_cputime(pid): |
| 18 | with open("/proc/%d/stat" % pid, "r") as f: |
| 19 | fields = f.readline().rstrip().split() |
| 20 | # 13: utime, 14: stime, 15: cutime, 16: cstime |
| 21 | return sum(int(field) for field in fields[13:16]) |
| 22 | |
| 23 | def get_process_cputime(pid): |
| 24 | import resource |
| 25 | with open("/proc/%d/stat" % pid, "r") as f: |
| 26 | fields = f.readline().rstrip().split() |
| 27 | stats = { |
| 28 | 'utime' : fields[13], |
| 29 | 'stime' : fields[14], |
| 30 | 'cutime' : fields[15], |
| 31 | 'cstime' : fields[16], |
| 32 | } |
| 33 | iostats = {} |
| 34 | if os.path.isfile("/proc/%d/io" % pid): |
| 35 | with open("/proc/%d/io" % pid, "r") as f: |
| 36 | while True: |
| 37 | i = f.readline().strip() |
| 38 | if not i: |
| 39 | break |
| 40 | if not ":" in i: |
| 41 | # one more extra line is appended (empty or containing "0") |
| 42 | # most probably due to race condition in kernel while |
| 43 | # updating IO stats |
| 44 | break |
| 45 | i = i.split(": ") |
| 46 | iostats[i[0]] = i[1] |
| 47 | resources = resource.getrusage(resource.RUSAGE_SELF) |
| 48 | childres = resource.getrusage(resource.RUSAGE_CHILDREN) |
| 49 | return stats, iostats, resources, childres |
| 50 | |
| 51 | def get_cputime(): |
| 52 | with open("/proc/stat", "r") as f: |
| 53 | fields = f.readline().rstrip().split()[1:] |
| 54 | return sum(int(field) for field in fields) |
| 55 | |
| 56 | def set_timedata(var, d, server_time): |
| 57 | d.setVar(var, server_time) |
| 58 | |
| 59 | def get_timedata(var, d, end_time): |
| 60 | oldtime = d.getVar(var, False) |
| 61 | if oldtime is None: |
| 62 | return |
| 63 | return end_time - oldtime |
| 64 | |
| 65 | def set_buildtimedata(var, d): |
| 66 | import time |
| 67 | time = time.time() |
| 68 | cputime = get_cputime() |
| 69 | proctime = get_buildprocess_cputime(os.getpid()) |
| 70 | d.setVar(var, (time, cputime, proctime)) |
| 71 | |
| 72 | def get_buildtimedata(var, d): |
| 73 | import time |
| 74 | timedata = d.getVar(var, False) |
| 75 | if timedata is None: |
| 76 | return |
| 77 | oldtime, oldcpu, oldproc = timedata |
| 78 | procdiff = get_buildprocess_cputime(os.getpid()) - oldproc |
| 79 | cpudiff = get_cputime() - oldcpu |
| 80 | end_time = time.time() |
| 81 | timediff = end_time - oldtime |
| 82 | if cpudiff > 0: |
| 83 | cpuperc = float(procdiff) * 100 / cpudiff |
| 84 | else: |
| 85 | cpuperc = None |
| 86 | return timediff, cpuperc |
| 87 | |
| 88 | def write_task_data(status, logfile, e, d): |
| 89 | with open(os.path.join(logfile), "a") as f: |
| 90 | elapsedtime = get_timedata("__timedata_task", d, e.time) |
| 91 | if elapsedtime: |
| 92 | f.write(d.expand("${PF}: %s\n" % e.task)) |
| 93 | f.write(d.expand("Elapsed time: %0.2f seconds\n" % elapsedtime)) |
| 94 | cpu, iostats, resources, childres = get_process_cputime(os.getpid()) |
| 95 | if cpu: |
| 96 | f.write("utime: %s\n" % cpu['utime']) |
| 97 | f.write("stime: %s\n" % cpu['stime']) |
| 98 | f.write("cutime: %s\n" % cpu['cutime']) |
| 99 | f.write("cstime: %s\n" % cpu['cstime']) |
| 100 | for i in iostats: |
| 101 | f.write("IO %s: %s\n" % (i, iostats[i])) |
| 102 | rusages = ["ru_utime", "ru_stime", "ru_maxrss", "ru_minflt", "ru_majflt", "ru_inblock", "ru_oublock", "ru_nvcsw", "ru_nivcsw"] |
| 103 | for i in rusages: |
| 104 | f.write("rusage %s: %s\n" % (i, getattr(resources, i))) |
| 105 | for i in rusages: |
| 106 | f.write("Child rusage %s: %s\n" % (i, getattr(childres, i))) |
| 107 | if status == "passed": |
| 108 | f.write("Status: PASSED \n") |
| 109 | else: |
| 110 | f.write("Status: FAILED \n") |
| 111 | f.write("Ended: %0.2f \n" % e.time) |
| 112 | |
| 113 | def write_host_data(logfile, e, d, type): |
| 114 | import subprocess, os, datetime |
| 115 | # minimum time allowed for each command to run, in seconds |
| 116 | time_threshold = 0.5 |
| 117 | limit = 10 |
| 118 | # the total number of commands |
| 119 | num_cmds = 0 |
| 120 | msg = "" |
| 121 | if type == "interval": |
| 122 | # interval at which data will be logged |
| 123 | interval = d.getVar("BB_HEARTBEAT_EVENT", False) |
| 124 | if interval is None: |
| 125 | bb.warn("buildstats: Collecting host data at intervals failed. Set BB_HEARTBEAT_EVENT=\"<interval>\" in conf/local.conf for the interval at which host data will be logged.") |
| 126 | d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0") |
| 127 | return |
| 128 | interval = int(interval) |
| 129 | cmds = d.getVar('BB_LOG_HOST_STAT_CMDS_INTERVAL') |
| 130 | msg = "Host Stats: Collecting data at %d second intervals.\n" % interval |
| 131 | if cmds is None: |
| 132 | d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0") |
| 133 | bb.warn("buildstats: Collecting host data at intervals failed. Set BB_LOG_HOST_STAT_CMDS_INTERVAL=\"command1 ; command2 ; ... \" in conf/local.conf.") |
| 134 | return |
| 135 | if type == "failure": |
| 136 | cmds = d.getVar('BB_LOG_HOST_STAT_CMDS_FAILURE') |
| 137 | msg = "Host Stats: Collecting data on failure.\n" |
| 138 | msg += "Failed at task: " + e.task + "\n" |
| 139 | if cmds is None: |
| 140 | d.setVar("BB_LOG_HOST_STAT_ON_FAILURE", "0") |
| 141 | bb.warn("buildstats: Collecting host data on failure failed. Set BB_LOG_HOST_STAT_CMDS_FAILURE=\"command1 ; command2 ; ... \" in conf/local.conf.") |
| 142 | return |
| 143 | c_san = [] |
| 144 | for cmd in cmds.split(";"): |
| 145 | if len(cmd) == 0: |
| 146 | continue |
| 147 | num_cmds += 1 |
| 148 | c_san.append(cmd) |
| 149 | if num_cmds == 0: |
| 150 | if type == "interval": |
| 151 | d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0") |
| 152 | if type == "failure": |
| 153 | d.setVar("BB_LOG_HOST_STAT_ON_FAILURE", "0") |
| 154 | return |
| 155 | |
| 156 | # return if the interval is not enough to run all commands within the specified BB_HEARTBEAT_EVENT interval |
| 157 | if type == "interval": |
| 158 | limit = interval / num_cmds |
| 159 | if limit <= time_threshold: |
| 160 | d.setVar("BB_LOG_HOST_STAT_ON_INTERVAL", "0") |
| 161 | bb.warn("buildstats: Collecting host data failed. BB_HEARTBEAT_EVENT interval not enough to run the specified commands. Increase value of BB_HEARTBEAT_EVENT in conf/local.conf.") |
| 162 | return |
| 163 | |
| 164 | # set the environment variables |
| 165 | path = d.getVar("PATH") |
| 166 | opath = d.getVar("BB_ORIGENV", False).getVar("PATH") |
| 167 | ospath = os.environ['PATH'] |
| 168 | os.environ['PATH'] = path + ":" + opath + ":" + ospath |
| 169 | with open(logfile, "a") as f: |
| 170 | f.write("Event Time: %f\nDate: %s\n" % (e.time, datetime.datetime.now())) |
| 171 | f.write("%s" % msg) |
| 172 | for c in c_san: |
| 173 | try: |
| 174 | output = subprocess.check_output(c.split(), stderr=subprocess.STDOUT, timeout=limit).decode('utf-8') |
| 175 | except (subprocess.CalledProcessError, subprocess.TimeoutExpired, FileNotFoundError) as err: |
| 176 | output = "Error running command: %s\n%s\n" % (c, err) |
| 177 | f.write("%s\n%s\n" % (c, output)) |
| 178 | # reset the environment |
| 179 | os.environ['PATH'] = ospath |
| 180 | |
| 181 | python run_buildstats () { |
| 182 | import bb.build |
| 183 | import bb.event |
| 184 | import time, subprocess, platform |
| 185 | |
| 186 | bn = d.getVar('BUILDNAME') |
| 187 | ######################################################################## |
| 188 | # bitbake fires HeartbeatEvent even before a build has been |
| 189 | # triggered, causing BUILDNAME to be None |
| 190 | ######################################################################## |
| 191 | if bn is not None: |
| 192 | bsdir = os.path.join(d.getVar('BUILDSTATS_BASE'), bn) |
| 193 | taskdir = os.path.join(bsdir, d.getVar('PF')) |
| 194 | if isinstance(e, bb.event.HeartbeatEvent) and bb.utils.to_boolean(d.getVar("BB_LOG_HOST_STAT_ON_INTERVAL")): |
| 195 | bb.utils.mkdirhier(bsdir) |
| 196 | write_host_data(os.path.join(bsdir, "host_stats_interval"), e, d, "interval") |
| 197 | |
| 198 | if isinstance(e, bb.event.BuildStarted): |
| 199 | ######################################################################## |
| 200 | # If the kernel was not configured to provide I/O statistics, issue |
| 201 | # a one time warning. |
| 202 | ######################################################################## |
| 203 | if not os.path.isfile("/proc/%d/io" % os.getpid()): |
| 204 | bb.warn("The Linux kernel on your build host was not configured to provide process I/O statistics. (CONFIG_TASK_IO_ACCOUNTING is not set)") |
| 205 | |
| 206 | ######################################################################## |
| 207 | # at first pass make the buildstats hierarchy and then |
| 208 | # set the buildname |
| 209 | ######################################################################## |
| 210 | bb.utils.mkdirhier(bsdir) |
| 211 | set_buildtimedata("__timedata_build", d) |
| 212 | build_time = os.path.join(bsdir, "build_stats") |
| 213 | # write start of build into build_time |
| 214 | with open(build_time, "a") as f: |
| 215 | host_info = platform.uname() |
| 216 | f.write("Host Info: ") |
| 217 | for x in host_info: |
| 218 | if x: |
| 219 | f.write(x + " ") |
| 220 | f.write("\n") |
| 221 | f.write("Build Started: %0.2f \n" % d.getVar('__timedata_build', False)[0]) |
| 222 | |
| 223 | elif isinstance(e, bb.event.BuildCompleted): |
| 224 | build_time = os.path.join(bsdir, "build_stats") |
| 225 | with open(build_time, "a") as f: |
| 226 | ######################################################################## |
| 227 | # Write build statistics for the build |
| 228 | ######################################################################## |
| 229 | timedata = get_buildtimedata("__timedata_build", d) |
| 230 | if timedata: |
| 231 | time, cpu = timedata |
| 232 | # write end of build and cpu used into build_time |
| 233 | f.write("Elapsed time: %0.2f seconds \n" % (time)) |
| 234 | if cpu: |
| 235 | f.write("CPU usage: %0.1f%% \n" % cpu) |
| 236 | |
| 237 | if isinstance(e, bb.build.TaskStarted): |
| 238 | set_timedata("__timedata_task", d, e.time) |
| 239 | bb.utils.mkdirhier(taskdir) |
| 240 | # write into the task event file the name and start time |
| 241 | with open(os.path.join(taskdir, e.task), "a") as f: |
| 242 | f.write("Event: %s \n" % bb.event.getName(e)) |
| 243 | f.write("Started: %0.2f \n" % e.time) |
| 244 | |
| 245 | elif isinstance(e, bb.build.TaskSucceeded): |
| 246 | write_task_data("passed", os.path.join(taskdir, e.task), e, d) |
| 247 | if e.task == "do_rootfs": |
| 248 | bs = os.path.join(bsdir, "build_stats") |
| 249 | with open(bs, "a") as f: |
| 250 | rootfs = d.getVar('IMAGE_ROOTFS') |
| 251 | if os.path.isdir(rootfs): |
| 252 | try: |
| 253 | rootfs_size = subprocess.check_output(["du", "-sh", rootfs], |
| 254 | stderr=subprocess.STDOUT).decode('utf-8') |
| 255 | f.write("Uncompressed Rootfs size: %s" % rootfs_size) |
| 256 | except subprocess.CalledProcessError as err: |
| 257 | bb.warn("Failed to get rootfs size: %s" % err.output.decode('utf-8')) |
| 258 | |
| 259 | elif isinstance(e, bb.build.TaskFailed): |
| 260 | # Can have a failure before TaskStarted so need to mkdir here too |
| 261 | bb.utils.mkdirhier(taskdir) |
| 262 | write_task_data("failed", os.path.join(taskdir, e.task), e, d) |
| 263 | ######################################################################## |
| 264 | # Lets make things easier and tell people where the build failed in |
| 265 | # build_status. We do this here because BuildCompleted triggers no |
| 266 | # matter what the status of the build actually is |
| 267 | ######################################################################## |
| 268 | build_status = os.path.join(bsdir, "build_stats") |
| 269 | with open(build_status, "a") as f: |
| 270 | f.write(d.expand("Failed at: ${PF} at task: %s \n" % e.task)) |
| 271 | if bb.utils.to_boolean(d.getVar("BB_LOG_HOST_STAT_ON_FAILURE")): |
| 272 | write_host_data(os.path.join(bsdir, "host_stats_%s_failure" % e.task), e, d, "failure") |
| 273 | } |
| 274 | |
| 275 | addhandler run_buildstats |
| 276 | run_buildstats[eventmask] = "bb.event.BuildStarted bb.event.BuildCompleted bb.event.HeartbeatEvent bb.build.TaskStarted bb.build.TaskSucceeded bb.build.TaskFailed" |
| 277 | |
| 278 | python runqueue_stats () { |
| 279 | import buildstats |
| 280 | from bb import event, runqueue |
| 281 | # We should not record any samples before the first task has started, |
| 282 | # because that's the first activity shown in the process chart. |
| 283 | # Besides, at that point we are sure that the build variables |
| 284 | # are available that we need to find the output directory. |
| 285 | # The persistent SystemStats is stored in the datastore and |
| 286 | # closed when the build is done. |
| 287 | system_stats = d.getVar('_buildstats_system_stats', False) |
| 288 | if not system_stats and isinstance(e, (bb.runqueue.sceneQueueTaskStarted, bb.runqueue.runQueueTaskStarted)): |
| 289 | system_stats = buildstats.SystemStats(d) |
| 290 | d.setVar('_buildstats_system_stats', system_stats) |
| 291 | if system_stats: |
| 292 | # Ensure that we sample at important events. |
| 293 | done = isinstance(e, bb.event.BuildCompleted) |
| 294 | if system_stats.sample(e, force=done): |
| 295 | d.setVar('_buildstats_system_stats', system_stats) |
| 296 | if done: |
| 297 | system_stats.close() |
| 298 | d.delVar('_buildstats_system_stats') |
| 299 | } |
| 300 | |
| 301 | addhandler runqueue_stats |
| 302 | runqueue_stats[eventmask] = "bb.runqueue.sceneQueueTaskStarted bb.runqueue.runQueueTaskStarted bb.event.HeartbeatEvent bb.event.BuildCompleted bb.event.MonitorDiskEvent" |