Brad Bishop | c342db3 | 2019-05-15 21:57:59 -0400 | [diff] [blame] | 1 | # |
| 2 | # SPDX-License-Identifier: GPL-2.0-only |
| 3 | # |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 4 | # Implements system state sampling. Called by buildstats.bbclass. |
| 5 | # Because it is a real Python module, it can hold persistent state, |
| 6 | # like open log files and the time of the last sampling. |
| 7 | |
| 8 | import time |
| 9 | import re |
| 10 | import bb.event |
| 11 | |
| 12 | class SystemStats: |
| 13 | def __init__(self, d): |
| 14 | bn = d.getVar('BUILDNAME') |
| 15 | bsdir = os.path.join(d.getVar('BUILDSTATS_BASE'), bn) |
| 16 | bb.utils.mkdirhier(bsdir) |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 17 | file_handlers = [('diskstats', self._reduce_diskstats), |
| 18 | ('meminfo', self._reduce_meminfo), |
| 19 | ('stat', self._reduce_stat)] |
| 20 | |
| 21 | # Some hosts like openSUSE have readable /proc/pressure files |
| 22 | # but throw errors when these files are opened. Catch these error |
| 23 | # and ensure that the reduce_proc_pressure directory is not created. |
| 24 | if os.path.exists("/proc/pressure"): |
| 25 | try: |
| 26 | with open('/proc/pressure/cpu', 'rb') as source: |
| 27 | source.read() |
| 28 | pressuredir = os.path.join(bsdir, 'reduced_proc_pressure') |
| 29 | bb.utils.mkdirhier(pressuredir) |
| 30 | file_handlers.extend([('pressure/cpu', self._reduce_pressure), |
| 31 | ('pressure/io', self._reduce_pressure), |
| 32 | ('pressure/memory', self._reduce_pressure)]) |
| 33 | except Exception: |
| 34 | pass |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 35 | |
| 36 | self.proc_files = [] |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 37 | for filename, handler in (file_handlers): |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 38 | # The corresponding /proc files might not exist on the host. |
| 39 | # For example, /proc/diskstats is not available in virtualized |
| 40 | # environments like Linux-VServer. Silently skip collecting |
| 41 | # the data. |
| 42 | if os.path.exists(os.path.join('/proc', filename)): |
| 43 | # In practice, this class gets instantiated only once in |
| 44 | # the bitbake cooker process. Therefore 'append' mode is |
| 45 | # not strictly necessary, but using it makes the class |
| 46 | # more robust should two processes ever write |
| 47 | # concurrently. |
| 48 | destfile = os.path.join(bsdir, '%sproc_%s.log' % ('reduced_' if handler else '', filename)) |
| 49 | self.proc_files.append((filename, open(destfile, 'ab'), handler)) |
| 50 | self.monitor_disk = open(os.path.join(bsdir, 'monitor_disk.log'), 'ab') |
| 51 | # Last time that we sampled /proc data resp. recorded disk monitoring data. |
| 52 | self.last_proc = 0 |
| 53 | self.last_disk_monitor = 0 |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 54 | # Minimum number of seconds between recording a sample. This becames relevant when we get |
| 55 | # called very often while many short tasks get started. Sampling during quiet periods |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 56 | # depends on the heartbeat event, which fires less often. |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 57 | # By default, the Heartbeat events occur roughly once every second but the actual time |
| 58 | # between these events deviates by a few milliseconds, in most cases. Hence |
| 59 | # pick a somewhat arbitary tolerance such that we sample a large majority |
| 60 | # of the Heartbeat events. This ignores rare events that fall outside the minimum |
| 61 | # and may lead an extra sample in a given second every so often. However, it allows for fairly |
| 62 | # consistent intervals between samples without missing many events. |
| 63 | self.tolerance = 0.01 |
| 64 | self.min_seconds = 1.0 - self.tolerance |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 65 | |
Patrick Williams | 93c203f | 2021-10-06 16:15:23 -0500 | [diff] [blame] | 66 | self.meminfo_regex = re.compile(rb'^(MemTotal|MemFree|Buffers|Cached|SwapTotal|SwapFree):\s*(\d+)') |
| 67 | self.diskstats_regex = re.compile(rb'^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 68 | self.diskstats_ltime = None |
| 69 | self.diskstats_data = None |
| 70 | self.stat_ltimes = None |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 71 | # Last time we sampled /proc/pressure. All resources stored in a single dict with the key as filename |
| 72 | self.last_pressure = {"pressure/cpu": None, "pressure/io": None, "pressure/memory": None} |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 73 | |
| 74 | def close(self): |
| 75 | self.monitor_disk.close() |
| 76 | for _, output, _ in self.proc_files: |
| 77 | output.close() |
| 78 | |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 79 | def _reduce_meminfo(self, time, data, filename): |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 80 | """ |
| 81 | Extracts 'MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree' |
| 82 | and writes their values into a single line, in that order. |
| 83 | """ |
| 84 | values = {} |
| 85 | for line in data.split(b'\n'): |
| 86 | m = self.meminfo_regex.match(line) |
| 87 | if m: |
| 88 | values[m.group(1)] = m.group(2) |
| 89 | if len(values) == 6: |
| 90 | return (time, |
| 91 | b' '.join([values[x] for x in |
| 92 | (b'MemTotal', b'MemFree', b'Buffers', b'Cached', b'SwapTotal', b'SwapFree')]) + b'\n') |
| 93 | |
| 94 | def _diskstats_is_relevant_line(self, linetokens): |
| 95 | if len(linetokens) != 14: |
| 96 | return False |
| 97 | disk = linetokens[2] |
| 98 | return self.diskstats_regex.match(disk) |
| 99 | |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 100 | def _reduce_diskstats(self, time, data, filename): |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 101 | relevant_tokens = filter(self._diskstats_is_relevant_line, map(lambda x: x.split(), data.split(b'\n'))) |
| 102 | diskdata = [0] * 3 |
| 103 | reduced = None |
| 104 | for tokens in relevant_tokens: |
| 105 | # rsect |
| 106 | diskdata[0] += int(tokens[5]) |
| 107 | # wsect |
| 108 | diskdata[1] += int(tokens[9]) |
| 109 | # use |
| 110 | diskdata[2] += int(tokens[12]) |
| 111 | if self.diskstats_ltime: |
| 112 | # We need to compute information about the time interval |
| 113 | # since the last sampling and record the result as sample |
| 114 | # for that point in the past. |
| 115 | interval = time - self.diskstats_ltime |
| 116 | if interval > 0: |
| 117 | sums = [ a - b for a, b in zip(diskdata, self.diskstats_data) ] |
| 118 | readTput = sums[0] / 2.0 * 100.0 / interval |
| 119 | writeTput = sums[1] / 2.0 * 100.0 / interval |
| 120 | util = float( sums[2] ) / 10 / interval |
| 121 | util = max(0.0, min(1.0, util)) |
| 122 | reduced = (self.diskstats_ltime, (readTput, writeTput, util)) |
| 123 | |
| 124 | self.diskstats_ltime = time |
| 125 | self.diskstats_data = diskdata |
| 126 | return reduced |
| 127 | |
| 128 | |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 129 | def _reduce_nop(self, time, data, filename): |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 130 | return (time, data) |
| 131 | |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 132 | def _reduce_stat(self, time, data, filename): |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 133 | if not data: |
| 134 | return None |
| 135 | # CPU times {user, nice, system, idle, io_wait, irq, softirq} from first line |
| 136 | tokens = data.split(b'\n', 1)[0].split() |
| 137 | times = [ int(token) for token in tokens[1:] ] |
| 138 | reduced = None |
| 139 | if self.stat_ltimes: |
| 140 | user = float((times[0] + times[1]) - (self.stat_ltimes[0] + self.stat_ltimes[1])) |
| 141 | system = float((times[2] + times[5] + times[6]) - (self.stat_ltimes[2] + self.stat_ltimes[5] + self.stat_ltimes[6])) |
| 142 | idle = float(times[3] - self.stat_ltimes[3]) |
| 143 | iowait = float(times[4] - self.stat_ltimes[4]) |
| 144 | |
| 145 | aSum = max(user + system + idle + iowait, 1) |
| 146 | reduced = (time, (user/aSum, system/aSum, iowait/aSum)) |
| 147 | |
| 148 | self.stat_ltimes = times |
| 149 | return reduced |
| 150 | |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 151 | def _reduce_pressure(self, time, data, filename): |
| 152 | """ |
| 153 | Return reduced pressure: {avg10, avg60, avg300} and delta total compared to the previous sample |
| 154 | for the cpu, io and memory resources. A common function is used for all 3 resources since the |
| 155 | format of the /proc/pressure file is the same in each case. |
| 156 | """ |
| 157 | if not data: |
| 158 | return None |
| 159 | tokens = data.split(b'\n', 1)[0].split() |
| 160 | avg10 = float(tokens[1].split(b'=')[1]) |
| 161 | avg60 = float(tokens[2].split(b'=')[1]) |
| 162 | avg300 = float(tokens[3].split(b'=')[1]) |
| 163 | total = int(tokens[4].split(b'=')[1]) |
| 164 | |
| 165 | reduced = None |
| 166 | if self.last_pressure[filename]: |
| 167 | delta = total - self.last_pressure[filename] |
| 168 | reduced = (time, (avg10, avg60, avg300, delta)) |
| 169 | self.last_pressure[filename] = total |
| 170 | return reduced |
| 171 | |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 172 | def sample(self, event, force): |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 173 | """ |
| 174 | Collect and log proc or disk_monitor stats periodically. |
| 175 | Return True if a new sample is collected and hence the value last_proc or last_disk_monitor |
| 176 | is changed. |
| 177 | """ |
| 178 | retval = False |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 179 | now = time.time() |
| 180 | if (now - self.last_proc > self.min_seconds) or force: |
| 181 | for filename, output, handler in self.proc_files: |
| 182 | with open(os.path.join('/proc', filename), 'rb') as input: |
| 183 | data = input.read() |
| 184 | if handler: |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 185 | reduced = handler(now, data, filename) |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 186 | else: |
| 187 | reduced = (now, data) |
| 188 | if reduced: |
| 189 | if isinstance(reduced[1], bytes): |
| 190 | # Use as it is. |
| 191 | data = reduced[1] |
| 192 | else: |
| 193 | # Convert to a single line. |
| 194 | data = (' '.join([str(x) for x in reduced[1]]) + '\n').encode('ascii') |
| 195 | # Unbuffered raw write, less overhead and useful |
| 196 | # in case that we end up with concurrent writes. |
| 197 | os.write(output.fileno(), |
| 198 | ('%.0f\n' % reduced[0]).encode('ascii') + |
| 199 | data + |
| 200 | b'\n') |
| 201 | self.last_proc = now |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 202 | retval = True |
Brad Bishop | 6e60e8b | 2018-02-01 10:27:11 -0500 | [diff] [blame] | 203 | |
| 204 | if isinstance(event, bb.event.MonitorDiskEvent) and \ |
| 205 | ((now - self.last_disk_monitor > self.min_seconds) or force): |
| 206 | os.write(self.monitor_disk.fileno(), |
| 207 | ('%.0f\n' % now).encode('ascii') + |
| 208 | ''.join(['%s: %d\n' % (dev, sample.total_bytes - sample.free_bytes) |
| 209 | for dev, sample in event.disk_usage.items()]).encode('ascii') + |
| 210 | b'\n') |
| 211 | self.last_disk_monitor = now |
Andrew Geissler | 615f2f1 | 2022-07-15 14:00:58 -0500 | [diff] [blame] | 212 | retval = True |
| 213 | return retval |