Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # -*- coding: utf-8 -*- |
| 3 | |
| 4 | # SPDX-License-Identifier: BSD-3-Clause |
| 5 | # Copyright (C) 2015 Enea Software AB |
| 6 | # Author: Thomas Lundström <thomas.lundstrom@enea.com> |
| 7 | |
| 8 | # The script measures interrupt latency together with different types of |
| 9 | # system load. This is done using the programs cyclictest and stress. |
| 10 | # |
| 11 | # The main output is: |
| 12 | # |
| 13 | # Best case (min) latency |
| 14 | # This has very limited value, but is presented since it can be done |
| 15 | # easily |
| 16 | # |
| 17 | # Average latency |
| 18 | # This value is of interrest for throughput oriented systems. Limited |
| 19 | # value for a real-time system. Also presented because it is easy to do. |
| 20 | # |
| 21 | # Worst case (max) latency |
| 22 | # This is the interesting number for a real-time system. The number |
| 23 | # presented is (of cource) the largest number observed. The challenge |
| 24 | # is to know how the observed worst case relates to the actual worst case. |
| 25 | # |
| 26 | # To get an indication of the confidence, the following method is used: |
| 27 | # 1) Instead of one long run, the measurement is made as a set of shorter |
| 28 | # runs. The number of runs must be a power of 2 for reasons that will |
| 29 | # shorlty be obvious |
| 30 | # |
| 31 | # 2) First, a list of the max values are created. |
| 32 | # |
| 33 | # 3) The smallest value in that list is recorded. |
| 34 | # |
| 35 | # 4) Then a new list is create by taking the max value of each pair of |
| 36 | # values in the original list. In this list the smallest value is |
| 37 | # recorded. |
| 38 | # |
| 39 | # 5) Step 3 is repeated until there is only one value in the list. See |
| 40 | # example below: |
| 41 | # |
| 42 | # Samples: |
| 43 | # | 44 | | | | | |
| 44 | # | 77 | 77 | | | | |
| 45 | # | 118 | | | | | |
| 46 | # | 119 | 119 | 119 | | | |
| 47 | # | 138 | | | | | |
| 48 | # | 57 | 138 | | | | |
| 49 | # | 175 | | | | | |
| 50 | # | 130 | 175 | 175 | 175 | | |
| 51 | # | 54 | | | | | |
| 52 | # | 150 | 150 | | | | |
| 53 | # | 47 | | | | | |
| 54 | # | 59 | 59 | 150 | | | |
| 55 | # | 199 | | | | | |
| 56 | # | 115 | 199 | | | | |
| 57 | # | 177 | | | | | |
| 58 | # | 129 | 177 | 199 | 199 | 199 | |
| 59 | # |
| 60 | # Smallest value: |
| 61 | # | 44 | 59 | 119 | 175 | 199 | |
| 62 | # |
| 63 | # 6) The generated list of smallest values is analyzed. In this case, it |
| 64 | # can be observed that the values are increasing significantly through |
| 65 | # the entire list, which leads to the conclusion that the number of |
| 66 | # samples is too small. |
| 67 | # If instead the list had been (167, 191, 196, 199, 199), there had |
| 68 | # been a very small, or no, increase at the end of the list. We might |
| 69 | # then suspect that the number of samples is probably large enough. |
| 70 | # There is however no guarantee for that. |
| 71 | # |
| 72 | # Steps 1-2 are done in run_cyclictest_suite |
| 73 | # Steps 3-5 are done in gen_minmax_list. |
| 74 | # Step 6 needs to be done manually since there is (yet) no well defined |
| 75 | # FAIL criterion and a theoretically solid PASS criterion may never be |
| 76 | # available. |
| 77 | |
| 78 | import multiprocessing |
| 79 | import os |
| 80 | import re |
| 81 | import signal |
| 82 | import subprocess |
| 83 | import time |
| 84 | import traceback |
| 85 | |
| 86 | # See comment on the function set_hung_tmo |
| 87 | has_hung_task_detection = True |
| 88 | |
| 89 | #------------------------------------------------------------------------------- |
| 90 | |
| 91 | class TestFail(Exception): |
| 92 | def __init__(self, msg): |
| 93 | self.msg = msg |
| 94 | |
| 95 | def __str__(self): |
| 96 | return "Test failure: (" + self.msg + ")" |
| 97 | |
| 98 | #------------------------------------------------------------------------------- |
| 99 | |
| 100 | def tc_name(sub_name): |
| 101 | return "rt_bmark.intlat." + sub_name |
| 102 | |
| 103 | #------------------------------------------------------------------------------- |
| 104 | # log() does the same job as print except that a '#' is added at the beginning |
| 105 | # of each line. This causes TEFEL to ignore it |
| 106 | |
| 107 | def log(*msg): |
| 108 | tmp = "".join(map(str, msg)) # 'map(str, ...' allows numbers |
| 109 | for line in tmp.split("\n"): |
| 110 | print "#", line |
| 111 | |
| 112 | #------------------------------------------------------------------------------- |
| 113 | # Like log(), but with a timestamp added |
| 114 | |
| 115 | def log_ts(*msg): |
| 116 | ts = time.localtime() |
| 117 | stamp = "%2d:%02d:%02d: " % (ts.tm_hour, ts.tm_min, ts.tm_sec) |
| 118 | log(stamp, *msg) |
| 119 | |
| 120 | #------------------------------------------------------------------------------- |
| 121 | |
| 122 | def log_test_header(seq_no, nr_of_tests, name): |
| 123 | log("=" * 78) |
| 124 | log() |
| 125 | log(" Test case (%d/%d): %s" % (seq_no, nr_of_tests, tc_name(name))) |
| 126 | log() |
| 127 | log("." * 78) |
| 128 | log() |
| 129 | |
| 130 | #------------------------------------------------------------------------------- |
| 131 | |
| 132 | def start_stress(*args): |
| 133 | stress_cmd = [ "stress" ] |
| 134 | added_stress_types = [] |
| 135 | req_stress_types = set(args) |
| 136 | cpu_cnt = str(multiprocessing.cpu_count()) |
| 137 | |
| 138 | # The function cond_add_stress appends the options to the stress |
| 139 | # command if the stress type is in the set of requested stress types |
| 140 | |
| 141 | def cond_add_stress(stress_type, options): |
| 142 | if stress_type in req_stress_types: |
| 143 | req_stress_types.remove(stress_type) |
| 144 | added_stress_types.append(stress_type) |
| 145 | stress_cmd.extend(options) |
| 146 | |
| 147 | #---------- |
| 148 | |
| 149 | cond_add_stress("io", ["-i", cpu_cnt]) |
| 150 | cond_add_stress("cpu", ["-c", cpu_cnt]) |
| 151 | cond_add_stress("hdd", ["-d", cpu_cnt, "--hdd-bytes", "20M"]) |
| 152 | cond_add_stress("vm", ["-m", cpu_cnt, "--vm-bytes", "10M"]) |
| 153 | |
| 154 | unknown = ", ".join(req_stress_types) |
| 155 | if unknown != "": |
| 156 | raise TestFail("Unknown stress type(s): %s" % unknown) |
| 157 | |
| 158 | if not added_stress_types: |
| 159 | log("No stress requested") |
| 160 | return None |
| 161 | |
| 162 | added = "+".join(added_stress_types) |
| 163 | stress_cmd_str = " ".join(stress_cmd) |
| 164 | |
| 165 | log("Starting stress(", added, ")") |
| 166 | log(" Command: '", stress_cmd_str, "'") |
| 167 | log() |
| 168 | |
| 169 | # preexec_fn=os.setsid causes stress to be executed in a separate |
| 170 | # session, => it gets a new process group (incl. children). It |
| 171 | # can then be terminated using os.killpg in end_stress without |
| 172 | # terminating this script. |
| 173 | |
| 174 | p = subprocess.Popen(stress_cmd, preexec_fn=os.setsid) |
| 175 | |
| 176 | return p |
| 177 | |
| 178 | #------------------------------------------------------------------------------- |
| 179 | |
| 180 | def end_stress(p): |
| 181 | if p is None: |
| 182 | # The value None indicates that no stress scenario was started |
| 183 | return |
| 184 | |
| 185 | if p.poll() is not None: |
| 186 | raise TestFail("stress prematurely terminated.") |
| 187 | |
| 188 | os.killpg(os.getpgid(p.pid), signal.SIGTERM) |
| 189 | log("Terminated stress") |
| 190 | |
| 191 | #------------------------------------------------------------------------------- |
| 192 | |
| 193 | def us2hms_str(us): |
| 194 | s = (us+500000) // 1000000 # Round microseconds to s |
| 195 | m = s//60 |
| 196 | s -= 60*m; |
| 197 | h = m//60 |
| 198 | m -= 60*h |
| 199 | |
| 200 | return "%d:%02d:%02d" % (h, m, s) |
| 201 | |
| 202 | #------------------------------------------------------------------------------- |
| 203 | # Sometime the hung task supervision is triggered during execution of |
| 204 | # cyclictest (cyclictest starves stress). To avoid that, the supervision |
| 205 | # is temporarily disabled |
| 206 | |
| 207 | def set_hung_tmo(new_tmo): |
| 208 | global has_hung_task_detection |
| 209 | |
| 210 | tmo_file = "/proc/sys/kernel/hung_task_timeout_secs" |
| 211 | |
| 212 | if not has_hung_task_detection: |
| 213 | return |
| 214 | |
| 215 | if not os.access(tmo_file, os.W_OK): |
| 216 | log("Hung task detection not supported") |
| 217 | log(" (File ", tmo_file, " not found)") |
| 218 | has_hung_task_detection = False |
| 219 | return |
| 220 | |
| 221 | orig_tmo = int(subprocess.check_output(["cat", tmo_file]).strip()) |
| 222 | if new_tmo != orig_tmo: |
| 223 | cmd = ( "echo " + str(new_tmo) + " > " + tmo_file ) |
| 224 | subprocess.check_output(cmd, shell=True) |
| 225 | log("Changed timeout for detection of hung tasks: ", |
| 226 | orig_tmo, " -> ", new_tmo) |
| 227 | |
| 228 | return orig_tmo |
| 229 | |
| 230 | #------------------------------------------------------------------------------- |
| 231 | |
| 232 | def gen_minmax_list(max_list): |
| 233 | res = [min(max_list)] |
| 234 | |
| 235 | while True: |
| 236 | tmp = max_list |
| 237 | max_list = [] |
| 238 | while tmp: |
| 239 | max_list.append(max(tmp.pop(0), tmp.pop(0))) |
| 240 | |
| 241 | res.append(min(max_list)) |
| 242 | |
| 243 | if len(max_list) < 2: |
| 244 | return res |
| 245 | |
| 246 | #------------------------------------------------------------------------------- |
| 247 | # Parameters for cyclictest: |
| 248 | # |
| 249 | # On the -S option (from cyclictest.c): |
| 250 | # -S implies options -a -t -n and same priority of all threads |
| 251 | # -a: One thread per core |
| 252 | # -n: use clock_nanosleep instead of posix interval timers |
| 253 | # -t: (without argument) Set number of threads to the number |
| 254 | # of cpus |
| 255 | |
| 256 | interval_core_0 = 100 # Timer interval on core 0 [us] |
| 257 | interval_delta = 20 # Interval increment for each core [us] |
| 258 | loop_count = 30000 # Number of loops (on core 0). |
| 259 | |
| 260 | cmd = ("cyclictest", |
| 261 | "-S", # Standard SMP testing. See below |
| 262 | "-p", "99", # RT priority 99 |
| 263 | "-q", # Quiet mode, i.e. print only a summary |
| 264 | "-i", str(interval_core_0), |
| 265 | "-d", str(interval_delta), |
| 266 | "-l", str(loop_count) |
| 267 | ) |
| 268 | rex = re.compile("C:\s*(\d+).*Min:\s*(\d+).*Avg:\s*(\d+).*Max:\s*(\d+)") |
| 269 | |
| 270 | def run_cyclictest_once(): |
| 271 | res = subprocess.check_output(cmd) |
| 272 | |
| 273 | # minlist and maxlist are lists with the extremes for each core |
| 274 | # avg_cnt is the sum of cycles for all cores |
| 275 | # avg_sum is the sum of (cycle count*average) for each core |
| 276 | # Since cyclictest runs different number of cycles on |
| 277 | # different cores, avg_sum/avg_cnt gives a more accurate |
| 278 | # value of the overall average than just taking the average |
| 279 | # of each core's averages |
| 280 | |
| 281 | minlist = [] |
| 282 | maxlist = [] |
| 283 | avg_sum = 0 |
| 284 | avg_cnt = 0 |
| 285 | |
| 286 | for line in res.split("\n"): |
| 287 | m = rex.search(line) |
| 288 | if m is not None: |
| 289 | minlist.append(int(m.group(2))) |
| 290 | maxlist.append(int(m.group(4))) |
| 291 | n = int(m.group(1)) |
| 292 | avg_sum += n * int(m.group(3)) |
| 293 | avg_cnt += n |
| 294 | |
| 295 | return min(minlist), [avg_sum, avg_cnt], max(maxlist) |
| 296 | |
| 297 | #------------------------------------------------------------------------------- |
| 298 | # A precondition for the tracking of min-max values is that |
| 299 | # the suite size os a power of 2. |
| 300 | |
| 301 | N = 5 |
| 302 | suite_size = 2**N |
| 303 | |
| 304 | est_exec_time_once = interval_core_0 * loop_count |
| 305 | est_exec_time_suite = suite_size * est_exec_time_once |
| 306 | |
| 307 | def run_cyclictest_suite(): |
| 308 | log("Starting cyclictest") |
| 309 | log(" Command : ", " ".join(cmd)) |
| 310 | log(" Number of cycles : ", loop_count*suite_size, |
| 311 | " (", suite_size, " sets of ", loop_count, " cycles)") |
| 312 | log(" Exec. time (est) : ", us2hms_str(est_exec_time_suite)) |
| 313 | log() |
| 314 | |
| 315 | orig_tmo = set_hung_tmo(0) # 0 => Disable |
| 316 | |
| 317 | # float('inf') emulates infinity. At least in the sense that it is |
| 318 | # guaranteed to be larger than any actual number. |
| 319 | ack_min = float('inf') |
| 320 | ack_avg = [0, 0] |
| 321 | |
| 322 | log() |
| 323 | log_ts("Start of execution") |
| 324 | t = time.time() |
| 325 | max_list = [] |
| 326 | |
| 327 | for i in xrange(0, suite_size): |
| 328 | tmp_min, tmp_avg, tmp_max = run_cyclictest_once() |
| 329 | |
| 330 | msg = "%2d/%2d:" % (i+1, suite_size) |
| 331 | msg += " min: %4d" % tmp_min |
| 332 | msg += " avg: %5.1f" % (float(tmp_avg[0])/tmp_avg[1]) |
| 333 | msg += " max: %4d" % tmp_max |
| 334 | log_ts(msg) |
| 335 | |
| 336 | # Track minimum value |
| 337 | if tmp_min < ack_min: |
| 338 | ack_min = tmp_min |
| 339 | |
| 340 | # Track smallest max value |
| 341 | max_list.append(tmp_max) |
| 342 | |
| 343 | ack_avg[0] += tmp_avg[0] |
| 344 | ack_avg[1] += tmp_avg[1] |
| 345 | |
| 346 | t = time.time()-t |
| 347 | log_ts("Cyclictest completed. Actual execution time:", |
| 348 | us2hms_str(t*1000000)) |
| 349 | log() |
| 350 | set_hung_tmo(orig_tmo) |
| 351 | |
| 352 | return ack_min, float(ack_avg[0])/ack_avg[1], gen_minmax_list(max_list) |
| 353 | |
| 354 | #------------------------------------------------------------------------------- |
| 355 | |
| 356 | class cyclictest_runner: |
| 357 | def run_test(self, seq_no, nr_of_tests, name, stressparams): |
| 358 | |
| 359 | try: |
| 360 | log_test_header(seq_no, nr_of_tests, name) |
| 361 | |
| 362 | p = start_stress(*stressparams) |
| 363 | |
| 364 | bm_min, bm_avg, bm_max_list = run_cyclictest_suite() |
| 365 | |
| 366 | end_stress(p) |
| 367 | |
| 368 | bm_max = bm_max_list[-1] |
| 369 | |
| 370 | log() |
| 371 | log("Min: %d us" % bm_min) |
| 372 | log("Avg: %.1f us" % bm_avg) |
| 373 | log("Max: %d us" % bm_max) |
| 374 | log() |
| 375 | log("Max list: ", bm_max_list) |
| 376 | log() |
| 377 | log("PASS") |
| 378 | |
| 379 | print |
| 380 | print tc_name(name), "[Min/us,Avg/us,Max/us]:", |
| 381 | print "%d,%.1f,%d" % (bm_min,bm_avg, bm_max) |
| 382 | print "PASS:", tc_name(name) |
| 383 | print |
| 384 | |
| 385 | except Exception: |
| 386 | log() |
| 387 | log("Exception!") |
| 388 | log() |
| 389 | log("Traceback:", traceback.format_exc()) |
| 390 | log() |
| 391 | log("WD: ", os.getcwd()) |
| 392 | log() |
| 393 | log("FAIL") |
| 394 | print |
| 395 | print "FAIL:", tc_name(name) |
| 396 | print |
| 397 | |
| 398 | #------------------------------------------------------------------------------- |
| 399 | |
| 400 | runner = cyclictest_runner() |
| 401 | |
| 402 | tests = (("no_stress", []), |
| 403 | ("cpu", ["cpu"]), |
| 404 | ("hdd", ["hdd"]), |
| 405 | ("io", ["io"]), |
| 406 | ("vm", ["vm"]), |
| 407 | ("full", ["io", "cpu", "hdd", "vm"]), |
| 408 | ) |
| 409 | |
| 410 | nr_of_tests = len(tests) |
| 411 | for seq_no, params in enumerate(tests, start=1): |
| 412 | runner.run_test(seq_no, nr_of_tests, *params) |