Jonathan Doman | 94c94bf | 2020-10-05 23:25:45 -0700 | [diff] [blame^] | 1 | #!/usr/bin/env python3 |
| 2 | |
| 3 | # This tool runs on the host CPU and gathers all SST related configuration from |
| 4 | # the BMC (Redfish) and from the linux driver, and compares them to catch any |
| 5 | # errors or disagreement. Only required arguments are the details to start a |
| 6 | # Redfish session. |
| 7 | # |
| 8 | # This was tested running on a live Arch Linux ISO environment. Any Linux |
| 9 | # installation should work, but best to get the latest tools and kernel driver. |
| 10 | # |
| 11 | # Required dependencies: |
| 12 | # * DMTF's redfish python library. This is available in pip. |
| 13 | # * intel-speed-select tool from the kernel source tree |
| 14 | # (tools/power/x86/intel-speed-select), and available in the PATH. |
| 15 | |
| 16 | import redfish |
| 17 | |
| 18 | import argparse |
| 19 | import json |
| 20 | import re |
| 21 | import subprocess |
| 22 | import sys |
| 23 | |
| 24 | linux_cpu_map = dict() |
| 25 | success = True |
| 26 | |
| 27 | def get_linux_output(): |
| 28 | cmd = "/usr/bin/env intel-speed-select --debug --format json perf-profile info".split() |
| 29 | process = subprocess.run(cmd, capture_output=True, text=True) |
| 30 | process.check_returncode() |
| 31 | result = json.loads(process.stderr) |
| 32 | |
| 33 | global linux_cpu_map |
| 34 | linux_cpu_map = dict() |
| 35 | for line in process.stdout.split('\n'): |
| 36 | match = re.search("logical_cpu:(\d+).*punit_core:(\d+)", line) |
| 37 | if not match: |
| 38 | continue |
| 39 | logical_thread = int(match.group(1)) |
| 40 | physical_core = int(match.group(2)) |
| 41 | linux_cpu_map[logical_thread] = physical_core |
| 42 | |
| 43 | cmd = "/usr/bin/env intel-speed-select --format json perf-profile get-config-current-level".split() |
| 44 | process = subprocess.run(cmd, capture_output=True, text=True) |
| 45 | current_level = json.loads(process.stderr) |
| 46 | |
| 47 | for proc, data in current_level.items(): |
| 48 | result[proc].update(data) |
| 49 | |
| 50 | return result |
| 51 | |
| 52 | |
| 53 | def compare(redfish_val, linux_val, description): |
| 54 | err = "" |
| 55 | if redfish_val != linux_val: |
| 56 | err = "!! MISMATCH !!" |
| 57 | global success |
| 58 | success = False |
| 59 | print(f"{description}: {err}") |
| 60 | print(f" Redfish: {redfish_val}") |
| 61 | print(f" Linux: {linux_val}") |
| 62 | |
| 63 | |
| 64 | def get_linux_package(linux_data, redfish_id): |
| 65 | match = re.match("cpu(\d+)", redfish_id) |
| 66 | if not match: |
| 67 | raise RuntimeError(f"Redfish CPU name is unexpected: {redfish_id}") |
| 68 | num = match.group(1) |
| 69 | matching_keys = [] |
| 70 | for key in linux_data.keys(): |
| 71 | if re.match(f"^package-{num}:.*", key): |
| 72 | matching_keys.append(key) |
| 73 | if len(matching_keys) != 1: |
| 74 | raise RuntimeError(f"Unexpected number of matching linux objects for {redfish_id}") |
| 75 | return linux_data[matching_keys[0]] |
| 76 | |
| 77 | |
| 78 | def compare_config(redfish_config, linux_config): |
| 79 | print(f"--Checking {redfish_config['Id']}--") |
| 80 | compare(redfish_config["BaseSpeedMHz"], int(linux_config["base-frequency(MHz)"]), "Base Speed") |
| 81 | |
| 82 | actual_hp_p1 = actual_lp_p1 = 0 |
| 83 | actual_hp_cores = set() |
| 84 | for bf in redfish_config["BaseSpeedPrioritySettings"]: |
| 85 | if not actual_hp_p1 or bf["BaseSpeedMHz"] > actual_hp_p1: |
| 86 | actual_hp_p1 = bf["BaseSpeedMHz"] |
| 87 | actual_hp_cores = set(bf["CoreIDs"]) |
| 88 | if not actual_lp_p1 or bf["BaseSpeedMHz"] < actual_lp_p1: |
| 89 | actual_lp_p1 = bf["BaseSpeedMHz"] |
| 90 | |
| 91 | exp_hp_p1 = exp_lp_p1 = 0 |
| 92 | exp_hp_cores = set() |
| 93 | if "speed-select-base-freq-properties" in linux_config: |
| 94 | exp_bf_props = linux_config["speed-select-base-freq-properties"] |
| 95 | exp_hp_p1 = int(exp_bf_props["high-priority-base-frequency(MHz)"]) |
| 96 | exp_hp_cores = set(map(lambda x: linux_cpu_map[x], |
| 97 | map(int, exp_bf_props["high-priority-cpu-list"].split(",")))) |
| 98 | exp_lp_p1 = int(exp_bf_props["low-priority-base-frequency(MHz)"]) |
| 99 | |
| 100 | compare(actual_hp_p1, exp_hp_p1, "SST-BF High Priority P1 Freq") |
| 101 | compare(actual_hp_cores, exp_hp_cores, "SST-BF High Priority Core List") |
| 102 | compare(actual_lp_p1, exp_lp_p1, "SST-BF Low Priority P1 Freq") |
| 103 | |
| 104 | |
| 105 | compare(redfish_config["MaxJunctionTemperatureCelsius"], |
| 106 | int(linux_config["tjunction-max(C)"]), |
| 107 | "Junction Temperature") |
| 108 | compare(redfish_config["MaxSpeedMHz"], |
| 109 | int(linux_config["turbo-ratio-limits-sse"]["bucket-0"]["max-turbo-frequency(MHz)"]), |
| 110 | "SSE Max Turbo Speed") |
| 111 | compare(redfish_config["TDPWatts"], |
| 112 | int(linux_config["thermal-design-power(W)"]), |
| 113 | "TDP") |
| 114 | compare(redfish_config["TotalAvailableCoreCount"], |
| 115 | int(linux_config["enable-cpu-count"])//2, |
| 116 | "Enabled Core Count") |
| 117 | |
| 118 | actual_turbo = [(x["ActiveCoreCount"], x["MaxSpeedMHz"]) for x in redfish_config["TurboProfile"]] |
| 119 | linux_turbo = linux_config["turbo-ratio-limits-sse"] |
| 120 | exp_turbo = [] |
| 121 | for bucket_key in sorted(linux_turbo.keys()): |
| 122 | bucket = linux_turbo[bucket_key] |
| 123 | exp_turbo.append((int(bucket["core-count"]), int(bucket["max-turbo-frequency(MHz)"]))) |
| 124 | compare(actual_turbo, exp_turbo, "SSE Turbo Profile") |
| 125 | |
| 126 | |
| 127 | def get_level_from_config_id(config_id): |
| 128 | match = re.match("config(\d+)", config_id) |
| 129 | if not match: |
| 130 | raise RuntimeError(f"Invalid config name {config_id}") |
| 131 | return match.group(1) |
| 132 | |
| 133 | |
| 134 | def main(): |
| 135 | parser = argparse.ArgumentParser(description="Compare Redfish SST properties against Linux tools") |
| 136 | parser.add_argument("hostname") |
| 137 | parser.add_argument("--username", "-u", default="root") |
| 138 | parser.add_argument("--password", "-p", default="0penBmc") |
| 139 | args = parser.parse_args() |
| 140 | |
| 141 | linux_data = get_linux_output() |
| 142 | |
| 143 | bmc = redfish.redfish_client(base_url=f"https://{args.hostname}", |
| 144 | username=args.username, password=args.password) |
| 145 | bmc.login(auth="session") |
| 146 | |
| 147 | # Load the ProcessorCollection |
| 148 | resp = json.loads(bmc.get("/redfish/v1/Systems/system/Processors").text) |
| 149 | for proc_member in resp["Members"]: |
| 150 | proc_resp = json.loads(bmc.get(proc_member["@odata.id"]).text) |
| 151 | proc_id = proc_resp["Id"] |
| 152 | print() |
| 153 | print(f"----Checking Processor {proc_id}----") |
| 154 | |
| 155 | if proc_resp["Status"]["State"] == "Absent": |
| 156 | print("Not populated") |
| 157 | continue |
| 158 | |
| 159 | # Get subset of intel-speed-select data which applies to this CPU |
| 160 | pkg_data = get_linux_package(linux_data, proc_id) |
| 161 | |
| 162 | # Check currently applied config |
| 163 | applied_config = proc_resp["AppliedOperatingConfig"]["@odata.id"].split('/')[-1] |
| 164 | current_level = get_level_from_config_id(applied_config) |
| 165 | compare(current_level, pkg_data["get-config-current_level"], "Applied Config") |
| 166 | |
| 167 | exp_cur_level_data = pkg_data[f"perf-profile-level-{current_level}"] |
| 168 | |
| 169 | # Check whether SST-BF is enabled |
| 170 | bf_enabled = proc_resp["BaseSpeedPriorityState"].lower() |
| 171 | exp_bf_enabled = exp_cur_level_data["speed-select-base-freq"] |
| 172 | if exp_bf_enabled == "unsupported": |
| 173 | exp_bf_enabled = "disabled" |
| 174 | compare(bf_enabled, exp_bf_enabled, "SST-BF Enabled?") |
| 175 | |
| 176 | # Check high speed core list |
| 177 | hscores = set(proc_resp["HighSpeedCoreIDs"]) |
| 178 | exp_hscores = set() |
| 179 | if "speed-select-base-freq-properties" in exp_cur_level_data: |
| 180 | exp_hscores = exp_cur_level_data["speed-select-base-freq-properties"]["high-priority-cpu-list"] |
| 181 | exp_hscores = set([linux_cpu_map[int(x)] for x in exp_hscores.split(",")]) |
| 182 | compare(hscores, exp_hscores, "High Speed Core List") |
| 183 | |
| 184 | # Load the OperatingConfigCollection |
| 185 | resp = json.loads(bmc.get(proc_resp["OperatingConfigs"]["@odata.id"]).text) |
| 186 | |
| 187 | # Check number of available configs |
| 188 | profile_keys = list(filter(lambda x: x.startswith("perf-profile-level"), pkg_data.keys())) |
| 189 | compare(resp["Members@odata.count"], int(len(profile_keys)), "Number of profiles") |
| 190 | |
| 191 | for config_member in resp["Members"]: |
| 192 | # Load each OperatingConfig and compare all its contents |
| 193 | config_resp = json.loads(bmc.get(config_member["@odata.id"]).text) |
| 194 | level = get_level_from_config_id(config_resp["Id"]) |
| 195 | exp_level_data = pkg_data[f"perf-profile-level-{level}"] |
| 196 | compare_config(config_resp, exp_level_data) |
| 197 | |
| 198 | print() |
| 199 | if success: |
| 200 | print("Everything matched! :)") |
| 201 | return 0 |
| 202 | else: |
| 203 | print("There were mismatches, please check output :(") |
| 204 | return 1 |
| 205 | |
| 206 | if __name__ == "__main__": |
| 207 | sys.exit(main()) |