Ed Tanous | 2b314e4 | 2025-08-19 15:46:18 -0700 | [diff] [blame^] | 1 | // SPDX-License-Identifier: Apache-2.0 |
| 2 | // SPDX-FileCopyrightText: 2025 NVIDIA |
| 3 | |
| 4 | #include <fcntl.h> |
| 5 | #include <systemd/sd-daemon.h> |
| 6 | |
| 7 | #include <CLI/CLI.hpp> |
| 8 | #include <gpiod.hpp> |
| 9 | |
| 10 | #include <chrono> |
| 11 | #include <cstdint> |
| 12 | #include <cstdlib> |
| 13 | #include <cstring> |
| 14 | #include <filesystem> |
| 15 | #include <format> |
| 16 | #include <fstream> |
| 17 | #include <iostream> |
| 18 | #include <thread> |
| 19 | #include <unordered_map> |
| 20 | |
| 21 | using namespace std::chrono_literals; |
| 22 | |
| 23 | constexpr static const char* app_name = "platform_init"; |
| 24 | |
| 25 | // Map of GPIO name to line. Holds lines open for the duration of the program |
| 26 | static std::unordered_map<std::string, gpiod::line> io; |
| 27 | |
| 28 | void sleep_milliseconds(std::chrono::milliseconds milliseconds) |
| 29 | { |
| 30 | std::cerr << std::format("Sleeping for {} milliseconds\n", |
| 31 | milliseconds.count()); |
| 32 | std::this_thread::sleep_for(milliseconds); |
| 33 | } |
| 34 | |
| 35 | void set_gpio(const char* line_name, int value, |
| 36 | std::chrono::milliseconds find_timeout = 0ms) |
| 37 | { |
| 38 | std::cerr << std::format("{} Request to set to {}\n", line_name, value); |
| 39 | std::chrono::milliseconds polling_time = 10ms; |
| 40 | gpiod::line& line = io[line_name]; |
| 41 | if (!line) |
| 42 | { |
| 43 | do |
| 44 | { |
| 45 | line = gpiod::find_line(line_name); |
| 46 | if (!line) |
| 47 | { |
| 48 | std::cerr << std::format( |
| 49 | "{} not found yet, waiting and retrying\n", line_name); |
| 50 | |
| 51 | sleep_milliseconds(polling_time); |
| 52 | find_timeout -= polling_time; |
| 53 | } |
| 54 | } while (!line && find_timeout > 0s); |
| 55 | if (!line && find_timeout <= 0s) |
| 56 | { |
| 57 | std::cerr << std::format("{} Unable to find\n", line_name); |
| 58 | return; |
| 59 | } |
| 60 | try |
| 61 | { |
| 62 | line.request({app_name, gpiod::line_request::DIRECTION_OUTPUT, 0}, |
| 63 | value); |
| 64 | } |
| 65 | catch (const std::system_error& e) |
| 66 | { |
| 67 | std::cerr << std::format( |
| 68 | "{} unable to set direction and value {}\n", line_name, |
| 69 | e.what()); |
| 70 | return; |
| 71 | } |
| 72 | // No need to set if the init did it for us |
| 73 | std::cerr << std::format("{} Set to {}\n", line_name, value); |
| 74 | return; |
| 75 | } |
| 76 | std::cerr << std::format("{} Settingto {}\n", line_name, value); |
| 77 | line.set_value(value); |
| 78 | } |
| 79 | |
| 80 | int get_gpio(const char* line_name) |
| 81 | { |
| 82 | std::cerr << std::format("{} Request to get\n", line_name); |
| 83 | |
| 84 | gpiod::line line = gpiod::find_line(line_name); |
| 85 | if (!line) |
| 86 | { |
| 87 | std::cerr << std::format("{} Set unable to find\n", line_name); |
| 88 | return -1; |
| 89 | } |
| 90 | try |
| 91 | { |
| 92 | line.request({app_name, gpiod::line_request::DIRECTION_INPUT, 0}); |
| 93 | } |
| 94 | catch (const std::system_error& e) |
| 95 | { |
| 96 | std::cerr << std::format("{} unable to set {}\n", line_name, e.what()); |
| 97 | } |
| 98 | |
| 99 | int value = line.get_value(); |
| 100 | std::cerr << std::format("{} was {}\n", line_name, value); |
| 101 | return value; |
| 102 | } |
| 103 | |
| 104 | enum class GpioEventResult |
| 105 | { |
| 106 | Error, |
| 107 | Asserted, |
| 108 | Timeout |
| 109 | }; |
| 110 | |
| 111 | struct GpioEvent |
| 112 | { |
| 113 | GpioEvent(const char* line_name_in, int value_in) : |
| 114 | line_name(line_name_in), value(value_in) |
| 115 | { |
| 116 | line = gpiod::find_line(line_name); |
| 117 | if (!line) |
| 118 | { |
| 119 | std::cerr << std::format("{} GpioEvent: Unable to find\n", |
| 120 | line_name); |
| 121 | return; |
| 122 | } |
| 123 | int edge = (value != 0) ? ::gpiod::line_request::EVENT_RISING_EDGE |
| 124 | : ::gpiod::line_request::EVENT_FALLING_EDGE; |
| 125 | |
| 126 | line.request({app_name, edge, 0}); |
| 127 | |
| 128 | int val = line.get_value(); |
| 129 | if (val == value) |
| 130 | { |
| 131 | std::cerr << std::format("{} GpioEvent is already {}\n", line_name, |
| 132 | val); |
| 133 | } |
| 134 | else |
| 135 | { |
| 136 | std::cerr << std::format("GpioEvent created for {}\n", line_name); |
| 137 | } |
| 138 | } |
| 139 | GpioEventResult wait() |
| 140 | { |
| 141 | if (!line) |
| 142 | { |
| 143 | std::cerr << std::format("Line {} wasn't initialized\n", line_name); |
| 144 | return GpioEventResult::Error; |
| 145 | } |
| 146 | std::cerr << std::format("{} Waiting to go to {}\n", line_name, |
| 147 | (value != 0) ? "assert" : "deassert"); |
| 148 | auto events = line.event_wait(std::chrono::seconds(120)); |
| 149 | if (!events) |
| 150 | { |
| 151 | std::cerr << std::format("{} Timeout\n", line_name); |
| 152 | return GpioEventResult::Timeout; |
| 153 | } |
| 154 | |
| 155 | std::cerr << std::format("{} Asserted\n", line_name); |
| 156 | |
| 157 | return GpioEventResult::Asserted; |
| 158 | } |
| 159 | |
| 160 | gpiod::line line; |
| 161 | std::string line_name; |
| 162 | int value; |
| 163 | }; |
| 164 | |
| 165 | void rebind_i2c(std::string number) |
| 166 | { |
| 167 | std::string bindpath = |
| 168 | std::format("/sys/bus/platform/drivers/aspeed-i2c-bus/unbind", number); |
| 169 | std::ofstream bindofs(bindpath); |
| 170 | if (!bindofs) |
| 171 | { |
| 172 | std::cerr << std::format("{} unable to open\n", bindpath); |
| 173 | return; |
| 174 | } |
| 175 | try |
| 176 | { |
| 177 | bindofs << std::format("{}.i2c\n", number); |
| 178 | } |
| 179 | catch (const std::system_error& e) |
| 180 | { |
| 181 | std::cerr << std::format("{} unable to write\n", bindpath); |
| 182 | return; |
| 183 | } |
| 184 | bindofs.close(); |
| 185 | std::cerr << std::format("{} unbound\n", number); |
| 186 | |
| 187 | std::string unbindpath = |
| 188 | std::format("/sys/bus/platform/drivers/aspeed-i2c-bus/bind", number); |
| 189 | std::ofstream unbindofs(unbindpath); |
| 190 | if (!unbindofs) |
| 191 | { |
| 192 | std::cerr << std::format("{} unable to open\n", unbindpath); |
| 193 | return; |
| 194 | } |
| 195 | try |
| 196 | { |
| 197 | unbindofs << std::format("{}.i2c\n", number); |
| 198 | } |
| 199 | catch (const std::system_error& e) |
| 200 | { |
| 201 | std::cerr << std::format("{} unable to write\n", unbindpath); |
| 202 | return; |
| 203 | } |
| 204 | std::cerr << std::format("{} bound\n", number); |
| 205 | } |
| 206 | |
| 207 | void set_gpio_raw(unsigned int chip_num, unsigned int bit_num, int value) |
| 208 | { |
| 209 | std::string syspath = std::format("gpiochip{}", chip_num); |
| 210 | std::cerr << std::format("Setting gpiochip{} bit {} to {}\n", chip_num, |
| 211 | bit_num, value); |
| 212 | try |
| 213 | { |
| 214 | gpiod::chip chip(syspath); |
| 215 | gpiod::line line = chip.get_line(bit_num); |
| 216 | line.request({app_name, gpiod::line_request::DIRECTION_OUTPUT, 0}, |
| 217 | value); |
| 218 | std::cerr << std::format("gpiochip{} bit {} set to {}\n", chip_num, |
| 219 | bit_num, value); |
| 220 | } |
| 221 | catch (const std::system_error& e) |
| 222 | { |
| 223 | std::cerr << std::format("Error setting gpiochip{} bit {}: {}\n", |
| 224 | chip_num, bit_num, e.what()); |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | void new_device(unsigned int bus, unsigned int address, |
| 229 | std::string_view device_type) |
| 230 | { |
| 231 | std::string path = |
| 232 | std::format("/sys/bus/i2c/devices/i2c-{}/new_device", bus); |
| 233 | std::cerr << std::format("attempting to open {}", path); |
| 234 | std::ofstream new_device(path); |
| 235 | if (!new_device) |
| 236 | { |
| 237 | std::cerr << "Error: Unable to create I2C device\n"; |
| 238 | return; |
| 239 | } |
| 240 | new_device << std::format("{} 0x{:02x}", device_type, address); |
| 241 | new_device.close(); |
| 242 | |
| 243 | std::cerr << std::format("{} device created at bus {}", device_type, bus); |
| 244 | } |
| 245 | |
| 246 | void wait_for_path_to_exist(std::string_view path, |
| 247 | std::chrono::milliseconds timeout) |
| 248 | { |
| 249 | while (true) |
| 250 | { |
| 251 | std::error_code ec; |
| 252 | bool exists = std::filesystem::exists(path, ec); |
| 253 | if (exists) |
| 254 | { |
| 255 | return; |
| 256 | } |
| 257 | sleep_milliseconds(1ms); |
| 258 | timeout -= 1ms; |
| 259 | } |
| 260 | std::cerr << std::format("Failed to wait for {} to exist", path); |
| 261 | } |
| 262 | |
| 263 | void init_p2020_gpu_card() |
| 264 | { |
| 265 | std::cerr << "Initializing GPU card...\n"; |
| 266 | |
| 267 | // Init the P2020 gpio expander |
| 268 | new_device(14, 0x20, "pca6408"); |
| 269 | |
| 270 | // Wait for device to be created |
| 271 | const auto* device_path = "/sys/bus/i2c/devices/14-0020"; |
| 272 | wait_for_path_to_exist(device_path, 1000ms); |
| 273 | |
| 274 | // Find the GPIO chip number |
| 275 | std::string gpio_chip; |
| 276 | for (const auto& entry : std::filesystem::directory_iterator(device_path)) |
| 277 | { |
| 278 | std::string path = entry.path().string(); |
| 279 | if (path.find("gpiochip") != std::string::npos) |
| 280 | { |
| 281 | gpio_chip = |
| 282 | path.substr(path.find("gpiochip") + std::strlen("gpiochip")); |
| 283 | break; |
| 284 | } |
| 285 | } |
| 286 | if (gpio_chip.empty()) |
| 287 | { |
| 288 | std::cerr << "Error: Could not find GPIO chip number\n"; |
| 289 | return; |
| 290 | } |
| 291 | |
| 292 | std::cerr << "Found GPIO chip: gpiochip" << gpio_chip << "\n"; |
| 293 | unsigned int gpiochipint = 0; |
| 294 | std::from_chars_result r = |
| 295 | std::from_chars(&*gpio_chip.begin(), &*gpio_chip.end(), gpiochipint); |
| 296 | if (r.ec != std::error_code() || r.ptr != &*gpio_chip.end()) |
| 297 | { |
| 298 | std::cout << "Failed to convert gpiochip\n"; |
| 299 | return; |
| 300 | } |
| 301 | |
| 302 | // Set MCU in recovery |
| 303 | set_gpio_raw(gpiochipint, 3, 1); |
| 304 | |
| 305 | // Reset MCU |
| 306 | set_gpio_raw(gpiochipint, 4, 0); |
| 307 | set_gpio_raw(gpiochipint, 4, 1); |
| 308 | |
| 309 | // Switch MUX to MCU |
| 310 | set_gpio_raw(gpiochipint, 5, 1); |
| 311 | } |
| 312 | |
| 313 | bool hmc_is_present() |
| 314 | { |
| 315 | std::error_code ec; |
| 316 | bool exists = std::filesystem::exists("/sys/bus/i2c/devices/9-0074", ec); |
| 317 | if (ec) |
| 318 | { |
| 319 | exists = false; |
| 320 | } |
| 321 | if (exists) |
| 322 | { |
| 323 | std::cerr << "HMC present in platform"; |
| 324 | } |
| 325 | else |
| 326 | { |
| 327 | std::cerr << "HMC not present in platform"; |
| 328 | } |
| 329 | return exists; |
| 330 | } |
| 331 | |
| 332 | int init_nvidia_gb200(bool has_p2020) |
| 333 | { |
| 334 | // Reset USB hubs |
| 335 | set_gpio("USB_HUB_RESET_L-O", 0, 10000ms); |
| 336 | bool hmc_present = hmc_is_present(); |
| 337 | if (!hmc_present) |
| 338 | { |
| 339 | set_gpio("SEC_USB2_HUB_RST_L-O", 0, 10000ms); |
| 340 | } |
| 341 | |
| 342 | sleep_milliseconds(100ms); |
| 343 | if (!hmc_present) |
| 344 | { |
| 345 | set_gpio("SEC_USB2_HUB_RST_L-O", 1); |
| 346 | } |
| 347 | // Write SGPIO_BMC_EN-O=1 to correctly set mux to send SGPIO signals to |
| 348 | // FPGA |
| 349 | set_gpio("SGPIO_BMC_EN-O", 1); |
| 350 | |
| 351 | // Write the bit for BMC without HMC |
| 352 | set_gpio("HMC_BMC_DETECT-O", static_cast<int>(!hmc_present), 30000ms); |
| 353 | |
| 354 | // Set BMC_EROT_FPGA_SPI_MUX_SEL-O = 1 to enable FPGA to access its EROT |
| 355 | set_gpio("BMC_EROT_FPGA_SPI_MUX_SEL-O", 1); |
| 356 | |
| 357 | // Enable 12V |
| 358 | set_gpio("BMC_12V_CTRL-O", 1, 10000ms); |
| 359 | |
| 360 | set_gpio("PWR_BRAKE_L-O", 1); |
| 361 | set_gpio("SHDN_REQ_L-O", 1); |
| 362 | set_gpio("SHDN_FORCE_L-O", 1); |
| 363 | // Hold in reset (asserted) after standby power enabled |
| 364 | set_gpio("SYS_RST_IN_L-O", 0); |
| 365 | |
| 366 | GpioEvent fpga_ready_wait = GpioEvent("FPGA_READY_BMC-I", 1); |
| 367 | GpioEvent sec_erot_fpga_rst = GpioEvent("SEC_FPGA_READY_BMC-I", 1); |
| 368 | |
| 369 | // Release FPGA EROT from reset |
| 370 | set_gpio("EROT_FPGA_RST_L-O", 1); |
| 371 | set_gpio("SEC_EROT_FPGA_RST_L-O", 1); |
| 372 | |
| 373 | sleep_milliseconds(100ms); |
| 374 | |
| 375 | set_gpio("FPGA_RST_L-O", 1); |
| 376 | |
| 377 | if (fpga_ready_wait.wait() != GpioEventResult::Asserted) |
| 378 | { |
| 379 | std::cerr << "FPGA_READY_BMC-I failed to assert\n"; |
| 380 | // return EXIT_FAILURE; |
| 381 | } |
| 382 | |
| 383 | if (sec_erot_fpga_rst.wait() != GpioEventResult::Asserted) |
| 384 | { |
| 385 | std::cerr << "SEC_FPGA_READY_BMC-I failed to assert\n"; |
| 386 | // return EXIT_FAILURE; |
| 387 | } |
| 388 | |
| 389 | // ReInitialize the FPGA connected I2C buses to unstick them and let |
| 390 | // FruDevice know it can scan for FRUs I2c bus 1 |
| 391 | rebind_i2c("1e78a100"); |
| 392 | // I2c bus 2 |
| 393 | rebind_i2c("1e78a180"); |
| 394 | |
| 395 | // Set sgpio signals |
| 396 | set_gpio("RUN_POWER_EN-O", 1); |
| 397 | set_gpio("SYS_RST_IN_L-O", 1); |
| 398 | set_gpio("GLOBAL_WP_BMC-O", 0); |
| 399 | |
| 400 | set_gpio("BMC_READY-O", 1); |
| 401 | |
| 402 | if (has_p2020) |
| 403 | { |
| 404 | init_p2020_gpu_card(); |
| 405 | } |
| 406 | |
| 407 | set_gpio("USB_HUB_RESET_L-O", 1); |
| 408 | if (!hmc_present) |
| 409 | { |
| 410 | set_gpio("SEC_USB2_HUB_RST_L-O", 1); |
| 411 | } |
| 412 | |
| 413 | sd_notify(0, "READY=1"); |
| 414 | std::cerr << "Platform init complete\n"; |
| 415 | pause(); |
| 416 | std::cerr << "Releasing platform\n"; |
| 417 | |
| 418 | return EXIT_SUCCESS; |
| 419 | } |
| 420 | |
| 421 | int init_nvidia_gb200_base() |
| 422 | { |
| 423 | return init_nvidia_gb200(false); |
| 424 | } |
| 425 | |
| 426 | int init_nvidia_gb200_with_p2020() |
| 427 | { |
| 428 | return init_nvidia_gb200(true); |
| 429 | } |
| 430 | |
| 431 | constexpr std::array<std::pair<std::string_view, int (*)()>, 2> init_functions{ |
| 432 | {{"nvidia-gb200", init_nvidia_gb200_base}, |
| 433 | {"nvidia-gb200-with-p2020", init_nvidia_gb200_with_p2020}}}; |
| 434 | |
| 435 | int main(int argc, char** argv) |
| 436 | { |
| 437 | CLI::App app("Platform init CLI"); |
| 438 | |
| 439 | app.require_subcommand(); |
| 440 | |
| 441 | CLI::App* init_sub = |
| 442 | app.add_subcommand("init", "Initialize the platform and daemonize"); |
| 443 | std::string platform_name; |
| 444 | init_sub |
| 445 | ->add_option("platform_name", platform_name, |
| 446 | "Name of the platform to init") |
| 447 | ->required(); |
| 448 | app.require_subcommand(); |
| 449 | |
| 450 | CLI11_PARSE(app, argc, argv) |
| 451 | |
| 452 | const auto* it = std::ranges::find_if( |
| 453 | init_functions, |
| 454 | [&platform_name](const std::pair<std::string_view, int (*)()> val) { |
| 455 | return val.first == platform_name; |
| 456 | }); |
| 457 | if (it == init_functions.end()) |
| 458 | { |
| 459 | std::cerr << init_sub->help() << "\n"; |
| 460 | return EXIT_FAILURE; |
| 461 | } |
| 462 | |
| 463 | return it->second(); |
| 464 | } |