blob: 7a947cf215c78c5a48ff1eebedfba27699b41fd8 [file] [log] [blame]
Ed Tanous2b314e42025-08-19 15:46:18 -07001// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: 2025 NVIDIA
3
4#include <fcntl.h>
5#include <systemd/sd-daemon.h>
6
7#include <CLI/CLI.hpp>
8#include <gpiod.hpp>
9
10#include <chrono>
11#include <cstdint>
12#include <cstdlib>
13#include <cstring>
14#include <filesystem>
15#include <format>
16#include <fstream>
17#include <iostream>
18#include <thread>
19#include <unordered_map>
20
21using namespace std::chrono_literals;
22
23constexpr static const char* app_name = "platform_init";
24
25// Map of GPIO name to line. Holds lines open for the duration of the program
26static std::unordered_map<std::string, gpiod::line> io;
27
28void sleep_milliseconds(std::chrono::milliseconds milliseconds)
29{
30 std::cerr << std::format("Sleeping for {} milliseconds\n",
31 milliseconds.count());
32 std::this_thread::sleep_for(milliseconds);
33}
34
35void set_gpio(const char* line_name, int value,
36 std::chrono::milliseconds find_timeout = 0ms)
37{
38 std::cerr << std::format("{} Request to set to {}\n", line_name, value);
39 std::chrono::milliseconds polling_time = 10ms;
40 gpiod::line& line = io[line_name];
41 if (!line)
42 {
43 do
44 {
45 line = gpiod::find_line(line_name);
46 if (!line)
47 {
48 std::cerr << std::format(
49 "{} not found yet, waiting and retrying\n", line_name);
50
51 sleep_milliseconds(polling_time);
52 find_timeout -= polling_time;
53 }
54 } while (!line && find_timeout > 0s);
55 if (!line && find_timeout <= 0s)
56 {
57 std::cerr << std::format("{} Unable to find\n", line_name);
58 return;
59 }
60 try
61 {
62 line.request({app_name, gpiod::line_request::DIRECTION_OUTPUT, 0},
63 value);
64 }
65 catch (const std::system_error& e)
66 {
67 std::cerr << std::format(
68 "{} unable to set direction and value {}\n", line_name,
69 e.what());
70 return;
71 }
72 // No need to set if the init did it for us
73 std::cerr << std::format("{} Set to {}\n", line_name, value);
74 return;
75 }
76 std::cerr << std::format("{} Settingto {}\n", line_name, value);
77 line.set_value(value);
78}
79
80int get_gpio(const char* line_name)
81{
82 std::cerr << std::format("{} Request to get\n", line_name);
83
84 gpiod::line line = gpiod::find_line(line_name);
85 if (!line)
86 {
87 std::cerr << std::format("{} Set unable to find\n", line_name);
88 return -1;
89 }
90 try
91 {
92 line.request({app_name, gpiod::line_request::DIRECTION_INPUT, 0});
93 }
94 catch (const std::system_error& e)
95 {
96 std::cerr << std::format("{} unable to set {}\n", line_name, e.what());
97 }
98
99 int value = line.get_value();
100 std::cerr << std::format("{} was {}\n", line_name, value);
101 return value;
102}
103
104enum class GpioEventResult
105{
106 Error,
107 Asserted,
108 Timeout
109};
110
111struct GpioEvent
112{
113 GpioEvent(const char* line_name_in, int value_in) :
114 line_name(line_name_in), value(value_in)
115 {
116 line = gpiod::find_line(line_name);
117 if (!line)
118 {
119 std::cerr << std::format("{} GpioEvent: Unable to find\n",
120 line_name);
121 return;
122 }
123 int edge = (value != 0) ? ::gpiod::line_request::EVENT_RISING_EDGE
124 : ::gpiod::line_request::EVENT_FALLING_EDGE;
125
126 line.request({app_name, edge, 0});
127
128 int val = line.get_value();
129 if (val == value)
130 {
131 std::cerr << std::format("{} GpioEvent is already {}\n", line_name,
132 val);
133 }
134 else
135 {
136 std::cerr << std::format("GpioEvent created for {}\n", line_name);
137 }
138 }
139 GpioEventResult wait()
140 {
141 if (!line)
142 {
143 std::cerr << std::format("Line {} wasn't initialized\n", line_name);
144 return GpioEventResult::Error;
145 }
146 std::cerr << std::format("{} Waiting to go to {}\n", line_name,
147 (value != 0) ? "assert" : "deassert");
148 auto events = line.event_wait(std::chrono::seconds(120));
149 if (!events)
150 {
151 std::cerr << std::format("{} Timeout\n", line_name);
152 return GpioEventResult::Timeout;
153 }
154
155 std::cerr << std::format("{} Asserted\n", line_name);
156
157 return GpioEventResult::Asserted;
158 }
159
160 gpiod::line line;
161 std::string line_name;
162 int value;
163};
164
165void rebind_i2c(std::string number)
166{
167 std::string bindpath =
168 std::format("/sys/bus/platform/drivers/aspeed-i2c-bus/unbind", number);
169 std::ofstream bindofs(bindpath);
170 if (!bindofs)
171 {
172 std::cerr << std::format("{} unable to open\n", bindpath);
173 return;
174 }
175 try
176 {
177 bindofs << std::format("{}.i2c\n", number);
178 }
179 catch (const std::system_error& e)
180 {
181 std::cerr << std::format("{} unable to write\n", bindpath);
182 return;
183 }
184 bindofs.close();
185 std::cerr << std::format("{} unbound\n", number);
186
187 std::string unbindpath =
188 std::format("/sys/bus/platform/drivers/aspeed-i2c-bus/bind", number);
189 std::ofstream unbindofs(unbindpath);
190 if (!unbindofs)
191 {
192 std::cerr << std::format("{} unable to open\n", unbindpath);
193 return;
194 }
195 try
196 {
197 unbindofs << std::format("{}.i2c\n", number);
198 }
199 catch (const std::system_error& e)
200 {
201 std::cerr << std::format("{} unable to write\n", unbindpath);
202 return;
203 }
204 std::cerr << std::format("{} bound\n", number);
205}
206
207void set_gpio_raw(unsigned int chip_num, unsigned int bit_num, int value)
208{
209 std::string syspath = std::format("gpiochip{}", chip_num);
210 std::cerr << std::format("Setting gpiochip{} bit {} to {}\n", chip_num,
211 bit_num, value);
212 try
213 {
214 gpiod::chip chip(syspath);
215 gpiod::line line = chip.get_line(bit_num);
216 line.request({app_name, gpiod::line_request::DIRECTION_OUTPUT, 0},
217 value);
218 std::cerr << std::format("gpiochip{} bit {} set to {}\n", chip_num,
219 bit_num, value);
220 }
221 catch (const std::system_error& e)
222 {
223 std::cerr << std::format("Error setting gpiochip{} bit {}: {}\n",
224 chip_num, bit_num, e.what());
225 }
226}
227
228void new_device(unsigned int bus, unsigned int address,
229 std::string_view device_type)
230{
231 std::string path =
232 std::format("/sys/bus/i2c/devices/i2c-{}/new_device", bus);
233 std::cerr << std::format("attempting to open {}", path);
234 std::ofstream new_device(path);
235 if (!new_device)
236 {
237 std::cerr << "Error: Unable to create I2C device\n";
238 return;
239 }
240 new_device << std::format("{} 0x{:02x}", device_type, address);
241 new_device.close();
242
243 std::cerr << std::format("{} device created at bus {}", device_type, bus);
244}
245
246void wait_for_path_to_exist(std::string_view path,
247 std::chrono::milliseconds timeout)
248{
249 while (true)
250 {
251 std::error_code ec;
252 bool exists = std::filesystem::exists(path, ec);
253 if (exists)
254 {
255 return;
256 }
257 sleep_milliseconds(1ms);
258 timeout -= 1ms;
259 }
260 std::cerr << std::format("Failed to wait for {} to exist", path);
261}
262
263void init_p2020_gpu_card()
264{
265 std::cerr << "Initializing GPU card...\n";
266
267 // Init the P2020 gpio expander
268 new_device(14, 0x20, "pca6408");
269
270 // Wait for device to be created
271 const auto* device_path = "/sys/bus/i2c/devices/14-0020";
272 wait_for_path_to_exist(device_path, 1000ms);
273
274 // Find the GPIO chip number
275 std::string gpio_chip;
276 for (const auto& entry : std::filesystem::directory_iterator(device_path))
277 {
278 std::string path = entry.path().string();
279 if (path.find("gpiochip") != std::string::npos)
280 {
281 gpio_chip =
282 path.substr(path.find("gpiochip") + std::strlen("gpiochip"));
283 break;
284 }
285 }
286 if (gpio_chip.empty())
287 {
288 std::cerr << "Error: Could not find GPIO chip number\n";
289 return;
290 }
291
292 std::cerr << "Found GPIO chip: gpiochip" << gpio_chip << "\n";
293 unsigned int gpiochipint = 0;
294 std::from_chars_result r =
295 std::from_chars(&*gpio_chip.begin(), &*gpio_chip.end(), gpiochipint);
296 if (r.ec != std::error_code() || r.ptr != &*gpio_chip.end())
297 {
298 std::cout << "Failed to convert gpiochip\n";
299 return;
300 }
301
302 // Set MCU in recovery
303 set_gpio_raw(gpiochipint, 3, 1);
304
305 // Reset MCU
306 set_gpio_raw(gpiochipint, 4, 0);
307 set_gpio_raw(gpiochipint, 4, 1);
308
309 // Switch MUX to MCU
310 set_gpio_raw(gpiochipint, 5, 1);
311}
312
313bool hmc_is_present()
314{
315 std::error_code ec;
316 bool exists = std::filesystem::exists("/sys/bus/i2c/devices/9-0074", ec);
317 if (ec)
318 {
319 exists = false;
320 }
321 if (exists)
322 {
323 std::cerr << "HMC present in platform";
324 }
325 else
326 {
327 std::cerr << "HMC not present in platform";
328 }
329 return exists;
330}
331
332int init_nvidia_gb200(bool has_p2020)
333{
334 // Reset USB hubs
335 set_gpio("USB_HUB_RESET_L-O", 0, 10000ms);
336 bool hmc_present = hmc_is_present();
337 if (!hmc_present)
338 {
339 set_gpio("SEC_USB2_HUB_RST_L-O", 0, 10000ms);
340 }
341
342 sleep_milliseconds(100ms);
343 if (!hmc_present)
344 {
345 set_gpio("SEC_USB2_HUB_RST_L-O", 1);
346 }
347 // Write SGPIO_BMC_EN-O=1 to correctly set mux to send SGPIO signals to
348 // FPGA
349 set_gpio("SGPIO_BMC_EN-O", 1);
350
351 // Write the bit for BMC without HMC
352 set_gpio("HMC_BMC_DETECT-O", static_cast<int>(!hmc_present), 30000ms);
353
354 // Set BMC_EROT_FPGA_SPI_MUX_SEL-O = 1 to enable FPGA to access its EROT
355 set_gpio("BMC_EROT_FPGA_SPI_MUX_SEL-O", 1);
356
357 // Enable 12V
358 set_gpio("BMC_12V_CTRL-O", 1, 10000ms);
359
360 set_gpio("PWR_BRAKE_L-O", 1);
361 set_gpio("SHDN_REQ_L-O", 1);
362 set_gpio("SHDN_FORCE_L-O", 1);
363 // Hold in reset (asserted) after standby power enabled
364 set_gpio("SYS_RST_IN_L-O", 0);
365
366 GpioEvent fpga_ready_wait = GpioEvent("FPGA_READY_BMC-I", 1);
367 GpioEvent sec_erot_fpga_rst = GpioEvent("SEC_FPGA_READY_BMC-I", 1);
368
369 // Release FPGA EROT from reset
370 set_gpio("EROT_FPGA_RST_L-O", 1);
371 set_gpio("SEC_EROT_FPGA_RST_L-O", 1);
372
373 sleep_milliseconds(100ms);
374
375 set_gpio("FPGA_RST_L-O", 1);
376
377 if (fpga_ready_wait.wait() != GpioEventResult::Asserted)
378 {
379 std::cerr << "FPGA_READY_BMC-I failed to assert\n";
380 // return EXIT_FAILURE;
381 }
382
383 if (sec_erot_fpga_rst.wait() != GpioEventResult::Asserted)
384 {
385 std::cerr << "SEC_FPGA_READY_BMC-I failed to assert\n";
386 // return EXIT_FAILURE;
387 }
388
389 // ReInitialize the FPGA connected I2C buses to unstick them and let
390 // FruDevice know it can scan for FRUs I2c bus 1
391 rebind_i2c("1e78a100");
392 // I2c bus 2
393 rebind_i2c("1e78a180");
394
395 // Set sgpio signals
396 set_gpio("RUN_POWER_EN-O", 1);
397 set_gpio("SYS_RST_IN_L-O", 1);
398 set_gpio("GLOBAL_WP_BMC-O", 0);
399
400 set_gpio("BMC_READY-O", 1);
401
402 if (has_p2020)
403 {
404 init_p2020_gpu_card();
405 }
406
407 set_gpio("USB_HUB_RESET_L-O", 1);
408 if (!hmc_present)
409 {
410 set_gpio("SEC_USB2_HUB_RST_L-O", 1);
411 }
412
413 sd_notify(0, "READY=1");
414 std::cerr << "Platform init complete\n";
415 pause();
416 std::cerr << "Releasing platform\n";
417
418 return EXIT_SUCCESS;
419}
420
421int init_nvidia_gb200_base()
422{
423 return init_nvidia_gb200(false);
424}
425
426int init_nvidia_gb200_with_p2020()
427{
428 return init_nvidia_gb200(true);
429}
430
431constexpr std::array<std::pair<std::string_view, int (*)()>, 2> init_functions{
432 {{"nvidia-gb200", init_nvidia_gb200_base},
433 {"nvidia-gb200-with-p2020", init_nvidia_gb200_with_p2020}}};
434
435int main(int argc, char** argv)
436{
437 CLI::App app("Platform init CLI");
438
439 app.require_subcommand();
440
441 CLI::App* init_sub =
442 app.add_subcommand("init", "Initialize the platform and daemonize");
443 std::string platform_name;
444 init_sub
445 ->add_option("platform_name", platform_name,
446 "Name of the platform to init")
447 ->required();
448 app.require_subcommand();
449
450 CLI11_PARSE(app, argc, argv)
451
452 const auto* it = std::ranges::find_if(
453 init_functions,
454 [&platform_name](const std::pair<std::string_view, int (*)()> val) {
455 return val.first == platform_name;
456 });
457 if (it == init_functions.end())
458 {
459 std::cerr << init_sub->help() << "\n";
460 return EXIT_FAILURE;
461 }
462
463 return it->second();
464}