blob: 79e396f666d399912571cbd9053803034c083b89 [file] [log] [blame]
Marc Olberding5d50e522025-09-03 18:23:32 -07001#include "gpio.hpp"
2#include "i2c.hpp"
3#include "utilities.hpp"
4
5#include <systemd/sd-daemon.h>
6
Marc Olberdingc9c86122025-09-08 17:45:21 -07007#include <sdbusplus/asio/connection.hpp>
8
Marc Olberding5d50e522025-09-03 18:23:32 -07009#include <chrono>
Marc Olberding1b83d212025-10-03 17:42:58 -070010#include <expected>
Marc Olberding5d50e522025-09-03 18:23:32 -070011#include <filesystem>
12#include <fstream>
13#include <iostream>
14#include <thread>
Marc Olberdingc9c86122025-09-08 17:45:21 -070015#include <unordered_map>
Marc Olberding5d50e522025-09-03 18:23:32 -070016
Marc Olberdingc9c86122025-09-08 17:45:21 -070017using JsonVariantType =
18 std::variant<uint8_t, std::vector<std::string>, std::vector<double>,
19 std::string, int64_t, uint64_t, double, int32_t, uint32_t,
20 int16_t, uint16_t, bool>;
Marc Olberding5d50e522025-09-03 18:23:32 -070021namespace nvidia
22{
23
24using steady_clock = std::chrono::steady_clock;
25using namespace std::chrono_literals;
26
27void logged_system(std::string_view cmd)
28{
29 std::cerr << std::format("calling {} \n", cmd);
30 int rc = std::system(cmd.data());
31 (void)rc;
32}
33
34void setup_devmem()
35{
36 logged_system("mknod /dev/mem c 1 1");
37}
38
39void handle_passthrough_registers(bool enable)
40{
41 static constexpr uint32_t reg = 0x1e6e24bc;
42 std::string command;
43 if (enable)
44 {
45 command = std::format("devmem 0x{:x} 32 0x3f000000", reg);
46 }
47 else
48 {
49 command = std::format("devmem 0x{:x} 32 0", reg);
50 }
51 logged_system(command);
52}
53
54void wait_for_i2c_ready()
55{
56 // hpm cpld is at bus 4, address 0x17
57 i2c::RawDevice cpld{4, 0x17};
58 auto now = steady_clock::now();
59 auto end = now + 20min;
Marc Olberding70afaf62025-10-28 09:46:16 -070060
Marc Olberding5d50e522025-09-03 18:23:32 -070061 while (steady_clock::now() < end)
62 {
63 static constexpr uint8_t i2c_ready = 0xf2;
Marc Olberding1b83d212025-10-03 17:42:58 -070064 const auto result = cpld.read_byte(i2c_ready);
Marc Olberding5d50e522025-09-03 18:23:32 -070065
Marc Olberding70afaf62025-10-28 09:46:16 -070066 if (!result.has_value())
Marc Olberding1b83d212025-10-03 17:42:58 -070067 {
68 std::string err =
69 std::format("Unable to communicate with cpld. rc: {}\n",
70 result.error().value());
71 std::cerr << err;
72 throw std::runtime_error(err);
73 }
Marc Olberding5d50e522025-09-03 18:23:32 -070074
Marc Olberding70afaf62025-10-28 09:46:16 -070075 if (*result == 1)
76 {
77 return;
78 }
79
Marc Olberding5d50e522025-09-03 18:23:32 -070080 std::this_thread::sleep_for(std::chrono::seconds{10});
81 }
82
83 throw std::runtime_error("Waiting for host timed out!\n");
84}
85
86void probe_dev(size_t bus, uint8_t address, std::string_view dev_type)
87{
Marc Olberdingc9c86122025-09-08 17:45:21 -070088 std::string path =
Marc Olberding5d50e522025-09-03 18:23:32 -070089 std::format("/sys/bus/i2c/devices/i2c-{}/new_device", bus);
90
Marc Olberdingc9c86122025-09-08 17:45:21 -070091 wait_for_path_to_exist(path, std::chrono::milliseconds{1000});
92
Marc Olberding5d50e522025-09-03 18:23:32 -070093 std::ofstream f{path};
94 if (!f.good())
95 {
96 std::cerr << std::format("Unable to open {}\n", path.c_str());
97 std::exit(EXIT_FAILURE);
98 }
99
100 f << std::format("{} 0x{:02x}", dev_type, address);
101 f.close();
102
103 std::string created_path =
104 std::format("/sys/bus/i2c/devices/{}-{:04x}", bus, address);
105 wait_for_path_to_exist(created_path, 10ms);
106}
107
108void create_i2c_mux(size_t bus, uint8_t address, std::string_view dev_type)
109{
110 probe_dev(bus, address, dev_type);
111
Marc Olberdingc9c86122025-09-08 17:45:21 -0700112 std::string idle =
Marc Olberding5d50e522025-09-03 18:23:32 -0700113 std::format("/sys/bus/i2c/devices/{}-{:04x}/idle_state", bus, address);
114 std::ofstream idle_f{idle};
115 if (!idle_f.good())
116 {
117 std::string err = std::format("Unable to open {}\n", idle.c_str());
118 std::cerr << err;
119 throw std::runtime_error(err);
120 }
121
122 // -2 is idle-mux-disconnect
123 idle_f << -2;
124 idle_f.close();
125}
126
127size_t get_bus_from_channel(size_t parent_bus, uint8_t address, size_t channel)
128{
129 std::filesystem::path path =
130 std::format("/sys/bus/i2c/devices/{}-{:04x}/channel-{}/i2c-dev/",
131 parent_bus, address, channel);
132 int bus = -1;
133 std::error_code ec{};
134 for (const auto& f : std::filesystem::directory_iterator(path, ec))
135 {
136 // we expect to see i2c-<bus>, trim and parse everything after the dash
137 const std::string& p = f.path().filename().string();
138 std::cerr << "Reading from " << p << "\n";
139 auto [_, err] = std::from_chars(p.data() + 4, p.data() + p.size(), bus);
140 if (err != std::errc{})
141 {
142 std::string err_s = std::format("Failed to parse {}\n", p);
143 std::cerr << err_s;
144 throw std::runtime_error(err_s);
145 }
146 }
147 if (bus == -1 || ec)
148 {
149 std::string err_s =
150 std::format("Failed to find a channel at {}\n", path.string());
151 std::cerr << err_s;
152 throw std::runtime_error(err_s);
153 }
154 return bus;
155}
156
157void bringup_cx8_mcu(size_t bus)
158{
159 probe_dev(bus, 0x26, "pca9555");
160 std::string gpio_p =
161 std::format("/sys/bus/i2c/devices/{}-{:04x}/", bus, 0x26);
162 int chip_num = gpio::find_chip_idx_from_dir(gpio_p);
163 if (chip_num < 0)
164 {
165 std::cerr << std::format("Failed to find cx8 gpio at {}\n", gpio_p);
166 std::exit(EXIT_FAILURE);
167 }
168
169 // 14 is the reset pin on the MCU
170 // reset pin is active low
171 gpio::set_raw(chip_num, 14, 1);
172}
173
174void gringup_gpu_sma(size_t bus, size_t channel)
175{
176 size_t gpu_bus = get_bus_from_channel(bus, 0x72, channel);
177 probe_dev(gpu_bus, 0x20, "pca6408");
178 std::string gpio_p =
179 std::format("/sys/bus/i2c/devices/{}-{:04x}/", gpu_bus, 0x20);
180 int chip_num = gpio::find_chip_idx_from_dir(gpio_p);
181 if (chip_num < 0)
182 {
183 std::cerr << std::format("Failed to find gpu gpio {}\n", gpio_p);
184 std::exit(EXIT_FAILURE);
185 }
186
187 // pin 4 is the reset pin, active low
188 // pin 5 engages the telemetry path from the SMA
189 gpio::set_raw(chip_num, 5, 1);
190 gpio::set_raw(chip_num, 4, 1);
191}
192
193void bringup_gpus_on_mcio(size_t bus)
194{
195 create_i2c_mux(bus, 0x72, "pca9546");
196
197 gringup_gpu_sma(bus, 2);
198 gringup_gpu_sma(bus, 3);
199}
200
201void bringup_cx8_mcio(size_t mux_addr, size_t channel, bool has_cx8)
202{
203 size_t bus = get_bus_from_channel(5, mux_addr, channel);
204 if (has_cx8)
205 {
206 bringup_cx8_mcu(bus);
207 }
208 bringup_gpus_on_mcio(bus);
209}
210
Marc Olberdingc9c86122025-09-08 17:45:21 -0700211const char* mctpd_service = "au.com.codeconstruct.MCTP1";
212const char* mctp_obj = "/au/com/codeconstruct/mctp1/";
213const char* mctp_busowner = "au.com.codeconstruct.MCTP.BusOwner1";
214const char* mctp_bridge = "au.com.codeconstruct.MCTP.Bridge1";
Marc Olberding5d50e522025-09-03 18:23:32 -0700215
Marc Olberdingc9c86122025-09-08 17:45:21 -0700216template <typename PropertyType>
217PropertyType get_property(const char* service, const char* object,
218 const char* interface, const char* property_name)
219{
220 auto b = sdbusplus::bus::new_default_system();
221 auto m = b.new_method_call(service, object,
222 "org.freedesktop.DBus.Properties", "Get");
223 m.append(interface, property_name);
224
225 std::variant<PropertyType> t;
226 auto reply = b.call(m);
227
228 reply.read(t);
229 return std::get<PropertyType>(t);
Marc Olberding5d50e522025-09-03 18:23:32 -0700230}
231
Marc Olberdingc9c86122025-09-08 17:45:21 -0700232// given a device index
233// enumerate the mctp interface
234// and give back the eid
235uint8_t enumerate_mctp(uint8_t device_idx)
Marc Olberding5d50e522025-09-03 18:23:32 -0700236{
Marc Olberdingc9c86122025-09-08 17:45:21 -0700237 std::vector<uint8_t> address = {};
238 std::string obj = std::format(
239 "/au/com/codeconstruct/mctp1/interfaces/mctpusb{}", device_idx);
240
241 std::cerr << "calling " << obj << std::endl;
242
243 auto b = sdbusplus::bus::new_default_system();
244 auto m = b.new_method_call(mctpd_service, obj.c_str(), mctp_busowner,
245 "AssignEndpoint");
246 m.append(address);
247
248 auto reply = b.call(m);
249
250 uint8_t eid;
251 int32_t net;
252 std::string intf;
253 bool probed;
254 reply.read(eid, net, intf, probed);
255
256 return eid;
257}
258
259// We need to get the pool start and size
260std::tuple<uint8_t, uint8_t> get_pool_start_and_size(uint8_t eid)
261{
262 std::string obj =
263 std::format("/au/com/codeconstruct/mctp1/networks/1/endpoints/{}", eid);
264 std::cerr << "calling " << obj << std::endl;
265
266 uint8_t poolstart = get_property<uint8_t>(mctpd_service, obj.c_str(),
267 mctp_bridge, "PoolStart");
268 uint8_t poolend = get_property<uint8_t>(mctpd_service, obj.c_str(),
269 mctp_bridge, "PoolEnd");
270
271 uint8_t poolsize = poolend - poolstart + 1;
272
273 std::cerr << std::format("eid {} has pool start {} and size {}", eid,
274 poolstart, poolsize)
275 << std::endl;
276 return {poolstart, poolsize};
277}
278
279int get_device_from_port_string(std::string_view port_string)
280{
281 std::filesystem::path path = port_string;
282 path /= "net";
283 int dev_index = -1;
284 auto p = path.native();
285 wait_for_path_to_exist(p, std::chrono::milliseconds{20000});
286
287 for (const auto& dir : std::filesystem::directory_iterator(path))
288 {
289 // this looks something like:
290 // /sys/devices/platform/ahb/1e6a3000.usb/usb1/1-1/1-1.2/1-1.2.3/1-1.2.3:1.0/net/mctpusb7
291 // we want to extract the final "7"
292 std::cerr << "Looking at " << dir.path().native() << std::endl;
293
294 auto f_name = dir.path().filename().native();
295 if (f_name.starts_with("mctpusb"))
296 {
297 std::from_chars(f_name.data() + 7, f_name.data() + f_name.size(),
298 dev_index);
299 break;
300 }
301 }
302
303 if (dev_index == -1)
304 {
305 std::cerr << std::format("Unable to find an mctpusb net device at {}\n",
306 path.native());
307 }
308
309 std::cerr << "found mctp device index " << dev_index << std::endl;
310 return dev_index;
311}
312
313bool is_populated(std::string board, std::string name)
314{
315 std::string obj = std::format(
316 "/xyz/openbmc_project/inventory/system/board/{}/{}", board, name);
317 std::cerr << "inspecting " << obj << std::endl;
318 try
319 {
320 uint8_t eid = get_property<uint8_t>(
321 "xyz.openbmc_project.EntityManager", obj.c_str(),
322 "xyz.openbmc_project.Configuration.NvidiaMctpVdm", "StaticEid");
323 (void)eid;
324 return true;
325 }
326 catch (...)
327 {
328 return false;
329 }
330}
331
332void force_rescan()
333{
334 auto b = sdbusplus::bus::new_default_system();
335 auto m = b.new_method_call("xyz.openbmc_project.EntityManager",
336 "/xyz/openbmc_project/EntityManager",
337 "xyz.openbmc_project.EntityManager", "ReScan");
338 b.call(m);
339}
340
341void populate_gpu(std::string board, uint8_t eid, std::string name)
342{
343 if (is_populated(board, name))
344 {
345 std::cerr << name << " already exists" << std::endl;
346 return;
347 }
348
349 std::string obj =
350 std::format("/xyz/openbmc_project/inventory/system/board/{}", board);
351
352 std::cerr << "calling with " << obj << std::endl;
353
354 std::chrono::steady_clock::time_point start =
355 std::chrono::steady_clock::now();
356 std::chrono::steady_clock::time_point end = start + std::chrono::minutes{3};
357 auto b = sdbusplus::bus::new_default_system();
358 auto m = b.new_method_call("xyz.openbmc_project.EntityManager", obj.c_str(),
359 "xyz.openbmc_project.AddObject", "AddObject");
360 std::unordered_map<std::string, JsonVariantType> param;
361 param["Name"] = name;
362 param["StaticEid"] = eid;
363 param["Type"] = "NvidiaMctpVdm";
364
365 m.append(param);
366
367 do
368 {
369 auto now = std::chrono::steady_clock::now();
370 if (now >= end)
371 {
372 std::cerr << "Timeout: Failed to add " << obj << std::endl;
373 return;
374 }
375 try
376 {
377 b.call(m);
378 return;
379 }
380 catch (...)
381 {
382 std::cerr << "Failed to find " << obj << " trying again"
383 << std::endl;
384 std::this_thread::sleep_for(std::chrono::seconds{10});
385 continue;
386 }
387 } while (true);
388}
389
390struct bridge_device
391{
392 std::string usb_path;
393 std::string name;
394 std::string board_name;
395};
396
397void bringup_devices()
398{
399 // There's a lot of hackery going on here
400 // This is for handling (as of today) unsupported bridged endpoints
401 // The MCU's on this platform act as MCTP bridges
402 // We know their absolute USB path through the platform hub, and that's
403 // symlinked to a mctp net device So we will start there we also know that
404 // each device the USB device is bridging to will always have the same
405 // relative ordering
406 // inside of a given pool. This is not a generally true assumption but it
407 // is true for our MCU's
408 // So we can put each bridge and is downstream devices through enumeration
409 // with mctpd, when we get the response, we know the bridges eid we can then
410 // ask mctpd what the pool size and start eid is for the bridge pool. From
411 // there we can infer the eid of each bridged device behind it and call
412 // AddObject on EntityManager for each board to bring up the requisite nodes
413 // beneath it which will allow the rest of the system to start behaving as
414 // expected. Once we have real support for bridged eid's, we can and should
415 // delete this mess.
416 static constexpr const char* usb_prefix =
417 "/sys/devices/platform/ahb/1e6a3000.usb/usb1/1-1/";
418 const std::array<bridge_device, 10> device_name_map = {
419 {{.usb_path = "1-1.2/1-1.2.1/1-1.2.1:1.0",
420 .name = "GPU_0",
421 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_1"},
422 {.usb_path = "1-1.1/1-1.1.2/1-1.1.2.1/1-1.1.2.1:1.0",
423 .name = "GPU_1",
424 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_2"},
425 {.usb_path = "1-1.4/1-1.4.1/1-1.4.1:1.0",
426 .name = "GPU_2",
427 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_3"},
428 {.usb_path = "1-1.2/1-1.2.2/1-1.2.2:1.0",
429 .name = "GPU_3",
430 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_4"},
431 {.usb_path = "1-1.1/1-1.1.4/1-1.1.4.1/1-1.1.4.1:1.0",
432 .name = "GPU_4",
433 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_5"},
434 {.usb_path = "1-1.1/1-1.1.2/1-1.1.2.2/1-1.1.2.2:1.0",
435 .name = "GPU_5",
436 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_6"},
437 {.usb_path = "1-1.4/1-1.4.2/1-1.4.2:1.0",
438 .name = "GPU_6",
439 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_7"},
440 {.usb_path = "1-1.2/1-1.2.3/1-1.2.3:1.0",
441 .name = "CX8_0",
442 .board_name = "NVIDIA_Alon_cx8_Fru"},
443 {.usb_path = "1-1.1/1-1.1.4/1-1.1.4.2/1-1.1.4.2:1.0",
444 .name = "GPU_7",
445 .board_name = "Nvidia_RTX_PRO_6000_Blackwell_8"},
446 {.usb_path = "1-1.1/1-1.1.2/1-1.1.2.3/1-1.1.2.3:1.0",
447 .name = "CX8_1",
448 .board_name = "NVIDIA_Alon_cx8_Fru"}}};
449
450 for (const auto& [usb_path, name, board_name] : device_name_map)
451 {
452 std::cerr << "looking at device " << name << std::endl;
453 std::string path = std::format("{}/{}", usb_prefix, usb_path);
454 int dev_index = get_device_from_port_string(path);
455 if (dev_index < 0)
456 {
457 std::cerr << std::format(
458 "Unable to bring up {} because it doesn't seem to exist\n",
459 name);
460 continue;
461 }
462
463 // enumerate the bridge device
464 uint8_t bridge_eid = enumerate_mctp(dev_index);
465
466 auto [pool_start, pool_size] = get_pool_start_and_size(bridge_eid);
467
468 std::this_thread::sleep_for(std::chrono::milliseconds{500});
469
470 // yes this sucks, no I don't like it but we know we'll only have two
471 // types of bridged endpoints on this platform and its 9PM the night
472 // before it needs to work so we're going to do it *to* it
473 if (name.starts_with("GPU"))
474 {
475 // each GPU has an SMA, as well as a GPU, they both talk over vdm
476 // so add both as seperate nodes
477 std::cerr << "Adding SMA\n";
478 populate_gpu(board_name, bridge_eid, name + "SMA");
479 std::cerr << "Adding GPU\n";
480 populate_gpu(board_name, pool_start, name);
481 }
482 else if (name.starts_with("CX8"))
483 {
484 // TODO: deal with this
485 std::cerr << "Skipping CX8's for now\n";
486 }
487 else
488 {
489 std::cerr << std::format(
490 "Something awful happened with path: {}, name {}\n", path,
491 name);
492 }
493 }
494}
495
496void wait_for_frus_to_probe()
497{
498 std::string path = "/sys/bus/i2c/devices/17-0056";
499 wait_for_path_to_exist(path, std::chrono::milliseconds{30 * 1000});
500
501 std::this_thread::sleep_for(std::chrono::seconds{30});
Marc Olberding5d50e522025-09-03 18:23:32 -0700502}
503
504int init_nvl32()
505{
506 setup_devmem();
Marc Olberding08ab98d2025-11-20 14:43:28 -0800507 gpio::set("BMC_INIT_DONE", 1);
Marc Olberding5d50e522025-09-03 18:23:32 -0700508 handle_passthrough_registers(false);
509 sd_notify(0, "READY=1");
510
511 wait_for_i2c_ready();
Marc Olberdingc9c86122025-09-08 17:45:21 -0700512 // we suspect that the CPLD tells us we're ready before
513 // we actually are. This sleep stabilizes this discrepency
514 std::this_thread::sleep_for(std::chrono::seconds{1});
Marc Olberding5d50e522025-09-03 18:23:32 -0700515
516 create_i2c_mux(5, 0x70, "pca9548");
517 create_i2c_mux(5, 0x71, "pca9548");
518 create_i2c_mux(5, 0x73, "pca9548");
519 create_i2c_mux(5, 0x75, "pca9548");
520
521 bringup_cx8_mcio(0x70, 1, true);
522 bringup_cx8_mcio(0x70, 5, false);
523 bringup_cx8_mcio(0x73, 3, true);
524 bringup_cx8_mcio(0x73, 7, false);
525
Marc Olberdingc9c86122025-09-08 17:45:21 -0700526 // there's a weird bug in EntityManager
527 // Where Fru devices don't probe automatically
528 // We'll wait for the drivers to be probed
529 // and then force a rescan
530 // we'll follow up with a proper fix
531 wait_for_frus_to_probe();
532
533 force_rescan();
534 // allow for things to settle
535 std::this_thread::sleep_for(std::chrono::seconds{1});
536
537 bringup_devices();
Marc Olberding5d50e522025-09-03 18:23:32 -0700538 std::cerr << "platform init complete\n";
539 pause();
540 std::cerr << "Releasing platform\n";
541
542 return EXIT_SUCCESS;
543}
544
545} // namespace nvidia