| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 1 | /* | 
|  | 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & | 
|  | 3 | * AFFILIATES. All rights reserved. | 
|  | 4 | * SPDX-License-Identifier: Apache-2.0 | 
|  | 5 | */ | 
|  | 6 |  | 
|  | 7 | #include "NvidiaGpuThresholds.hpp" | 
|  | 8 |  | 
|  | 9 | #include <MctpRequester.hpp> | 
|  | 10 | #include <NvidiaGpuMctpVdm.hpp> | 
|  | 11 | #include <OcpMctpVdm.hpp> | 
|  | 12 | #include <phosphor-logging/lg2.hpp> | 
|  | 13 |  | 
|  | 14 | #include <array> | 
|  | 15 | #include <cerrno> | 
|  | 16 | #include <cstddef> | 
|  | 17 | #include <cstdint> | 
|  | 18 | #include <functional> | 
|  | 19 | #include <memory> | 
|  | 20 | #include <span> | 
| Marc Olberding | d0125c9 | 2025-10-08 14:37:19 -0700 | [diff] [blame^] | 21 | #include <system_error> | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 22 | #include <vector> | 
|  | 23 |  | 
|  | 24 | void processReadThermalParameterResponse( | 
|  | 25 | const std::function<void(uint8_t, int32_t)>& callback, | 
| Marc Olberding | d0125c9 | 2025-10-08 14:37:19 -0700 | [diff] [blame^] | 26 | const std::error_code& ec, std::span<const uint8_t> respMsg) | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 27 | { | 
| Marc Olberding | d0125c9 | 2025-10-08 14:37:19 -0700 | [diff] [blame^] | 28 | if (ec) | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 29 | { | 
|  | 30 | lg2::error( | 
|  | 31 | "Error reading thermal parameter: sending message over MCTP failed, rc={RC}", | 
| Marc Olberding | d0125c9 | 2025-10-08 14:37:19 -0700 | [diff] [blame^] | 32 | "RC", ec.message()); | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 33 | callback(EPROTO, 0); | 
|  | 34 | return; | 
|  | 35 | } | 
|  | 36 |  | 
|  | 37 | ocp::accelerator_management::CompletionCode cc{}; | 
|  | 38 | uint16_t reasonCode = 0; | 
|  | 39 | int32_t threshold = 0; | 
|  | 40 |  | 
|  | 41 | auto rc = gpu::decodeReadThermalParametersResponse(respMsg, cc, reasonCode, | 
|  | 42 | threshold); | 
|  | 43 |  | 
|  | 44 | if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) | 
|  | 45 | { | 
|  | 46 | lg2::error( | 
|  | 47 | "Error reading thermal parameter: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}", | 
|  | 48 | "RC", rc, "CC", cc, "RESC", reasonCode); | 
|  | 49 | callback(EPROTO, 0); | 
|  | 50 | return; | 
|  | 51 | } | 
|  | 52 |  | 
|  | 53 | callback(0, threshold); | 
|  | 54 | }; | 
|  | 55 |  | 
|  | 56 | void readThermalParameter(uint8_t eid, uint8_t id, | 
|  | 57 | mctp::MctpRequester& mctpRequester, | 
|  | 58 | const std::function<void(uint8_t, int32_t)>& callback) | 
|  | 59 | { | 
|  | 60 | auto reqMsg = std::make_shared< | 
|  | 61 | std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>>(); | 
|  | 62 |  | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 63 | auto rc = gpu::encodeReadThermalParametersRequest(0, id, *reqMsg); | 
|  | 64 | if (rc != 0) | 
|  | 65 | { | 
|  | 66 | lg2::error( | 
|  | 67 | "Error reading thermal parameter for eid {EID} and parameter id {PID} : encode failed. rc={RC}", | 
|  | 68 | "EID", eid, "PID", id, "RC", rc); | 
|  | 69 | callback(rc, 0); | 
|  | 70 | return; | 
|  | 71 | } | 
|  | 72 |  | 
|  | 73 | mctpRequester.sendRecvMsg( | 
| Marc Olberding | d0125c9 | 2025-10-08 14:37:19 -0700 | [diff] [blame^] | 74 | eid, *reqMsg, | 
|  | 75 | [reqMsg, | 
|  | 76 | callback](const std::error_code& ec, std::span<const uint8_t> buff) { | 
|  | 77 | processReadThermalParameterResponse(callback, ec, buff); | 
| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 78 | }); | 
|  | 79 | } | 
|  | 80 |  | 
|  | 81 | void readThermalParameterCallback( | 
|  | 82 | uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids, | 
|  | 83 | mctp::MctpRequester& mctpRequester, | 
|  | 84 | const std::function<void(uint8_t, std::vector<int32_t>)>& callback, | 
|  | 85 | size_t index, const std::shared_ptr<std::vector<int32_t>>& thresholds, | 
|  | 86 | uint8_t rc, int32_t threshold) | 
|  | 87 | { | 
|  | 88 | if (rc != 0) | 
|  | 89 | { | 
|  | 90 | lg2::error( | 
|  | 91 | "Error reading thermal parameter for eid {EID} and parameter id {PID}. rc={RC}", | 
|  | 92 | "EID", eid, "PID", (*ids)[index], "RC", rc); | 
|  | 93 | callback(rc, *thresholds); | 
|  | 94 | return; | 
|  | 95 | } | 
|  | 96 |  | 
|  | 97 | thresholds->push_back(threshold); | 
|  | 98 |  | 
|  | 99 | ++index; | 
|  | 100 | if (index == ids->size()) | 
|  | 101 | { | 
|  | 102 | callback(rc, *thresholds); | 
|  | 103 | } | 
|  | 104 | else | 
|  | 105 | { | 
|  | 106 | readThermalParameter(eid, (*ids)[index], mctpRequester, | 
|  | 107 | std::bind_front(readThermalParameterCallback, eid, | 
|  | 108 | ids, std::ref(mctpRequester), | 
|  | 109 | callback, index, thresholds)); | 
|  | 110 | } | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | void readThermalParameters( | 
|  | 114 | uint8_t eid, const std::vector<uint8_t>& ids, | 
|  | 115 | mctp::MctpRequester& mctpRequester, | 
|  | 116 | const std::function<void(uint8_t, std::vector<int32_t>)>& callback) | 
|  | 117 | { | 
|  | 118 | auto thresholds = std::make_shared<std::vector<int32_t>>(); | 
|  | 119 | size_t index = 0; | 
|  | 120 |  | 
|  | 121 | readThermalParameter( | 
|  | 122 | eid, ids[index], mctpRequester, | 
|  | 123 | std::bind_front(readThermalParameterCallback, eid, | 
|  | 124 | std::make_shared<std::vector<uint8_t>>(ids), | 
|  | 125 | std::ref(mctpRequester), callback, index, thresholds)); | 
|  | 126 | } |