| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 1 | /* | 
 | 2 |  * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & | 
 | 3 |  * AFFILIATES. All rights reserved. | 
 | 4 |  * SPDX-License-Identifier: Apache-2.0 | 
 | 5 |  */ | 
 | 6 |  | 
 | 7 | #include "NvidiaGpuThresholds.hpp" | 
 | 8 |  | 
 | 9 | #include <MctpRequester.hpp> | 
 | 10 | #include <NvidiaGpuMctpVdm.hpp> | 
 | 11 | #include <OcpMctpVdm.hpp> | 
 | 12 | #include <phosphor-logging/lg2.hpp> | 
 | 13 |  | 
 | 14 | #include <array> | 
 | 15 | #include <cerrno> | 
 | 16 | #include <cstddef> | 
 | 17 | #include <cstdint> | 
 | 18 | #include <functional> | 
 | 19 | #include <memory> | 
 | 20 | #include <span> | 
 | 21 | #include <vector> | 
 | 22 |  | 
 | 23 | void processReadThermalParameterResponse( | 
 | 24 |     const std::function<void(uint8_t, int32_t)>& callback, | 
 | 25 |     const std::span<const uint8_t> respMsg, int sendRecvMsgResult) | 
 | 26 | { | 
 | 27 |     if (sendRecvMsgResult != 0) | 
 | 28 |     { | 
 | 29 |         lg2::error( | 
 | 30 |             "Error reading thermal parameter: sending message over MCTP failed, rc={RC}", | 
 | 31 |             "RC", sendRecvMsgResult); | 
 | 32 |         callback(EPROTO, 0); | 
 | 33 |         return; | 
 | 34 |     } | 
 | 35 |  | 
 | 36 |     ocp::accelerator_management::CompletionCode cc{}; | 
 | 37 |     uint16_t reasonCode = 0; | 
 | 38 |     int32_t threshold = 0; | 
 | 39 |  | 
 | 40 |     auto rc = gpu::decodeReadThermalParametersResponse(respMsg, cc, reasonCode, | 
 | 41 |                                                        threshold); | 
 | 42 |  | 
 | 43 |     if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) | 
 | 44 |     { | 
 | 45 |         lg2::error( | 
 | 46 |             "Error reading thermal parameter: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}", | 
 | 47 |             "RC", rc, "CC", cc, "RESC", reasonCode); | 
 | 48 |         callback(EPROTO, 0); | 
 | 49 |         return; | 
 | 50 |     } | 
 | 51 |  | 
 | 52 |     callback(0, threshold); | 
 | 53 | }; | 
 | 54 |  | 
 | 55 | void readThermalParameter(uint8_t eid, uint8_t id, | 
 | 56 |                           mctp::MctpRequester& mctpRequester, | 
 | 57 |                           const std::function<void(uint8_t, int32_t)>& callback) | 
 | 58 | { | 
 | 59 |     auto reqMsg = std::make_shared< | 
 | 60 |         std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>>(); | 
 | 61 |  | 
 | 62 |     auto respMsg = std::make_shared< | 
 | 63 |         std::array<uint8_t, sizeof(gpu::ReadThermalParametersResponse)>>(); | 
 | 64 |  | 
 | 65 |     auto rc = gpu::encodeReadThermalParametersRequest(0, id, *reqMsg); | 
 | 66 |     if (rc != 0) | 
 | 67 |     { | 
 | 68 |         lg2::error( | 
 | 69 |             "Error reading thermal parameter for eid {EID} and parameter id {PID} : encode failed. rc={RC}", | 
 | 70 |             "EID", eid, "PID", id, "RC", rc); | 
 | 71 |         callback(rc, 0); | 
 | 72 |         return; | 
 | 73 |     } | 
 | 74 |  | 
 | 75 |     mctpRequester.sendRecvMsg( | 
 | 76 |         eid, *reqMsg, *respMsg, | 
 | 77 |         [reqMsg, respMsg, callback](int sendRecvMsgResult) { | 
 | 78 |             processReadThermalParameterResponse(callback, *respMsg, | 
 | 79 |                                                 sendRecvMsgResult); | 
 | 80 |         }); | 
 | 81 | } | 
 | 82 |  | 
 | 83 | void readThermalParameterCallback( | 
 | 84 |     uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids, | 
 | 85 |     mctp::MctpRequester& mctpRequester, | 
 | 86 |     const std::function<void(uint8_t, std::vector<int32_t>)>& callback, | 
 | 87 |     size_t index, const std::shared_ptr<std::vector<int32_t>>& thresholds, | 
 | 88 |     uint8_t rc, int32_t threshold) | 
 | 89 | { | 
 | 90 |     if (rc != 0) | 
 | 91 |     { | 
 | 92 |         lg2::error( | 
 | 93 |             "Error reading thermal parameter for eid {EID} and parameter id {PID}. rc={RC}", | 
 | 94 |             "EID", eid, "PID", (*ids)[index], "RC", rc); | 
 | 95 |         callback(rc, *thresholds); | 
 | 96 |         return; | 
 | 97 |     } | 
 | 98 |  | 
 | 99 |     thresholds->push_back(threshold); | 
 | 100 |  | 
 | 101 |     ++index; | 
 | 102 |     if (index == ids->size()) | 
 | 103 |     { | 
 | 104 |         callback(rc, *thresholds); | 
 | 105 |     } | 
 | 106 |     else | 
 | 107 |     { | 
 | 108 |         readThermalParameter(eid, (*ids)[index], mctpRequester, | 
 | 109 |                              std::bind_front(readThermalParameterCallback, eid, | 
 | 110 |                                              ids, std::ref(mctpRequester), | 
 | 111 |                                              callback, index, thresholds)); | 
 | 112 |     } | 
 | 113 | } | 
 | 114 |  | 
 | 115 | void readThermalParameters( | 
 | 116 |     uint8_t eid, const std::vector<uint8_t>& ids, | 
 | 117 |     mctp::MctpRequester& mctpRequester, | 
 | 118 |     const std::function<void(uint8_t, std::vector<int32_t>)>& callback) | 
 | 119 | { | 
 | 120 |     auto thresholds = std::make_shared<std::vector<int32_t>>(); | 
 | 121 |     size_t index = 0; | 
 | 122 |  | 
 | 123 |     readThermalParameter( | 
 | 124 |         eid, ids[index], mctpRequester, | 
 | 125 |         std::bind_front(readThermalParameterCallback, eid, | 
 | 126 |                         std::make_shared<std::vector<uint8_t>>(ids), | 
 | 127 |                         std::ref(mctpRequester), callback, index, thresholds)); | 
 | 128 | } |