| Harshit Aghera | 5e7decc | 2025-05-07 16:20:16 +0530 | [diff] [blame] | 1 | /* | 
|  | 2 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & | 
|  | 3 | * AFFILIATES. All rights reserved. | 
|  | 4 | * SPDX-License-Identifier: Apache-2.0 | 
|  | 5 | */ | 
|  | 6 |  | 
|  | 7 | #include "NvidiaGpuThresholds.hpp" | 
|  | 8 |  | 
|  | 9 | #include <MctpRequester.hpp> | 
|  | 10 | #include <NvidiaGpuMctpVdm.hpp> | 
|  | 11 | #include <OcpMctpVdm.hpp> | 
|  | 12 | #include <phosphor-logging/lg2.hpp> | 
|  | 13 |  | 
|  | 14 | #include <array> | 
|  | 15 | #include <cerrno> | 
|  | 16 | #include <cstddef> | 
|  | 17 | #include <cstdint> | 
|  | 18 | #include <functional> | 
|  | 19 | #include <memory> | 
|  | 20 | #include <span> | 
|  | 21 | #include <vector> | 
|  | 22 |  | 
|  | 23 | void processReadThermalParameterResponse( | 
|  | 24 | const std::function<void(uint8_t, int32_t)>& callback, | 
|  | 25 | const std::span<const uint8_t> respMsg, int sendRecvMsgResult) | 
|  | 26 | { | 
|  | 27 | if (sendRecvMsgResult != 0) | 
|  | 28 | { | 
|  | 29 | lg2::error( | 
|  | 30 | "Error reading thermal parameter: sending message over MCTP failed, rc={RC}", | 
|  | 31 | "RC", sendRecvMsgResult); | 
|  | 32 | callback(EPROTO, 0); | 
|  | 33 | return; | 
|  | 34 | } | 
|  | 35 |  | 
|  | 36 | ocp::accelerator_management::CompletionCode cc{}; | 
|  | 37 | uint16_t reasonCode = 0; | 
|  | 38 | int32_t threshold = 0; | 
|  | 39 |  | 
|  | 40 | auto rc = gpu::decodeReadThermalParametersResponse(respMsg, cc, reasonCode, | 
|  | 41 | threshold); | 
|  | 42 |  | 
|  | 43 | if (rc != 0 || cc != ocp::accelerator_management::CompletionCode::SUCCESS) | 
|  | 44 | { | 
|  | 45 | lg2::error( | 
|  | 46 | "Error reading thermal parameter: decode failed, rc={RC}, cc={CC}, reasonCode={RESC}", | 
|  | 47 | "RC", rc, "CC", cc, "RESC", reasonCode); | 
|  | 48 | callback(EPROTO, 0); | 
|  | 49 | return; | 
|  | 50 | } | 
|  | 51 |  | 
|  | 52 | callback(0, threshold); | 
|  | 53 | }; | 
|  | 54 |  | 
|  | 55 | void readThermalParameter(uint8_t eid, uint8_t id, | 
|  | 56 | mctp::MctpRequester& mctpRequester, | 
|  | 57 | const std::function<void(uint8_t, int32_t)>& callback) | 
|  | 58 | { | 
|  | 59 | auto reqMsg = std::make_shared< | 
|  | 60 | std::array<uint8_t, sizeof(gpu::ReadThermalParametersRequest)>>(); | 
|  | 61 |  | 
|  | 62 | auto respMsg = std::make_shared< | 
|  | 63 | std::array<uint8_t, sizeof(gpu::ReadThermalParametersResponse)>>(); | 
|  | 64 |  | 
|  | 65 | auto rc = gpu::encodeReadThermalParametersRequest(0, id, *reqMsg); | 
|  | 66 | if (rc != 0) | 
|  | 67 | { | 
|  | 68 | lg2::error( | 
|  | 69 | "Error reading thermal parameter for eid {EID} and parameter id {PID} : encode failed. rc={RC}", | 
|  | 70 | "EID", eid, "PID", id, "RC", rc); | 
|  | 71 | callback(rc, 0); | 
|  | 72 | return; | 
|  | 73 | } | 
|  | 74 |  | 
|  | 75 | mctpRequester.sendRecvMsg( | 
|  | 76 | eid, *reqMsg, *respMsg, | 
|  | 77 | [reqMsg, respMsg, callback](int sendRecvMsgResult) { | 
|  | 78 | processReadThermalParameterResponse(callback, *respMsg, | 
|  | 79 | sendRecvMsgResult); | 
|  | 80 | }); | 
|  | 81 | } | 
|  | 82 |  | 
|  | 83 | void readThermalParameterCallback( | 
|  | 84 | uint8_t eid, const std::shared_ptr<std::vector<uint8_t>>& ids, | 
|  | 85 | mctp::MctpRequester& mctpRequester, | 
|  | 86 | const std::function<void(uint8_t, std::vector<int32_t>)>& callback, | 
|  | 87 | size_t index, const std::shared_ptr<std::vector<int32_t>>& thresholds, | 
|  | 88 | uint8_t rc, int32_t threshold) | 
|  | 89 | { | 
|  | 90 | if (rc != 0) | 
|  | 91 | { | 
|  | 92 | lg2::error( | 
|  | 93 | "Error reading thermal parameter for eid {EID} and parameter id {PID}. rc={RC}", | 
|  | 94 | "EID", eid, "PID", (*ids)[index], "RC", rc); | 
|  | 95 | callback(rc, *thresholds); | 
|  | 96 | return; | 
|  | 97 | } | 
|  | 98 |  | 
|  | 99 | thresholds->push_back(threshold); | 
|  | 100 |  | 
|  | 101 | ++index; | 
|  | 102 | if (index == ids->size()) | 
|  | 103 | { | 
|  | 104 | callback(rc, *thresholds); | 
|  | 105 | } | 
|  | 106 | else | 
|  | 107 | { | 
|  | 108 | readThermalParameter(eid, (*ids)[index], mctpRequester, | 
|  | 109 | std::bind_front(readThermalParameterCallback, eid, | 
|  | 110 | ids, std::ref(mctpRequester), | 
|  | 111 | callback, index, thresholds)); | 
|  | 112 | } | 
|  | 113 | } | 
|  | 114 |  | 
|  | 115 | void readThermalParameters( | 
|  | 116 | uint8_t eid, const std::vector<uint8_t>& ids, | 
|  | 117 | mctp::MctpRequester& mctpRequester, | 
|  | 118 | const std::function<void(uint8_t, std::vector<int32_t>)>& callback) | 
|  | 119 | { | 
|  | 120 | auto thresholds = std::make_shared<std::vector<int32_t>>(); | 
|  | 121 | size_t index = 0; | 
|  | 122 |  | 
|  | 123 | readThermalParameter( | 
|  | 124 | eid, ids[index], mctpRequester, | 
|  | 125 | std::bind_front(readThermalParameterCallback, eid, | 
|  | 126 | std::make_shared<std::vector<uint8_t>>(ids), | 
|  | 127 | std::ref(mctpRequester), callback, index, thresholds)); | 
|  | 128 | } |