blob: 1c783541259f8946e2e702a28453b0bfe0018fe5 [file] [log] [blame]
Nikhil Potadeb669b6b2019-03-13 10:52:21 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
Nikhil Potadeb669b6b2019-03-13 10:52:21 -070017#include <crc32c.h>
18#include <libmctp-smbus.h>
19
Ed Tanous8a57ec02020-10-09 12:46:52 -070020#include <NVMeDevice.hpp>
21#include <NVMeSensor.hpp>
Nikhil Potadeb669b6b2019-03-13 10:52:21 -070022#include <boost/algorithm/string/replace.hpp>
23#include <boost/asio/ip/tcp.hpp>
James Feist38fb5982020-05-28 10:09:54 -070024
Nikhil Potadeb669b6b2019-03-13 10:52:21 -070025#include <iostream>
26
27static constexpr double maxReading = 127;
28static constexpr double minReading = 0;
29
Ed Tanous8a57ec02020-10-09 12:46:52 -070030static constexpr bool debug = false;
Nikhil Potadeb669b6b2019-03-13 10:52:21 -070031
32void rxMessage(uint8_t eid, void* data, void* msg, size_t len);
33
34namespace nvmeMCTP
35{
36struct mctp_binding_smbus* smbus = mctp_smbus_init();
37struct mctp* mctp = mctp_init();
38
39static boost::container::flat_map<int, int> inFds;
40static boost::container::flat_map<int, int> outFds;
41
42int getInFd(int rootBus)
43{
44 auto findBus = inFds.find(rootBus);
45 if (findBus != inFds.end())
46 {
47 return findBus->second;
48 }
49 int fd = mctp_smbus_open_in_bus(smbus, rootBus);
50 if (fd < 0)
51 {
52 std::cerr << "Error opening IN Bus " << rootBus << "\n";
53 }
54 inFds[rootBus] = fd;
55 return fd;
56}
57
58int getOutFd(int bus)
59{
60 auto findBus = outFds.find(bus);
61 if (findBus != outFds.end())
62 {
63 return findBus->second;
64 }
65 int fd = mctp_smbus_open_out_bus(smbus, bus);
66 if (fd < 0)
67 {
68 std::cerr << "Error opening Out Bus " << bus << "\n";
69 }
70 outFds[bus] = fd;
71 return fd;
72}
73
74// we don't close the outFd as multiple sensors could be sharing the fd, we need
75// to close the inFd as it can only be used on 1 socket at a time
76void closeInFd(int rootBus)
77{
78 auto findFd = inFds.find(rootBus);
79 if (findFd == inFds.end())
80 {
81 return;
82 }
83 close(findFd->second);
84 inFds.erase(rootBus);
85}
86
87int getRootBus(int inFd)
88{
89 // we assume that we won't have too many FDs, so looping is OK
90 for (const auto [root, fd] : inFds)
91 {
92 if (fd == inFd)
93 {
94 return root;
95 }
96 }
97
98 return -1;
99}
100
101void init()
102{
103 if (mctp == nullptr || smbus == nullptr)
104 {
105 throw std::runtime_error("Unable to init mctp");
106 }
107 mctp_smbus_register_bus(smbus, nvmeMCTP::mctp, 0);
108 mctp_set_rx_all(mctp, rxMessage, nullptr);
109}
110
111} // namespace nvmeMCTP
112
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700113void readResponse(const std::shared_ptr<NVMeContext>& nvmeDevice)
114{
115 nvmeDevice->nvmeSlaveSocket.async_wait(
116 boost::asio::ip::tcp::socket::wait_error,
117 [nvmeDevice](const boost::system::error_code errorCode) {
118 if (errorCode)
119 {
120 return;
121 }
122
123 mctp_smbus_set_in_fd(nvmeMCTP::smbus,
124 nvmeMCTP::getInFd(nvmeDevice->rootBus));
125
126 // through libmctp this will invoke rxMessage
127 mctp_smbus_read(nvmeMCTP::smbus);
128 });
129}
130
131int nvmeMessageTransmit(mctp& mctp, nvme_mi_msg_request& req)
132{
133 std::array<uint8_t, NVME_MI_MSG_BUFFER_SIZE> messageBuf = {};
134
135 req.header.flags |= NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND
136 << NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT;
137 req.header.message_type =
138 NVME_MI_MESSAGE_TYPE | NVME_MI_MCTP_INTEGRITY_CHECK;
139
140 uint32_t integrity = 0;
141 size_t msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len +
142 sizeof(integrity);
143
144 if (sizeof(messageBuf) < msgSize)
145 {
146 return EXIT_FAILURE;
147 }
148
149 messageBuf[0] = req.header.message_type;
150 messageBuf[1] = req.header.flags;
151 // Reserved bytes 2-3
152
153 messageBuf[4] = req.header.opcode;
154 // reserved bytes 5-7
155 messageBuf[8] = req.header.dword0 & 0xff;
156 messageBuf[9] = (req.header.dword0 >> 8) & 0xff;
157 messageBuf[10] = (req.header.dword0 >> 16) & 0xff;
158 messageBuf[11] = (req.header.dword0 >> 24) & 0xff;
159
160 messageBuf[12] = req.header.dword1 & 0xff;
161 messageBuf[13] = (req.header.dword1 >> 8) & 0xff;
162 messageBuf[14] = (req.header.dword1 >> 16) & 0xff;
163 messageBuf[15] = (req.header.dword1 >> 24) & 0xff;
164
165 std::copy_n(req.request_data, req.request_data_len,
166 messageBuf.data() +
167 static_cast<uint8_t>(NVME_MI_MSG_REQUEST_HEADER_SIZE));
168
169 msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len;
170 integrity = crc32c(messageBuf.data(),
171 NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len);
172 messageBuf[msgSize] = integrity & 0xff;
173 messageBuf[msgSize + 1] = (integrity >> 8) & 0xff;
174 messageBuf[msgSize + 2] = (integrity >> 16) & 0xff;
175 messageBuf[msgSize + 3] = (integrity >> 24) & 0xff;
176 msgSize += sizeof(integrity);
177
178 return mctp_message_tx(&mctp, 0, messageBuf.data(), msgSize);
179}
180
181int verifyIntegrity(uint8_t* msg, size_t len)
182{
183 uint32_t msgIntegrity = {0};
184 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(msgIntegrity))
185 {
186 std::cerr << "Not enough bytes for nvme header and trailer\n";
187 return -1;
188 }
189
190 msgIntegrity = (msg[len - 4]) + (msg[len - 3] << 8) + (msg[len - 2] << 16) +
191 (msg[len - 1] << 24);
192
193 uint32_t calculateIntegrity = crc32c(msg, len - sizeof(msgIntegrity));
194 if (msgIntegrity != calculateIntegrity)
195 {
196 std::cerr << "CRC mismatch. Got=" << msgIntegrity
197 << " Expected=" << calculateIntegrity << "\n";
198 return -1;
199 }
200 return 0;
201}
202
203void readAndProcessNVMeSensor(const std::shared_ptr<NVMeContext>& nvmeDevice)
204{
205 struct nvme_mi_msg_request requestMsg = {};
206 requestMsg.header.opcode = NVME_MI_OPCODE_HEALTH_STATUS_POLL;
207 requestMsg.header.dword0 = 0;
208 requestMsg.header.dword1 = 0;
209
210 int mctpResponseTimeout = 1;
211
212 if (nvmeDevice->sensors.empty())
213 {
214 return;
215 }
216
217 std::shared_ptr<NVMeSensor>& sensor = nvmeDevice->sensors.front();
218
219 // setup the timeout timer
220 nvmeDevice->mctpResponseTimer.expires_from_now(
221 boost::posix_time::seconds(mctpResponseTimeout));
222
223 nvmeDevice->mctpResponseTimer.async_wait(
224 [sensor, nvmeDevice](const boost::system::error_code errorCode) {
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700225 if (errorCode)
226 {
James Feist961bf092020-07-01 16:38:12 -0700227 // timer cancelled successfully
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700228 return;
229 }
James Feist961bf092020-07-01 16:38:12 -0700230
231 sensor->incrementError();
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700232
233 // cycle it back
234 nvmeDevice->sensors.pop_front();
235 nvmeDevice->sensors.emplace_back(sensor);
236
237 nvmeDevice->nvmeSlaveSocket.cancel();
238 });
239
240 readResponse(nvmeDevice);
241
Ed Tanous8a57ec02020-10-09 12:46:52 -0700242 if (debug)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700243 {
244 std::cout << "Sending message to read data from Drive on bus: "
245 << sensor->bus << " , rootBus: " << nvmeDevice->rootBus
246 << " device: " << sensor->name << "\n";
247 }
248
249 mctp_smbus_set_out_fd(nvmeMCTP::smbus, nvmeMCTP::getOutFd(sensor->bus));
250 int rc = nvmeMessageTransmit(*nvmeMCTP::mctp, requestMsg);
251
252 if (rc != 0)
253 {
254 std::cerr << "Error sending request message to NVMe device\n";
255 }
256}
257
258static double getTemperatureReading(int8_t reading)
259{
260
261 if (reading == static_cast<int8_t>(0x80) ||
262 reading == static_cast<int8_t>(0x81))
263 {
264 // 0x80 = No temperature data or temperature data is more the 5 s
265 // old 0x81 = Temperature sensor failure
Zhikui Renda274232020-11-11 07:41:03 -0800266 return std::numeric_limits<double>::quiet_NaN();
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700267 }
268
269 return reading;
270}
271
272void rxMessage(uint8_t eid, void*, void* msg, size_t len)
273{
274 struct nvme_mi_msg_response_header header
James Feist38fb5982020-05-28 10:09:54 -0700275 {};
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700276
277 int inFd = mctp_smbus_get_in_fd(nvmeMCTP::smbus);
278 int rootBus = nvmeMCTP::getRootBus(inFd);
279
280 NVMEMap& nvmeMap = getNVMEMap();
281 auto findMap = nvmeMap.find(rootBus);
282 if (findMap == nvmeMap.end())
283 {
284 std::cerr << "Unable to lookup root bus " << rootBus << "\n";
285 return;
286 }
287 std::shared_ptr<NVMeContext>& self = findMap->second;
288
289 if (msg == nullptr)
290 {
291 std::cerr << "Bad message received\n";
292 return;
293 }
294
295 if (len <= 0)
296 {
297 std::cerr << "Received message not long enough\n";
298 return;
299 }
300
Ed Tanous8a57ec02020-10-09 12:46:52 -0700301 if (debug)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700302 {
303 std::cout << "Eid from the received messaged: " << eid << "\n";
304 }
305
306 uint8_t* messageData = static_cast<uint8_t*>(msg);
307
308 if ((*messageData & NVME_MI_MESSAGE_TYPE_MASK) != NVME_MI_MESSAGE_TYPE)
309 {
310 std::cerr << "Got unknown type message_type="
311 << (*messageData & NVME_MI_MESSAGE_TYPE_MASK) << "\n";
312 return;
313 }
314
315 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(uint32_t))
316 {
317 std::cerr << "Not enough bytes for NVMe header and trailer\n";
318 return;
319 }
320
321 if (verifyIntegrity(messageData, len) != 0)
322 {
323 std::cerr << "Verification of message integrity failed\n";
324 return;
325 }
326
327 header.message_type = messageData[0];
328 header.flags = messageData[1];
329 header.status = messageData[4];
330
331 if (header.status == NVME_MI_HDR_STATUS_MORE_PROCESSING_REQUIRED)
332 {
333 return;
334 }
335
336 if (header.status != NVME_MI_HDR_STATUS_SUCCESS)
337 {
338 std::cerr << "Command failed with status= " << header.status << "\n";
339 return;
340 }
341
342 messageData += NVME_MI_MSG_RESPONSE_HEADER_SIZE;
343 size_t messageLength =
344 len - NVME_MI_MSG_RESPONSE_HEADER_SIZE - sizeof(uint32_t);
345 if (((header.flags >> NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT) &
346 NVME_MI_HDR_FLAG_MSG_TYPE_MASK) != NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND)
347 {
348 std::cerr << "Not MI type comamnd\n";
349 return;
350 }
351
352 if (messageLength < NVME_MI_HEALTH_STATUS_POLL_MSG_MIN)
353 {
354 std::cerr << "Got improperly sized health status poll\n";
355 return;
356 }
357
358 std::shared_ptr<NVMeSensor> sensorInfo = self->sensors.front();
Ed Tanous8a57ec02020-10-09 12:46:52 -0700359 if (debug)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700360 {
361 std::cout << "Temperature Reading: "
362 << getTemperatureReading(messageData[5])
363 << " Celsius for device " << sensorInfo->name << "\n";
364 }
365
Zhikui Renda274232020-11-11 07:41:03 -0800366 double value = getTemperatureReading(messageData[5]);
367 if (!std::isfinite(value))
368 {
369 sensorInfo->markAvailable(false);
370 sensorInfo->incrementError();
371 }
372 else
373 {
374 sensorInfo->updateValue(value);
375 }
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700376
Ed Tanous8a57ec02020-10-09 12:46:52 -0700377 if (debug)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700378 {
379 std::cout << "Cancelling the timer now\n";
380 }
381
382 // move to back of scan queue
383 self->sensors.pop_front();
384 self->sensors.emplace_back(sensorInfo);
385
386 self->mctpResponseTimer.cancel();
387}
388
389NVMeContext::NVMeContext(boost::asio::io_service& io, int rootBus) :
390 rootBus(rootBus), scanTimer(io), nvmeSlaveSocket(io), mctpResponseTimer(io)
391{
392 nvmeSlaveSocket.assign(boost::asio::ip::tcp::v4(),
393 nvmeMCTP::getInFd(rootBus));
394}
395
396void NVMeContext::pollNVMeDevices()
397{
398 scanTimer.expires_from_now(boost::posix_time::seconds(1));
399 scanTimer.async_wait(
400 [self{shared_from_this()}](const boost::system::error_code errorCode) {
401 if (errorCode == boost::asio::error::operation_aborted)
402 {
403 return; // we're being canceled
404 }
405 else if (errorCode)
406 {
407 std::cerr << "Error:" << errorCode.message() << "\n";
408 return;
409 }
410 else
411 {
412 readAndProcessNVMeSensor(self);
413 }
414
415 self->pollNVMeDevices();
416 });
417}
418
James Feist375ade22020-07-16 16:32:10 -0700419void NVMeContext::close()
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700420{
421 scanTimer.cancel();
422 mctpResponseTimer.cancel();
423 nvmeSlaveSocket.cancel();
424 nvmeMCTP::closeInFd(rootBus);
425}
426
James Feist375ade22020-07-16 16:32:10 -0700427NVMeContext::~NVMeContext()
428{
429 close();
430}
431
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700432NVMeSensor::NVMeSensor(sdbusplus::asio::object_server& objectServer,
James Feist7d7579f2020-09-02 14:13:08 -0700433 boost::asio::io_service&,
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700434 std::shared_ptr<sdbusplus::asio::connection>& conn,
435 const std::string& sensorName,
Jeff Lin7b7a9de2021-02-22 11:16:27 +0800436 std::vector<thresholds::Threshold>&& thresholdsIn,
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700437 const std::string& sensorConfiguration,
438 const int busNumber) :
Jeff Lin7b7a9de2021-02-22 11:16:27 +0800439 Sensor(boost::replace_all_copy(sensorName, " ", "_"),
440 std::move(thresholdsIn), sensorConfiguration,
441 "xyz.openbmc_project.Configuration.NVMe", maxReading, minReading,
442 conn, PowerState::on),
James Feist961bf092020-07-01 16:38:12 -0700443 objServer(objectServer), bus(busNumber)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700444{
445 sensorInterface = objectServer.add_interface(
446 "/xyz/openbmc_project/sensors/temperature/" + name,
447 "xyz.openbmc_project.Sensor.Value");
448
449 if (thresholds::hasWarningInterface(thresholds))
450 {
451 thresholdInterfaceWarning = objectServer.add_interface(
452 "/xyz/openbmc_project/sensors/temperature/" + name,
453 "xyz.openbmc_project.Sensor.Threshold.Warning");
454 }
455 if (thresholds::hasCriticalInterface(thresholds))
456 {
457 thresholdInterfaceCritical = objectServer.add_interface(
458 "/xyz/openbmc_project/sensors/temperature/" + name,
459 "xyz.openbmc_project.Sensor.Threshold.Critical");
460 }
461 association = objectServer.add_interface(
462 "/xyz/openbmc_project/sensors/temperature/" + name,
463 association::interface);
464
465 setInitialProperties(conn);
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700466}
467
468NVMeSensor::~NVMeSensor()
469{
470 // close the input dev to cancel async operations
471 objServer.remove_interface(thresholdInterfaceWarning);
472 objServer.remove_interface(thresholdInterfaceCritical);
473 objServer.remove_interface(sensorInterface);
474 objServer.remove_interface(association);
475}
476
477void NVMeSensor::checkThresholds(void)
478{
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700479 thresholds::checkThresholds(this);
480}