blob: 8cdf744c7aa1bdee9e3a89d783cc9e72474f1adc [file] [log] [blame]
Nikhil Potadeb669b6b2019-03-13 10:52:21 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
17#include "NVMeSensor.hpp"
18
19#include "NVMeDevice.hpp"
20
21#include <crc32c.h>
22#include <libmctp-smbus.h>
23
24#include <boost/algorithm/string/replace.hpp>
25#include <boost/asio/ip/tcp.hpp>
James Feist38fb5982020-05-28 10:09:54 -070026
Nikhil Potadeb669b6b2019-03-13 10:52:21 -070027#include <iostream>
28
29static constexpr double maxReading = 127;
30static constexpr double minReading = 0;
31
32static constexpr bool DEBUG = false;
33
34void rxMessage(uint8_t eid, void* data, void* msg, size_t len);
35
36namespace nvmeMCTP
37{
38struct mctp_binding_smbus* smbus = mctp_smbus_init();
39struct mctp* mctp = mctp_init();
40
41static boost::container::flat_map<int, int> inFds;
42static boost::container::flat_map<int, int> outFds;
43
44int getInFd(int rootBus)
45{
46 auto findBus = inFds.find(rootBus);
47 if (findBus != inFds.end())
48 {
49 return findBus->second;
50 }
51 int fd = mctp_smbus_open_in_bus(smbus, rootBus);
52 if (fd < 0)
53 {
54 std::cerr << "Error opening IN Bus " << rootBus << "\n";
55 }
56 inFds[rootBus] = fd;
57 return fd;
58}
59
60int getOutFd(int bus)
61{
62 auto findBus = outFds.find(bus);
63 if (findBus != outFds.end())
64 {
65 return findBus->second;
66 }
67 int fd = mctp_smbus_open_out_bus(smbus, bus);
68 if (fd < 0)
69 {
70 std::cerr << "Error opening Out Bus " << bus << "\n";
71 }
72 outFds[bus] = fd;
73 return fd;
74}
75
76// we don't close the outFd as multiple sensors could be sharing the fd, we need
77// to close the inFd as it can only be used on 1 socket at a time
78void closeInFd(int rootBus)
79{
80 auto findFd = inFds.find(rootBus);
81 if (findFd == inFds.end())
82 {
83 return;
84 }
85 close(findFd->second);
86 inFds.erase(rootBus);
87}
88
89int getRootBus(int inFd)
90{
91 // we assume that we won't have too many FDs, so looping is OK
92 for (const auto [root, fd] : inFds)
93 {
94 if (fd == inFd)
95 {
96 return root;
97 }
98 }
99
100 return -1;
101}
102
103void init()
104{
105 if (mctp == nullptr || smbus == nullptr)
106 {
107 throw std::runtime_error("Unable to init mctp");
108 }
109 mctp_smbus_register_bus(smbus, nvmeMCTP::mctp, 0);
110 mctp_set_rx_all(mctp, rxMessage, nullptr);
111}
112
113} // namespace nvmeMCTP
114
115static int lastQueriedDeviceIndex = -1;
116
117void readResponse(const std::shared_ptr<NVMeContext>& nvmeDevice)
118{
119 nvmeDevice->nvmeSlaveSocket.async_wait(
120 boost::asio::ip::tcp::socket::wait_error,
121 [nvmeDevice](const boost::system::error_code errorCode) {
122 if (errorCode)
123 {
124 return;
125 }
126
127 mctp_smbus_set_in_fd(nvmeMCTP::smbus,
128 nvmeMCTP::getInFd(nvmeDevice->rootBus));
129
130 // through libmctp this will invoke rxMessage
131 mctp_smbus_read(nvmeMCTP::smbus);
132 });
133}
134
135int nvmeMessageTransmit(mctp& mctp, nvme_mi_msg_request& req)
136{
137 std::array<uint8_t, NVME_MI_MSG_BUFFER_SIZE> messageBuf = {};
138
139 req.header.flags |= NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND
140 << NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT;
141 req.header.message_type =
142 NVME_MI_MESSAGE_TYPE | NVME_MI_MCTP_INTEGRITY_CHECK;
143
144 uint32_t integrity = 0;
145 size_t msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len +
146 sizeof(integrity);
147
148 if (sizeof(messageBuf) < msgSize)
149 {
150 return EXIT_FAILURE;
151 }
152
153 messageBuf[0] = req.header.message_type;
154 messageBuf[1] = req.header.flags;
155 // Reserved bytes 2-3
156
157 messageBuf[4] = req.header.opcode;
158 // reserved bytes 5-7
159 messageBuf[8] = req.header.dword0 & 0xff;
160 messageBuf[9] = (req.header.dword0 >> 8) & 0xff;
161 messageBuf[10] = (req.header.dword0 >> 16) & 0xff;
162 messageBuf[11] = (req.header.dword0 >> 24) & 0xff;
163
164 messageBuf[12] = req.header.dword1 & 0xff;
165 messageBuf[13] = (req.header.dword1 >> 8) & 0xff;
166 messageBuf[14] = (req.header.dword1 >> 16) & 0xff;
167 messageBuf[15] = (req.header.dword1 >> 24) & 0xff;
168
169 std::copy_n(req.request_data, req.request_data_len,
170 messageBuf.data() +
171 static_cast<uint8_t>(NVME_MI_MSG_REQUEST_HEADER_SIZE));
172
173 msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len;
174 integrity = crc32c(messageBuf.data(),
175 NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len);
176 messageBuf[msgSize] = integrity & 0xff;
177 messageBuf[msgSize + 1] = (integrity >> 8) & 0xff;
178 messageBuf[msgSize + 2] = (integrity >> 16) & 0xff;
179 messageBuf[msgSize + 3] = (integrity >> 24) & 0xff;
180 msgSize += sizeof(integrity);
181
182 return mctp_message_tx(&mctp, 0, messageBuf.data(), msgSize);
183}
184
185int verifyIntegrity(uint8_t* msg, size_t len)
186{
187 uint32_t msgIntegrity = {0};
188 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(msgIntegrity))
189 {
190 std::cerr << "Not enough bytes for nvme header and trailer\n";
191 return -1;
192 }
193
194 msgIntegrity = (msg[len - 4]) + (msg[len - 3] << 8) + (msg[len - 2] << 16) +
195 (msg[len - 1] << 24);
196
197 uint32_t calculateIntegrity = crc32c(msg, len - sizeof(msgIntegrity));
198 if (msgIntegrity != calculateIntegrity)
199 {
200 std::cerr << "CRC mismatch. Got=" << msgIntegrity
201 << " Expected=" << calculateIntegrity << "\n";
202 return -1;
203 }
204 return 0;
205}
206
207void readAndProcessNVMeSensor(const std::shared_ptr<NVMeContext>& nvmeDevice)
208{
209 struct nvme_mi_msg_request requestMsg = {};
210 requestMsg.header.opcode = NVME_MI_OPCODE_HEALTH_STATUS_POLL;
211 requestMsg.header.dword0 = 0;
212 requestMsg.header.dword1 = 0;
213
214 int mctpResponseTimeout = 1;
215
216 if (nvmeDevice->sensors.empty())
217 {
218 return;
219 }
220
221 std::shared_ptr<NVMeSensor>& sensor = nvmeDevice->sensors.front();
222
223 // setup the timeout timer
224 nvmeDevice->mctpResponseTimer.expires_from_now(
225 boost::posix_time::seconds(mctpResponseTimeout));
226
227 nvmeDevice->mctpResponseTimer.async_wait(
228 [sensor, nvmeDevice](const boost::system::error_code errorCode) {
229 constexpr const size_t errorThreshold = 5;
230 if (errorCode)
231 {
James Feistd3ac4f92020-01-29 15:38:55 -0800232 sensor->errorCount = 0;
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700233 return;
234 }
James Feistd3ac4f92020-01-29 15:38:55 -0800235 if (!isPowerOn())
236 {
237 sensor->errorCount = 0;
238 sensor->updateValue(std::numeric_limits<double>::quiet_NaN());
239 }
240 else if (sensor->errorCount < errorThreshold)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700241 {
242 std::cerr << "MCTP timeout device " << sensor->name << "\n";
243 sensor->errorCount++;
244 }
245 else
246 {
247 sensor->updateValue(0);
248 }
249
250 // cycle it back
251 nvmeDevice->sensors.pop_front();
252 nvmeDevice->sensors.emplace_back(sensor);
253
254 nvmeDevice->nvmeSlaveSocket.cancel();
255 });
256
257 readResponse(nvmeDevice);
258
259 if (DEBUG)
260 {
261 std::cout << "Sending message to read data from Drive on bus: "
262 << sensor->bus << " , rootBus: " << nvmeDevice->rootBus
263 << " device: " << sensor->name << "\n";
264 }
265
266 mctp_smbus_set_out_fd(nvmeMCTP::smbus, nvmeMCTP::getOutFd(sensor->bus));
267 int rc = nvmeMessageTransmit(*nvmeMCTP::mctp, requestMsg);
268
269 if (rc != 0)
270 {
271 std::cerr << "Error sending request message to NVMe device\n";
272 }
273}
274
275static double getTemperatureReading(int8_t reading)
276{
277
278 if (reading == static_cast<int8_t>(0x80) ||
279 reading == static_cast<int8_t>(0x81))
280 {
281 // 0x80 = No temperature data or temperature data is more the 5 s
282 // old 0x81 = Temperature sensor failure
283 return maxReading;
284 }
285
286 return reading;
287}
288
289void rxMessage(uint8_t eid, void*, void* msg, size_t len)
290{
291 struct nvme_mi_msg_response_header header
James Feist38fb5982020-05-28 10:09:54 -0700292 {};
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700293
294 int inFd = mctp_smbus_get_in_fd(nvmeMCTP::smbus);
295 int rootBus = nvmeMCTP::getRootBus(inFd);
296
297 NVMEMap& nvmeMap = getNVMEMap();
298 auto findMap = nvmeMap.find(rootBus);
299 if (findMap == nvmeMap.end())
300 {
301 std::cerr << "Unable to lookup root bus " << rootBus << "\n";
302 return;
303 }
304 std::shared_ptr<NVMeContext>& self = findMap->second;
305
306 if (msg == nullptr)
307 {
308 std::cerr << "Bad message received\n";
309 return;
310 }
311
312 if (len <= 0)
313 {
314 std::cerr << "Received message not long enough\n";
315 return;
316 }
317
318 if (DEBUG)
319 {
320 std::cout << "Eid from the received messaged: " << eid << "\n";
321 }
322
323 uint8_t* messageData = static_cast<uint8_t*>(msg);
324
325 if ((*messageData & NVME_MI_MESSAGE_TYPE_MASK) != NVME_MI_MESSAGE_TYPE)
326 {
327 std::cerr << "Got unknown type message_type="
328 << (*messageData & NVME_MI_MESSAGE_TYPE_MASK) << "\n";
329 return;
330 }
331
332 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(uint32_t))
333 {
334 std::cerr << "Not enough bytes for NVMe header and trailer\n";
335 return;
336 }
337
338 if (verifyIntegrity(messageData, len) != 0)
339 {
340 std::cerr << "Verification of message integrity failed\n";
341 return;
342 }
343
344 header.message_type = messageData[0];
345 header.flags = messageData[1];
346 header.status = messageData[4];
347
348 if (header.status == NVME_MI_HDR_STATUS_MORE_PROCESSING_REQUIRED)
349 {
350 return;
351 }
352
353 if (header.status != NVME_MI_HDR_STATUS_SUCCESS)
354 {
355 std::cerr << "Command failed with status= " << header.status << "\n";
356 return;
357 }
358
359 messageData += NVME_MI_MSG_RESPONSE_HEADER_SIZE;
360 size_t messageLength =
361 len - NVME_MI_MSG_RESPONSE_HEADER_SIZE - sizeof(uint32_t);
362 if (((header.flags >> NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT) &
363 NVME_MI_HDR_FLAG_MSG_TYPE_MASK) != NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND)
364 {
365 std::cerr << "Not MI type comamnd\n";
366 return;
367 }
368
369 if (messageLength < NVME_MI_HEALTH_STATUS_POLL_MSG_MIN)
370 {
371 std::cerr << "Got improperly sized health status poll\n";
372 return;
373 }
374
375 std::shared_ptr<NVMeSensor> sensorInfo = self->sensors.front();
376 if (DEBUG)
377 {
378 std::cout << "Temperature Reading: "
379 << getTemperatureReading(messageData[5])
380 << " Celsius for device " << sensorInfo->name << "\n";
381 }
382
383 sensorInfo->updateValue(getTemperatureReading(messageData[5]));
384
385 if (DEBUG)
386 {
387 std::cout << "Cancelling the timer now\n";
388 }
389
390 // move to back of scan queue
391 self->sensors.pop_front();
392 self->sensors.emplace_back(sensorInfo);
393
394 self->mctpResponseTimer.cancel();
395}
396
397NVMeContext::NVMeContext(boost::asio::io_service& io, int rootBus) :
398 rootBus(rootBus), scanTimer(io), nvmeSlaveSocket(io), mctpResponseTimer(io)
399{
400 nvmeSlaveSocket.assign(boost::asio::ip::tcp::v4(),
401 nvmeMCTP::getInFd(rootBus));
402}
403
404void NVMeContext::pollNVMeDevices()
405{
406 scanTimer.expires_from_now(boost::posix_time::seconds(1));
407 scanTimer.async_wait(
408 [self{shared_from_this()}](const boost::system::error_code errorCode) {
409 if (errorCode == boost::asio::error::operation_aborted)
410 {
411 return; // we're being canceled
412 }
413 else if (errorCode)
414 {
415 std::cerr << "Error:" << errorCode.message() << "\n";
416 return;
417 }
418 else
419 {
420 readAndProcessNVMeSensor(self);
421 }
422
423 self->pollNVMeDevices();
424 });
425}
426
427NVMeContext::~NVMeContext()
428{
429 scanTimer.cancel();
430 mctpResponseTimer.cancel();
431 nvmeSlaveSocket.cancel();
432 nvmeMCTP::closeInFd(rootBus);
433}
434
435NVMeSensor::NVMeSensor(sdbusplus::asio::object_server& objectServer,
436 boost::asio::io_service& io,
437 std::shared_ptr<sdbusplus::asio::connection>& conn,
438 const std::string& sensorName,
439 std::vector<thresholds::Threshold>&& _thresholds,
440 const std::string& sensorConfiguration,
441 const int busNumber) :
442 Sensor(boost::replace_all_copy(sensorName, " ", "_"),
443 std::move(_thresholds), sensorConfiguration,
444 "xyz.openbmc_project.Configuration.NVMe", maxReading, minReading),
445 objServer(objectServer), errorCount(0), bus(busNumber)
446{
447 sensorInterface = objectServer.add_interface(
448 "/xyz/openbmc_project/sensors/temperature/" + name,
449 "xyz.openbmc_project.Sensor.Value");
450
451 if (thresholds::hasWarningInterface(thresholds))
452 {
453 thresholdInterfaceWarning = objectServer.add_interface(
454 "/xyz/openbmc_project/sensors/temperature/" + name,
455 "xyz.openbmc_project.Sensor.Threshold.Warning");
456 }
457 if (thresholds::hasCriticalInterface(thresholds))
458 {
459 thresholdInterfaceCritical = objectServer.add_interface(
460 "/xyz/openbmc_project/sensors/temperature/" + name,
461 "xyz.openbmc_project.Sensor.Threshold.Critical");
462 }
463 association = objectServer.add_interface(
464 "/xyz/openbmc_project/sensors/temperature/" + name,
465 association::interface);
466
467 setInitialProperties(conn);
468 // setup match
469 setupPowerMatch(conn);
470}
471
472NVMeSensor::~NVMeSensor()
473{
474 // close the input dev to cancel async operations
475 objServer.remove_interface(thresholdInterfaceWarning);
476 objServer.remove_interface(thresholdInterfaceCritical);
477 objServer.remove_interface(sensorInterface);
478 objServer.remove_interface(association);
479}
480
481void NVMeSensor::checkThresholds(void)
482{
James Feistd3ac4f92020-01-29 15:38:55 -0800483 if (!isPowerOn())
484 {
485 return;
486 }
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700487 thresholds::checkThresholds(this);
488}