blob: e993833b90fbac5180327aaf1858eb8348f79819 [file] [log] [blame]
Nikhil Potadeb669b6b2019-03-13 10:52:21 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
17#include "NVMeSensor.hpp"
18
19#include "NVMeDevice.hpp"
20
21#include <crc32c.h>
22#include <libmctp-smbus.h>
23
24#include <boost/algorithm/string/replace.hpp>
25#include <boost/asio/ip/tcp.hpp>
26#include <iostream>
27
28static constexpr double maxReading = 127;
29static constexpr double minReading = 0;
30
31static constexpr bool DEBUG = false;
32
33void rxMessage(uint8_t eid, void* data, void* msg, size_t len);
34
35namespace nvmeMCTP
36{
37struct mctp_binding_smbus* smbus = mctp_smbus_init();
38struct mctp* mctp = mctp_init();
39
40static boost::container::flat_map<int, int> inFds;
41static boost::container::flat_map<int, int> outFds;
42
43int getInFd(int rootBus)
44{
45 auto findBus = inFds.find(rootBus);
46 if (findBus != inFds.end())
47 {
48 return findBus->second;
49 }
50 int fd = mctp_smbus_open_in_bus(smbus, rootBus);
51 if (fd < 0)
52 {
53 std::cerr << "Error opening IN Bus " << rootBus << "\n";
54 }
55 inFds[rootBus] = fd;
56 return fd;
57}
58
59int getOutFd(int bus)
60{
61 auto findBus = outFds.find(bus);
62 if (findBus != outFds.end())
63 {
64 return findBus->second;
65 }
66 int fd = mctp_smbus_open_out_bus(smbus, bus);
67 if (fd < 0)
68 {
69 std::cerr << "Error opening Out Bus " << bus << "\n";
70 }
71 outFds[bus] = fd;
72 return fd;
73}
74
75// we don't close the outFd as multiple sensors could be sharing the fd, we need
76// to close the inFd as it can only be used on 1 socket at a time
77void closeInFd(int rootBus)
78{
79 auto findFd = inFds.find(rootBus);
80 if (findFd == inFds.end())
81 {
82 return;
83 }
84 close(findFd->second);
85 inFds.erase(rootBus);
86}
87
88int getRootBus(int inFd)
89{
90 // we assume that we won't have too many FDs, so looping is OK
91 for (const auto [root, fd] : inFds)
92 {
93 if (fd == inFd)
94 {
95 return root;
96 }
97 }
98
99 return -1;
100}
101
102void init()
103{
104 if (mctp == nullptr || smbus == nullptr)
105 {
106 throw std::runtime_error("Unable to init mctp");
107 }
108 mctp_smbus_register_bus(smbus, nvmeMCTP::mctp, 0);
109 mctp_set_rx_all(mctp, rxMessage, nullptr);
110}
111
112} // namespace nvmeMCTP
113
114static int lastQueriedDeviceIndex = -1;
115
116void readResponse(const std::shared_ptr<NVMeContext>& nvmeDevice)
117{
118 nvmeDevice->nvmeSlaveSocket.async_wait(
119 boost::asio::ip::tcp::socket::wait_error,
120 [nvmeDevice](const boost::system::error_code errorCode) {
121 if (errorCode)
122 {
123 return;
124 }
125
126 mctp_smbus_set_in_fd(nvmeMCTP::smbus,
127 nvmeMCTP::getInFd(nvmeDevice->rootBus));
128
129 // through libmctp this will invoke rxMessage
130 mctp_smbus_read(nvmeMCTP::smbus);
131 });
132}
133
134int nvmeMessageTransmit(mctp& mctp, nvme_mi_msg_request& req)
135{
136 std::array<uint8_t, NVME_MI_MSG_BUFFER_SIZE> messageBuf = {};
137
138 req.header.flags |= NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND
139 << NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT;
140 req.header.message_type =
141 NVME_MI_MESSAGE_TYPE | NVME_MI_MCTP_INTEGRITY_CHECK;
142
143 uint32_t integrity = 0;
144 size_t msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len +
145 sizeof(integrity);
146
147 if (sizeof(messageBuf) < msgSize)
148 {
149 return EXIT_FAILURE;
150 }
151
152 messageBuf[0] = req.header.message_type;
153 messageBuf[1] = req.header.flags;
154 // Reserved bytes 2-3
155
156 messageBuf[4] = req.header.opcode;
157 // reserved bytes 5-7
158 messageBuf[8] = req.header.dword0 & 0xff;
159 messageBuf[9] = (req.header.dword0 >> 8) & 0xff;
160 messageBuf[10] = (req.header.dword0 >> 16) & 0xff;
161 messageBuf[11] = (req.header.dword0 >> 24) & 0xff;
162
163 messageBuf[12] = req.header.dword1 & 0xff;
164 messageBuf[13] = (req.header.dword1 >> 8) & 0xff;
165 messageBuf[14] = (req.header.dword1 >> 16) & 0xff;
166 messageBuf[15] = (req.header.dword1 >> 24) & 0xff;
167
168 std::copy_n(req.request_data, req.request_data_len,
169 messageBuf.data() +
170 static_cast<uint8_t>(NVME_MI_MSG_REQUEST_HEADER_SIZE));
171
172 msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len;
173 integrity = crc32c(messageBuf.data(),
174 NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len);
175 messageBuf[msgSize] = integrity & 0xff;
176 messageBuf[msgSize + 1] = (integrity >> 8) & 0xff;
177 messageBuf[msgSize + 2] = (integrity >> 16) & 0xff;
178 messageBuf[msgSize + 3] = (integrity >> 24) & 0xff;
179 msgSize += sizeof(integrity);
180
181 return mctp_message_tx(&mctp, 0, messageBuf.data(), msgSize);
182}
183
184int verifyIntegrity(uint8_t* msg, size_t len)
185{
186 uint32_t msgIntegrity = {0};
187 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(msgIntegrity))
188 {
189 std::cerr << "Not enough bytes for nvme header and trailer\n";
190 return -1;
191 }
192
193 msgIntegrity = (msg[len - 4]) + (msg[len - 3] << 8) + (msg[len - 2] << 16) +
194 (msg[len - 1] << 24);
195
196 uint32_t calculateIntegrity = crc32c(msg, len - sizeof(msgIntegrity));
197 if (msgIntegrity != calculateIntegrity)
198 {
199 std::cerr << "CRC mismatch. Got=" << msgIntegrity
200 << " Expected=" << calculateIntegrity << "\n";
201 return -1;
202 }
203 return 0;
204}
205
206void readAndProcessNVMeSensor(const std::shared_ptr<NVMeContext>& nvmeDevice)
207{
208 struct nvme_mi_msg_request requestMsg = {};
209 requestMsg.header.opcode = NVME_MI_OPCODE_HEALTH_STATUS_POLL;
210 requestMsg.header.dword0 = 0;
211 requestMsg.header.dword1 = 0;
212
213 int mctpResponseTimeout = 1;
214
215 if (nvmeDevice->sensors.empty())
216 {
217 return;
218 }
219
220 std::shared_ptr<NVMeSensor>& sensor = nvmeDevice->sensors.front();
221
222 // setup the timeout timer
223 nvmeDevice->mctpResponseTimer.expires_from_now(
224 boost::posix_time::seconds(mctpResponseTimeout));
225
226 nvmeDevice->mctpResponseTimer.async_wait(
227 [sensor, nvmeDevice](const boost::system::error_code errorCode) {
228 constexpr const size_t errorThreshold = 5;
229 if (errorCode)
230 {
James Feistd3ac4f92020-01-29 15:38:55 -0800231 sensor->errorCount = 0;
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700232 return;
233 }
James Feistd3ac4f92020-01-29 15:38:55 -0800234 if (!isPowerOn())
235 {
236 sensor->errorCount = 0;
237 sensor->updateValue(std::numeric_limits<double>::quiet_NaN());
238 }
239 else if (sensor->errorCount < errorThreshold)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700240 {
241 std::cerr << "MCTP timeout device " << sensor->name << "\n";
242 sensor->errorCount++;
243 }
244 else
245 {
246 sensor->updateValue(0);
247 }
248
249 // cycle it back
250 nvmeDevice->sensors.pop_front();
251 nvmeDevice->sensors.emplace_back(sensor);
252
253 nvmeDevice->nvmeSlaveSocket.cancel();
254 });
255
256 readResponse(nvmeDevice);
257
258 if (DEBUG)
259 {
260 std::cout << "Sending message to read data from Drive on bus: "
261 << sensor->bus << " , rootBus: " << nvmeDevice->rootBus
262 << " device: " << sensor->name << "\n";
263 }
264
265 mctp_smbus_set_out_fd(nvmeMCTP::smbus, nvmeMCTP::getOutFd(sensor->bus));
266 int rc = nvmeMessageTransmit(*nvmeMCTP::mctp, requestMsg);
267
268 if (rc != 0)
269 {
270 std::cerr << "Error sending request message to NVMe device\n";
271 }
272}
273
274static double getTemperatureReading(int8_t reading)
275{
276
277 if (reading == static_cast<int8_t>(0x80) ||
278 reading == static_cast<int8_t>(0x81))
279 {
280 // 0x80 = No temperature data or temperature data is more the 5 s
281 // old 0x81 = Temperature sensor failure
282 return maxReading;
283 }
284
285 return reading;
286}
287
288void rxMessage(uint8_t eid, void*, void* msg, size_t len)
289{
290 struct nvme_mi_msg_response_header header
291 {
292 };
293
294 int inFd = mctp_smbus_get_in_fd(nvmeMCTP::smbus);
295 int rootBus = nvmeMCTP::getRootBus(inFd);
296
297 NVMEMap& nvmeMap = getNVMEMap();
298 auto findMap = nvmeMap.find(rootBus);
299 if (findMap == nvmeMap.end())
300 {
301 std::cerr << "Unable to lookup root bus " << rootBus << "\n";
302 return;
303 }
304 std::shared_ptr<NVMeContext>& self = findMap->second;
305
306 if (msg == nullptr)
307 {
308 std::cerr << "Bad message received\n";
309 return;
310 }
311
312 if (len <= 0)
313 {
314 std::cerr << "Received message not long enough\n";
315 return;
316 }
317
318 if (DEBUG)
319 {
320 std::cout << "Eid from the received messaged: " << eid << "\n";
321 }
322
323 uint8_t* messageData = static_cast<uint8_t*>(msg);
324
325 if ((*messageData & NVME_MI_MESSAGE_TYPE_MASK) != NVME_MI_MESSAGE_TYPE)
326 {
327 std::cerr << "Got unknown type message_type="
328 << (*messageData & NVME_MI_MESSAGE_TYPE_MASK) << "\n";
329 return;
330 }
331
332 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(uint32_t))
333 {
334 std::cerr << "Not enough bytes for NVMe header and trailer\n";
335 return;
336 }
337
338 if (verifyIntegrity(messageData, len) != 0)
339 {
340 std::cerr << "Verification of message integrity failed\n";
341 return;
342 }
343
344 header.message_type = messageData[0];
345 header.flags = messageData[1];
346 header.status = messageData[4];
347
348 if (header.status == NVME_MI_HDR_STATUS_MORE_PROCESSING_REQUIRED)
349 {
350 return;
351 }
352
353 if (header.status != NVME_MI_HDR_STATUS_SUCCESS)
354 {
355 std::cerr << "Command failed with status= " << header.status << "\n";
356 return;
357 }
358
359 messageData += NVME_MI_MSG_RESPONSE_HEADER_SIZE;
360 size_t messageLength =
361 len - NVME_MI_MSG_RESPONSE_HEADER_SIZE - sizeof(uint32_t);
362 if (((header.flags >> NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT) &
363 NVME_MI_HDR_FLAG_MSG_TYPE_MASK) != NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND)
364 {
365 std::cerr << "Not MI type comamnd\n";
366 return;
367 }
368
369 if (messageLength < NVME_MI_HEALTH_STATUS_POLL_MSG_MIN)
370 {
371 std::cerr << "Got improperly sized health status poll\n";
372 return;
373 }
374
375 std::shared_ptr<NVMeSensor> sensorInfo = self->sensors.front();
376 if (DEBUG)
377 {
378 std::cout << "Temperature Reading: "
379 << getTemperatureReading(messageData[5])
380 << " Celsius for device " << sensorInfo->name << "\n";
381 }
382
383 sensorInfo->updateValue(getTemperatureReading(messageData[5]));
384
385 if (DEBUG)
386 {
387 std::cout << "Cancelling the timer now\n";
388 }
389
390 // move to back of scan queue
391 self->sensors.pop_front();
392 self->sensors.emplace_back(sensorInfo);
393
394 self->mctpResponseTimer.cancel();
395}
396
397NVMeContext::NVMeContext(boost::asio::io_service& io, int rootBus) :
398 rootBus(rootBus), scanTimer(io), nvmeSlaveSocket(io), mctpResponseTimer(io)
399{
400 nvmeSlaveSocket.assign(boost::asio::ip::tcp::v4(),
401 nvmeMCTP::getInFd(rootBus));
402}
403
404void NVMeContext::pollNVMeDevices()
405{
406 scanTimer.expires_from_now(boost::posix_time::seconds(1));
407 scanTimer.async_wait(
408 [self{shared_from_this()}](const boost::system::error_code errorCode) {
409 if (errorCode == boost::asio::error::operation_aborted)
410 {
411 return; // we're being canceled
412 }
413 else if (errorCode)
414 {
415 std::cerr << "Error:" << errorCode.message() << "\n";
416 return;
417 }
418 else
419 {
420 readAndProcessNVMeSensor(self);
421 }
422
423 self->pollNVMeDevices();
424 });
425}
426
427NVMeContext::~NVMeContext()
428{
429 scanTimer.cancel();
430 mctpResponseTimer.cancel();
431 nvmeSlaveSocket.cancel();
432 nvmeMCTP::closeInFd(rootBus);
433}
434
435NVMeSensor::NVMeSensor(sdbusplus::asio::object_server& objectServer,
436 boost::asio::io_service& io,
437 std::shared_ptr<sdbusplus::asio::connection>& conn,
438 const std::string& sensorName,
439 std::vector<thresholds::Threshold>&& _thresholds,
440 const std::string& sensorConfiguration,
441 const int busNumber) :
442 Sensor(boost::replace_all_copy(sensorName, " ", "_"),
443 std::move(_thresholds), sensorConfiguration,
444 "xyz.openbmc_project.Configuration.NVMe", maxReading, minReading),
445 objServer(objectServer), errorCount(0), bus(busNumber)
446{
447 sensorInterface = objectServer.add_interface(
448 "/xyz/openbmc_project/sensors/temperature/" + name,
449 "xyz.openbmc_project.Sensor.Value");
450
451 if (thresholds::hasWarningInterface(thresholds))
452 {
453 thresholdInterfaceWarning = objectServer.add_interface(
454 "/xyz/openbmc_project/sensors/temperature/" + name,
455 "xyz.openbmc_project.Sensor.Threshold.Warning");
456 }
457 if (thresholds::hasCriticalInterface(thresholds))
458 {
459 thresholdInterfaceCritical = objectServer.add_interface(
460 "/xyz/openbmc_project/sensors/temperature/" + name,
461 "xyz.openbmc_project.Sensor.Threshold.Critical");
462 }
463 association = objectServer.add_interface(
464 "/xyz/openbmc_project/sensors/temperature/" + name,
465 association::interface);
466
467 setInitialProperties(conn);
468 // setup match
469 setupPowerMatch(conn);
470}
471
472NVMeSensor::~NVMeSensor()
473{
474 // close the input dev to cancel async operations
475 objServer.remove_interface(thresholdInterfaceWarning);
476 objServer.remove_interface(thresholdInterfaceCritical);
477 objServer.remove_interface(sensorInterface);
478 objServer.remove_interface(association);
479}
480
481void NVMeSensor::checkThresholds(void)
482{
James Feistd3ac4f92020-01-29 15:38:55 -0800483 if (!isPowerOn())
484 {
485 return;
486 }
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700487 thresholds::checkThresholds(this);
488}