blob: d683174ea688717b67f11a58163c5026768b4244 [file] [log] [blame]
Nikhil Potadeb669b6b2019-03-13 10:52:21 -07001/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
17#include "NVMeSensor.hpp"
18
19#include "NVMeDevice.hpp"
20
21#include <crc32c.h>
22#include <libmctp-smbus.h>
23
24#include <boost/algorithm/string/replace.hpp>
25#include <boost/asio/ip/tcp.hpp>
James Feist38fb5982020-05-28 10:09:54 -070026
Nikhil Potadeb669b6b2019-03-13 10:52:21 -070027#include <iostream>
28
29static constexpr double maxReading = 127;
30static constexpr double minReading = 0;
31
32static constexpr bool DEBUG = false;
33
34void rxMessage(uint8_t eid, void* data, void* msg, size_t len);
35
36namespace nvmeMCTP
37{
38struct mctp_binding_smbus* smbus = mctp_smbus_init();
39struct mctp* mctp = mctp_init();
40
41static boost::container::flat_map<int, int> inFds;
42static boost::container::flat_map<int, int> outFds;
43
44int getInFd(int rootBus)
45{
46 auto findBus = inFds.find(rootBus);
47 if (findBus != inFds.end())
48 {
49 return findBus->second;
50 }
51 int fd = mctp_smbus_open_in_bus(smbus, rootBus);
52 if (fd < 0)
53 {
54 std::cerr << "Error opening IN Bus " << rootBus << "\n";
55 }
56 inFds[rootBus] = fd;
57 return fd;
58}
59
60int getOutFd(int bus)
61{
62 auto findBus = outFds.find(bus);
63 if (findBus != outFds.end())
64 {
65 return findBus->second;
66 }
67 int fd = mctp_smbus_open_out_bus(smbus, bus);
68 if (fd < 0)
69 {
70 std::cerr << "Error opening Out Bus " << bus << "\n";
71 }
72 outFds[bus] = fd;
73 return fd;
74}
75
76// we don't close the outFd as multiple sensors could be sharing the fd, we need
77// to close the inFd as it can only be used on 1 socket at a time
78void closeInFd(int rootBus)
79{
80 auto findFd = inFds.find(rootBus);
81 if (findFd == inFds.end())
82 {
83 return;
84 }
85 close(findFd->second);
86 inFds.erase(rootBus);
87}
88
89int getRootBus(int inFd)
90{
91 // we assume that we won't have too many FDs, so looping is OK
92 for (const auto [root, fd] : inFds)
93 {
94 if (fd == inFd)
95 {
96 return root;
97 }
98 }
99
100 return -1;
101}
102
103void init()
104{
105 if (mctp == nullptr || smbus == nullptr)
106 {
107 throw std::runtime_error("Unable to init mctp");
108 }
109 mctp_smbus_register_bus(smbus, nvmeMCTP::mctp, 0);
110 mctp_set_rx_all(mctp, rxMessage, nullptr);
111}
112
113} // namespace nvmeMCTP
114
115static int lastQueriedDeviceIndex = -1;
116
117void readResponse(const std::shared_ptr<NVMeContext>& nvmeDevice)
118{
119 nvmeDevice->nvmeSlaveSocket.async_wait(
120 boost::asio::ip::tcp::socket::wait_error,
121 [nvmeDevice](const boost::system::error_code errorCode) {
122 if (errorCode)
123 {
124 return;
125 }
126
127 mctp_smbus_set_in_fd(nvmeMCTP::smbus,
128 nvmeMCTP::getInFd(nvmeDevice->rootBus));
129
130 // through libmctp this will invoke rxMessage
131 mctp_smbus_read(nvmeMCTP::smbus);
132 });
133}
134
135int nvmeMessageTransmit(mctp& mctp, nvme_mi_msg_request& req)
136{
137 std::array<uint8_t, NVME_MI_MSG_BUFFER_SIZE> messageBuf = {};
138
139 req.header.flags |= NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND
140 << NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT;
141 req.header.message_type =
142 NVME_MI_MESSAGE_TYPE | NVME_MI_MCTP_INTEGRITY_CHECK;
143
144 uint32_t integrity = 0;
145 size_t msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len +
146 sizeof(integrity);
147
148 if (sizeof(messageBuf) < msgSize)
149 {
150 return EXIT_FAILURE;
151 }
152
153 messageBuf[0] = req.header.message_type;
154 messageBuf[1] = req.header.flags;
155 // Reserved bytes 2-3
156
157 messageBuf[4] = req.header.opcode;
158 // reserved bytes 5-7
159 messageBuf[8] = req.header.dword0 & 0xff;
160 messageBuf[9] = (req.header.dword0 >> 8) & 0xff;
161 messageBuf[10] = (req.header.dword0 >> 16) & 0xff;
162 messageBuf[11] = (req.header.dword0 >> 24) & 0xff;
163
164 messageBuf[12] = req.header.dword1 & 0xff;
165 messageBuf[13] = (req.header.dword1 >> 8) & 0xff;
166 messageBuf[14] = (req.header.dword1 >> 16) & 0xff;
167 messageBuf[15] = (req.header.dword1 >> 24) & 0xff;
168
169 std::copy_n(req.request_data, req.request_data_len,
170 messageBuf.data() +
171 static_cast<uint8_t>(NVME_MI_MSG_REQUEST_HEADER_SIZE));
172
173 msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len;
174 integrity = crc32c(messageBuf.data(),
175 NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len);
176 messageBuf[msgSize] = integrity & 0xff;
177 messageBuf[msgSize + 1] = (integrity >> 8) & 0xff;
178 messageBuf[msgSize + 2] = (integrity >> 16) & 0xff;
179 messageBuf[msgSize + 3] = (integrity >> 24) & 0xff;
180 msgSize += sizeof(integrity);
181
182 return mctp_message_tx(&mctp, 0, messageBuf.data(), msgSize);
183}
184
185int verifyIntegrity(uint8_t* msg, size_t len)
186{
187 uint32_t msgIntegrity = {0};
188 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(msgIntegrity))
189 {
190 std::cerr << "Not enough bytes for nvme header and trailer\n";
191 return -1;
192 }
193
194 msgIntegrity = (msg[len - 4]) + (msg[len - 3] << 8) + (msg[len - 2] << 16) +
195 (msg[len - 1] << 24);
196
197 uint32_t calculateIntegrity = crc32c(msg, len - sizeof(msgIntegrity));
198 if (msgIntegrity != calculateIntegrity)
199 {
200 std::cerr << "CRC mismatch. Got=" << msgIntegrity
201 << " Expected=" << calculateIntegrity << "\n";
202 return -1;
203 }
204 return 0;
205}
206
207void readAndProcessNVMeSensor(const std::shared_ptr<NVMeContext>& nvmeDevice)
208{
209 struct nvme_mi_msg_request requestMsg = {};
210 requestMsg.header.opcode = NVME_MI_OPCODE_HEALTH_STATUS_POLL;
211 requestMsg.header.dword0 = 0;
212 requestMsg.header.dword1 = 0;
213
214 int mctpResponseTimeout = 1;
215
216 if (nvmeDevice->sensors.empty())
217 {
218 return;
219 }
220
221 std::shared_ptr<NVMeSensor>& sensor = nvmeDevice->sensors.front();
222
223 // setup the timeout timer
224 nvmeDevice->mctpResponseTimer.expires_from_now(
225 boost::posix_time::seconds(mctpResponseTimeout));
226
227 nvmeDevice->mctpResponseTimer.async_wait(
228 [sensor, nvmeDevice](const boost::system::error_code errorCode) {
229 constexpr const size_t errorThreshold = 5;
230 if (errorCode)
231 {
James Feist961bf092020-07-01 16:38:12 -0700232 // timer cancelled successfully
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700233 return;
234 }
James Feist961bf092020-07-01 16:38:12 -0700235
236 sensor->incrementError();
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700237
238 // cycle it back
239 nvmeDevice->sensors.pop_front();
240 nvmeDevice->sensors.emplace_back(sensor);
241
242 nvmeDevice->nvmeSlaveSocket.cancel();
243 });
244
245 readResponse(nvmeDevice);
246
247 if (DEBUG)
248 {
249 std::cout << "Sending message to read data from Drive on bus: "
250 << sensor->bus << " , rootBus: " << nvmeDevice->rootBus
251 << " device: " << sensor->name << "\n";
252 }
253
254 mctp_smbus_set_out_fd(nvmeMCTP::smbus, nvmeMCTP::getOutFd(sensor->bus));
255 int rc = nvmeMessageTransmit(*nvmeMCTP::mctp, requestMsg);
256
257 if (rc != 0)
258 {
259 std::cerr << "Error sending request message to NVMe device\n";
260 }
261}
262
263static double getTemperatureReading(int8_t reading)
264{
265
266 if (reading == static_cast<int8_t>(0x80) ||
267 reading == static_cast<int8_t>(0x81))
268 {
269 // 0x80 = No temperature data or temperature data is more the 5 s
270 // old 0x81 = Temperature sensor failure
271 return maxReading;
272 }
273
274 return reading;
275}
276
277void rxMessage(uint8_t eid, void*, void* msg, size_t len)
278{
279 struct nvme_mi_msg_response_header header
James Feist38fb5982020-05-28 10:09:54 -0700280 {};
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700281
282 int inFd = mctp_smbus_get_in_fd(nvmeMCTP::smbus);
283 int rootBus = nvmeMCTP::getRootBus(inFd);
284
285 NVMEMap& nvmeMap = getNVMEMap();
286 auto findMap = nvmeMap.find(rootBus);
287 if (findMap == nvmeMap.end())
288 {
289 std::cerr << "Unable to lookup root bus " << rootBus << "\n";
290 return;
291 }
292 std::shared_ptr<NVMeContext>& self = findMap->second;
293
294 if (msg == nullptr)
295 {
296 std::cerr << "Bad message received\n";
297 return;
298 }
299
300 if (len <= 0)
301 {
302 std::cerr << "Received message not long enough\n";
303 return;
304 }
305
306 if (DEBUG)
307 {
308 std::cout << "Eid from the received messaged: " << eid << "\n";
309 }
310
311 uint8_t* messageData = static_cast<uint8_t*>(msg);
312
313 if ((*messageData & NVME_MI_MESSAGE_TYPE_MASK) != NVME_MI_MESSAGE_TYPE)
314 {
315 std::cerr << "Got unknown type message_type="
316 << (*messageData & NVME_MI_MESSAGE_TYPE_MASK) << "\n";
317 return;
318 }
319
320 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(uint32_t))
321 {
322 std::cerr << "Not enough bytes for NVMe header and trailer\n";
323 return;
324 }
325
326 if (verifyIntegrity(messageData, len) != 0)
327 {
328 std::cerr << "Verification of message integrity failed\n";
329 return;
330 }
331
332 header.message_type = messageData[0];
333 header.flags = messageData[1];
334 header.status = messageData[4];
335
336 if (header.status == NVME_MI_HDR_STATUS_MORE_PROCESSING_REQUIRED)
337 {
338 return;
339 }
340
341 if (header.status != NVME_MI_HDR_STATUS_SUCCESS)
342 {
343 std::cerr << "Command failed with status= " << header.status << "\n";
344 return;
345 }
346
347 messageData += NVME_MI_MSG_RESPONSE_HEADER_SIZE;
348 size_t messageLength =
349 len - NVME_MI_MSG_RESPONSE_HEADER_SIZE - sizeof(uint32_t);
350 if (((header.flags >> NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT) &
351 NVME_MI_HDR_FLAG_MSG_TYPE_MASK) != NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND)
352 {
353 std::cerr << "Not MI type comamnd\n";
354 return;
355 }
356
357 if (messageLength < NVME_MI_HEALTH_STATUS_POLL_MSG_MIN)
358 {
359 std::cerr << "Got improperly sized health status poll\n";
360 return;
361 }
362
363 std::shared_ptr<NVMeSensor> sensorInfo = self->sensors.front();
364 if (DEBUG)
365 {
366 std::cout << "Temperature Reading: "
367 << getTemperatureReading(messageData[5])
368 << " Celsius for device " << sensorInfo->name << "\n";
369 }
370
371 sensorInfo->updateValue(getTemperatureReading(messageData[5]));
372
373 if (DEBUG)
374 {
375 std::cout << "Cancelling the timer now\n";
376 }
377
378 // move to back of scan queue
379 self->sensors.pop_front();
380 self->sensors.emplace_back(sensorInfo);
381
382 self->mctpResponseTimer.cancel();
383}
384
385NVMeContext::NVMeContext(boost::asio::io_service& io, int rootBus) :
386 rootBus(rootBus), scanTimer(io), nvmeSlaveSocket(io), mctpResponseTimer(io)
387{
388 nvmeSlaveSocket.assign(boost::asio::ip::tcp::v4(),
389 nvmeMCTP::getInFd(rootBus));
390}
391
392void NVMeContext::pollNVMeDevices()
393{
394 scanTimer.expires_from_now(boost::posix_time::seconds(1));
395 scanTimer.async_wait(
396 [self{shared_from_this()}](const boost::system::error_code errorCode) {
397 if (errorCode == boost::asio::error::operation_aborted)
398 {
399 return; // we're being canceled
400 }
401 else if (errorCode)
402 {
403 std::cerr << "Error:" << errorCode.message() << "\n";
404 return;
405 }
406 else
407 {
408 readAndProcessNVMeSensor(self);
409 }
410
411 self->pollNVMeDevices();
412 });
413}
414
415NVMeContext::~NVMeContext()
416{
417 scanTimer.cancel();
418 mctpResponseTimer.cancel();
419 nvmeSlaveSocket.cancel();
420 nvmeMCTP::closeInFd(rootBus);
421}
422
423NVMeSensor::NVMeSensor(sdbusplus::asio::object_server& objectServer,
424 boost::asio::io_service& io,
425 std::shared_ptr<sdbusplus::asio::connection>& conn,
426 const std::string& sensorName,
427 std::vector<thresholds::Threshold>&& _thresholds,
428 const std::string& sensorConfiguration,
429 const int busNumber) :
430 Sensor(boost::replace_all_copy(sensorName, " ", "_"),
431 std::move(_thresholds), sensorConfiguration,
James Feist961bf092020-07-01 16:38:12 -0700432 "xyz.openbmc_project.Configuration.NVMe", maxReading, minReading,
433 PowerState::on),
434 objServer(objectServer), bus(busNumber)
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700435{
436 sensorInterface = objectServer.add_interface(
437 "/xyz/openbmc_project/sensors/temperature/" + name,
438 "xyz.openbmc_project.Sensor.Value");
439
440 if (thresholds::hasWarningInterface(thresholds))
441 {
442 thresholdInterfaceWarning = objectServer.add_interface(
443 "/xyz/openbmc_project/sensors/temperature/" + name,
444 "xyz.openbmc_project.Sensor.Threshold.Warning");
445 }
446 if (thresholds::hasCriticalInterface(thresholds))
447 {
448 thresholdInterfaceCritical = objectServer.add_interface(
449 "/xyz/openbmc_project/sensors/temperature/" + name,
450 "xyz.openbmc_project.Sensor.Threshold.Critical");
451 }
452 association = objectServer.add_interface(
453 "/xyz/openbmc_project/sensors/temperature/" + name,
454 association::interface);
455
456 setInitialProperties(conn);
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700457}
458
459NVMeSensor::~NVMeSensor()
460{
461 // close the input dev to cancel async operations
462 objServer.remove_interface(thresholdInterfaceWarning);
463 objServer.remove_interface(thresholdInterfaceCritical);
464 objServer.remove_interface(sensorInterface);
465 objServer.remove_interface(association);
466}
467
468void NVMeSensor::checkThresholds(void)
469{
Nikhil Potadeb669b6b2019-03-13 10:52:21 -0700470 thresholds::checkThresholds(this);
471}