blob: c421d1fd9c59f5a570988c1c7e35a301d19d0d7a [file] [log] [blame]
Andrew Jefferydae6e182021-05-21 16:23:07 +09301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
17#include "NVMeContext.hpp"
18
19#include "NVMeDevice.hpp"
20
21#include <crc32c.h>
22#include <libmctp-smbus.h>
23#include <libmctp.h>
24
25#include <boost/container/flat_map.hpp>
26
27static constexpr bool debug = false;
28
29static void rxMessage(uint8_t eid, void* data, void* msg, size_t len);
30
31namespace nvmeMCTP
32{
33struct mctp_binding_smbus* smbus = mctp_smbus_init();
34struct mctp* mctp = mctp_init();
35
36static boost::container::flat_map<int, int> inFds;
37static boost::container::flat_map<int, int> outFds;
38
39int getInFd(int rootBus)
40{
41 auto findBus = inFds.find(rootBus);
42 if (findBus != inFds.end())
43 {
44 return findBus->second;
45 }
46 int fd = mctp_smbus_open_in_bus(smbus, rootBus);
47 if (fd < 0)
48 {
49 std::cerr << "Error opening IN Bus " << rootBus << "\n";
50 }
51 inFds[rootBus] = fd;
52 return fd;
53}
54
55int getOutFd(int bus)
56{
57 auto findBus = outFds.find(bus);
58 if (findBus != outFds.end())
59 {
60 return findBus->second;
61 }
62 int fd = mctp_smbus_open_out_bus(smbus, bus);
63 if (fd < 0)
64 {
65 std::cerr << "Error opening Out Bus " << bus << "\n";
66 }
67 outFds[bus] = fd;
68 return fd;
69}
70
71// we don't close the outFd as multiple sensors could be sharing the fd, we need
72// to close the inFd as it can only be used on 1 socket at a time
73void closeInFd(int rootBus)
74{
75 auto findFd = inFds.find(rootBus);
76 if (findFd == inFds.end())
77 {
78 return;
79 }
80 close(findFd->second);
81 inFds.erase(rootBus);
82}
83
84int getRootBus(int inFd)
85{
86 // we assume that we won't have too many FDs, so looping is OK
87 for (const auto [root, fd] : inFds)
88 {
89 if (fd == inFd)
90 {
91 return root;
92 }
93 }
94
95 return -1;
96}
97
98void init()
99{
100 if (mctp == nullptr || smbus == nullptr)
101 {
102 throw std::runtime_error("Unable to init mctp");
103 }
104 mctp_smbus_register_bus(smbus, nvmeMCTP::mctp, 0);
105 mctp_set_rx_all(mctp, rxMessage, nullptr);
106}
107
108} // namespace nvmeMCTP
109
110static int verifyIntegrity(uint8_t* msg, size_t len)
111{
112 uint32_t msgIntegrity = {0};
113 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(msgIntegrity))
114 {
115 std::cerr << "Not enough bytes for nvme header and trailer\n";
116 return -1;
117 }
118
119 msgIntegrity = (msg[len - 4]) + (msg[len - 3] << 8) + (msg[len - 2] << 16) +
120 (msg[len - 1] << 24);
121
122 uint32_t calculateIntegrity = crc32c(msg, len - sizeof(msgIntegrity));
123 if (msgIntegrity != calculateIntegrity)
124 {
125 std::cerr << "CRC mismatch. Got=" << msgIntegrity
126 << " Expected=" << calculateIntegrity << "\n";
127 return -1;
128 }
129 return 0;
130}
131
132static double getTemperatureReading(int8_t reading)
133{
134
135 if (reading == static_cast<int8_t>(0x80) ||
136 reading == static_cast<int8_t>(0x81))
137 {
138 // 0x80 = No temperature data or temperature data is more the 5 s
139 // old 0x81 = Temperature sensor failure
140 return std::numeric_limits<double>::quiet_NaN();
141 }
142
143 return reading;
144}
145
146static void rxMessage(uint8_t eid, void*, void* msg, size_t len)
147{
148 struct nvme_mi_msg_response_header header
149 {};
150
151 int inFd = mctp_smbus_get_in_fd(nvmeMCTP::smbus);
152 int rootBus = nvmeMCTP::getRootBus(inFd);
153
154 NVMEMap& nvmeMap = getNVMEMap();
155 auto findMap = nvmeMap.find(rootBus);
156 if (findMap == nvmeMap.end())
157 {
158 std::cerr << "Unable to lookup root bus " << rootBus << "\n";
159 return;
160 }
161 std::shared_ptr<NVMeContext>& self = findMap->second;
162
163 if (msg == nullptr)
164 {
165 std::cerr << "Bad message received\n";
166 return;
167 }
168
169 if (len <= 0)
170 {
171 std::cerr << "Received message not long enough\n";
172 return;
173 }
174
175 if (debug)
176 {
177 std::cout << "Eid from the received messaged: " << eid << "\n";
178 }
179
180 uint8_t* messageData = static_cast<uint8_t*>(msg);
181
182 if ((*messageData & NVME_MI_MESSAGE_TYPE_MASK) != NVME_MI_MESSAGE_TYPE)
183 {
184 std::cerr << "Got unknown type message_type="
185 << (*messageData & NVME_MI_MESSAGE_TYPE_MASK) << "\n";
186 return;
187 }
188
189 if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(uint32_t))
190 {
191 std::cerr << "Not enough bytes for NVMe header and trailer\n";
192 return;
193 }
194
195 if (verifyIntegrity(messageData, len) != 0)
196 {
197 std::cerr << "Verification of message integrity failed\n";
198 return;
199 }
200
201 header.message_type = messageData[0];
202 header.flags = messageData[1];
203 header.status = messageData[4];
204
205 if (header.status == NVME_MI_HDR_STATUS_MORE_PROCESSING_REQUIRED)
206 {
207 return;
208 }
209
210 if (header.status != NVME_MI_HDR_STATUS_SUCCESS)
211 {
212 std::cerr << "Command failed with status= " << header.status << "\n";
213 return;
214 }
215
216 messageData += NVME_MI_MSG_RESPONSE_HEADER_SIZE;
217 size_t messageLength =
218 len - NVME_MI_MSG_RESPONSE_HEADER_SIZE - sizeof(uint32_t);
219 if (((header.flags >> NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT) &
220 NVME_MI_HDR_FLAG_MSG_TYPE_MASK) != NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND)
221 {
222 std::cerr << "Not MI type comamnd\n";
223 return;
224 }
225
226 if (messageLength < NVME_MI_HEALTH_STATUS_POLL_MSG_MIN)
227 {
228 std::cerr << "Got improperly sized health status poll\n";
229 return;
230 }
231
232 std::shared_ptr<NVMeSensor> sensorInfo = self->sensors.front();
233 if (debug)
234 {
235 std::cout << "Temperature Reading: "
236 << getTemperatureReading(messageData[5])
237 << " Celsius for device " << sensorInfo->name << "\n";
238 }
239
240 double value = getTemperatureReading(messageData[5]);
241 if (!std::isfinite(value))
242 {
243 sensorInfo->markAvailable(false);
244 sensorInfo->incrementError();
245 }
246 else
247 {
248 sensorInfo->updateValue(value);
249 }
250
251 if (debug)
252 {
253 std::cout << "Cancelling the timer now\n";
254 }
255
256 // move to back of scan queue
257 self->sensors.pop_front();
258 self->sensors.emplace_back(sensorInfo);
259
260 self->mctpResponseTimer.cancel();
261}
262
263static int nvmeMessageTransmit(mctp& mctp, nvme_mi_msg_request& req)
264{
265 std::array<uint8_t, NVME_MI_MSG_BUFFER_SIZE> messageBuf = {};
266
267 req.header.flags |= NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND
268 << NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT;
269 req.header.message_type =
270 NVME_MI_MESSAGE_TYPE | NVME_MI_MCTP_INTEGRITY_CHECK;
271
272 uint32_t integrity = 0;
273 size_t msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len +
274 sizeof(integrity);
275
276 if (sizeof(messageBuf) < msgSize)
277 {
278 return EXIT_FAILURE;
279 }
280
281 messageBuf[0] = req.header.message_type;
282 messageBuf[1] = req.header.flags;
283 // Reserved bytes 2-3
284
285 messageBuf[4] = req.header.opcode;
286 // reserved bytes 5-7
287 messageBuf[8] = req.header.dword0 & 0xff;
288 messageBuf[9] = (req.header.dword0 >> 8) & 0xff;
289 messageBuf[10] = (req.header.dword0 >> 16) & 0xff;
290 messageBuf[11] = (req.header.dword0 >> 24) & 0xff;
291
292 messageBuf[12] = req.header.dword1 & 0xff;
293 messageBuf[13] = (req.header.dword1 >> 8) & 0xff;
294 messageBuf[14] = (req.header.dword1 >> 16) & 0xff;
295 messageBuf[15] = (req.header.dword1 >> 24) & 0xff;
296
297 std::copy_n(req.request_data, req.request_data_len,
298 messageBuf.data() +
299 static_cast<uint8_t>(NVME_MI_MSG_REQUEST_HEADER_SIZE));
300
301 msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len;
302 integrity = crc32c(messageBuf.data(),
303 NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len);
304 messageBuf[msgSize] = integrity & 0xff;
305 messageBuf[msgSize + 1] = (integrity >> 8) & 0xff;
306 messageBuf[msgSize + 2] = (integrity >> 16) & 0xff;
307 messageBuf[msgSize + 3] = (integrity >> 24) & 0xff;
308 msgSize += sizeof(integrity);
309
310 return mctp_message_tx(&mctp, 0, messageBuf.data(), msgSize);
311}
312
313static void readResponse(const std::shared_ptr<NVMeContext>& nvmeDevice)
314{
315 nvmeDevice->nvmeSlaveSocket.async_wait(
316 boost::asio::ip::tcp::socket::wait_error,
317 [nvmeDevice](const boost::system::error_code errorCode) {
318 if (errorCode)
319 {
320 return;
321 }
322
323 mctp_smbus_set_in_fd(nvmeMCTP::smbus,
324 nvmeMCTP::getInFd(nvmeDevice->rootBus));
325
326 // through libmctp this will invoke rxMessage
327 mctp_smbus_read(nvmeMCTP::smbus);
328 });
329}
330
331static void
332 readAndProcessNVMeSensor(const std::shared_ptr<NVMeContext>& nvmeDevice)
333{
334 struct nvme_mi_msg_request requestMsg = {};
335 requestMsg.header.opcode = NVME_MI_OPCODE_HEALTH_STATUS_POLL;
336 requestMsg.header.dword0 = 0;
337 requestMsg.header.dword1 = 0;
338
339 int mctpResponseTimeout = 1;
340
341 if (nvmeDevice->sensors.empty())
342 {
343 return;
344 }
345
346 std::shared_ptr<NVMeSensor>& sensor = nvmeDevice->sensors.front();
347
348 // setup the timeout timer
349 nvmeDevice->mctpResponseTimer.expires_from_now(
350 boost::posix_time::seconds(mctpResponseTimeout));
351
352 nvmeDevice->mctpResponseTimer.async_wait(
353 [sensor, nvmeDevice](const boost::system::error_code errorCode) {
354 if (errorCode)
355 {
356 // timer cancelled successfully
357 return;
358 }
359
360 sensor->incrementError();
361
362 // cycle it back
363 nvmeDevice->sensors.pop_front();
364 nvmeDevice->sensors.emplace_back(sensor);
365
366 nvmeDevice->nvmeSlaveSocket.cancel();
367 });
368
369 readResponse(nvmeDevice);
370
371 if (debug)
372 {
373 std::cout << "Sending message to read data from Drive on bus: "
374 << sensor->bus << " , rootBus: " << nvmeDevice->rootBus
375 << " device: " << sensor->name << "\n";
376 }
377
378 mctp_smbus_set_out_fd(nvmeMCTP::smbus, nvmeMCTP::getOutFd(sensor->bus));
379 int rc = nvmeMessageTransmit(*nvmeMCTP::mctp, requestMsg);
380
381 if (rc != 0)
382 {
383 std::cerr << "Error sending request message to NVMe device\n";
384 }
385}
386
387NVMeContext::NVMeContext(boost::asio::io_service& io, int rootBus) :
388 scanTimer(io), rootBus(rootBus), mctpResponseTimer(io), nvmeSlaveSocket(io)
389{
390 nvmeSlaveSocket.assign(boost::asio::ip::tcp::v4(),
391 nvmeMCTP::getInFd(rootBus));
392}
393
394void NVMeContext::pollNVMeDevices()
395{
396 scanTimer.expires_from_now(boost::posix_time::seconds(1));
397 scanTimer.async_wait(
398 [self{shared_from_this()}](const boost::system::error_code errorCode) {
399 if (errorCode == boost::asio::error::operation_aborted)
400 {
401 return; // we're being canceled
402 }
403 else if (errorCode)
404 {
405 std::cerr << "Error:" << errorCode.message() << "\n";
406 return;
407 }
408 else
409 {
410 readAndProcessNVMeSensor(self);
411 }
412
413 self->pollNVMeDevices();
414 });
415}
416
417void NVMeContext::close()
418{
419 scanTimer.cancel();
420 mctpResponseTimer.cancel();
421 nvmeSlaveSocket.cancel();
422 nvmeMCTP::closeInFd(rootBus);
423}
424
425NVMeContext::~NVMeContext()
426{
427 close();
428}