blob: eb5ecd944dcd3b644e1dd766ac69436700ac12b1 [file] [log] [blame]
Andrew Jefferye3e3c972021-05-26 14:37:07 +09301#include "NVMeBasicContext.hpp"
2
Ed Tanouseacbfdd2024-04-04 12:00:24 -07003#include "NVMeContext.hpp"
4#include "NVMeSensor.hpp"
5
Andrew Jefferye3e3c972021-05-26 14:37:07 +09306#include <endian.h>
7#include <sys/ioctl.h>
8#include <unistd.h>
9
Ed Tanous73030632022-01-14 10:09:47 -080010#include <FileHandle.hpp>
Ed Tanouseacbfdd2024-04-04 12:00:24 -070011#include <boost/asio/buffer.hpp>
12#include <boost/asio/error.hpp>
13#include <boost/asio/io_context.hpp>
Andrew Jefferye3e3c972021-05-26 14:37:07 +093014#include <boost/asio/read.hpp>
15#include <boost/asio/streambuf.hpp>
16#include <boost/asio/write.hpp>
17
Ed Tanouseacbfdd2024-04-04 12:00:24 -070018#include <array>
Andrew Jefferye3e3c972021-05-26 14:37:07 +093019#include <cerrno>
Ed Tanouseacbfdd2024-04-04 12:00:24 -070020#include <chrono>
21#include <cmath>
22#include <cstdint>
Andrew Jefferye3e3c972021-05-26 14:37:07 +093023#include <cstdio>
24#include <cstring>
Ed Tanouseacbfdd2024-04-04 12:00:24 -070025#include <filesystem>
26#include <ios>
27#include <iostream>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <stdexcept>
32#include <string>
Andrew Jefferye3e3c972021-05-26 14:37:07 +093033#include <system_error>
Ed Tanouseacbfdd2024-04-04 12:00:24 -070034#include <thread>
35#include <utility>
36#include <vector>
Andrew Jefferye3e3c972021-05-26 14:37:07 +093037
38extern "C"
39{
40#include <i2c/smbus.h>
41#include <linux/i2c-dev.h>
42}
43
44/*
45 * NVMe-MI Basic Management Command
46 *
47 * https://nvmexpress.org/wp-content/uploads/NVMe_Management_-_Technical_Note_on_Basic_Management_Command.pdf
48 */
49
50static std::shared_ptr<std::array<uint8_t, 6>>
51 encodeBasicQuery(int bus, uint8_t device, uint8_t offset)
52{
53 if (bus < 0)
54 {
55 throw std::domain_error("Invalid bus argument");
56 }
57
58 /* bus + address + command */
59 uint32_t busle = htole32(static_cast<uint32_t>(bus));
60 auto command =
61 std::make_shared<std::array<uint8_t, sizeof(busle) + 1 + 1>>();
62 memcpy(command->data(), &busle, sizeof(busle));
63 (*command)[sizeof(busle) + 0] = device;
64 (*command)[sizeof(busle) + 1] = offset;
65
66 return command;
67}
68
69static void decodeBasicQuery(const std::array<uint8_t, 6>& req, int& bus,
70 uint8_t& device, uint8_t& offset)
71{
Ed Tanousa771f6a2022-01-14 09:36:51 -080072 uint32_t busle = 0;
Andrew Jefferye3e3c972021-05-26 14:37:07 +093073
74 memcpy(&busle, req.data(), sizeof(busle));
75 bus = le32toh(busle);
76 device = req[sizeof(busle) + 0];
77 offset = req[sizeof(busle) + 1];
78}
79
Andrew Jeffery1a143022022-07-19 14:18:24 +093080static void execBasicQuery(int bus, uint8_t addr, uint8_t cmd,
81 std::vector<uint8_t>& resp)
Andrew Jefferye3e3c972021-05-26 14:37:07 +093082{
Ed Tanousa771f6a2022-01-14 09:36:51 -080083 int32_t size = 0;
Ed Tanous73030632022-01-14 10:09:47 -080084 std::filesystem::path devpath = "/dev/i2c-" + std::to_string(bus);
Andrew Jefferye3e3c972021-05-26 14:37:07 +093085
chaul.ampere0fe02292022-07-21 06:37:39 +000086 try
Andrew Jefferye3e3c972021-05-26 14:37:07 +093087 {
chaul.ampere0fe02292022-07-21 06:37:39 +000088 FileHandle fileHandle(devpath);
89
90 /* Select the target device */
91 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
92 if (::ioctl(fileHandle.handle(), I2C_SLAVE, addr) == -1)
93 {
94 std::cerr << "Failed to configure device address 0x" << std::hex
95 << (int)addr << " for bus " << std::dec << bus << ": "
96 << strerror(errno) << "\n";
97 resp.resize(0);
98 return;
99 }
100
101 resp.resize(UINT8_MAX + 1);
102
103 /* Issue the NVMe MI basic command */
104 size = i2c_smbus_read_block_data(fileHandle.handle(), cmd, resp.data());
105 if (size < 0)
106 {
107 std::cerr << "Failed to read block data from device 0x" << std::hex
108 << (int)addr << " on bus " << std::dec << bus << ": "
109 << strerror(errno) << "\n";
110 resp.resize(0);
111 }
112 else if (size > UINT8_MAX + 1)
113 {
114 std::cerr << "Unexpected message length from device 0x" << std::hex
115 << (int)addr << " on bus " << std::dec << bus << ": "
116 << size << " (" << UINT8_MAX << ")\n";
117 resp.resize(0);
118 }
119 else
120 {
121 resp.resize(size);
122 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930123 }
chaul.ampere0fe02292022-07-21 06:37:39 +0000124 catch (const std::out_of_range& e)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930125 {
chaul.ampere0fe02292022-07-21 06:37:39 +0000126 std::cerr << "Failed to create file handle for bus " << std::dec << bus
127 << ": " << e.what() << "\n";
Andrew Jeffery1a143022022-07-19 14:18:24 +0930128 resp.resize(0);
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930129 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930130}
131
Ed Tanous73030632022-01-14 10:09:47 -0800132static ssize_t processBasicQueryStream(FileHandle& in, FileHandle& out)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930133{
134 std::vector<uint8_t> resp{};
Ed Tanousa771f6a2022-01-14 09:36:51 -0800135 ssize_t rc = 0;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930136
137 while (true)
138 {
Ed Tanousa771f6a2022-01-14 09:36:51 -0800139 uint8_t device = 0;
140 uint8_t offset = 0;
141 uint8_t len = 0;
142 int bus = 0;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930143
144 /* bus + address + command */
145 std::array<uint8_t, sizeof(uint32_t) + 1 + 1> req{};
146
147 /* Read the command parameters */
Ed Tanous73030632022-01-14 10:09:47 -0800148 ssize_t rc = ::read(in.handle(), req.data(), req.size());
149 if (rc != static_cast<ssize_t>(req.size()))
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930150 {
Ed Tanous73030632022-01-14 10:09:47 -0800151 std::cerr << "Failed to read request from in descriptor "
152 << strerror(errno) << "\n";
Ed Tanous2049bd22022-07-09 07:20:26 -0700153 if (rc != 0)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930154 {
Ed Tanous73030632022-01-14 10:09:47 -0800155 return -errno;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930156 }
Ed Tanous73030632022-01-14 10:09:47 -0800157 return -EIO;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930158 }
159
160 decodeBasicQuery(req, bus, device, offset);
161
162 /* Execute the query */
Andrew Jeffery1a143022022-07-19 14:18:24 +0930163 execBasicQuery(bus, device, offset, resp);
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930164
165 /* Write out the response length */
Andrew Jeffery1a143022022-07-19 14:18:24 +0930166 len = resp.size();
Ed Tanous73030632022-01-14 10:09:47 -0800167 rc = ::write(out.handle(), &len, sizeof(len));
168 if (rc != sizeof(len))
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930169 {
Andrew Jeffery9ca98ec2021-11-02 09:50:47 +1030170 std::cerr << "Failed to write block (" << std::dec << len
Ed Tanous73030632022-01-14 10:09:47 -0800171 << ") length to out descriptor: "
172 << strerror(static_cast<int>(-rc)) << "\n";
Ed Tanous2049bd22022-07-09 07:20:26 -0700173 if (rc != 0)
Ed Tanous73030632022-01-14 10:09:47 -0800174 {
175 return -errno;
176 }
177 return -EIO;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930178 }
179
180 /* Write out the response data */
Ed Tanous73030632022-01-14 10:09:47 -0800181 std::vector<uint8_t>::iterator cursor = resp.begin();
182 while (cursor != resp.end())
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930183 {
Ed Tanous73030632022-01-14 10:09:47 -0800184 size_t lenRemaining = std::distance(cursor, resp.end());
185 ssize_t egress = ::write(out.handle(), &(*cursor), lenRemaining);
Ed Tanousa771f6a2022-01-14 09:36:51 -0800186 if (egress == -1)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930187 {
Andrew Jeffery9ca98ec2021-11-02 09:50:47 +1030188 std::cerr << "Failed to write block data of length " << std::dec
Ed Tanous73030632022-01-14 10:09:47 -0800189 << lenRemaining << " to out pipe: " << strerror(errno)
190 << "\n";
Ed Tanous2049bd22022-07-09 07:20:26 -0700191 if (rc != 0)
Ed Tanous73030632022-01-14 10:09:47 -0800192 {
193 return -errno;
194 }
195 return -EIO;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930196 }
197
198 cursor += egress;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930199 }
200 }
201
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930202 return rc;
203}
204
205/* Throws std::error_code on failure */
206/* FIXME: Probably shouldn't do fallible stuff in a constructor */
Ed Tanous1f978632023-02-28 18:16:39 -0800207NVMeBasicContext::NVMeBasicContext(boost::asio::io_context& io, int rootBus) :
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930208 NVMeContext::NVMeContext(io, rootBus), io(io), reqStream(io), respStream(io)
209{
Ed Tanousa771f6a2022-01-14 09:36:51 -0800210 std::array<int, 2> responsePipe{};
211 std::array<int, 2> requestPipe{};
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930212
213 /* Set up inter-thread communication */
214 if (::pipe(requestPipe.data()) == -1)
215 {
216 std::cerr << "Failed to create request pipe: " << strerror(errno)
217 << "\n";
218 throw std::error_code(errno, std::system_category());
219 }
220
221 if (::pipe(responsePipe.data()) == -1)
222 {
223 std::cerr << "Failed to create response pipe: " << strerror(errno)
224 << "\n";
225
226 if (::close(requestPipe[0]) == -1)
227 {
228 std::cerr << "Failed to close write fd of request pipe: "
229 << strerror(errno) << "\n";
230 }
231
232 if (::close(requestPipe[1]) == -1)
233 {
234 std::cerr << "Failed to close read fd of request pipe: "
235 << strerror(errno) << "\n";
236 }
237
238 throw std::error_code(errno, std::system_category());
239 }
240
241 reqStream.assign(requestPipe[1]);
Ed Tanous73030632022-01-14 10:09:47 -0800242 FileHandle streamIn(requestPipe[0]);
243 FileHandle streamOut(responsePipe[1]);
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930244 respStream.assign(responsePipe[0]);
245
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930246 thread = std::jthread([streamIn{std::move(streamIn)},
247 streamOut{std::move(streamOut)}]() mutable {
Ed Tanous72b39112024-04-03 18:35:24 -0700248 ssize_t rc = processBasicQueryStream(streamIn, streamOut);
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930249
Ed Tanous72b39112024-04-03 18:35:24 -0700250 if (rc < 0)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930251 {
252 std::cerr << "Failure while processing query stream: "
Ed Tanousa771f6a2022-01-14 09:36:51 -0800253 << strerror(static_cast<int>(-rc)) << "\n";
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930254 }
255
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930256 std::cerr << "Terminating basic query thread\n";
257 });
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930258}
259
Andrew Jefferyb5d7a7f2022-05-02 11:57:03 +0930260void NVMeBasicContext::readAndProcessNVMeSensor()
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930261{
Andrew Jefferyb5d7a7f2022-05-02 11:57:03 +0930262 if (pollCursor == sensors.end())
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930263 {
Andrew Jeffery8c7074e2022-03-21 14:58:13 +1030264 this->pollNVMeDevices();
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930265 return;
266 }
267
Andrew Jefferyb5d7a7f2022-05-02 11:57:03 +0930268 std::shared_ptr<NVMeSensor> sensor = *pollCursor++;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930269
Andrew Jeffery3859c7f2021-10-29 15:51:16 +1030270 if (!sensor->readingStateGood())
271 {
272 sensor->markAvailable(false);
273 sensor->updateValue(std::numeric_limits<double>::quiet_NaN());
Andrew Jefferyb5d7a7f2022-05-02 11:57:03 +0930274 readAndProcessNVMeSensor();
Andrew Jeffery3859c7f2021-10-29 15:51:16 +1030275 return;
276 }
277
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030278 /* Potentially defer sampling the sensor sensor if it is in error */
279 if (!sensor->sample())
280 {
Andrew Jefferyb5d7a7f2022-05-02 11:57:03 +0930281 readAndProcessNVMeSensor();
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030282 return;
283 }
284
Nnamdi Ajah06cd9882023-02-15 13:21:32 +0100285 auto command = encodeBasicQuery(sensor->bus, sensor->address, 0x00);
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930286
287 /* Issue the request */
288 boost::asio::async_write(
289 reqStream, boost::asio::buffer(command->data(), command->size()),
Ed Tanous76b2bc72022-02-18 09:48:16 -0800290 [command](boost::system::error_code ec, std::size_t) {
Ed Tanousbb679322022-05-16 16:10:00 -0700291 if (ec)
292 {
293 std::cerr << "Got error writing basic query: " << ec << "\n";
294 }
Patrick Williams597e8422023-10-20 11:19:01 -0500295 });
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930296
Andrew Jeffery84c16872022-03-15 21:50:59 +1030297 auto response = std::make_shared<boost::asio::streambuf>();
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930298 response->prepare(1);
299
300 /* Gather the response and dispatch for parsing */
301 boost::asio::async_read(
302 respStream, *response,
303 [response](const boost::system::error_code& ec, std::size_t n) {
Ed Tanousbb679322022-05-16 16:10:00 -0700304 if (ec)
305 {
306 std::cerr << "Got error completing basic query: " << ec << "\n";
307 return static_cast<std::size_t>(0);
308 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930309
Ed Tanousbb679322022-05-16 16:10:00 -0700310 if (n == 0)
311 {
312 return static_cast<std::size_t>(1);
313 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930314
Ed Tanousbb679322022-05-16 16:10:00 -0700315 std::istream is(response.get());
316 size_t len = static_cast<std::size_t>(is.peek());
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930317
Ed Tanousbb679322022-05-16 16:10:00 -0700318 if (n > len + 1)
319 {
320 std::cerr << "Query stream has become unsynchronised: "
321 << "n: " << n << ", "
322 << "len: " << len << "\n";
323 return static_cast<std::size_t>(0);
324 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930325
Ed Tanousbb679322022-05-16 16:10:00 -0700326 if (n == len + 1)
327 {
328 return static_cast<std::size_t>(0);
329 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930330
Ed Tanousbb679322022-05-16 16:10:00 -0700331 if (n > 1)
332 {
333 return len + 1 - n;
334 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930335
Ed Tanousbb679322022-05-16 16:10:00 -0700336 response->prepare(len);
337 return len;
Patrick Williams597e8422023-10-20 11:19:01 -0500338 },
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930339 [weakSelf{weak_from_this()}, sensor, response](
Andrew Jeffery8c7074e2022-03-21 14:58:13 +1030340 const boost::system::error_code& ec, std::size_t length) mutable {
Ed Tanousbb679322022-05-16 16:10:00 -0700341 if (ec)
342 {
343 std::cerr << "Got error reading basic query: " << ec << "\n";
344 return;
345 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930346
Ed Tanousbb679322022-05-16 16:10:00 -0700347 if (length == 0)
348 {
349 std::cerr << "Invalid message length: " << length << "\n";
350 return;
351 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930352
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930353 if (auto self = weakSelf.lock())
354 {
355 /* Deserialise the response */
356 response->consume(1); /* Drop the length byte */
357 std::istream is(response.get());
358 std::vector<char> data(response->size());
359 is.read(data.data(), response->size());
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930360
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930361 /* Update the sensor */
362 self->processResponse(sensor, data.data(), data.size());
Andrew Jeffery8c7074e2022-03-21 14:58:13 +1030363
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930364 /* Enqueue processing of the next sensor */
365 self->readAndProcessNVMeSensor();
366 }
Ed Tanousbb679322022-05-16 16:10:00 -0700367 });
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930368}
369
370void NVMeBasicContext::pollNVMeDevices()
371{
Andrew Jefferyb5d7a7f2022-05-02 11:57:03 +0930372 pollCursor = sensors.begin();
Andrew Jeffery8c7074e2022-03-21 14:58:13 +1030373
Ed Tanous83db50c2023-03-01 10:20:24 -0800374 scanTimer.expires_after(std::chrono::seconds(1));
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930375 scanTimer.async_wait([weakSelf{weak_from_this()}](
376 const boost::system::error_code errorCode) {
Ed Tanousbb679322022-05-16 16:10:00 -0700377 if (errorCode == boost::asio::error::operation_aborted)
378 {
379 return;
380 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930381
Ed Tanousbb679322022-05-16 16:10:00 -0700382 if (errorCode)
383 {
384 std::cerr << errorCode.message() << "\n";
385 return;
386 }
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930387
Andrew Jeffery3cbd5a12022-07-18 16:32:11 +0930388 if (auto self = weakSelf.lock())
389 {
390 self->readAndProcessNVMeSensor();
391 }
Ed Tanousbb679322022-05-16 16:10:00 -0700392 });
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930393}
394
395static double getTemperatureReading(int8_t reading)
396{
397 if (reading == static_cast<int8_t>(0x80) ||
398 reading == static_cast<int8_t>(0x81))
399 {
400 // 0x80 = No temperature data or temperature data is more the 5 s
401 // old 0x81 = Temperature sensor failure
402 return std::numeric_limits<double>::quiet_NaN();
403 }
404
405 return reading;
406}
407
Andrew Jeffery8c7074e2022-03-21 14:58:13 +1030408void NVMeBasicContext::processResponse(std::shared_ptr<NVMeSensor>& sensor,
409 void* msg, size_t len)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930410{
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030411 if (msg == nullptr || len < 6)
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930412 {
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030413 sensor->incrementError();
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930414 return;
415 }
416
417 uint8_t* messageData = static_cast<uint8_t*>(msg);
Andrew Jeffery7aeb1a52022-03-15 22:49:04 +1030418
419 uint8_t status = messageData[0];
Ed Tanous2049bd22022-07-09 07:20:26 -0700420 if (((status & NVME_MI_BASIC_SFLGS_DRIVE_NOT_READY) != 0) ||
421 ((status & NVME_MI_BASIC_SFLGS_DRIVE_FUNCTIONAL) == 0))
Andrew Jeffery7aeb1a52022-03-15 22:49:04 +1030422 {
423 sensor->markFunctional(false);
424 return;
425 }
426
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930427 double value = getTemperatureReading(messageData[2]);
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030428 if (!std::isfinite(value))
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930429 {
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930430 sensor->incrementError();
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030431 return;
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930432 }
Andrew Jeffery14108bb2022-03-21 15:00:16 +1030433
434 sensor->updateValue(value);
Andrew Jefferye3e3c972021-05-26 14:37:07 +0930435}