Enabling NVMe sensor support

This commit introduces the support for NVMe drives for sensors.

All the NVMe drives which are detected by the FRU manager and are
present in the inventory are scanned at regular interval for reading
the temperature values of the NVMe devices.

Tested:

NAME                                          TYPE      SIGNATURE RESULT/VALUE
org.freedesktop.DBus.Introspectable           interface -         -
.Introspect                                   method    -         s
org.freedesktop.DBus.Peer                     interface -         -
.GetMachineId                                 method    -         s
.Ping                                         method    -         -
org.freedesktop.DBus.Properties               interface -         -
.Get                                          method    ss        v
.GetAll                                       method    s         a{sv}
.Set                                          method    ssv       -
.PropertiesChanged                            signal    sa{sv}as  -
xyz.openbmc_project.Association.Definitions   interface -         -
.Associations                                 property  a(sss)    1 "chassis" "all_sensors" "/xyz/openb...
xyz.openbmc_project.Sensor.Threshold.Critical interface -         -
.CriticalAlarmHigh                            property  b         false
.CriticalAlarmLow                             property  b         false
.CriticalHigh                                 property  d         115
.CriticalLow                                  property  d         0
xyz.openbmc_project.Sensor.Threshold.Warning  interface -         -
.WarningAlarmHigh                             property  b         false
.WarningAlarmLow                              property  b         false
.WarningHigh                                  property  d         110
.WarningLow                                   property  d         5
xyz.openbmc_project.Sensor.Value              interface -         -
.MaxValue                                     property  d         127
.MinValue                                     property  d         -60
.Value                                        property  d         22

Change-Id: Icb119b424234d548c8ff5cda9c7a9517ec9696bb
Signed-off-by: Nikhil Potade <nikhil.potade@linux.intel.com>
Signed-off-by: James Feist <james.feist@linux.intel.com>
diff --git a/src/NVMeSensor.cpp b/src/NVMeSensor.cpp
new file mode 100644
index 0000000..be99f58
--- /dev/null
+++ b/src/NVMeSensor.cpp
@@ -0,0 +1,478 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "NVMeSensor.hpp"
+
+#include "NVMeDevice.hpp"
+
+#include <crc32c.h>
+#include <libmctp-smbus.h>
+
+#include <boost/algorithm/string/replace.hpp>
+#include <boost/asio/ip/tcp.hpp>
+#include <iostream>
+
+static constexpr double maxReading = 127;
+static constexpr double minReading = 0;
+
+static constexpr bool DEBUG = false;
+
+void rxMessage(uint8_t eid, void* data, void* msg, size_t len);
+
+namespace nvmeMCTP
+{
+struct mctp_binding_smbus* smbus = mctp_smbus_init();
+struct mctp* mctp = mctp_init();
+
+static boost::container::flat_map<int, int> inFds;
+static boost::container::flat_map<int, int> outFds;
+
+int getInFd(int rootBus)
+{
+    auto findBus = inFds.find(rootBus);
+    if (findBus != inFds.end())
+    {
+        return findBus->second;
+    }
+    int fd = mctp_smbus_open_in_bus(smbus, rootBus);
+    if (fd < 0)
+    {
+        std::cerr << "Error opening IN Bus " << rootBus << "\n";
+    }
+    inFds[rootBus] = fd;
+    return fd;
+}
+
+int getOutFd(int bus)
+{
+    auto findBus = outFds.find(bus);
+    if (findBus != outFds.end())
+    {
+        return findBus->second;
+    }
+    int fd = mctp_smbus_open_out_bus(smbus, bus);
+    if (fd < 0)
+    {
+        std::cerr << "Error opening Out Bus " << bus << "\n";
+    }
+    outFds[bus] = fd;
+    return fd;
+}
+
+// we don't close the outFd as multiple sensors could be sharing the fd, we need
+// to close the inFd as it can only be used on 1 socket at a time
+void closeInFd(int rootBus)
+{
+    auto findFd = inFds.find(rootBus);
+    if (findFd == inFds.end())
+    {
+        return;
+    }
+    close(findFd->second);
+    inFds.erase(rootBus);
+}
+
+int getRootBus(int inFd)
+{
+    // we assume that we won't have too many FDs, so looping is OK
+    for (const auto [root, fd] : inFds)
+    {
+        if (fd == inFd)
+        {
+            return root;
+        }
+    }
+
+    return -1;
+}
+
+void init()
+{
+    if (mctp == nullptr || smbus == nullptr)
+    {
+        throw std::runtime_error("Unable to init mctp");
+    }
+    mctp_smbus_register_bus(smbus, nvmeMCTP::mctp, 0);
+    mctp_set_rx_all(mctp, rxMessage, nullptr);
+}
+
+} // namespace nvmeMCTP
+
+static int lastQueriedDeviceIndex = -1;
+
+void readResponse(const std::shared_ptr<NVMeContext>& nvmeDevice)
+{
+    nvmeDevice->nvmeSlaveSocket.async_wait(
+        boost::asio::ip::tcp::socket::wait_error,
+        [nvmeDevice](const boost::system::error_code errorCode) {
+            if (errorCode)
+            {
+                return;
+            }
+
+            mctp_smbus_set_in_fd(nvmeMCTP::smbus,
+                                 nvmeMCTP::getInFd(nvmeDevice->rootBus));
+
+            // through libmctp this will invoke rxMessage
+            mctp_smbus_read(nvmeMCTP::smbus);
+        });
+}
+
+int nvmeMessageTransmit(mctp& mctp, nvme_mi_msg_request& req)
+{
+    std::array<uint8_t, NVME_MI_MSG_BUFFER_SIZE> messageBuf = {};
+
+    req.header.flags |= NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND
+                        << NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT;
+    req.header.message_type =
+        NVME_MI_MESSAGE_TYPE | NVME_MI_MCTP_INTEGRITY_CHECK;
+
+    uint32_t integrity = 0;
+    size_t msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len +
+                     sizeof(integrity);
+
+    if (sizeof(messageBuf) < msgSize)
+    {
+        return EXIT_FAILURE;
+    }
+
+    messageBuf[0] = req.header.message_type;
+    messageBuf[1] = req.header.flags;
+    // Reserved bytes 2-3
+
+    messageBuf[4] = req.header.opcode;
+    // reserved bytes 5-7
+    messageBuf[8] = req.header.dword0 & 0xff;
+    messageBuf[9] = (req.header.dword0 >> 8) & 0xff;
+    messageBuf[10] = (req.header.dword0 >> 16) & 0xff;
+    messageBuf[11] = (req.header.dword0 >> 24) & 0xff;
+
+    messageBuf[12] = req.header.dword1 & 0xff;
+    messageBuf[13] = (req.header.dword1 >> 8) & 0xff;
+    messageBuf[14] = (req.header.dword1 >> 16) & 0xff;
+    messageBuf[15] = (req.header.dword1 >> 24) & 0xff;
+
+    std::copy_n(req.request_data, req.request_data_len,
+                messageBuf.data() +
+                    static_cast<uint8_t>(NVME_MI_MSG_REQUEST_HEADER_SIZE));
+
+    msgSize = NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len;
+    integrity = crc32c(messageBuf.data(),
+                       NVME_MI_MSG_REQUEST_HEADER_SIZE + req.request_data_len);
+    messageBuf[msgSize] = integrity & 0xff;
+    messageBuf[msgSize + 1] = (integrity >> 8) & 0xff;
+    messageBuf[msgSize + 2] = (integrity >> 16) & 0xff;
+    messageBuf[msgSize + 3] = (integrity >> 24) & 0xff;
+    msgSize += sizeof(integrity);
+
+    return mctp_message_tx(&mctp, 0, messageBuf.data(), msgSize);
+}
+
+int verifyIntegrity(uint8_t* msg, size_t len)
+{
+    uint32_t msgIntegrity = {0};
+    if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(msgIntegrity))
+    {
+        std::cerr << "Not enough bytes for nvme header and trailer\n";
+        return -1;
+    }
+
+    msgIntegrity = (msg[len - 4]) + (msg[len - 3] << 8) + (msg[len - 2] << 16) +
+                   (msg[len - 1] << 24);
+
+    uint32_t calculateIntegrity = crc32c(msg, len - sizeof(msgIntegrity));
+    if (msgIntegrity != calculateIntegrity)
+    {
+        std::cerr << "CRC mismatch. Got=" << msgIntegrity
+                  << " Expected=" << calculateIntegrity << "\n";
+        return -1;
+    }
+    return 0;
+}
+
+void readAndProcessNVMeSensor(const std::shared_ptr<NVMeContext>& nvmeDevice)
+{
+    struct nvme_mi_msg_request requestMsg = {};
+    requestMsg.header.opcode = NVME_MI_OPCODE_HEALTH_STATUS_POLL;
+    requestMsg.header.dword0 = 0;
+    requestMsg.header.dword1 = 0;
+
+    int mctpResponseTimeout = 1;
+
+    if (nvmeDevice->sensors.empty())
+    {
+        return;
+    }
+
+    std::shared_ptr<NVMeSensor>& sensor = nvmeDevice->sensors.front();
+
+    // setup the timeout timer
+    nvmeDevice->mctpResponseTimer.expires_from_now(
+        boost::posix_time::seconds(mctpResponseTimeout));
+
+    nvmeDevice->mctpResponseTimer.async_wait(
+        [sensor, nvmeDevice](const boost::system::error_code errorCode) {
+            constexpr const size_t errorThreshold = 5;
+            if (errorCode)
+            {
+                return;
+            }
+            if (sensor->errorCount < errorThreshold)
+            {
+                std::cerr << "MCTP timeout device " << sensor->name << "\n";
+                sensor->errorCount++;
+            }
+            else
+            {
+                sensor->updateValue(0);
+            }
+
+            // cycle it back
+            nvmeDevice->sensors.pop_front();
+            nvmeDevice->sensors.emplace_back(sensor);
+
+            nvmeDevice->nvmeSlaveSocket.cancel();
+        });
+
+    readResponse(nvmeDevice);
+
+    if (DEBUG)
+    {
+        std::cout << "Sending message to read data from Drive on bus: "
+                  << sensor->bus << " , rootBus: " << nvmeDevice->rootBus
+                  << " device: " << sensor->name << "\n";
+    }
+
+    mctp_smbus_set_out_fd(nvmeMCTP::smbus, nvmeMCTP::getOutFd(sensor->bus));
+    int rc = nvmeMessageTransmit(*nvmeMCTP::mctp, requestMsg);
+
+    if (rc != 0)
+    {
+        std::cerr << "Error sending request message to NVMe device\n";
+    }
+}
+
+static double getTemperatureReading(int8_t reading)
+{
+
+    if (reading == static_cast<int8_t>(0x80) ||
+        reading == static_cast<int8_t>(0x81))
+    {
+        // 0x80 = No temperature data or temperature data is more the 5 s
+        // old 0x81 = Temperature sensor failure
+        return maxReading;
+    }
+
+    return reading;
+}
+
+void rxMessage(uint8_t eid, void*, void* msg, size_t len)
+{
+    struct nvme_mi_msg_response_header header
+    {
+    };
+
+    int inFd = mctp_smbus_get_in_fd(nvmeMCTP::smbus);
+    int rootBus = nvmeMCTP::getRootBus(inFd);
+
+    NVMEMap& nvmeMap = getNVMEMap();
+    auto findMap = nvmeMap.find(rootBus);
+    if (findMap == nvmeMap.end())
+    {
+        std::cerr << "Unable to lookup root bus " << rootBus << "\n";
+        return;
+    }
+    std::shared_ptr<NVMeContext>& self = findMap->second;
+
+    if (msg == nullptr)
+    {
+        std::cerr << "Bad message received\n";
+        return;
+    }
+
+    if (len <= 0)
+    {
+        std::cerr << "Received message not long enough\n";
+        return;
+    }
+
+    if (DEBUG)
+    {
+        std::cout << "Eid from the received messaged: " << eid << "\n";
+    }
+
+    uint8_t* messageData = static_cast<uint8_t*>(msg);
+
+    if ((*messageData & NVME_MI_MESSAGE_TYPE_MASK) != NVME_MI_MESSAGE_TYPE)
+    {
+        std::cerr << "Got unknown type message_type="
+                  << (*messageData & NVME_MI_MESSAGE_TYPE_MASK) << "\n";
+        return;
+    }
+
+    if (len < NVME_MI_MSG_RESPONSE_HEADER_SIZE + sizeof(uint32_t))
+    {
+        std::cerr << "Not enough bytes for NVMe header and trailer\n";
+        return;
+    }
+
+    if (verifyIntegrity(messageData, len) != 0)
+    {
+        std::cerr << "Verification of message integrity failed\n";
+        return;
+    }
+
+    header.message_type = messageData[0];
+    header.flags = messageData[1];
+    header.status = messageData[4];
+
+    if (header.status == NVME_MI_HDR_STATUS_MORE_PROCESSING_REQUIRED)
+    {
+        return;
+    }
+
+    if (header.status != NVME_MI_HDR_STATUS_SUCCESS)
+    {
+        std::cerr << "Command failed with status= " << header.status << "\n";
+        return;
+    }
+
+    messageData += NVME_MI_MSG_RESPONSE_HEADER_SIZE;
+    size_t messageLength =
+        len - NVME_MI_MSG_RESPONSE_HEADER_SIZE - sizeof(uint32_t);
+    if (((header.flags >> NVME_MI_HDR_FLAG_MSG_TYPE_SHIFT) &
+         NVME_MI_HDR_FLAG_MSG_TYPE_MASK) != NVME_MI_HDR_MESSAGE_TYPE_MI_COMMAND)
+    {
+        std::cerr << "Not MI type comamnd\n";
+        return;
+    }
+
+    if (messageLength < NVME_MI_HEALTH_STATUS_POLL_MSG_MIN)
+    {
+        std::cerr << "Got improperly sized health status poll\n";
+        return;
+    }
+
+    std::shared_ptr<NVMeSensor> sensorInfo = self->sensors.front();
+    if (DEBUG)
+    {
+        std::cout << "Temperature Reading: "
+                  << getTemperatureReading(messageData[5])
+                  << " Celsius for device " << sensorInfo->name << "\n";
+    }
+
+    sensorInfo->updateValue(getTemperatureReading(messageData[5]));
+
+    if (DEBUG)
+    {
+        std::cout << "Cancelling the timer now\n";
+    }
+
+    // move to back of scan queue
+    self->sensors.pop_front();
+    self->sensors.emplace_back(sensorInfo);
+
+    self->mctpResponseTimer.cancel();
+}
+
+NVMeContext::NVMeContext(boost::asio::io_service& io, int rootBus) :
+    rootBus(rootBus), scanTimer(io), nvmeSlaveSocket(io), mctpResponseTimer(io)
+{
+    nvmeSlaveSocket.assign(boost::asio::ip::tcp::v4(),
+                           nvmeMCTP::getInFd(rootBus));
+}
+
+void NVMeContext::pollNVMeDevices()
+{
+    scanTimer.expires_from_now(boost::posix_time::seconds(1));
+    scanTimer.async_wait(
+        [self{shared_from_this()}](const boost::system::error_code errorCode) {
+            if (errorCode == boost::asio::error::operation_aborted)
+            {
+                return; // we're being canceled
+            }
+            else if (errorCode)
+            {
+                std::cerr << "Error:" << errorCode.message() << "\n";
+                return;
+            }
+            else
+            {
+                readAndProcessNVMeSensor(self);
+            }
+
+            self->pollNVMeDevices();
+        });
+}
+
+NVMeContext::~NVMeContext()
+{
+    scanTimer.cancel();
+    mctpResponseTimer.cancel();
+    nvmeSlaveSocket.cancel();
+    nvmeMCTP::closeInFd(rootBus);
+}
+
+NVMeSensor::NVMeSensor(sdbusplus::asio::object_server& objectServer,
+                       boost::asio::io_service& io,
+                       std::shared_ptr<sdbusplus::asio::connection>& conn,
+                       const std::string& sensorName,
+                       std::vector<thresholds::Threshold>&& _thresholds,
+                       const std::string& sensorConfiguration,
+                       const int busNumber) :
+    Sensor(boost::replace_all_copy(sensorName, " ", "_"),
+           std::move(_thresholds), sensorConfiguration,
+           "xyz.openbmc_project.Configuration.NVMe", maxReading, minReading),
+    objServer(objectServer), errorCount(0), bus(busNumber)
+{
+    sensorInterface = objectServer.add_interface(
+        "/xyz/openbmc_project/sensors/temperature/" + name,
+        "xyz.openbmc_project.Sensor.Value");
+
+    if (thresholds::hasWarningInterface(thresholds))
+    {
+        thresholdInterfaceWarning = objectServer.add_interface(
+            "/xyz/openbmc_project/sensors/temperature/" + name,
+            "xyz.openbmc_project.Sensor.Threshold.Warning");
+    }
+    if (thresholds::hasCriticalInterface(thresholds))
+    {
+        thresholdInterfaceCritical = objectServer.add_interface(
+            "/xyz/openbmc_project/sensors/temperature/" + name,
+            "xyz.openbmc_project.Sensor.Threshold.Critical");
+    }
+    association = objectServer.add_interface(
+        "/xyz/openbmc_project/sensors/temperature/" + name,
+        association::interface);
+
+    setInitialProperties(conn);
+    // setup match
+    setupPowerMatch(conn);
+}
+
+NVMeSensor::~NVMeSensor()
+{
+    // close the input dev to cancel async operations
+    objServer.remove_interface(thresholdInterfaceWarning);
+    objServer.remove_interface(thresholdInterfaceCritical);
+    objServer.remove_interface(sensorInterface);
+    objServer.remove_interface(association);
+}
+
+void NVMeSensor::checkThresholds(void)
+{
+    thresholds::checkThresholds(this);
+}