Add failsafe logger for zones

Tested:
...
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `0` is in failsafe mode.
With update at `fleeting0`: The sensor has bad readings.
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `1` is in failsafe mode.
With update at `fleeting1`: The sensor has bad readings.
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `1` leaves failsafe mode.
With update at `hotswap_in_Input_Power`: The sensor has recovered.
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `0` leaves failsafe mode.
With update at `hotswap_in_Input_Power`: The sensor has recovered.
...

Change-Id: I2c296addb7ad117c03c04a27de91204796cda036
Signed-off-by: James Zheng <alphetis@google.com>
diff --git a/failsafeloggers/builder.cpp b/failsafeloggers/builder.cpp
new file mode 100644
index 0000000..376e644
--- /dev/null
+++ b/failsafeloggers/builder.cpp
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2017 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "failsafeloggers/builder.hpp"
+
+#include "conf.hpp"
+#include "failsafeloggers/failsafe_logger.hpp"
+#include "failsafeloggers/failsafe_logger_utility.hpp"
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace pid_control
+{
+
+void buildFailsafeLoggers(
+    const std::unordered_map<int64_t, std::shared_ptr<ZoneInterface>>& zones,
+    const size_t logMaxCountPerSecond /* = 20 */)
+{
+    zoneIdToFailsafeLogger =
+        std::unordered_map<int64_t,
+                           std::shared_ptr<pid_control::FailsafeLogger>>();
+    sensorNameToZoneId =
+        std::unordered_map<std::string, std::vector<int64_t>>();
+    for (const auto& zoneIdToZone : zones)
+    {
+        int64_t zoneId = zoneIdToZone.first;
+        // Create a failsafe logger for each zone.
+        zoneIdToFailsafeLogger[zoneId] = std::make_shared<FailsafeLogger>(
+            logMaxCountPerSecond, zoneIdToZone.second->getFailSafeMode());
+        // Build the sensor-zone topology map.
+        std::vector<std::string> sensorNames =
+            zoneIdToZone.second->getSensorNames();
+        for (const std::string& sensorName : sensorNames)
+        {
+            if (std::find(sensorNameToZoneId[sensorName].begin(),
+                          sensorNameToZoneId[sensorName].end(), zoneId) ==
+                sensorNameToZoneId[sensorName].end())
+            {
+                sensorNameToZoneId[sensorName].push_back(zoneId);
+            }
+        }
+        std::cerr << "Build failsafe logger for Zone " << zoneId
+                  << " with initial "
+                  << "failsafe mode: " << zoneIdToZone.second->getFailSafeMode()
+                  << "\n";
+    }
+}
+
+} // namespace pid_control
diff --git a/failsafeloggers/builder.hpp b/failsafeloggers/builder.hpp
new file mode 100644
index 0000000..3c59820
--- /dev/null
+++ b/failsafeloggers/builder.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "conf.hpp"
+#include "failsafeloggers/failsafe_logger.hpp"
+#include "pid/zone_interface.hpp"
+#include "sensors/manager.hpp"
+
+#include <memory>
+#include <unordered_map>
+
+namespace pid_control
+{
+
+void buildFailsafeLoggers(
+    const std::unordered_map<int64_t, std::shared_ptr<ZoneInterface>>& zones,
+    const size_t logMaxCountPerSecond = 20);
+
+} // namespace pid_control
diff --git a/failsafeloggers/failsafe_logger.cpp b/failsafeloggers/failsafe_logger.cpp
new file mode 100644
index 0000000..91416e0
--- /dev/null
+++ b/failsafeloggers/failsafe_logger.cpp
@@ -0,0 +1,61 @@
+#include "failsafe_logger.hpp"
+
+#include <chrono>
+#include <iostream>
+
+namespace pid_control
+{
+
+void FailsafeLogger::outputFailsafeLog(
+    const int64_t zoneId, const bool newFailsafeState,
+    const std::string location, const std::string reason)
+{
+    // Remove outdated log entries.
+    const auto now = std::chrono::high_resolution_clock::now();
+    uint64_t nowMs = std::chrono::duration_cast<std::chrono::milliseconds>(
+                         now.time_since_epoch())
+                         .count();
+    // Limit the log output in 1 second.
+    constexpr uint64_t secondInMS = 1000; // 1 second in milliseconds
+    while (!_logTimestamps.empty() &&
+           nowMs - _logTimestamps.front() >= secondInMS)
+    {
+        _logTimestamps.pop_front();
+    }
+
+    // There is a failsafe state change, clear the logs in current state.
+    bool originFailsafeState = _currentFailsafeState;
+    if (newFailsafeState != _currentFailsafeState)
+    {
+        _logsInCurrentState.clear();
+        _currentFailsafeState = newFailsafeState;
+    }
+    // Do not output the log if the capacity is reached, or if the log is
+    // already encountered in the current state.
+    std::string locationReason = location + " @ " + reason;
+    if (_logTimestamps.size() >= _logMaxCountPerSecond ||
+        !_logsInCurrentState.contains(locationReason))
+    {
+        return;
+    }
+    _logsInCurrentState.insert(locationReason);
+
+    // Only output the log if the zone enters, stays in, or leaves failsafe
+    // mode. No need to output the log if the zone stays in non-failsafe mode.
+    if (newFailsafeState)
+    {
+        std::cerr << "Zone `" << zoneId
+                  << "` is in failsafe mode.\t\tWith update at `" << location
+                  << "`: " << reason << "\n";
+    }
+    else if (!newFailsafeState && originFailsafeState)
+    {
+        std::cerr << "Zone `" << zoneId
+                  << "` leaves failsafe mode.\t\tWith update at `" << location
+                  << "`: " << reason << "\n";
+    }
+
+    _logTimestamps.push_back(nowMs);
+}
+
+} // namespace pid_control
diff --git a/failsafeloggers/failsafe_logger.hpp b/failsafeloggers/failsafe_logger.hpp
new file mode 100644
index 0000000..0e13919
--- /dev/null
+++ b/failsafeloggers/failsafe_logger.hpp
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace pid_control
+{
+
+/**
+ * Log the reason for a zone to enter and leave the failsafe mode.
+ *
+ * Particularly, for entering the failsafe mode:
+ *   1. A sensor is specified in thermal config as an input but missed in DBus
+ *   2. A sensor has null readings in DBus
+ *   3. A sensor is abnormal in DBus (not functional, not enabled, etc)
+ *   4. A sensor's reading is above upper critical (UC) limit
+ *
+ * Among the above reasons:
+ *   1 excludes 2, 3, 4.
+ *   2 excludes 1, 4.
+ *   3 excludes 1.
+ *   4 excludes 1, 2.
+ *
+ * Note that this log is at the zone level, not the sensor level.
+ */
+class FailsafeLogger
+{
+  public:
+    FailsafeLogger(size_t logMaxCountPerSecond = 20,
+                   bool currentFailsafeState = false) :
+        _logMaxCountPerSecond(logMaxCountPerSecond),
+        _currentFailsafeState(currentFailsafeState)
+    {}
+    ~FailsafeLogger() = default;
+
+    /** Attempt to output an entering/leaving-failsafe-mode log.
+     */
+    void outputFailsafeLog(int64_t zoneId, bool newFailsafeState,
+                           const std::string location,
+                           const std::string reason);
+
+  private:
+    // The maximum number of log entries to be output within 1 second.
+    size_t _logMaxCountPerSecond;
+    // Whether the zone is currently in the failsafe mode.
+    bool _currentFailsafeState;
+    // The timestamps of the log entries.
+    std::deque<size_t> _logTimestamps;
+    // The logs already encountered in the current state.
+    std::unordered_set<std::string> _logsInCurrentState;
+};
+
+} // namespace pid_control
diff --git a/failsafeloggers/failsafe_logger_utility.cpp b/failsafeloggers/failsafe_logger_utility.cpp
new file mode 100644
index 0000000..40d0506
--- /dev/null
+++ b/failsafeloggers/failsafe_logger_utility.cpp
@@ -0,0 +1,11 @@
+#include "failsafe_logger_utility.hpp"
+
+#include <string>
+
+std::unordered_map<int64_t, std::shared_ptr<pid_control::FailsafeLogger>>
+    zoneIdToFailsafeLogger =
+        std::unordered_map<int64_t,
+                           std::shared_ptr<pid_control::FailsafeLogger>>();
+
+std::unordered_map<std::string, std::vector<int64_t>> sensorNameToZoneId =
+    std::unordered_map<std::string, std::vector<int64_t>>();
diff --git a/failsafeloggers/failsafe_logger_utility.hpp b/failsafeloggers/failsafe_logger_utility.hpp
new file mode 100644
index 0000000..05ed3ea
--- /dev/null
+++ b/failsafeloggers/failsafe_logger_utility.hpp
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "conf.hpp"
+#include "failsafeloggers/failsafe_logger.hpp"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+/** Map of the zone ID to its failsafe logger.
+ */
+extern std::unordered_map<int64_t, std::shared_ptr<pid_control::FailsafeLogger>>
+    zoneIdToFailsafeLogger;
+
+/** Map of the sensor name/ID to its corresponding zone IDs.
+ */
+extern std::unordered_map<std::string, std::vector<int64_t>> sensorNameToZoneId;
+
+namespace pid_control
+{
+
+/** Given a sensor name, attempt to output entering/leaving-failsafe-mode
+ * logs for its corresponding zones.
+ */
+inline void outputFailsafeLogWithSensor(
+    const std::string sensorName, const bool newFailsafeState,
+    const std::string location, const std::string reason)
+{
+    for (const int64_t zoneId : sensorNameToZoneId[sensorName])
+    {
+        if (zoneIdToFailsafeLogger.count(zoneId))
+        {
+            zoneIdToFailsafeLogger[zoneId]->outputFailsafeLog(
+                zoneId, newFailsafeState, location, reason);
+        }
+    }
+}
+
+/** Given a zone ID, attempt to output entering/leaving-failsafe-mode
+ * logs for its corresponding zones.
+ */
+inline void outputFailsafeLogWithZone(
+    const int64_t zoneId, const bool newFailsafeState,
+    const std::string location, const std::string reason)
+{
+    if (zoneIdToFailsafeLogger.count(zoneId))
+    {
+        zoneIdToFailsafeLogger[zoneId]->outputFailsafeLog(
+            zoneId, newFailsafeState, location, reason);
+    }
+}
+} // namespace pid_control