Add failsafe logger for zones
Tested:
...
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `0` is in failsafe mode.
With update at `fleeting0`: The sensor has bad readings.
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `1` is in failsafe mode.
With update at `fleeting1`: The sensor has bad readings.
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `1` leaves failsafe mode.
With update at `hotswap_in_Input_Power`: The sensor has recovered.
Nov 23 21:40:06 tmddp10-nfd01.prod.google.com swampd[4893]:
Zone `0` leaves failsafe mode.
With update at `hotswap_in_Input_Power`: The sensor has recovered.
...
Change-Id: I2c296addb7ad117c03c04a27de91204796cda036
Signed-off-by: James Zheng <alphetis@google.com>
diff --git a/failsafeloggers/builder.cpp b/failsafeloggers/builder.cpp
new file mode 100644
index 0000000..376e644
--- /dev/null
+++ b/failsafeloggers/builder.cpp
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2017 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "failsafeloggers/builder.hpp"
+
+#include "conf.hpp"
+#include "failsafeloggers/failsafe_logger.hpp"
+#include "failsafeloggers/failsafe_logger_utility.hpp"
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace pid_control
+{
+
+void buildFailsafeLoggers(
+ const std::unordered_map<int64_t, std::shared_ptr<ZoneInterface>>& zones,
+ const size_t logMaxCountPerSecond /* = 20 */)
+{
+ zoneIdToFailsafeLogger =
+ std::unordered_map<int64_t,
+ std::shared_ptr<pid_control::FailsafeLogger>>();
+ sensorNameToZoneId =
+ std::unordered_map<std::string, std::vector<int64_t>>();
+ for (const auto& zoneIdToZone : zones)
+ {
+ int64_t zoneId = zoneIdToZone.first;
+ // Create a failsafe logger for each zone.
+ zoneIdToFailsafeLogger[zoneId] = std::make_shared<FailsafeLogger>(
+ logMaxCountPerSecond, zoneIdToZone.second->getFailSafeMode());
+ // Build the sensor-zone topology map.
+ std::vector<std::string> sensorNames =
+ zoneIdToZone.second->getSensorNames();
+ for (const std::string& sensorName : sensorNames)
+ {
+ if (std::find(sensorNameToZoneId[sensorName].begin(),
+ sensorNameToZoneId[sensorName].end(), zoneId) ==
+ sensorNameToZoneId[sensorName].end())
+ {
+ sensorNameToZoneId[sensorName].push_back(zoneId);
+ }
+ }
+ std::cerr << "Build failsafe logger for Zone " << zoneId
+ << " with initial "
+ << "failsafe mode: " << zoneIdToZone.second->getFailSafeMode()
+ << "\n";
+ }
+}
+
+} // namespace pid_control
diff --git a/failsafeloggers/builder.hpp b/failsafeloggers/builder.hpp
new file mode 100644
index 0000000..3c59820
--- /dev/null
+++ b/failsafeloggers/builder.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "conf.hpp"
+#include "failsafeloggers/failsafe_logger.hpp"
+#include "pid/zone_interface.hpp"
+#include "sensors/manager.hpp"
+
+#include <memory>
+#include <unordered_map>
+
+namespace pid_control
+{
+
+void buildFailsafeLoggers(
+ const std::unordered_map<int64_t, std::shared_ptr<ZoneInterface>>& zones,
+ const size_t logMaxCountPerSecond = 20);
+
+} // namespace pid_control
diff --git a/failsafeloggers/failsafe_logger.cpp b/failsafeloggers/failsafe_logger.cpp
new file mode 100644
index 0000000..91416e0
--- /dev/null
+++ b/failsafeloggers/failsafe_logger.cpp
@@ -0,0 +1,61 @@
+#include "failsafe_logger.hpp"
+
+#include <chrono>
+#include <iostream>
+
+namespace pid_control
+{
+
+void FailsafeLogger::outputFailsafeLog(
+ const int64_t zoneId, const bool newFailsafeState,
+ const std::string location, const std::string reason)
+{
+ // Remove outdated log entries.
+ const auto now = std::chrono::high_resolution_clock::now();
+ uint64_t nowMs = std::chrono::duration_cast<std::chrono::milliseconds>(
+ now.time_since_epoch())
+ .count();
+ // Limit the log output in 1 second.
+ constexpr uint64_t secondInMS = 1000; // 1 second in milliseconds
+ while (!_logTimestamps.empty() &&
+ nowMs - _logTimestamps.front() >= secondInMS)
+ {
+ _logTimestamps.pop_front();
+ }
+
+ // There is a failsafe state change, clear the logs in current state.
+ bool originFailsafeState = _currentFailsafeState;
+ if (newFailsafeState != _currentFailsafeState)
+ {
+ _logsInCurrentState.clear();
+ _currentFailsafeState = newFailsafeState;
+ }
+ // Do not output the log if the capacity is reached, or if the log is
+ // already encountered in the current state.
+ std::string locationReason = location + " @ " + reason;
+ if (_logTimestamps.size() >= _logMaxCountPerSecond ||
+ !_logsInCurrentState.contains(locationReason))
+ {
+ return;
+ }
+ _logsInCurrentState.insert(locationReason);
+
+ // Only output the log if the zone enters, stays in, or leaves failsafe
+ // mode. No need to output the log if the zone stays in non-failsafe mode.
+ if (newFailsafeState)
+ {
+ std::cerr << "Zone `" << zoneId
+ << "` is in failsafe mode.\t\tWith update at `" << location
+ << "`: " << reason << "\n";
+ }
+ else if (!newFailsafeState && originFailsafeState)
+ {
+ std::cerr << "Zone `" << zoneId
+ << "` leaves failsafe mode.\t\tWith update at `" << location
+ << "`: " << reason << "\n";
+ }
+
+ _logTimestamps.push_back(nowMs);
+}
+
+} // namespace pid_control
diff --git a/failsafeloggers/failsafe_logger.hpp b/failsafeloggers/failsafe_logger.hpp
new file mode 100644
index 0000000..0e13919
--- /dev/null
+++ b/failsafeloggers/failsafe_logger.hpp
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace pid_control
+{
+
+/**
+ * Log the reason for a zone to enter and leave the failsafe mode.
+ *
+ * Particularly, for entering the failsafe mode:
+ * 1. A sensor is specified in thermal config as an input but missed in DBus
+ * 2. A sensor has null readings in DBus
+ * 3. A sensor is abnormal in DBus (not functional, not enabled, etc)
+ * 4. A sensor's reading is above upper critical (UC) limit
+ *
+ * Among the above reasons:
+ * 1 excludes 2, 3, 4.
+ * 2 excludes 1, 4.
+ * 3 excludes 1.
+ * 4 excludes 1, 2.
+ *
+ * Note that this log is at the zone level, not the sensor level.
+ */
+class FailsafeLogger
+{
+ public:
+ FailsafeLogger(size_t logMaxCountPerSecond = 20,
+ bool currentFailsafeState = false) :
+ _logMaxCountPerSecond(logMaxCountPerSecond),
+ _currentFailsafeState(currentFailsafeState)
+ {}
+ ~FailsafeLogger() = default;
+
+ /** Attempt to output an entering/leaving-failsafe-mode log.
+ */
+ void outputFailsafeLog(int64_t zoneId, bool newFailsafeState,
+ const std::string location,
+ const std::string reason);
+
+ private:
+ // The maximum number of log entries to be output within 1 second.
+ size_t _logMaxCountPerSecond;
+ // Whether the zone is currently in the failsafe mode.
+ bool _currentFailsafeState;
+ // The timestamps of the log entries.
+ std::deque<size_t> _logTimestamps;
+ // The logs already encountered in the current state.
+ std::unordered_set<std::string> _logsInCurrentState;
+};
+
+} // namespace pid_control
diff --git a/failsafeloggers/failsafe_logger_utility.cpp b/failsafeloggers/failsafe_logger_utility.cpp
new file mode 100644
index 0000000..40d0506
--- /dev/null
+++ b/failsafeloggers/failsafe_logger_utility.cpp
@@ -0,0 +1,11 @@
+#include "failsafe_logger_utility.hpp"
+
+#include <string>
+
+std::unordered_map<int64_t, std::shared_ptr<pid_control::FailsafeLogger>>
+ zoneIdToFailsafeLogger =
+ std::unordered_map<int64_t,
+ std::shared_ptr<pid_control::FailsafeLogger>>();
+
+std::unordered_map<std::string, std::vector<int64_t>> sensorNameToZoneId =
+ std::unordered_map<std::string, std::vector<int64_t>>();
diff --git a/failsafeloggers/failsafe_logger_utility.hpp b/failsafeloggers/failsafe_logger_utility.hpp
new file mode 100644
index 0000000..05ed3ea
--- /dev/null
+++ b/failsafeloggers/failsafe_logger_utility.hpp
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "conf.hpp"
+#include "failsafeloggers/failsafe_logger.hpp"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+/** Map of the zone ID to its failsafe logger.
+ */
+extern std::unordered_map<int64_t, std::shared_ptr<pid_control::FailsafeLogger>>
+ zoneIdToFailsafeLogger;
+
+/** Map of the sensor name/ID to its corresponding zone IDs.
+ */
+extern std::unordered_map<std::string, std::vector<int64_t>> sensorNameToZoneId;
+
+namespace pid_control
+{
+
+/** Given a sensor name, attempt to output entering/leaving-failsafe-mode
+ * logs for its corresponding zones.
+ */
+inline void outputFailsafeLogWithSensor(
+ const std::string sensorName, const bool newFailsafeState,
+ const std::string location, const std::string reason)
+{
+ for (const int64_t zoneId : sensorNameToZoneId[sensorName])
+ {
+ if (zoneIdToFailsafeLogger.count(zoneId))
+ {
+ zoneIdToFailsafeLogger[zoneId]->outputFailsafeLog(
+ zoneId, newFailsafeState, location, reason);
+ }
+ }
+}
+
+/** Given a zone ID, attempt to output entering/leaving-failsafe-mode
+ * logs for its corresponding zones.
+ */
+inline void outputFailsafeLogWithZone(
+ const int64_t zoneId, const bool newFailsafeState,
+ const std::string location, const std::string reason)
+{
+ if (zoneIdToFailsafeLogger.count(zoneId))
+ {
+ zoneIdToFailsafeLogger[zoneId]->outputFailsafeLog(
+ zoneId, newFailsafeState, location, reason);
+ }
+}
+} // namespace pid_control