sensor-mon: Persist timer start times
It's possible that while a shutdown timer is running, this app or the
whole BMC may restart. In that case, we want to restart the timer with
only the remaining time, assuming the alarm is still active.
To do this, create a new AlarmTimestamps class to hold any running timer
start times and persist these times using the cereal library. If the
application is restarted with timestamps written out and the alarm is
initially active and power is on, then the shutdown timer will be
calculated to be:
delay = shutdownDelay - (now - original time)
Note that cereal doesn't know how to handle the AlarmType or
ShutdownType types so they are converted to ints before writing.
Signed-off-by: Matt Spinler <spinler@us.ibm.com>
Change-Id: If07abe91c0e6951fcd74bbc49eb511a38f7b4006
diff --git a/sensor-monitor/alarm_timestamps.hpp b/sensor-monitor/alarm_timestamps.hpp
new file mode 100644
index 0000000..0c7d148
--- /dev/null
+++ b/sensor-monitor/alarm_timestamps.hpp
@@ -0,0 +1,246 @@
+/**
+ * Copyright © 2021 IBM Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include "config.h"
+
+#include "types.hpp"
+
+#include <cereal/archives/json.hpp>
+#include <cereal/types/string.hpp>
+#include <cereal/types/tuple.hpp>
+#include <cereal/types/vector.hpp>
+#include <sdeventplus/clock.hpp>
+#include <sdeventplus/utility/timer.hpp>
+
+#include <filesystem>
+#include <fstream>
+#include <map>
+#include <tuple>
+
+namespace sensor::monitor
+{
+
+/**
+ * @class AlarmTimestamps
+ *
+ * This class keeps track of the timestamps at which the shutdown
+ * timers are started in case the process or whole BMC restarts
+ * while a timer is running. In the case where the process starts
+ * when a timer was previously running and an alarm is still active,
+ * a new timer can be started with just the remaining time.
+ */
+class AlarmTimestamps
+{
+ public:
+ ~AlarmTimestamps() = default;
+ AlarmTimestamps(const AlarmTimestamps&) = delete;
+ AlarmTimestamps& operator=(const AlarmTimestamps&) = delete;
+ AlarmTimestamps(AlarmTimestamps&&) = delete;
+ AlarmTimestamps& operator=(AlarmTimestamps&&) = delete;
+
+ /**
+ * @brief Constructor
+ *
+ * Loads any saved timestamps
+ */
+ AlarmTimestamps()
+ {
+ load();
+ }
+
+ /**
+ * @brief Adds an entry to the timestamps map and persists it.
+ *
+ * @param[in] key - The AlarmKey value
+ * @param[in] timestamp - The start timestamp to save
+ */
+ void add(const AlarmKey& key, uint64_t timestamp)
+ {
+ // Emplace won't do anything if an entry with that
+ // key was already present, so only save if an actual
+ // entry was added.
+ auto result = timestamps.emplace(key, timestamp);
+ if (result.second)
+ {
+ save();
+ }
+ }
+
+ /**
+ * @brief Erase an entry using the passed in alarm key.
+ *
+ * @param[in] key - The AlarmKey value
+ */
+ void erase(const AlarmKey& key)
+ {
+ size_t removed = timestamps.erase(key);
+ if (removed)
+ {
+ save();
+ }
+ }
+
+ /**
+ * @brief Erase an entry using an iterator.
+ */
+ void erase(std::map<AlarmKey, uint64_t>::const_iterator& entry)
+ {
+ timestamps.erase(entry);
+ save();
+ }
+
+ /**
+ * @brief Clear all entries.
+ */
+ void clear()
+ {
+ if (!timestamps.empty())
+ {
+ timestamps.clear();
+ save();
+ }
+ }
+
+ /**
+ * @brief Remove any entries for which there is not a running timer
+ * for. This is used on startup when an alarm could have cleared
+ * during a restart to get rid of the old entries.
+ *
+ * @param[in] alarms - The current alarms map.
+ */
+ void prune(
+ const std::map<AlarmKey, std::unique_ptr<sdeventplus::utility::Timer<
+ sdeventplus::ClockId::Monotonic>>>& alarms)
+ {
+ auto size = timestamps.size();
+
+ auto isTimerStopped = [&alarms](const AlarmKey& key) {
+ auto alarm = alarms.find(key);
+ if (alarm != alarms.end())
+ {
+ auto& timer = alarm->second;
+ if (timer && timer->isEnabled())
+ {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ auto it = timestamps.begin();
+
+ while (it != timestamps.end())
+ {
+ if (isTimerStopped(it->first))
+ {
+ it = timestamps.erase(it);
+ }
+ else
+ {
+ ++it;
+ }
+ }
+
+ if (size != timestamps.size())
+ {
+ save();
+ }
+ }
+
+ /**
+ * @brief Returns the timestamps map
+ */
+ const std::map<AlarmKey, uint64_t>& get() const
+ {
+ return timestamps;
+ }
+
+ /**
+ * @brief Saves the timestamps map in the filesystem using cereal.
+ *
+ * Since cereal doesn't understand the AlarmType or ShutdownType
+ * enums, they are converted to ints before being written.
+ */
+ void save()
+ {
+ std::filesystem::path path =
+ std::filesystem::path{SENSOR_MONITOR_PERSIST_ROOT_PATH} /
+ timestampsFilename;
+
+ if (!std::filesystem::exists(path.parent_path()))
+ {
+ std::filesystem::create_directory(path.parent_path());
+ }
+
+ std::vector<std::tuple<std::string, int, int, uint64_t>> times;
+
+ for (const auto& [key, time] : timestamps)
+ {
+ times.emplace_back(std::get<std::string>(key),
+ static_cast<int>(std::get<ShutdownType>(key)),
+ static_cast<int>(std::get<AlarmType>(key)),
+ time);
+ }
+
+ std::ofstream stream{path.c_str()};
+ cereal::JSONOutputArchive oarchive{stream};
+
+ oarchive(times);
+ }
+
+ private:
+ static constexpr auto timestampsFilename = "shutdownAlarmStartTimes";
+
+ /**
+ * @brief Loads the saved timestamps from the filesystem.
+ *
+ * As with save(), cereal doesn't understand the ShutdownType or AlarmType
+ * enums so they have to have been saved as ints and converted.
+ */
+ void load()
+ {
+
+ std::vector<std::tuple<std::string, int, int, uint64_t>> times;
+
+ std::filesystem::path path =
+ std::filesystem::path{SENSOR_MONITOR_PERSIST_ROOT_PATH} /
+ timestampsFilename;
+
+ if (!std::filesystem::exists(path))
+ {
+ return;
+ }
+
+ std::ifstream stream{path.c_str()};
+ cereal::JSONInputArchive iarchive{stream};
+ iarchive(times);
+
+ for (const auto& [path, shutdownType, alarmType, timestamp] : times)
+ {
+ timestamps.emplace(AlarmKey{path,
+ static_cast<ShutdownType>(shutdownType),
+ static_cast<AlarmType>(alarmType)},
+ timestamp);
+ }
+ }
+
+ /**
+ * @brief The map of AlarmKeys and time start times.
+ */
+ std::map<AlarmKey, uint64_t> timestamps;
+};
+
+} // namespace sensor::monitor
diff --git a/sensor-monitor/shutdown_alarm_monitor.cpp b/sensor-monitor/shutdown_alarm_monitor.cpp
index 6023f88..d1508c0 100644
--- a/sensor-monitor/shutdown_alarm_monitor.cpp
+++ b/sensor-monitor/shutdown_alarm_monitor.cpp
@@ -81,6 +81,14 @@
if (_powerState->isPowerOn())
{
checkAlarms();
+
+ // Get rid of any previous saved timestamps that don't
+ // apply anymore.
+ timestamps.prune(alarms);
+ }
+ else
+ {
+ timestamps.clear();
}
}
@@ -226,6 +234,48 @@
// threshold immediately.
}
+ uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count();
+
+ // If there is a saved timestamp for this timer, then we were restarted
+ // while the timer was running. Calculate the remaining time to use
+ // for the timer.
+ auto previousStartTime = timestamps.get().find(alarmKey);
+ if (previousStartTime != timestamps.get().end())
+ {
+ const uint64_t& original = previousStartTime->second;
+
+ log<level::INFO>(fmt::format("Found previously running {} timer "
+ "for {} with start time {}",
+ propertyName, sensorPath, original)
+ .c_str());
+
+ // Sanity check it isn't total garbage.
+ if (now > original)
+ {
+ uint64_t remainingTime = 0;
+ auto elapsedTime = now - original;
+
+ if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count()))
+ {
+ remainingTime =
+ static_cast<uint64_t>(shutdownDelay.count()) - elapsedTime;
+ }
+
+ shutdownDelay = std::chrono::milliseconds{remainingTime};
+ }
+ else
+ {
+ log<level::WARNING>(
+ fmt::format(
+ "Restarting {} shutdown timer for {} for full "
+ "time because saved time {} is after current time {}",
+ propertyName, original, now)
+ .c_str());
+ }
+ }
+
log<level::INFO>(
fmt::format("Starting {}ms {} shutdown timer due to sensor {} value {}",
shutdownDelay.count(), propertyName, sensorPath, *value)
@@ -238,6 +288,10 @@
event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey));
timer->restartOnce(shutdownDelay);
+
+ // Note that if this key is already in the timestamps map because
+ // the timer was already running the timestamp wil not be updated.
+ timestamps.add(alarmKey, now);
}
void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey)
@@ -262,6 +316,8 @@
auto& timer = alarm->second;
timer->setEnabled(false);
timer.reset();
+
+ timestamps.erase(alarmKey);
}
void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey)
@@ -278,6 +334,8 @@
SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface,
"StartUnit", "obmc-chassis-hard-poweroff@0.target",
"replace");
+
+ timestamps.erase(alarmKey);
}
void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn)
@@ -288,6 +346,8 @@
}
else
{
+ timestamps.clear();
+
// Cancel and delete all timers
std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) {
auto& timer = alarm.second;
diff --git a/sensor-monitor/shutdown_alarm_monitor.hpp b/sensor-monitor/shutdown_alarm_monitor.hpp
index de1c49e..002c883 100644
--- a/sensor-monitor/shutdown_alarm_monitor.hpp
+++ b/sensor-monitor/shutdown_alarm_monitor.hpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
#pragma once
+#include "alarm_timestamps.hpp"
#include "power_state.hpp"
#include "types.hpp"
@@ -183,6 +184,11 @@
std::map<AlarmKey, std::unique_ptr<sdeventplus::utility::Timer<
sdeventplus::ClockId::Monotonic>>>
alarms;
+
+ /**
+ * @brief The running alarm timer timestamps.
+ */
+ AlarmTimestamps timestamps;
};
} // namespace sensor::monitor