blob: 52e0b85782b4b2f1c914f77d855ff360d7ec14de [file] [log] [blame]
AppaRao Pulie63eeda2019-07-05 16:25:38 +05301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
AppaRao Puli67d184c2020-05-29 00:48:33 +053017#include "pfr.hpp"
18#include "pfr_mgr.hpp"
19
AppaRao Puli88aa33b2019-07-18 23:49:55 +053020#include <systemd/sd-journal.h>
21
AppaRao Puli46cead92019-07-22 16:50:09 +053022#include <boost/asio.hpp>
AppaRao Pulie63eeda2019-07-05 16:25:38 +053023
AppaRao Puli88aa33b2019-07-18 23:49:55 +053024// Caches the last Recovery/Panic Count to
25// identify any new Recovery/panic actions.
26/* TODO: When BMC Reset's, these values will be lost
27 * Persist this info using settingsd */
28static uint8_t lastRecoveryCount = 0;
29static uint8_t lastPanicCount = 0;
30static uint8_t lastMajorErr = 0;
31static uint8_t lastMinorErr = 0;
32
33static bool stateTimerRunning = false;
AppaRao Puli46cead92019-07-22 16:50:09 +053034bool finishedSettingChkPoint = false;
35static constexpr uint8_t bmcBootFinishedChkPoint = 0x09;
36
AppaRao Puli88aa33b2019-07-18 23:49:55 +053037std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr;
AppaRao Puli46cead92019-07-22 16:50:09 +053038std::unique_ptr<boost::asio::steady_timer> initTimer = nullptr;
AppaRao Puli88aa33b2019-07-18 23:49:55 +053039
AppaRao Pulie4e95652019-07-19 16:52:01 +053040std::vector<std::unique_ptr<intel::pfr::PfrVersion>> pfrVersionObjects;
41std::unique_ptr<intel::pfr::PfrConfig> pfrConfigObject;
42
43using namespace intel::pfr;
44// List holds <ObjPath> <ImageType> <VersionPurpose>
45static std::vector<std::tuple<std::string, ImageType, std::string>>
46 verComponentList = {
47 std::make_tuple("bmc_active", ImageType::bmcActive, versionPurposeBMC),
48 std::make_tuple("bmc_recovery", ImageType::bmcRecovery,
49 versionPurposeBMC),
50 std::make_tuple("bios_active", ImageType::biosActive,
51 versionPurposeHost),
52 std::make_tuple("bios_recovery", ImageType::biosRecovery,
53 versionPurposeHost),
Vikram Bodireddy3c6c8c32019-12-05 11:06:15 +053054 std::make_tuple("cpld_active", ImageType::cpldActive,
55 versionPurposeOther),
56 std::make_tuple("cpld_recovery", ImageType::cpldRecovery,
57 versionPurposeOther),
58};
AppaRao Pulie4e95652019-07-19 16:52:01 +053059
AppaRao Pulie90f1282019-11-05 01:07:05 +053060// Recovery reason map.
61// {<CPLD association>,{<Redfish MessageID>, <Recovery Reason>}}
62static const boost::container::flat_map<uint8_t,
63 std::pair<std::string, std::string>>
64 recoveryReasonMap = {
65 {0x01,
66 {"BIOSFirmwareRecoveryReason",
Chalapathi3fb544b2020-02-14 15:43:49 +000067 "BIOS active image authentication failure"}},
AppaRao Pulie90f1282019-11-05 01:07:05 +053068 {0x02,
69 {"BIOSFirmwareRecoveryReason",
Chalapathi3fb544b2020-02-14 15:43:49 +000070 "BIOS recovery image authentication failure"}},
AppaRao Pulie90f1282019-11-05 01:07:05 +053071 {0x03, {"MEFirmwareRecoveryReason", "ME launch failure"}},
72 {0x04, {"BIOSFirmwareRecoveryReason", "ACM launch failure"}},
73 {0x05, {"BIOSFirmwareRecoveryReason", "IBB launch failure"}},
74 {0x06, {"BIOSFirmwareRecoveryReason", "OBB launch failure"}},
75 {0x07,
76 {"BMCFirmwareRecoveryReason",
77 "BMC active image authentication failure"}},
78 {0x08,
79 {"BMCFirmwareRecoveryReason",
80 "BMC recovery image authentication failure"}},
81 {0x09, {"BMCFirmwareRecoveryReason", "BMC launch failure"}},
82 {0x0A, {"CPLDFirmwareRecoveryReason", "CPLD watchdog expired"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053083
AppaRao Pulie90f1282019-11-05 01:07:05 +053084// Panic Reason map.
85// {<CPLD association>, {<Redfish MessageID>, <Panic reason> })
86static const boost::container::flat_map<uint8_t,
87 std::pair<std::string, std::string>>
88 panicReasonMap = {
Chalapathi3fb544b2020-02-14 15:43:49 +000089 {0x01, {"BIOSFirmwarePanicReason", "BIOS update intent"}},
90 {0x02, {"BMCFirmwarePanicReason", "BMC update intent"}},
91 {0x03, {"BMCFirmwarePanicReason", "BMC reset detected"}},
92 {0x04, {"BMCFirmwarePanicReason", "BMC watchdog expired"}},
93 {0x05, {"MEFirmwarePanicReason", "ME watchdog expired"}},
94 {0x06, {"BIOSFirmwarePanicReason", "ACM watchdog expired"}},
95 {0x07, {"BIOSFirmwarePanicReason", "IBB watchdog expired"}},
96 {0x08, {"BIOSFirmwarePanicReason", "OBB watchdog expired"}},
AppaRao Pulie90f1282019-11-05 01:07:05 +053097 {0x09,
98 {"BIOSFirmwarePanicReason",
Chalapathi3fb544b2020-02-14 15:43:49 +000099 "ACM or IBB or OBB authentication failure"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530100
AppaRao Puli24766942019-11-13 19:27:08 +0530101// Firmware resiliency major map.
102// {<CPLD association>, {<Redfish MessageID>, <Error reason> })
103static const boost::container::flat_map<uint8_t,
104 std::pair<std::string, std::string>>
105 majorErrorCodeMap = {
106 {0x01,
107 {"BMCFirmwareResiliencyError", "BMC image authentication failed"}},
108 {0x02,
109 {"BIOSFirmwareResiliencyError", "BIOS image authentication failed"}},
Chalapathi3fb544b2020-02-14 15:43:49 +0000110 {0x03, {"BIOSFirmwareResiliencyError", "Update from BIOS failed"}},
111 {0x04, {"BMCFirmwareResiliencyError", "Update from BMC failed"}}};
AppaRao Puli24766942019-11-13 19:27:08 +0530112
AppaRao Pulie4e95652019-07-19 16:52:01 +0530113static void updateDbusPropertiesCache()
114{
115 for (const auto& pfrVerObj : pfrVersionObjects)
116 {
117 pfrVerObj->updateVersion();
118 }
119
120 // Update provisoningStatus properties
121 pfrConfigObject->updateProvisioningStatus();
122
123 phosphor::logging::log<phosphor::logging::level::INFO>(
124 "PFR Manager service cache data updated.");
125}
126
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530127static void logLastRecoveryEvent()
128{
129 uint8_t reason = 0;
130 if (0 !=
131 intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason))
132 {
133 return;
134 }
135
AppaRao Pulie90f1282019-11-05 01:07:05 +0530136 auto it = recoveryReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530137 if (it == recoveryReasonMap.end())
138 {
139 // No matching found. So just return without logging event.
140 return;
141 }
AppaRao Pulie90f1282019-11-05 01:07:05 +0530142 std::string msgId = "OpenBMC.0.1." + it->second.first;
143 sd_journal_send("MESSAGE=%s", "Platform firmware recovery occurred.",
144 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
145 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
146 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530147}
148
149static void logLastPanicEvent()
150{
151 uint8_t reason = 0;
152 if (0 !=
153 intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason))
154 {
155 return;
156 }
157
AppaRao Pulie90f1282019-11-05 01:07:05 +0530158 auto it = panicReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530159 if (it == panicReasonMap.end())
160 {
161 // No matching found. So just return without logging event.
162 return;
163 }
164
AppaRao Pulie90f1282019-11-05 01:07:05 +0530165 std::string msgId = "OpenBMC.0.1." + it->second.first;
166 sd_journal_send("MESSAGE=%s", "Platform firmware panic occurred.",
167 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
168 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
169 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530170}
171
AppaRao Puli24766942019-11-13 19:27:08 +0530172static void logResiliencyErrorEvent(const uint8_t majorErrorCode,
173 const uint8_t minorErrorCode)
174{
175 auto it = majorErrorCodeMap.find(majorErrorCode);
176 if (it == majorErrorCodeMap.end())
177 {
178 // No matching found. So just return without logging event.
179 return;
180 }
181
182 std::string errorStr =
183 it->second.second + "(MinorCode:0x" + toHexString(minorErrorCode) + ")";
184 std::string msgId = "OpenBMC.0.1." + it->second.first;
185 sd_journal_send(
186 "MESSAGE=%s", "Platform firmware resiliency error occurred.",
187 "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", msgId.c_str(),
188 "REDFISH_MESSAGE_ARGS=%s", errorStr.c_str(), NULL);
189}
190
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530191static void checkAndLogEvents()
192{
193 uint8_t currPanicCount = 0;
194 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount,
195 currPanicCount))
196 {
197 if (lastPanicCount != currPanicCount)
198 {
199 // Update cached data and log redfish event by reading reason.
200 lastPanicCount = currPanicCount;
201 logLastPanicEvent();
202 }
203 }
204
205 uint8_t currRecoveryCount = 0;
206 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount,
207 currRecoveryCount))
208 {
209 if (lastRecoveryCount != currRecoveryCount)
210 {
211 // Update cached data and log redfish event by reading reason.
212 lastRecoveryCount = currRecoveryCount;
213 logLastRecoveryEvent();
214 }
215 }
216
217 uint8_t majorErr = 0;
218 uint8_t minorErr = 0;
219 if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError,
Chalapathic97962c2020-05-11 10:27:41 +0000220 majorErr)) &&
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530221 (0 ==
222 intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr)))
223 {
224 if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr))
225 {
226 lastMajorErr = majorErr;
227 lastMinorErr = minorErr;
228
AppaRao Puli24766942019-11-13 19:27:08 +0530229 logResiliencyErrorEvent(majorErr, minorErr);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530230 }
231 }
232}
233
234static void monitorPlatformStateChange(
235 sdbusplus::asio::object_server& server,
236 std::shared_ptr<sdbusplus::asio::connection>& conn)
237{
238 constexpr size_t pollTimeout = 10; // seconds
239 stateTimer->expires_after(std::chrono::seconds(pollTimeout));
240 stateTimer->async_wait(
241 [&server, &conn](const boost::system::error_code& ec) {
242 if (ec == boost::asio::error::operation_aborted)
243 {
244 // Timer reset.
245 return;
246 }
247 if (ec)
248 {
249 // Platform State Monitor - Timer cancelled.
250 return;
251 }
252 checkAndLogEvents();
253 monitorPlatformStateChange(server, conn);
254 });
255}
256
AppaRao Puli46cead92019-07-22 16:50:09 +0530257void checkAndSetCheckpoint(sdbusplus::asio::object_server& server,
258 std::shared_ptr<sdbusplus::asio::connection>& conn)
259{
260 // Check whether systemd completed all the loading.
261 conn->async_method_call(
262 [&server, &conn](boost::system::error_code ec,
263 const std::variant<uint64_t>& value) {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530264 if (!ec)
AppaRao Puli46cead92019-07-22 16:50:09 +0530265 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530266 if (std::get<uint64_t>(value))
AppaRao Puli46cead92019-07-22 16:50:09 +0530267 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530268 phosphor::logging::log<phosphor::logging::level::INFO>(
269 "PFR: BMC boot completed. Setting checkpoint 9.");
270 if (!finishedSettingChkPoint)
271 {
272 finishedSettingChkPoint = true;
273 intel::pfr::setBMCBootCheckpoint(
274 bmcBootFinishedChkPoint);
275 }
276 return;
AppaRao Puli46cead92019-07-22 16:50:09 +0530277 }
278 }
279 else
280 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530281 // Failed to get data from systemd. System might not
282 // be ready yet. Attempt again for data.
283 phosphor::logging::log<phosphor::logging::level::ERR>(
284 "PFR: aync call failed to get FinishTimestamp.",
285 phosphor::logging::entry("MSG=%s", ec.message().c_str()));
AppaRao Puli46cead92019-07-22 16:50:09 +0530286 }
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530287 // FIX-ME: Latest up-stream sync caused issue in receiving
288 // StartupFinished signal. Unable to get StartupFinished signal
289 // from systemd1 hence using poll method too, to trigger it
290 // properly.
291 constexpr size_t pollTimeout = 10; // seconds
292 initTimer->expires_after(std::chrono::seconds(pollTimeout));
293 initTimer->async_wait([&server,
294 &conn](const boost::system::error_code& ec) {
295 if (ec == boost::asio::error::operation_aborted)
296 {
297 // Timer reset.
298 phosphor::logging::log<phosphor::logging::level::INFO>(
299 "PFR: Set boot Checkpoint - Timer aborted or stopped.");
300 return;
301 }
302 if (ec)
303 {
304 phosphor::logging::log<phosphor::logging::level::ERR>(
305 "PFR: Set boot Checkpoint - async wait error.");
306 return;
307 }
308 checkAndSetCheckpoint(server, conn);
309 });
AppaRao Puli46cead92019-07-22 16:50:09 +0530310 },
311 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
312 "org.freedesktop.DBus.Properties", "Get",
313 "org.freedesktop.systemd1.Manager", "FinishTimestamp");
314}
315
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530316void monitorSignals(sdbusplus::asio::object_server& server,
317 std::shared_ptr<sdbusplus::asio::connection>& conn)
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530318{
AppaRao Puli46cead92019-07-22 16:50:09 +0530319 // Monitor Boot finished signal and set the checkpoint 9 to
320 // notify CPLD about BMC boot finish.
321 auto bootFinishedSignal = std::make_unique<sdbusplus::bus::match::match>(
322 static_cast<sdbusplus::bus::bus&>(*conn),
323 "type='signal',"
324 "member='StartupFinished',path='/org/freedesktop/systemd1',"
325 "interface='org.freedesktop.systemd1.Manager'",
326 [&server, &conn](sdbusplus::message::message& msg) {
327 if (!finishedSettingChkPoint)
328 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530329 phosphor::logging::log<phosphor::logging::level::INFO>(
330 "PFR: BMC boot completed(StartupFinished). Setting "
331 "checkpoint 9.");
AppaRao Puli46cead92019-07-22 16:50:09 +0530332 finishedSettingChkPoint = true;
333 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
334 }
335 });
336 checkAndSetCheckpoint(server, conn);
337
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530338 // Capture the Chassis state and Start the monitor timer
339 // if state changed to 'On'. Run timer until OS boot.
340 // Stop timer if state changed to 'Off'.
341 static auto matchChassisState = sdbusplus::bus::match::match(
342 static_cast<sdbusplus::bus::bus&>(*conn),
343 "type='signal',member='PropertiesChanged', "
344 "interface='org.freedesktop.DBus.Properties', "
345 "sender='xyz.openbmc_project.State.Chassis', "
346 "arg0namespace='xyz.openbmc_project.State.Chassis'",
347 [&server, &conn](sdbusplus::message::message& message) {
348 std::string intfName;
349 std::map<std::string, std::variant<std::string>> properties;
350 message.read(intfName, properties);
351
352 const auto it = properties.find("CurrentPowerState");
353 if (it != properties.end())
354 {
355 const std::string* state =
356 std::get_if<std::string>(&it->second);
357 if (state != nullptr)
358 {
359 if ((*state ==
360 "xyz.openbmc_project.State.Chassis.PowerState.On") &&
361 (!stateTimerRunning))
362 {
363 stateTimerRunning = true;
364 monitorPlatformStateChange(server, conn);
365 }
366 else if ((*state == "xyz.openbmc_project.State.Chassis."
367 "PowerState.Off") &&
368 (stateTimerRunning))
369 {
370 stateTimer->cancel();
371 checkAndLogEvents();
372 stateTimerRunning = false;
373 }
374 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530375
376 // Update the D-Bus properties when chassis state changes.
377 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530378 }
379 });
380
381 // Capture the Host state and Start the monitor timer
382 // if state changed to 'Running'. Run timer until OS boot.
383 // Stop timer if state changed to 'Off'.
384 static auto matchHostState = sdbusplus::bus::match::match(
385 static_cast<sdbusplus::bus::bus&>(*conn),
386 "type='signal',member='PropertiesChanged', "
387 "interface='org.freedesktop.DBus.Properties', "
388 "sender='xyz.openbmc_project.State.Chassis', "
389 "arg0namespace='xyz.openbmc_project.State.Host'",
390 [&server, &conn](sdbusplus::message::message& message) {
391 std::string intfName;
392 std::map<std::string, std::variant<std::string>> properties;
393 message.read(intfName, properties);
394
395 const auto it = properties.find("CurrentHostState");
396 if (it != properties.end())
397 {
398 const std::string* state =
399 std::get_if<std::string>(&it->second);
400 if (state != nullptr)
401 {
402 if ((*state ==
403 "xyz.openbmc_project.State.Host.HostState.Running") &&
404 (!stateTimerRunning))
405 {
406 stateTimerRunning = true;
407 monitorPlatformStateChange(server, conn);
408 }
409 else if (((*state == "xyz.openbmc_project.State.Host."
410 "HostState.Off") ||
411 (*state == "xyz.openbmc_project.State.Host."
412 "HostState.Quiesced")) &&
413 (stateTimerRunning))
414 {
415 stateTimer->cancel();
416 checkAndLogEvents();
417 stateTimerRunning = false;
418 }
419 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530420
421 // Update the D-Bus properties when host state changes.
422 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530423 }
424 });
425
426 // Capture the OS state change and stop monitor timer
427 // if OS boots completly or becomes Inactive.
428 // start timer in other cases to mnitor states.
429 static auto matchOsState = sdbusplus::bus::match::match(
430 static_cast<sdbusplus::bus::bus&>(*conn),
431 "type='signal',member='PropertiesChanged', "
432 "interface='org.freedesktop.DBus.Properties', "
433 "sender='xyz.openbmc_project.State.Chassis', "
434 "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'",
435 [&server, &conn](sdbusplus::message::message& message) {
436 std::string intfName;
437 std::map<std::string, std::variant<std::string>> properties;
438 message.read(intfName, properties);
439
440 const auto it = properties.find("OperatingSystemState");
441 if (it != properties.end())
442 {
443 const std::string* state =
444 std::get_if<std::string>(&it->second);
445 if (state != nullptr)
446 {
447 if (((*state == "BootComplete") ||
448 (*state == "Inactive")) &&
449 (stateTimerRunning))
450 {
451 stateTimer->cancel();
452 checkAndLogEvents();
453 stateTimerRunning = false;
454 }
455 else if (!stateTimerRunning)
456 {
457 stateTimerRunning = true;
458 monitorPlatformStateChange(server, conn);
459 }
460 }
461 }
462 });
463
464 // First time, check and log events if any.
465 checkAndLogEvents();
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530466}
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530467
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530468int main()
469{
470 // setup connection to dbus
471 boost::asio::io_service io;
472 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
473 stateTimer = std::make_unique<boost::asio::steady_timer>(io);
474 initTimer = std::make_unique<boost::asio::steady_timer>(io);
475 auto server = sdbusplus::asio::object_server(conn, true);
476 monitorSignals(server, conn);
477
478 auto rootInterface = server.add_interface("/xyz/openbmc_project/pfr", "");
479 rootInterface->initialize();
480 server.add_manager("/xyz/openbmc_project/pfr");
481
482 // Create PFR attributes object and interface
483 pfrConfigObject = std::make_unique<intel::pfr::PfrConfig>(server, conn);
484
485 // Create Software objects using Versions interface
486 for (const auto& entry : verComponentList)
487 {
488 pfrVersionObjects.emplace_back(std::make_unique<intel::pfr::PfrVersion>(
489 server, conn, std::get<0>(entry), std::get<1>(entry),
490 std::get<2>(entry)));
491 }
492
493 conn->request_name("xyz.openbmc_project.PFR.Manager");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530494 phosphor::logging::log<phosphor::logging::level::INFO>(
495 "Intel PFR service started successfully");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530496 io.run();
497
498 return 0;
499}