blob: ae809122eda46e16aa87908ab4125a651a72dfe4 [file] [log] [blame]
AppaRao Pulie63eeda2019-07-05 16:25:38 +05301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
AppaRao Puli88aa33b2019-07-18 23:49:55 +053017#include <systemd/sd-journal.h>
18
AppaRao Pulie63eeda2019-07-05 16:25:38 +053019#include "pfr_mgr.hpp"
AppaRao Puli88aa33b2019-07-18 23:49:55 +053020#include "pfr.hpp"
AppaRao Puli46cead92019-07-22 16:50:09 +053021#include <boost/asio.hpp>
AppaRao Pulie63eeda2019-07-05 16:25:38 +053022
AppaRao Puli88aa33b2019-07-18 23:49:55 +053023// Caches the last Recovery/Panic Count to
24// identify any new Recovery/panic actions.
25/* TODO: When BMC Reset's, these values will be lost
26 * Persist this info using settingsd */
27static uint8_t lastRecoveryCount = 0;
28static uint8_t lastPanicCount = 0;
29static uint8_t lastMajorErr = 0;
30static uint8_t lastMinorErr = 0;
31
32static bool stateTimerRunning = false;
AppaRao Puli46cead92019-07-22 16:50:09 +053033bool finishedSettingChkPoint = false;
34static constexpr uint8_t bmcBootFinishedChkPoint = 0x09;
35
AppaRao Puli88aa33b2019-07-18 23:49:55 +053036std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr;
AppaRao Puli46cead92019-07-22 16:50:09 +053037std::unique_ptr<boost::asio::steady_timer> initTimer = nullptr;
AppaRao Puli88aa33b2019-07-18 23:49:55 +053038
AppaRao Pulie4e95652019-07-19 16:52:01 +053039std::vector<std::unique_ptr<intel::pfr::PfrVersion>> pfrVersionObjects;
40std::unique_ptr<intel::pfr::PfrConfig> pfrConfigObject;
41
42using namespace intel::pfr;
43// List holds <ObjPath> <ImageType> <VersionPurpose>
44static std::vector<std::tuple<std::string, ImageType, std::string>>
45 verComponentList = {
46 std::make_tuple("bmc_active", ImageType::bmcActive, versionPurposeBMC),
47 std::make_tuple("bmc_recovery", ImageType::bmcRecovery,
48 versionPurposeBMC),
49 std::make_tuple("bios_active", ImageType::biosActive,
50 versionPurposeHost),
51 std::make_tuple("bios_recovery", ImageType::biosRecovery,
52 versionPurposeHost),
Vikram Bodireddy3c6c8c32019-12-05 11:06:15 +053053 std::make_tuple("cpld_active", ImageType::cpldActive,
54 versionPurposeOther),
55 std::make_tuple("cpld_recovery", ImageType::cpldRecovery,
56 versionPurposeOther),
57};
AppaRao Pulie4e95652019-07-19 16:52:01 +053058
AppaRao Pulie90f1282019-11-05 01:07:05 +053059// Recovery reason map.
60// {<CPLD association>,{<Redfish MessageID>, <Recovery Reason>}}
61static const boost::container::flat_map<uint8_t,
62 std::pair<std::string, std::string>>
63 recoveryReasonMap = {
64 {0x01,
65 {"BIOSFirmwareRecoveryReason",
66 "PCH active image authentication failure"}},
67 {0x02,
68 {"BIOSFirmwareRecoveryReason",
69 "PCH recovery image authentication failure"}},
70 {0x03, {"MEFirmwareRecoveryReason", "ME launch failure"}},
71 {0x04, {"BIOSFirmwareRecoveryReason", "ACM launch failure"}},
72 {0x05, {"BIOSFirmwareRecoveryReason", "IBB launch failure"}},
73 {0x06, {"BIOSFirmwareRecoveryReason", "OBB launch failure"}},
74 {0x07,
75 {"BMCFirmwareRecoveryReason",
76 "BMC active image authentication failure"}},
77 {0x08,
78 {"BMCFirmwareRecoveryReason",
79 "BMC recovery image authentication failure"}},
80 {0x09, {"BMCFirmwareRecoveryReason", "BMC launch failure"}},
81 {0x0A, {"CPLDFirmwareRecoveryReason", "CPLD watchdog expired"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053082
AppaRao Pulie90f1282019-11-05 01:07:05 +053083// Panic Reason map.
84// {<CPLD association>, {<Redfish MessageID>, <Panic reason> })
85static const boost::container::flat_map<uint8_t,
86 std::pair<std::string, std::string>>
87 panicReasonMap = {
88 {0x01, {"CPLDFirmwarePanicReason", "CPLD watchdog expired"}},
89 {0x02, {"BMCFirmwarePanicReason", "BMC watchdog expired"}},
90 {0x03, {"MEFirmwarePanicReason", "ME watchdog expired"}},
91 {0x04, {"BIOSFirmwarePanicReason", "ACM watchdog expired"}},
92 {0x05, {"BIOSFirmwarePanicReason", "IBB watchdog expired"}},
93 {0x06, {"BIOSFirmwarePanicReason", "OBB watchdog expired"}},
94 {0x07,
95 {"BMCFirmwarePanicReason", "BMC active image authentication failure"}},
96 {0x08,
97 {"BMCFirmwarePanicReason",
98 "BMC recovery image authentication failure"}},
99 {0x09,
100 {"BIOSFirmwarePanicReason",
101 "PCH active image authentication failure"}},
102 {0x0A,
103 {"BIOSFirmwarePanicReason",
104 "PCH recovery image authentication failure"}},
105 {0x0B, {"MEFirmwarePanicReason", "ME authentication failure"}},
106 {0x0C,
107 {"BIOSFirmwarePanicReason",
108 "ACM or IBB or OBB authentication failure"}},
109 {0x0D, {"BIOSFirmwarePanicReason", "PCH update intent"}},
110 {0x0E, {"BMCFirmwarePanicReason", "BMC update intent"}},
111 {0x0F, {"BMCFirmwarePanicReason", "BMC reset detected"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530112
AppaRao Puli24766942019-11-13 19:27:08 +0530113// Firmware resiliency major map.
114// {<CPLD association>, {<Redfish MessageID>, <Error reason> })
115static const boost::container::flat_map<uint8_t,
116 std::pair<std::string, std::string>>
117 majorErrorCodeMap = {
118 {0x01,
119 {"BMCFirmwareResiliencyError", "BMC image authentication failed"}},
120 {0x02,
121 {"BIOSFirmwareResiliencyError", "BIOS image authentication failed"}},
122 {0x03, {"BMCFirmwareResiliencyError", "BMC boot failed"}},
123 {0x04, {"MEFirmwareResiliencyError", "ME boot failed"}},
124 {0x05, {"BIOSFirmwareResiliencyError", "ACM boot failed"}},
125 {0x06, {"BIOSFirmwareResiliencyError", "BIOS boot failed"}},
126 {0x07, {"BIOSFirmwareResiliencyError", "Update from PCH failed"}},
127 {0x08, {"BIOSFirmwarePanicReason", "Update from BMC failed"}}};
128
AppaRao Pulie4e95652019-07-19 16:52:01 +0530129static void updateDbusPropertiesCache()
130{
131 for (const auto& pfrVerObj : pfrVersionObjects)
132 {
133 pfrVerObj->updateVersion();
134 }
135
136 // Update provisoningStatus properties
137 pfrConfigObject->updateProvisioningStatus();
138
139 phosphor::logging::log<phosphor::logging::level::INFO>(
140 "PFR Manager service cache data updated.");
141}
142
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530143static void logLastRecoveryEvent()
144{
145 uint8_t reason = 0;
146 if (0 !=
147 intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason))
148 {
149 return;
150 }
151
AppaRao Pulie90f1282019-11-05 01:07:05 +0530152 auto it = recoveryReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530153 if (it == recoveryReasonMap.end())
154 {
155 // No matching found. So just return without logging event.
156 return;
157 }
AppaRao Pulie90f1282019-11-05 01:07:05 +0530158 std::string msgId = "OpenBMC.0.1." + it->second.first;
159 sd_journal_send("MESSAGE=%s", "Platform firmware recovery occurred.",
160 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
161 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
162 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530163}
164
165static void logLastPanicEvent()
166{
167 uint8_t reason = 0;
168 if (0 !=
169 intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason))
170 {
171 return;
172 }
173
AppaRao Pulie90f1282019-11-05 01:07:05 +0530174 auto it = panicReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530175 if (it == panicReasonMap.end())
176 {
177 // No matching found. So just return without logging event.
178 return;
179 }
180
AppaRao Pulie90f1282019-11-05 01:07:05 +0530181 std::string msgId = "OpenBMC.0.1." + it->second.first;
182 sd_journal_send("MESSAGE=%s", "Platform firmware panic occurred.",
183 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
184 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
185 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530186}
187
AppaRao Puli24766942019-11-13 19:27:08 +0530188static void logResiliencyErrorEvent(const uint8_t majorErrorCode,
189 const uint8_t minorErrorCode)
190{
191 auto it = majorErrorCodeMap.find(majorErrorCode);
192 if (it == majorErrorCodeMap.end())
193 {
194 // No matching found. So just return without logging event.
195 return;
196 }
197
198 std::string errorStr =
199 it->second.second + "(MinorCode:0x" + toHexString(minorErrorCode) + ")";
200 std::string msgId = "OpenBMC.0.1." + it->second.first;
201 sd_journal_send(
202 "MESSAGE=%s", "Platform firmware resiliency error occurred.",
203 "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", msgId.c_str(),
204 "REDFISH_MESSAGE_ARGS=%s", errorStr.c_str(), NULL);
205}
206
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530207static void checkAndLogEvents()
208{
209 uint8_t currPanicCount = 0;
210 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount,
211 currPanicCount))
212 {
213 if (lastPanicCount != currPanicCount)
214 {
215 // Update cached data and log redfish event by reading reason.
216 lastPanicCount = currPanicCount;
217 logLastPanicEvent();
218 }
219 }
220
221 uint8_t currRecoveryCount = 0;
222 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount,
223 currRecoveryCount))
224 {
225 if (lastRecoveryCount != currRecoveryCount)
226 {
227 // Update cached data and log redfish event by reading reason.
228 lastRecoveryCount = currRecoveryCount;
229 logLastRecoveryEvent();
230 }
231 }
232
233 uint8_t majorErr = 0;
234 uint8_t minorErr = 0;
235 if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError,
236 majorErr)) ||
237 (0 ==
238 intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr)))
239 {
240 if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr))
241 {
242 lastMajorErr = majorErr;
243 lastMinorErr = minorErr;
244
AppaRao Puli24766942019-11-13 19:27:08 +0530245 logResiliencyErrorEvent(majorErr, minorErr);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530246 }
247 }
248}
249
250static void monitorPlatformStateChange(
251 sdbusplus::asio::object_server& server,
252 std::shared_ptr<sdbusplus::asio::connection>& conn)
253{
254 constexpr size_t pollTimeout = 10; // seconds
255 stateTimer->expires_after(std::chrono::seconds(pollTimeout));
256 stateTimer->async_wait(
257 [&server, &conn](const boost::system::error_code& ec) {
258 if (ec == boost::asio::error::operation_aborted)
259 {
260 // Timer reset.
261 return;
262 }
263 if (ec)
264 {
265 // Platform State Monitor - Timer cancelled.
266 return;
267 }
268 checkAndLogEvents();
269 monitorPlatformStateChange(server, conn);
270 });
271}
272
AppaRao Puli46cead92019-07-22 16:50:09 +0530273void checkAndSetCheckpoint(sdbusplus::asio::object_server& server,
274 std::shared_ptr<sdbusplus::asio::connection>& conn)
275{
276 // Check whether systemd completed all the loading.
277 conn->async_method_call(
278 [&server, &conn](boost::system::error_code ec,
279 const std::variant<uint64_t>& value) {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530280 if (!ec)
AppaRao Puli46cead92019-07-22 16:50:09 +0530281 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530282 if (std::get<uint64_t>(value))
AppaRao Puli46cead92019-07-22 16:50:09 +0530283 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530284 phosphor::logging::log<phosphor::logging::level::INFO>(
285 "PFR: BMC boot completed. Setting checkpoint 9.");
286 if (!finishedSettingChkPoint)
287 {
288 finishedSettingChkPoint = true;
289 intel::pfr::setBMCBootCheckpoint(
290 bmcBootFinishedChkPoint);
291 }
292 return;
AppaRao Puli46cead92019-07-22 16:50:09 +0530293 }
294 }
295 else
296 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530297 // Failed to get data from systemd. System might not
298 // be ready yet. Attempt again for data.
299 phosphor::logging::log<phosphor::logging::level::ERR>(
300 "PFR: aync call failed to get FinishTimestamp.",
301 phosphor::logging::entry("MSG=%s", ec.message().c_str()));
AppaRao Puli46cead92019-07-22 16:50:09 +0530302 }
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530303 // FIX-ME: Latest up-stream sync caused issue in receiving
304 // StartupFinished signal. Unable to get StartupFinished signal
305 // from systemd1 hence using poll method too, to trigger it
306 // properly.
307 constexpr size_t pollTimeout = 10; // seconds
308 initTimer->expires_after(std::chrono::seconds(pollTimeout));
309 initTimer->async_wait([&server,
310 &conn](const boost::system::error_code& ec) {
311 if (ec == boost::asio::error::operation_aborted)
312 {
313 // Timer reset.
314 phosphor::logging::log<phosphor::logging::level::INFO>(
315 "PFR: Set boot Checkpoint - Timer aborted or stopped.");
316 return;
317 }
318 if (ec)
319 {
320 phosphor::logging::log<phosphor::logging::level::ERR>(
321 "PFR: Set boot Checkpoint - async wait error.");
322 return;
323 }
324 checkAndSetCheckpoint(server, conn);
325 });
AppaRao Puli46cead92019-07-22 16:50:09 +0530326 },
327 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
328 "org.freedesktop.DBus.Properties", "Get",
329 "org.freedesktop.systemd1.Manager", "FinishTimestamp");
330}
331
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530332void monitorSignals(sdbusplus::asio::object_server& server,
333 std::shared_ptr<sdbusplus::asio::connection>& conn)
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530334{
AppaRao Puli46cead92019-07-22 16:50:09 +0530335 // Monitor Boot finished signal and set the checkpoint 9 to
336 // notify CPLD about BMC boot finish.
337 auto bootFinishedSignal = std::make_unique<sdbusplus::bus::match::match>(
338 static_cast<sdbusplus::bus::bus&>(*conn),
339 "type='signal',"
340 "member='StartupFinished',path='/org/freedesktop/systemd1',"
341 "interface='org.freedesktop.systemd1.Manager'",
342 [&server, &conn](sdbusplus::message::message& msg) {
343 if (!finishedSettingChkPoint)
344 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530345 phosphor::logging::log<phosphor::logging::level::INFO>(
346 "PFR: BMC boot completed(StartupFinished). Setting "
347 "checkpoint 9.");
AppaRao Puli46cead92019-07-22 16:50:09 +0530348 finishedSettingChkPoint = true;
349 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
350 }
351 });
352 checkAndSetCheckpoint(server, conn);
353
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530354 // Capture the Chassis state and Start the monitor timer
355 // if state changed to 'On'. Run timer until OS boot.
356 // Stop timer if state changed to 'Off'.
357 static auto matchChassisState = sdbusplus::bus::match::match(
358 static_cast<sdbusplus::bus::bus&>(*conn),
359 "type='signal',member='PropertiesChanged', "
360 "interface='org.freedesktop.DBus.Properties', "
361 "sender='xyz.openbmc_project.State.Chassis', "
362 "arg0namespace='xyz.openbmc_project.State.Chassis'",
363 [&server, &conn](sdbusplus::message::message& message) {
364 std::string intfName;
365 std::map<std::string, std::variant<std::string>> properties;
366 message.read(intfName, properties);
367
368 const auto it = properties.find("CurrentPowerState");
369 if (it != properties.end())
370 {
371 const std::string* state =
372 std::get_if<std::string>(&it->second);
373 if (state != nullptr)
374 {
375 if ((*state ==
376 "xyz.openbmc_project.State.Chassis.PowerState.On") &&
377 (!stateTimerRunning))
378 {
379 stateTimerRunning = true;
380 monitorPlatformStateChange(server, conn);
381 }
382 else if ((*state == "xyz.openbmc_project.State.Chassis."
383 "PowerState.Off") &&
384 (stateTimerRunning))
385 {
386 stateTimer->cancel();
387 checkAndLogEvents();
388 stateTimerRunning = false;
389 }
390 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530391
392 // Update the D-Bus properties when chassis state changes.
393 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530394 }
395 });
396
397 // Capture the Host state and Start the monitor timer
398 // if state changed to 'Running'. Run timer until OS boot.
399 // Stop timer if state changed to 'Off'.
400 static auto matchHostState = sdbusplus::bus::match::match(
401 static_cast<sdbusplus::bus::bus&>(*conn),
402 "type='signal',member='PropertiesChanged', "
403 "interface='org.freedesktop.DBus.Properties', "
404 "sender='xyz.openbmc_project.State.Chassis', "
405 "arg0namespace='xyz.openbmc_project.State.Host'",
406 [&server, &conn](sdbusplus::message::message& message) {
407 std::string intfName;
408 std::map<std::string, std::variant<std::string>> properties;
409 message.read(intfName, properties);
410
411 const auto it = properties.find("CurrentHostState");
412 if (it != properties.end())
413 {
414 const std::string* state =
415 std::get_if<std::string>(&it->second);
416 if (state != nullptr)
417 {
418 if ((*state ==
419 "xyz.openbmc_project.State.Host.HostState.Running") &&
420 (!stateTimerRunning))
421 {
422 stateTimerRunning = true;
423 monitorPlatformStateChange(server, conn);
424 }
425 else if (((*state == "xyz.openbmc_project.State.Host."
426 "HostState.Off") ||
427 (*state == "xyz.openbmc_project.State.Host."
428 "HostState.Quiesced")) &&
429 (stateTimerRunning))
430 {
431 stateTimer->cancel();
432 checkAndLogEvents();
433 stateTimerRunning = false;
434 }
435 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530436
437 // Update the D-Bus properties when host state changes.
438 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530439 }
440 });
441
442 // Capture the OS state change and stop monitor timer
443 // if OS boots completly or becomes Inactive.
444 // start timer in other cases to mnitor states.
445 static auto matchOsState = sdbusplus::bus::match::match(
446 static_cast<sdbusplus::bus::bus&>(*conn),
447 "type='signal',member='PropertiesChanged', "
448 "interface='org.freedesktop.DBus.Properties', "
449 "sender='xyz.openbmc_project.State.Chassis', "
450 "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'",
451 [&server, &conn](sdbusplus::message::message& message) {
452 std::string intfName;
453 std::map<std::string, std::variant<std::string>> properties;
454 message.read(intfName, properties);
455
456 const auto it = properties.find("OperatingSystemState");
457 if (it != properties.end())
458 {
459 const std::string* state =
460 std::get_if<std::string>(&it->second);
461 if (state != nullptr)
462 {
463 if (((*state == "BootComplete") ||
464 (*state == "Inactive")) &&
465 (stateTimerRunning))
466 {
467 stateTimer->cancel();
468 checkAndLogEvents();
469 stateTimerRunning = false;
470 }
471 else if (!stateTimerRunning)
472 {
473 stateTimerRunning = true;
474 monitorPlatformStateChange(server, conn);
475 }
476 }
477 }
478 });
479
480 // First time, check and log events if any.
481 checkAndLogEvents();
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530482}
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530483
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530484int main()
485{
486 // setup connection to dbus
487 boost::asio::io_service io;
488 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
489 stateTimer = std::make_unique<boost::asio::steady_timer>(io);
490 initTimer = std::make_unique<boost::asio::steady_timer>(io);
491 auto server = sdbusplus::asio::object_server(conn, true);
492 monitorSignals(server, conn);
493
494 auto rootInterface = server.add_interface("/xyz/openbmc_project/pfr", "");
495 rootInterface->initialize();
496 server.add_manager("/xyz/openbmc_project/pfr");
497
498 // Create PFR attributes object and interface
499 pfrConfigObject = std::make_unique<intel::pfr::PfrConfig>(server, conn);
500
501 // Create Software objects using Versions interface
502 for (const auto& entry : verComponentList)
503 {
504 pfrVersionObjects.emplace_back(std::make_unique<intel::pfr::PfrVersion>(
505 server, conn, std::get<0>(entry), std::get<1>(entry),
506 std::get<2>(entry)));
507 }
508
509 conn->request_name("xyz.openbmc_project.PFR.Manager");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530510 phosphor::logging::log<phosphor::logging::level::INFO>(
511 "Intel PFR service started successfully");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530512 io.run();
513
514 return 0;
515}