blob: c92a5d054e878816d9c380a3e2bc4e630a9a0509 [file] [log] [blame]
AppaRao Pulie63eeda2019-07-05 16:25:38 +05301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
AppaRao Puli88aa33b2019-07-18 23:49:55 +053017#include <systemd/sd-journal.h>
18
AppaRao Pulie63eeda2019-07-05 16:25:38 +053019#include "pfr_mgr.hpp"
AppaRao Puli88aa33b2019-07-18 23:49:55 +053020#include "pfr.hpp"
AppaRao Puli46cead92019-07-22 16:50:09 +053021#include <boost/asio.hpp>
AppaRao Pulie63eeda2019-07-05 16:25:38 +053022
AppaRao Puli88aa33b2019-07-18 23:49:55 +053023// Caches the last Recovery/Panic Count to
24// identify any new Recovery/panic actions.
25/* TODO: When BMC Reset's, these values will be lost
26 * Persist this info using settingsd */
27static uint8_t lastRecoveryCount = 0;
28static uint8_t lastPanicCount = 0;
29static uint8_t lastMajorErr = 0;
30static uint8_t lastMinorErr = 0;
31
32static bool stateTimerRunning = false;
AppaRao Puli46cead92019-07-22 16:50:09 +053033bool finishedSettingChkPoint = false;
34static constexpr uint8_t bmcBootFinishedChkPoint = 0x09;
35
AppaRao Puli88aa33b2019-07-18 23:49:55 +053036std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr;
AppaRao Puli46cead92019-07-22 16:50:09 +053037std::unique_ptr<boost::asio::steady_timer> initTimer = nullptr;
AppaRao Puli88aa33b2019-07-18 23:49:55 +053038
AppaRao Pulie4e95652019-07-19 16:52:01 +053039std::vector<std::unique_ptr<intel::pfr::PfrVersion>> pfrVersionObjects;
40std::unique_ptr<intel::pfr::PfrConfig> pfrConfigObject;
41
42using namespace intel::pfr;
43// List holds <ObjPath> <ImageType> <VersionPurpose>
44static std::vector<std::tuple<std::string, ImageType, std::string>>
45 verComponentList = {
46 std::make_tuple("bmc_active", ImageType::bmcActive, versionPurposeBMC),
47 std::make_tuple("bmc_recovery", ImageType::bmcRecovery,
48 versionPurposeBMC),
49 std::make_tuple("bios_active", ImageType::biosActive,
50 versionPurposeHost),
51 std::make_tuple("bios_recovery", ImageType::biosRecovery,
52 versionPurposeHost),
Vikram Bodireddy3c6c8c32019-12-05 11:06:15 +053053 std::make_tuple("cpld_active", ImageType::cpldActive,
54 versionPurposeOther),
55 std::make_tuple("cpld_recovery", ImageType::cpldRecovery,
56 versionPurposeOther),
57};
AppaRao Pulie4e95652019-07-19 16:52:01 +053058
AppaRao Pulie90f1282019-11-05 01:07:05 +053059// Recovery reason map.
60// {<CPLD association>,{<Redfish MessageID>, <Recovery Reason>}}
61static const boost::container::flat_map<uint8_t,
62 std::pair<std::string, std::string>>
63 recoveryReasonMap = {
64 {0x01,
65 {"BIOSFirmwareRecoveryReason",
66 "PCH active image authentication failure"}},
67 {0x02,
68 {"BIOSFirmwareRecoveryReason",
69 "PCH recovery image authentication failure"}},
70 {0x03, {"MEFirmwareRecoveryReason", "ME launch failure"}},
71 {0x04, {"BIOSFirmwareRecoveryReason", "ACM launch failure"}},
72 {0x05, {"BIOSFirmwareRecoveryReason", "IBB launch failure"}},
73 {0x06, {"BIOSFirmwareRecoveryReason", "OBB launch failure"}},
74 {0x07,
75 {"BMCFirmwareRecoveryReason",
76 "BMC active image authentication failure"}},
77 {0x08,
78 {"BMCFirmwareRecoveryReason",
79 "BMC recovery image authentication failure"}},
80 {0x09, {"BMCFirmwareRecoveryReason", "BMC launch failure"}},
81 {0x0A, {"CPLDFirmwareRecoveryReason", "CPLD watchdog expired"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053082
AppaRao Pulie90f1282019-11-05 01:07:05 +053083// Panic Reason map.
84// {<CPLD association>, {<Redfish MessageID>, <Panic reason> })
85static const boost::container::flat_map<uint8_t,
86 std::pair<std::string, std::string>>
87 panicReasonMap = {
88 {0x01, {"CPLDFirmwarePanicReason", "CPLD watchdog expired"}},
89 {0x02, {"BMCFirmwarePanicReason", "BMC watchdog expired"}},
90 {0x03, {"MEFirmwarePanicReason", "ME watchdog expired"}},
91 {0x04, {"BIOSFirmwarePanicReason", "ACM watchdog expired"}},
92 {0x05, {"BIOSFirmwarePanicReason", "IBB watchdog expired"}},
93 {0x06, {"BIOSFirmwarePanicReason", "OBB watchdog expired"}},
94 {0x07,
95 {"BMCFirmwarePanicReason", "BMC active image authentication failure"}},
96 {0x08,
97 {"BMCFirmwarePanicReason",
98 "BMC recovery image authentication failure"}},
99 {0x09,
100 {"BIOSFirmwarePanicReason",
101 "PCH active image authentication failure"}},
102 {0x0A,
103 {"BIOSFirmwarePanicReason",
104 "PCH recovery image authentication failure"}},
105 {0x0B, {"MEFirmwarePanicReason", "ME authentication failure"}},
106 {0x0C,
107 {"BIOSFirmwarePanicReason",
108 "ACM or IBB or OBB authentication failure"}},
109 {0x0D, {"BIOSFirmwarePanicReason", "PCH update intent"}},
110 {0x0E, {"BMCFirmwarePanicReason", "BMC update intent"}},
111 {0x0F, {"BMCFirmwarePanicReason", "BMC reset detected"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530112
AppaRao Puli24766942019-11-13 19:27:08 +0530113// Firmware resiliency major map.
114// {<CPLD association>, {<Redfish MessageID>, <Error reason> })
115static const boost::container::flat_map<uint8_t,
116 std::pair<std::string, std::string>>
117 majorErrorCodeMap = {
118 {0x01,
119 {"BMCFirmwareResiliencyError", "BMC image authentication failed"}},
120 {0x02,
121 {"BIOSFirmwareResiliencyError", "BIOS image authentication failed"}},
122 {0x03, {"BMCFirmwareResiliencyError", "BMC boot failed"}},
123 {0x04, {"MEFirmwareResiliencyError", "ME boot failed"}},
124 {0x05, {"BIOSFirmwareResiliencyError", "ACM boot failed"}},
125 {0x06, {"BIOSFirmwareResiliencyError", "BIOS boot failed"}},
126 {0x07, {"BIOSFirmwareResiliencyError", "Update from PCH failed"}},
127 {0x08, {"BIOSFirmwarePanicReason", "Update from BMC failed"}}};
128
AppaRao Pulie4e95652019-07-19 16:52:01 +0530129static void updateDbusPropertiesCache()
130{
131 for (const auto& pfrVerObj : pfrVersionObjects)
132 {
133 pfrVerObj->updateVersion();
134 }
135
136 // Update provisoningStatus properties
137 pfrConfigObject->updateProvisioningStatus();
138
139 phosphor::logging::log<phosphor::logging::level::INFO>(
140 "PFR Manager service cache data updated.");
141}
142
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530143static void logLastRecoveryEvent()
144{
145 uint8_t reason = 0;
146 if (0 !=
147 intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason))
148 {
149 return;
150 }
151
AppaRao Pulie90f1282019-11-05 01:07:05 +0530152 auto it = recoveryReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530153 if (it == recoveryReasonMap.end())
154 {
155 // No matching found. So just return without logging event.
156 return;
157 }
AppaRao Pulie90f1282019-11-05 01:07:05 +0530158 std::string msgId = "OpenBMC.0.1." + it->second.first;
159 sd_journal_send("MESSAGE=%s", "Platform firmware recovery occurred.",
160 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
161 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
162 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530163}
164
165static void logLastPanicEvent()
166{
167 uint8_t reason = 0;
168 if (0 !=
169 intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason))
170 {
171 return;
172 }
173
AppaRao Pulie90f1282019-11-05 01:07:05 +0530174 auto it = panicReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530175 if (it == panicReasonMap.end())
176 {
177 // No matching found. So just return without logging event.
178 return;
179 }
180
AppaRao Pulie90f1282019-11-05 01:07:05 +0530181 std::string msgId = "OpenBMC.0.1." + it->second.first;
182 sd_journal_send("MESSAGE=%s", "Platform firmware panic occurred.",
183 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
184 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
185 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530186}
187
AppaRao Puli24766942019-11-13 19:27:08 +0530188static void logResiliencyErrorEvent(const uint8_t majorErrorCode,
189 const uint8_t minorErrorCode)
190{
191 auto it = majorErrorCodeMap.find(majorErrorCode);
192 if (it == majorErrorCodeMap.end())
193 {
194 // No matching found. So just return without logging event.
195 return;
196 }
197
198 std::string errorStr =
199 it->second.second + "(MinorCode:0x" + toHexString(minorErrorCode) + ")";
200 std::string msgId = "OpenBMC.0.1." + it->second.first;
201 sd_journal_send(
202 "MESSAGE=%s", "Platform firmware resiliency error occurred.",
203 "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", msgId.c_str(),
204 "REDFISH_MESSAGE_ARGS=%s", errorStr.c_str(), NULL);
205}
206
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530207static void checkAndLogEvents()
208{
209 uint8_t currPanicCount = 0;
210 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount,
211 currPanicCount))
212 {
213 if (lastPanicCount != currPanicCount)
214 {
215 // Update cached data and log redfish event by reading reason.
216 lastPanicCount = currPanicCount;
217 logLastPanicEvent();
218 }
219 }
220
221 uint8_t currRecoveryCount = 0;
222 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount,
223 currRecoveryCount))
224 {
225 if (lastRecoveryCount != currRecoveryCount)
226 {
227 // Update cached data and log redfish event by reading reason.
228 lastRecoveryCount = currRecoveryCount;
229 logLastRecoveryEvent();
230 }
231 }
232
233 uint8_t majorErr = 0;
234 uint8_t minorErr = 0;
235 if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError,
236 majorErr)) ||
237 (0 ==
238 intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr)))
239 {
240 if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr))
241 {
242 lastMajorErr = majorErr;
243 lastMinorErr = minorErr;
244
AppaRao Puli24766942019-11-13 19:27:08 +0530245 logResiliencyErrorEvent(majorErr, minorErr);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530246 }
247 }
248}
249
250static void monitorPlatformStateChange(
251 sdbusplus::asio::object_server& server,
252 std::shared_ptr<sdbusplus::asio::connection>& conn)
253{
254 constexpr size_t pollTimeout = 10; // seconds
255 stateTimer->expires_after(std::chrono::seconds(pollTimeout));
256 stateTimer->async_wait(
257 [&server, &conn](const boost::system::error_code& ec) {
258 if (ec == boost::asio::error::operation_aborted)
259 {
260 // Timer reset.
261 return;
262 }
263 if (ec)
264 {
265 // Platform State Monitor - Timer cancelled.
266 return;
267 }
268 checkAndLogEvents();
269 monitorPlatformStateChange(server, conn);
270 });
271}
272
AppaRao Puli46cead92019-07-22 16:50:09 +0530273void checkAndSetCheckpoint(sdbusplus::asio::object_server& server,
274 std::shared_ptr<sdbusplus::asio::connection>& conn)
275{
276 // Check whether systemd completed all the loading.
277 conn->async_method_call(
278 [&server, &conn](boost::system::error_code ec,
279 const std::variant<uint64_t>& value) {
280 if (ec)
281 {
282 phosphor::logging::log<phosphor::logging::level::ERR>(
283 "async_method_call error: FinishTimestamp failed");
284 return;
285 }
286 if (std::get<uint64_t>(value))
287 {
288 if (!finishedSettingChkPoint)
289 {
290 finishedSettingChkPoint = true;
291 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
292 }
293 }
294 else
295 {
296 // FIX-ME: Latest up-stream sync caused issue in receiving
297 // StartupFinished signal. Unable to get StartupFinished signal
298 // from systemd1 hence using poll method too, to trigger it
299 // properly.
300 constexpr size_t pollTimeout = 10; // seconds
301 initTimer->expires_after(std::chrono::seconds(pollTimeout));
302 initTimer->async_wait([&server, &conn](
303 const boost::system::error_code& ec) {
304 if (ec == boost::asio::error::operation_aborted)
305 {
306 // Timer reset.
307 return;
308 }
309 if (ec)
310 {
311 phosphor::logging::log<phosphor::logging::level::ERR>(
312 "Set boot Checkpoint - async wait error.");
313 return;
314 }
315 checkAndSetCheckpoint(server, conn);
316 });
317 }
318 },
319 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
320 "org.freedesktop.DBus.Properties", "Get",
321 "org.freedesktop.systemd1.Manager", "FinishTimestamp");
322}
323
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530324int main()
325{
326 // setup connection to dbus
327 boost::asio::io_service io;
328 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530329 stateTimer = std::make_unique<boost::asio::steady_timer>(io);
AppaRao Puli46cead92019-07-22 16:50:09 +0530330 initTimer = std::make_unique<boost::asio::steady_timer>(io);
AppaRao Pulicc1ed682019-10-01 12:29:40 +0530331 conn->request_name("xyz.openbmc_project.PFR.Manager");
AppaRao Pulie4e95652019-07-19 16:52:01 +0530332 auto server = sdbusplus::asio::object_server(conn);
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530333
AppaRao Pulicc1ed682019-10-01 12:29:40 +0530334 // Create PFR attributes object and interface
AppaRao Pulie4e95652019-07-19 16:52:01 +0530335 pfrConfigObject = std::make_unique<intel::pfr::PfrConfig>(server, conn);
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530336
AppaRao Pulie4e95652019-07-19 16:52:01 +0530337 pfrVersionObjects.clear();
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530338 // Create Software objects using Versions interface
AppaRao Pulie4e95652019-07-19 16:52:01 +0530339 for (const auto& entry : verComponentList)
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530340 {
AppaRao Pulie4e95652019-07-19 16:52:01 +0530341 pfrVersionObjects.emplace_back(std::make_unique<intel::pfr::PfrVersion>(
342 server, conn, std::get<0>(entry), std::get<1>(entry),
343 std::get<2>(entry)));
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530344 }
345
AppaRao Puli46cead92019-07-22 16:50:09 +0530346 // Monitor Boot finished signal and set the checkpoint 9 to
347 // notify CPLD about BMC boot finish.
348 auto bootFinishedSignal = std::make_unique<sdbusplus::bus::match::match>(
349 static_cast<sdbusplus::bus::bus&>(*conn),
350 "type='signal',"
351 "member='StartupFinished',path='/org/freedesktop/systemd1',"
352 "interface='org.freedesktop.systemd1.Manager'",
353 [&server, &conn](sdbusplus::message::message& msg) {
354 if (!finishedSettingChkPoint)
355 {
356 finishedSettingChkPoint = true;
357 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
358 }
359 });
360 checkAndSetCheckpoint(server, conn);
361
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530362 // Capture the Chassis state and Start the monitor timer
363 // if state changed to 'On'. Run timer until OS boot.
364 // Stop timer if state changed to 'Off'.
365 static auto matchChassisState = sdbusplus::bus::match::match(
366 static_cast<sdbusplus::bus::bus&>(*conn),
367 "type='signal',member='PropertiesChanged', "
368 "interface='org.freedesktop.DBus.Properties', "
369 "sender='xyz.openbmc_project.State.Chassis', "
370 "arg0namespace='xyz.openbmc_project.State.Chassis'",
371 [&server, &conn](sdbusplus::message::message& message) {
372 std::string intfName;
373 std::map<std::string, std::variant<std::string>> properties;
374 message.read(intfName, properties);
375
376 const auto it = properties.find("CurrentPowerState");
377 if (it != properties.end())
378 {
379 const std::string* state =
380 std::get_if<std::string>(&it->second);
381 if (state != nullptr)
382 {
383 if ((*state ==
384 "xyz.openbmc_project.State.Chassis.PowerState.On") &&
385 (!stateTimerRunning))
386 {
387 stateTimerRunning = true;
388 monitorPlatformStateChange(server, conn);
389 }
390 else if ((*state == "xyz.openbmc_project.State.Chassis."
391 "PowerState.Off") &&
392 (stateTimerRunning))
393 {
394 stateTimer->cancel();
395 checkAndLogEvents();
396 stateTimerRunning = false;
397 }
398 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530399
400 // Update the D-Bus properties when chassis state changes.
401 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530402 }
403 });
404
405 // Capture the Host state and Start the monitor timer
406 // if state changed to 'Running'. Run timer until OS boot.
407 // Stop timer if state changed to 'Off'.
408 static auto matchHostState = sdbusplus::bus::match::match(
409 static_cast<sdbusplus::bus::bus&>(*conn),
410 "type='signal',member='PropertiesChanged', "
411 "interface='org.freedesktop.DBus.Properties', "
412 "sender='xyz.openbmc_project.State.Chassis', "
413 "arg0namespace='xyz.openbmc_project.State.Host'",
414 [&server, &conn](sdbusplus::message::message& message) {
415 std::string intfName;
416 std::map<std::string, std::variant<std::string>> properties;
417 message.read(intfName, properties);
418
419 const auto it = properties.find("CurrentHostState");
420 if (it != properties.end())
421 {
422 const std::string* state =
423 std::get_if<std::string>(&it->second);
424 if (state != nullptr)
425 {
426 if ((*state ==
427 "xyz.openbmc_project.State.Host.HostState.Running") &&
428 (!stateTimerRunning))
429 {
430 stateTimerRunning = true;
431 monitorPlatformStateChange(server, conn);
432 }
433 else if (((*state == "xyz.openbmc_project.State.Host."
434 "HostState.Off") ||
435 (*state == "xyz.openbmc_project.State.Host."
436 "HostState.Quiesced")) &&
437 (stateTimerRunning))
438 {
439 stateTimer->cancel();
440 checkAndLogEvents();
441 stateTimerRunning = false;
442 }
443 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530444
445 // Update the D-Bus properties when host state changes.
446 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530447 }
448 });
449
450 // Capture the OS state change and stop monitor timer
451 // if OS boots completly or becomes Inactive.
452 // start timer in other cases to mnitor states.
453 static auto matchOsState = sdbusplus::bus::match::match(
454 static_cast<sdbusplus::bus::bus&>(*conn),
455 "type='signal',member='PropertiesChanged', "
456 "interface='org.freedesktop.DBus.Properties', "
457 "sender='xyz.openbmc_project.State.Chassis', "
458 "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'",
459 [&server, &conn](sdbusplus::message::message& message) {
460 std::string intfName;
461 std::map<std::string, std::variant<std::string>> properties;
462 message.read(intfName, properties);
463
464 const auto it = properties.find("OperatingSystemState");
465 if (it != properties.end())
466 {
467 const std::string* state =
468 std::get_if<std::string>(&it->second);
469 if (state != nullptr)
470 {
471 if (((*state == "BootComplete") ||
472 (*state == "Inactive")) &&
473 (stateTimerRunning))
474 {
475 stateTimer->cancel();
476 checkAndLogEvents();
477 stateTimerRunning = false;
478 }
479 else if (!stateTimerRunning)
480 {
481 stateTimerRunning = true;
482 monitorPlatformStateChange(server, conn);
483 }
484 }
485 }
486 });
487
488 // First time, check and log events if any.
489 checkAndLogEvents();
490
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530491 phosphor::logging::log<phosphor::logging::level::INFO>(
492 "Intel PFR service started successfully");
493
494 io.run();
495
496 return 0;
497}