blob: 60ec796e27fd5858925e1151d2a9189b039c27aa [file] [log] [blame]
AppaRao Pulie63eeda2019-07-05 16:25:38 +05301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
AppaRao Puli88aa33b2019-07-18 23:49:55 +053017#include <systemd/sd-journal.h>
18
AppaRao Pulie63eeda2019-07-05 16:25:38 +053019#include "pfr_mgr.hpp"
AppaRao Puli88aa33b2019-07-18 23:49:55 +053020#include "pfr.hpp"
AppaRao Puli46cead92019-07-22 16:50:09 +053021#include <boost/asio.hpp>
AppaRao Pulie63eeda2019-07-05 16:25:38 +053022
AppaRao Puli88aa33b2019-07-18 23:49:55 +053023// Caches the last Recovery/Panic Count to
24// identify any new Recovery/panic actions.
25/* TODO: When BMC Reset's, these values will be lost
26 * Persist this info using settingsd */
27static uint8_t lastRecoveryCount = 0;
28static uint8_t lastPanicCount = 0;
29static uint8_t lastMajorErr = 0;
30static uint8_t lastMinorErr = 0;
31
32static bool stateTimerRunning = false;
AppaRao Puli46cead92019-07-22 16:50:09 +053033bool finishedSettingChkPoint = false;
34static constexpr uint8_t bmcBootFinishedChkPoint = 0x09;
35
AppaRao Puli88aa33b2019-07-18 23:49:55 +053036std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr;
AppaRao Puli46cead92019-07-22 16:50:09 +053037std::unique_ptr<boost::asio::steady_timer> initTimer = nullptr;
AppaRao Puli88aa33b2019-07-18 23:49:55 +053038
AppaRao Pulie4e95652019-07-19 16:52:01 +053039std::vector<std::unique_ptr<intel::pfr::PfrVersion>> pfrVersionObjects;
40std::unique_ptr<intel::pfr::PfrConfig> pfrConfigObject;
41
42using namespace intel::pfr;
43// List holds <ObjPath> <ImageType> <VersionPurpose>
44static std::vector<std::tuple<std::string, ImageType, std::string>>
45 verComponentList = {
46 std::make_tuple("bmc_active", ImageType::bmcActive, versionPurposeBMC),
47 std::make_tuple("bmc_recovery", ImageType::bmcRecovery,
48 versionPurposeBMC),
49 std::make_tuple("bios_active", ImageType::biosActive,
50 versionPurposeHost),
51 std::make_tuple("bios_recovery", ImageType::biosRecovery,
52 versionPurposeHost),
Vikram Bodireddy3c6c8c32019-12-05 11:06:15 +053053 std::make_tuple("cpld_active", ImageType::cpldActive,
54 versionPurposeOther),
55 std::make_tuple("cpld_recovery", ImageType::cpldRecovery,
56 versionPurposeOther),
57};
AppaRao Pulie4e95652019-07-19 16:52:01 +053058
AppaRao Pulie90f1282019-11-05 01:07:05 +053059// Recovery reason map.
60// {<CPLD association>,{<Redfish MessageID>, <Recovery Reason>}}
61static const boost::container::flat_map<uint8_t,
62 std::pair<std::string, std::string>>
63 recoveryReasonMap = {
64 {0x01,
65 {"BIOSFirmwareRecoveryReason",
66 "PCH active image authentication failure"}},
67 {0x02,
68 {"BIOSFirmwareRecoveryReason",
69 "PCH recovery image authentication failure"}},
70 {0x03, {"MEFirmwareRecoveryReason", "ME launch failure"}},
71 {0x04, {"BIOSFirmwareRecoveryReason", "ACM launch failure"}},
72 {0x05, {"BIOSFirmwareRecoveryReason", "IBB launch failure"}},
73 {0x06, {"BIOSFirmwareRecoveryReason", "OBB launch failure"}},
74 {0x07,
75 {"BMCFirmwareRecoveryReason",
76 "BMC active image authentication failure"}},
77 {0x08,
78 {"BMCFirmwareRecoveryReason",
79 "BMC recovery image authentication failure"}},
80 {0x09, {"BMCFirmwareRecoveryReason", "BMC launch failure"}},
81 {0x0A, {"CPLDFirmwareRecoveryReason", "CPLD watchdog expired"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053082
AppaRao Pulie90f1282019-11-05 01:07:05 +053083// Panic Reason map.
84// {<CPLD association>, {<Redfish MessageID>, <Panic reason> })
85static const boost::container::flat_map<uint8_t,
86 std::pair<std::string, std::string>>
87 panicReasonMap = {
88 {0x01, {"CPLDFirmwarePanicReason", "CPLD watchdog expired"}},
89 {0x02, {"BMCFirmwarePanicReason", "BMC watchdog expired"}},
90 {0x03, {"MEFirmwarePanicReason", "ME watchdog expired"}},
91 {0x04, {"BIOSFirmwarePanicReason", "ACM watchdog expired"}},
92 {0x05, {"BIOSFirmwarePanicReason", "IBB watchdog expired"}},
93 {0x06, {"BIOSFirmwarePanicReason", "OBB watchdog expired"}},
94 {0x07,
95 {"BMCFirmwarePanicReason", "BMC active image authentication failure"}},
96 {0x08,
97 {"BMCFirmwarePanicReason",
98 "BMC recovery image authentication failure"}},
99 {0x09,
100 {"BIOSFirmwarePanicReason",
101 "PCH active image authentication failure"}},
102 {0x0A,
103 {"BIOSFirmwarePanicReason",
104 "PCH recovery image authentication failure"}},
105 {0x0B, {"MEFirmwarePanicReason", "ME authentication failure"}},
106 {0x0C,
107 {"BIOSFirmwarePanicReason",
108 "ACM or IBB or OBB authentication failure"}},
109 {0x0D, {"BIOSFirmwarePanicReason", "PCH update intent"}},
110 {0x0E, {"BMCFirmwarePanicReason", "BMC update intent"}},
111 {0x0F, {"BMCFirmwarePanicReason", "BMC reset detected"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530112
AppaRao Puli24766942019-11-13 19:27:08 +0530113// Firmware resiliency major map.
114// {<CPLD association>, {<Redfish MessageID>, <Error reason> })
115static const boost::container::flat_map<uint8_t,
116 std::pair<std::string, std::string>>
117 majorErrorCodeMap = {
118 {0x01,
119 {"BMCFirmwareResiliencyError", "BMC image authentication failed"}},
120 {0x02,
121 {"BIOSFirmwareResiliencyError", "BIOS image authentication failed"}},
122 {0x03, {"BMCFirmwareResiliencyError", "BMC boot failed"}},
123 {0x04, {"MEFirmwareResiliencyError", "ME boot failed"}},
124 {0x05, {"BIOSFirmwareResiliencyError", "ACM boot failed"}},
125 {0x06, {"BIOSFirmwareResiliencyError", "BIOS boot failed"}},
126 {0x07, {"BIOSFirmwareResiliencyError", "Update from PCH failed"}},
127 {0x08, {"BIOSFirmwarePanicReason", "Update from BMC failed"}}};
128
AppaRao Pulie4e95652019-07-19 16:52:01 +0530129static void updateDbusPropertiesCache()
130{
131 for (const auto& pfrVerObj : pfrVersionObjects)
132 {
133 pfrVerObj->updateVersion();
134 }
135
136 // Update provisoningStatus properties
137 pfrConfigObject->updateProvisioningStatus();
138
139 phosphor::logging::log<phosphor::logging::level::INFO>(
140 "PFR Manager service cache data updated.");
141}
142
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530143static void logLastRecoveryEvent()
144{
145 uint8_t reason = 0;
146 if (0 !=
147 intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason))
148 {
149 return;
150 }
151
AppaRao Pulie90f1282019-11-05 01:07:05 +0530152 auto it = recoveryReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530153 if (it == recoveryReasonMap.end())
154 {
155 // No matching found. So just return without logging event.
156 return;
157 }
AppaRao Pulie90f1282019-11-05 01:07:05 +0530158 std::string msgId = "OpenBMC.0.1." + it->second.first;
159 sd_journal_send("MESSAGE=%s", "Platform firmware recovery occurred.",
160 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
161 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
162 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530163}
164
165static void logLastPanicEvent()
166{
167 uint8_t reason = 0;
168 if (0 !=
169 intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason))
170 {
171 return;
172 }
173
AppaRao Pulie90f1282019-11-05 01:07:05 +0530174 auto it = panicReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530175 if (it == panicReasonMap.end())
176 {
177 // No matching found. So just return without logging event.
178 return;
179 }
180
AppaRao Pulie90f1282019-11-05 01:07:05 +0530181 std::string msgId = "OpenBMC.0.1." + it->second.first;
182 sd_journal_send("MESSAGE=%s", "Platform firmware panic occurred.",
183 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
184 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
185 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530186}
187
AppaRao Puli24766942019-11-13 19:27:08 +0530188static void logResiliencyErrorEvent(const uint8_t majorErrorCode,
189 const uint8_t minorErrorCode)
190{
191 auto it = majorErrorCodeMap.find(majorErrorCode);
192 if (it == majorErrorCodeMap.end())
193 {
194 // No matching found. So just return without logging event.
195 return;
196 }
197
198 std::string errorStr =
199 it->second.second + "(MinorCode:0x" + toHexString(minorErrorCode) + ")";
200 std::string msgId = "OpenBMC.0.1." + it->second.first;
201 sd_journal_send(
202 "MESSAGE=%s", "Platform firmware resiliency error occurred.",
203 "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", msgId.c_str(),
204 "REDFISH_MESSAGE_ARGS=%s", errorStr.c_str(), NULL);
205}
206
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530207static void checkAndLogEvents()
208{
209 uint8_t currPanicCount = 0;
210 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount,
211 currPanicCount))
212 {
213 if (lastPanicCount != currPanicCount)
214 {
215 // Update cached data and log redfish event by reading reason.
216 lastPanicCount = currPanicCount;
217 logLastPanicEvent();
218 }
219 }
220
221 uint8_t currRecoveryCount = 0;
222 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount,
223 currRecoveryCount))
224 {
225 if (lastRecoveryCount != currRecoveryCount)
226 {
227 // Update cached data and log redfish event by reading reason.
228 lastRecoveryCount = currRecoveryCount;
229 logLastRecoveryEvent();
230 }
231 }
232
233 uint8_t majorErr = 0;
234 uint8_t minorErr = 0;
235 if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError,
236 majorErr)) ||
237 (0 ==
238 intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr)))
239 {
240 if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr))
241 {
242 lastMajorErr = majorErr;
243 lastMinorErr = minorErr;
244
AppaRao Puli24766942019-11-13 19:27:08 +0530245 logResiliencyErrorEvent(majorErr, minorErr);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530246 }
247 }
248}
249
250static void monitorPlatformStateChange(
251 sdbusplus::asio::object_server& server,
252 std::shared_ptr<sdbusplus::asio::connection>& conn)
253{
254 constexpr size_t pollTimeout = 10; // seconds
255 stateTimer->expires_after(std::chrono::seconds(pollTimeout));
256 stateTimer->async_wait(
257 [&server, &conn](const boost::system::error_code& ec) {
258 if (ec == boost::asio::error::operation_aborted)
259 {
260 // Timer reset.
261 return;
262 }
263 if (ec)
264 {
265 // Platform State Monitor - Timer cancelled.
266 return;
267 }
268 checkAndLogEvents();
269 monitorPlatformStateChange(server, conn);
270 });
271}
272
AppaRao Puli46cead92019-07-22 16:50:09 +0530273void checkAndSetCheckpoint(sdbusplus::asio::object_server& server,
274 std::shared_ptr<sdbusplus::asio::connection>& conn)
275{
276 // Check whether systemd completed all the loading.
277 conn->async_method_call(
278 [&server, &conn](boost::system::error_code ec,
279 const std::variant<uint64_t>& value) {
280 if (ec)
281 {
282 phosphor::logging::log<phosphor::logging::level::ERR>(
283 "async_method_call error: FinishTimestamp failed");
284 return;
285 }
286 if (std::get<uint64_t>(value))
287 {
288 if (!finishedSettingChkPoint)
289 {
290 finishedSettingChkPoint = true;
291 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
292 }
293 }
294 else
295 {
296 // FIX-ME: Latest up-stream sync caused issue in receiving
297 // StartupFinished signal. Unable to get StartupFinished signal
298 // from systemd1 hence using poll method too, to trigger it
299 // properly.
300 constexpr size_t pollTimeout = 10; // seconds
301 initTimer->expires_after(std::chrono::seconds(pollTimeout));
302 initTimer->async_wait([&server, &conn](
303 const boost::system::error_code& ec) {
304 if (ec == boost::asio::error::operation_aborted)
305 {
306 // Timer reset.
307 return;
308 }
309 if (ec)
310 {
311 phosphor::logging::log<phosphor::logging::level::ERR>(
312 "Set boot Checkpoint - async wait error.");
313 return;
314 }
315 checkAndSetCheckpoint(server, conn);
316 });
317 }
318 },
319 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
320 "org.freedesktop.DBus.Properties", "Get",
321 "org.freedesktop.systemd1.Manager", "FinishTimestamp");
322}
323
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530324int main()
325{
326 // setup connection to dbus
327 boost::asio::io_service io;
328 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530329 stateTimer = std::make_unique<boost::asio::steady_timer>(io);
AppaRao Puli46cead92019-07-22 16:50:09 +0530330 initTimer = std::make_unique<boost::asio::steady_timer>(io);
AppaRao Pulicc1ed682019-10-01 12:29:40 +0530331 conn->request_name("xyz.openbmc_project.PFR.Manager");
AppaRao Pulid901ad52019-12-12 23:27:32 +0530332 auto server = sdbusplus::asio::object_server(conn, true);
333 auto rootInterface = server.add_interface("/xyz/openbmc_project/pfr", "");
334 rootInterface->initialize();
335 server.add_manager("/xyz/openbmc_project/pfr");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530336
AppaRao Pulicc1ed682019-10-01 12:29:40 +0530337 // Create PFR attributes object and interface
AppaRao Pulie4e95652019-07-19 16:52:01 +0530338 pfrConfigObject = std::make_unique<intel::pfr::PfrConfig>(server, conn);
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530339
AppaRao Pulie4e95652019-07-19 16:52:01 +0530340 pfrVersionObjects.clear();
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530341 // Create Software objects using Versions interface
AppaRao Pulie4e95652019-07-19 16:52:01 +0530342 for (const auto& entry : verComponentList)
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530343 {
AppaRao Pulie4e95652019-07-19 16:52:01 +0530344 pfrVersionObjects.emplace_back(std::make_unique<intel::pfr::PfrVersion>(
345 server, conn, std::get<0>(entry), std::get<1>(entry),
346 std::get<2>(entry)));
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530347 }
348
AppaRao Puli46cead92019-07-22 16:50:09 +0530349 // Monitor Boot finished signal and set the checkpoint 9 to
350 // notify CPLD about BMC boot finish.
351 auto bootFinishedSignal = std::make_unique<sdbusplus::bus::match::match>(
352 static_cast<sdbusplus::bus::bus&>(*conn),
353 "type='signal',"
354 "member='StartupFinished',path='/org/freedesktop/systemd1',"
355 "interface='org.freedesktop.systemd1.Manager'",
356 [&server, &conn](sdbusplus::message::message& msg) {
357 if (!finishedSettingChkPoint)
358 {
359 finishedSettingChkPoint = true;
360 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
361 }
362 });
363 checkAndSetCheckpoint(server, conn);
364
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530365 // Capture the Chassis state and Start the monitor timer
366 // if state changed to 'On'. Run timer until OS boot.
367 // Stop timer if state changed to 'Off'.
368 static auto matchChassisState = sdbusplus::bus::match::match(
369 static_cast<sdbusplus::bus::bus&>(*conn),
370 "type='signal',member='PropertiesChanged', "
371 "interface='org.freedesktop.DBus.Properties', "
372 "sender='xyz.openbmc_project.State.Chassis', "
373 "arg0namespace='xyz.openbmc_project.State.Chassis'",
374 [&server, &conn](sdbusplus::message::message& message) {
375 std::string intfName;
376 std::map<std::string, std::variant<std::string>> properties;
377 message.read(intfName, properties);
378
379 const auto it = properties.find("CurrentPowerState");
380 if (it != properties.end())
381 {
382 const std::string* state =
383 std::get_if<std::string>(&it->second);
384 if (state != nullptr)
385 {
386 if ((*state ==
387 "xyz.openbmc_project.State.Chassis.PowerState.On") &&
388 (!stateTimerRunning))
389 {
390 stateTimerRunning = true;
391 monitorPlatformStateChange(server, conn);
392 }
393 else if ((*state == "xyz.openbmc_project.State.Chassis."
394 "PowerState.Off") &&
395 (stateTimerRunning))
396 {
397 stateTimer->cancel();
398 checkAndLogEvents();
399 stateTimerRunning = false;
400 }
401 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530402
403 // Update the D-Bus properties when chassis state changes.
404 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530405 }
406 });
407
408 // Capture the Host state and Start the monitor timer
409 // if state changed to 'Running'. Run timer until OS boot.
410 // Stop timer if state changed to 'Off'.
411 static auto matchHostState = sdbusplus::bus::match::match(
412 static_cast<sdbusplus::bus::bus&>(*conn),
413 "type='signal',member='PropertiesChanged', "
414 "interface='org.freedesktop.DBus.Properties', "
415 "sender='xyz.openbmc_project.State.Chassis', "
416 "arg0namespace='xyz.openbmc_project.State.Host'",
417 [&server, &conn](sdbusplus::message::message& message) {
418 std::string intfName;
419 std::map<std::string, std::variant<std::string>> properties;
420 message.read(intfName, properties);
421
422 const auto it = properties.find("CurrentHostState");
423 if (it != properties.end())
424 {
425 const std::string* state =
426 std::get_if<std::string>(&it->second);
427 if (state != nullptr)
428 {
429 if ((*state ==
430 "xyz.openbmc_project.State.Host.HostState.Running") &&
431 (!stateTimerRunning))
432 {
433 stateTimerRunning = true;
434 monitorPlatformStateChange(server, conn);
435 }
436 else if (((*state == "xyz.openbmc_project.State.Host."
437 "HostState.Off") ||
438 (*state == "xyz.openbmc_project.State.Host."
439 "HostState.Quiesced")) &&
440 (stateTimerRunning))
441 {
442 stateTimer->cancel();
443 checkAndLogEvents();
444 stateTimerRunning = false;
445 }
446 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530447
448 // Update the D-Bus properties when host state changes.
449 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530450 }
451 });
452
453 // Capture the OS state change and stop monitor timer
454 // if OS boots completly or becomes Inactive.
455 // start timer in other cases to mnitor states.
456 static auto matchOsState = sdbusplus::bus::match::match(
457 static_cast<sdbusplus::bus::bus&>(*conn),
458 "type='signal',member='PropertiesChanged', "
459 "interface='org.freedesktop.DBus.Properties', "
460 "sender='xyz.openbmc_project.State.Chassis', "
461 "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'",
462 [&server, &conn](sdbusplus::message::message& message) {
463 std::string intfName;
464 std::map<std::string, std::variant<std::string>> properties;
465 message.read(intfName, properties);
466
467 const auto it = properties.find("OperatingSystemState");
468 if (it != properties.end())
469 {
470 const std::string* state =
471 std::get_if<std::string>(&it->second);
472 if (state != nullptr)
473 {
474 if (((*state == "BootComplete") ||
475 (*state == "Inactive")) &&
476 (stateTimerRunning))
477 {
478 stateTimer->cancel();
479 checkAndLogEvents();
480 stateTimerRunning = false;
481 }
482 else if (!stateTimerRunning)
483 {
484 stateTimerRunning = true;
485 monitorPlatformStateChange(server, conn);
486 }
487 }
488 }
489 });
490
491 // First time, check and log events if any.
492 checkAndLogEvents();
493
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530494 phosphor::logging::log<phosphor::logging::level::INFO>(
495 "Intel PFR service started successfully");
496
497 io.run();
498
499 return 0;
500}