blob: 761b25e4d4c0768c9a5be567f7fad86c5bafb0af [file] [log] [blame]
AppaRao Pulie63eeda2019-07-05 16:25:38 +05301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
AppaRao Puli88aa33b2019-07-18 23:49:55 +053017#include <systemd/sd-journal.h>
18
AppaRao Pulie63eeda2019-07-05 16:25:38 +053019#include "pfr_mgr.hpp"
AppaRao Puli88aa33b2019-07-18 23:49:55 +053020#include "pfr.hpp"
AppaRao Puli46cead92019-07-22 16:50:09 +053021#include <boost/asio.hpp>
AppaRao Pulie63eeda2019-07-05 16:25:38 +053022
AppaRao Puli88aa33b2019-07-18 23:49:55 +053023// Caches the last Recovery/Panic Count to
24// identify any new Recovery/panic actions.
25/* TODO: When BMC Reset's, these values will be lost
26 * Persist this info using settingsd */
27static uint8_t lastRecoveryCount = 0;
28static uint8_t lastPanicCount = 0;
29static uint8_t lastMajorErr = 0;
30static uint8_t lastMinorErr = 0;
31
32static bool stateTimerRunning = false;
AppaRao Puli46cead92019-07-22 16:50:09 +053033bool finishedSettingChkPoint = false;
34static constexpr uint8_t bmcBootFinishedChkPoint = 0x09;
35
AppaRao Puli88aa33b2019-07-18 23:49:55 +053036std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr;
AppaRao Puli46cead92019-07-22 16:50:09 +053037std::unique_ptr<boost::asio::steady_timer> initTimer = nullptr;
AppaRao Puli88aa33b2019-07-18 23:49:55 +053038
AppaRao Pulie4e95652019-07-19 16:52:01 +053039std::vector<std::unique_ptr<intel::pfr::PfrVersion>> pfrVersionObjects;
40std::unique_ptr<intel::pfr::PfrConfig> pfrConfigObject;
41
42using namespace intel::pfr;
43// List holds <ObjPath> <ImageType> <VersionPurpose>
44static std::vector<std::tuple<std::string, ImageType, std::string>>
45 verComponentList = {
46 std::make_tuple("bmc_active", ImageType::bmcActive, versionPurposeBMC),
47 std::make_tuple("bmc_recovery", ImageType::bmcRecovery,
48 versionPurposeBMC),
49 std::make_tuple("bios_active", ImageType::biosActive,
50 versionPurposeHost),
51 std::make_tuple("bios_recovery", ImageType::biosRecovery,
52 versionPurposeHost),
53 std::make_tuple("cpld", ImageType::cpld, versionPurposeOther)};
54
AppaRao Pulie90f1282019-11-05 01:07:05 +053055// Recovery reason map.
56// {<CPLD association>,{<Redfish MessageID>, <Recovery Reason>}}
57static const boost::container::flat_map<uint8_t,
58 std::pair<std::string, std::string>>
59 recoveryReasonMap = {
60 {0x01,
61 {"BIOSFirmwareRecoveryReason",
62 "PCH active image authentication failure"}},
63 {0x02,
64 {"BIOSFirmwareRecoveryReason",
65 "PCH recovery image authentication failure"}},
66 {0x03, {"MEFirmwareRecoveryReason", "ME launch failure"}},
67 {0x04, {"BIOSFirmwareRecoveryReason", "ACM launch failure"}},
68 {0x05, {"BIOSFirmwareRecoveryReason", "IBB launch failure"}},
69 {0x06, {"BIOSFirmwareRecoveryReason", "OBB launch failure"}},
70 {0x07,
71 {"BMCFirmwareRecoveryReason",
72 "BMC active image authentication failure"}},
73 {0x08,
74 {"BMCFirmwareRecoveryReason",
75 "BMC recovery image authentication failure"}},
76 {0x09, {"BMCFirmwareRecoveryReason", "BMC launch failure"}},
77 {0x0A, {"CPLDFirmwareRecoveryReason", "CPLD watchdog expired"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053078
AppaRao Pulie90f1282019-11-05 01:07:05 +053079// Panic Reason map.
80// {<CPLD association>, {<Redfish MessageID>, <Panic reason> })
81static const boost::container::flat_map<uint8_t,
82 std::pair<std::string, std::string>>
83 panicReasonMap = {
84 {0x01, {"CPLDFirmwarePanicReason", "CPLD watchdog expired"}},
85 {0x02, {"BMCFirmwarePanicReason", "BMC watchdog expired"}},
86 {0x03, {"MEFirmwarePanicReason", "ME watchdog expired"}},
87 {0x04, {"BIOSFirmwarePanicReason", "ACM watchdog expired"}},
88 {0x05, {"BIOSFirmwarePanicReason", "IBB watchdog expired"}},
89 {0x06, {"BIOSFirmwarePanicReason", "OBB watchdog expired"}},
90 {0x07,
91 {"BMCFirmwarePanicReason", "BMC active image authentication failure"}},
92 {0x08,
93 {"BMCFirmwarePanicReason",
94 "BMC recovery image authentication failure"}},
95 {0x09,
96 {"BIOSFirmwarePanicReason",
97 "PCH active image authentication failure"}},
98 {0x0A,
99 {"BIOSFirmwarePanicReason",
100 "PCH recovery image authentication failure"}},
101 {0x0B, {"MEFirmwarePanicReason", "ME authentication failure"}},
102 {0x0C,
103 {"BIOSFirmwarePanicReason",
104 "ACM or IBB or OBB authentication failure"}},
105 {0x0D, {"BIOSFirmwarePanicReason", "PCH update intent"}},
106 {0x0E, {"BMCFirmwarePanicReason", "BMC update intent"}},
107 {0x0F, {"BMCFirmwarePanicReason", "BMC reset detected"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530108
AppaRao Puli24766942019-11-13 19:27:08 +0530109// Firmware resiliency major map.
110// {<CPLD association>, {<Redfish MessageID>, <Error reason> })
111static const boost::container::flat_map<uint8_t,
112 std::pair<std::string, std::string>>
113 majorErrorCodeMap = {
114 {0x01,
115 {"BMCFirmwareResiliencyError", "BMC image authentication failed"}},
116 {0x02,
117 {"BIOSFirmwareResiliencyError", "BIOS image authentication failed"}},
118 {0x03, {"BMCFirmwareResiliencyError", "BMC boot failed"}},
119 {0x04, {"MEFirmwareResiliencyError", "ME boot failed"}},
120 {0x05, {"BIOSFirmwareResiliencyError", "ACM boot failed"}},
121 {0x06, {"BIOSFirmwareResiliencyError", "BIOS boot failed"}},
122 {0x07, {"BIOSFirmwareResiliencyError", "Update from PCH failed"}},
123 {0x08, {"BIOSFirmwarePanicReason", "Update from BMC failed"}}};
124
AppaRao Pulie4e95652019-07-19 16:52:01 +0530125static void updateDbusPropertiesCache()
126{
127 for (const auto& pfrVerObj : pfrVersionObjects)
128 {
129 pfrVerObj->updateVersion();
130 }
131
132 // Update provisoningStatus properties
133 pfrConfigObject->updateProvisioningStatus();
134
135 phosphor::logging::log<phosphor::logging::level::INFO>(
136 "PFR Manager service cache data updated.");
137}
138
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530139static void logLastRecoveryEvent()
140{
141 uint8_t reason = 0;
142 if (0 !=
143 intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason))
144 {
145 return;
146 }
147
AppaRao Pulie90f1282019-11-05 01:07:05 +0530148 auto it = recoveryReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530149 if (it == recoveryReasonMap.end())
150 {
151 // No matching found. So just return without logging event.
152 return;
153 }
AppaRao Pulie90f1282019-11-05 01:07:05 +0530154 std::string msgId = "OpenBMC.0.1." + it->second.first;
155 sd_journal_send("MESSAGE=%s", "Platform firmware recovery occurred.",
156 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
157 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
158 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530159}
160
161static void logLastPanicEvent()
162{
163 uint8_t reason = 0;
164 if (0 !=
165 intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason))
166 {
167 return;
168 }
169
AppaRao Pulie90f1282019-11-05 01:07:05 +0530170 auto it = panicReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530171 if (it == panicReasonMap.end())
172 {
173 // No matching found. So just return without logging event.
174 return;
175 }
176
AppaRao Pulie90f1282019-11-05 01:07:05 +0530177 std::string msgId = "OpenBMC.0.1." + it->second.first;
178 sd_journal_send("MESSAGE=%s", "Platform firmware panic occurred.",
179 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
180 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
181 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530182}
183
AppaRao Puli24766942019-11-13 19:27:08 +0530184static void logResiliencyErrorEvent(const uint8_t majorErrorCode,
185 const uint8_t minorErrorCode)
186{
187 auto it = majorErrorCodeMap.find(majorErrorCode);
188 if (it == majorErrorCodeMap.end())
189 {
190 // No matching found. So just return without logging event.
191 return;
192 }
193
194 std::string errorStr =
195 it->second.second + "(MinorCode:0x" + toHexString(minorErrorCode) + ")";
196 std::string msgId = "OpenBMC.0.1." + it->second.first;
197 sd_journal_send(
198 "MESSAGE=%s", "Platform firmware resiliency error occurred.",
199 "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", msgId.c_str(),
200 "REDFISH_MESSAGE_ARGS=%s", errorStr.c_str(), NULL);
201}
202
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530203static void checkAndLogEvents()
204{
205 uint8_t currPanicCount = 0;
206 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount,
207 currPanicCount))
208 {
209 if (lastPanicCount != currPanicCount)
210 {
211 // Update cached data and log redfish event by reading reason.
212 lastPanicCount = currPanicCount;
213 logLastPanicEvent();
214 }
215 }
216
217 uint8_t currRecoveryCount = 0;
218 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount,
219 currRecoveryCount))
220 {
221 if (lastRecoveryCount != currRecoveryCount)
222 {
223 // Update cached data and log redfish event by reading reason.
224 lastRecoveryCount = currRecoveryCount;
225 logLastRecoveryEvent();
226 }
227 }
228
229 uint8_t majorErr = 0;
230 uint8_t minorErr = 0;
231 if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError,
232 majorErr)) ||
233 (0 ==
234 intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr)))
235 {
236 if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr))
237 {
238 lastMajorErr = majorErr;
239 lastMinorErr = minorErr;
240
AppaRao Puli24766942019-11-13 19:27:08 +0530241 logResiliencyErrorEvent(majorErr, minorErr);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530242 }
243 }
244}
245
246static void monitorPlatformStateChange(
247 sdbusplus::asio::object_server& server,
248 std::shared_ptr<sdbusplus::asio::connection>& conn)
249{
250 constexpr size_t pollTimeout = 10; // seconds
251 stateTimer->expires_after(std::chrono::seconds(pollTimeout));
252 stateTimer->async_wait(
253 [&server, &conn](const boost::system::error_code& ec) {
254 if (ec == boost::asio::error::operation_aborted)
255 {
256 // Timer reset.
257 return;
258 }
259 if (ec)
260 {
261 // Platform State Monitor - Timer cancelled.
262 return;
263 }
264 checkAndLogEvents();
265 monitorPlatformStateChange(server, conn);
266 });
267}
268
AppaRao Puli46cead92019-07-22 16:50:09 +0530269void checkAndSetCheckpoint(sdbusplus::asio::object_server& server,
270 std::shared_ptr<sdbusplus::asio::connection>& conn)
271{
272 // Check whether systemd completed all the loading.
273 conn->async_method_call(
274 [&server, &conn](boost::system::error_code ec,
275 const std::variant<uint64_t>& value) {
276 if (ec)
277 {
278 phosphor::logging::log<phosphor::logging::level::ERR>(
279 "async_method_call error: FinishTimestamp failed");
280 return;
281 }
282 if (std::get<uint64_t>(value))
283 {
284 if (!finishedSettingChkPoint)
285 {
286 finishedSettingChkPoint = true;
287 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
288 }
289 }
290 else
291 {
292 // FIX-ME: Latest up-stream sync caused issue in receiving
293 // StartupFinished signal. Unable to get StartupFinished signal
294 // from systemd1 hence using poll method too, to trigger it
295 // properly.
296 constexpr size_t pollTimeout = 10; // seconds
297 initTimer->expires_after(std::chrono::seconds(pollTimeout));
298 initTimer->async_wait([&server, &conn](
299 const boost::system::error_code& ec) {
300 if (ec == boost::asio::error::operation_aborted)
301 {
302 // Timer reset.
303 return;
304 }
305 if (ec)
306 {
307 phosphor::logging::log<phosphor::logging::level::ERR>(
308 "Set boot Checkpoint - async wait error.");
309 return;
310 }
311 checkAndSetCheckpoint(server, conn);
312 });
313 }
314 },
315 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
316 "org.freedesktop.DBus.Properties", "Get",
317 "org.freedesktop.systemd1.Manager", "FinishTimestamp");
318}
319
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530320int main()
321{
322 // setup connection to dbus
323 boost::asio::io_service io;
324 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530325 stateTimer = std::make_unique<boost::asio::steady_timer>(io);
AppaRao Puli46cead92019-07-22 16:50:09 +0530326 initTimer = std::make_unique<boost::asio::steady_timer>(io);
AppaRao Pulicc1ed682019-10-01 12:29:40 +0530327 conn->request_name("xyz.openbmc_project.PFR.Manager");
AppaRao Pulie4e95652019-07-19 16:52:01 +0530328 auto server = sdbusplus::asio::object_server(conn);
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530329
AppaRao Pulicc1ed682019-10-01 12:29:40 +0530330 // Create PFR attributes object and interface
AppaRao Pulie4e95652019-07-19 16:52:01 +0530331 pfrConfigObject = std::make_unique<intel::pfr::PfrConfig>(server, conn);
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530332
AppaRao Pulie4e95652019-07-19 16:52:01 +0530333 pfrVersionObjects.clear();
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530334 // Create Software objects using Versions interface
AppaRao Pulie4e95652019-07-19 16:52:01 +0530335 for (const auto& entry : verComponentList)
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530336 {
AppaRao Pulie4e95652019-07-19 16:52:01 +0530337 pfrVersionObjects.emplace_back(std::make_unique<intel::pfr::PfrVersion>(
338 server, conn, std::get<0>(entry), std::get<1>(entry),
339 std::get<2>(entry)));
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530340 }
341
AppaRao Puli46cead92019-07-22 16:50:09 +0530342 // Monitor Boot finished signal and set the checkpoint 9 to
343 // notify CPLD about BMC boot finish.
344 auto bootFinishedSignal = std::make_unique<sdbusplus::bus::match::match>(
345 static_cast<sdbusplus::bus::bus&>(*conn),
346 "type='signal',"
347 "member='StartupFinished',path='/org/freedesktop/systemd1',"
348 "interface='org.freedesktop.systemd1.Manager'",
349 [&server, &conn](sdbusplus::message::message& msg) {
350 if (!finishedSettingChkPoint)
351 {
352 finishedSettingChkPoint = true;
353 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
354 }
355 });
356 checkAndSetCheckpoint(server, conn);
357
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530358 // Capture the Chassis state and Start the monitor timer
359 // if state changed to 'On'. Run timer until OS boot.
360 // Stop timer if state changed to 'Off'.
361 static auto matchChassisState = sdbusplus::bus::match::match(
362 static_cast<sdbusplus::bus::bus&>(*conn),
363 "type='signal',member='PropertiesChanged', "
364 "interface='org.freedesktop.DBus.Properties', "
365 "sender='xyz.openbmc_project.State.Chassis', "
366 "arg0namespace='xyz.openbmc_project.State.Chassis'",
367 [&server, &conn](sdbusplus::message::message& message) {
368 std::string intfName;
369 std::map<std::string, std::variant<std::string>> properties;
370 message.read(intfName, properties);
371
372 const auto it = properties.find("CurrentPowerState");
373 if (it != properties.end())
374 {
375 const std::string* state =
376 std::get_if<std::string>(&it->second);
377 if (state != nullptr)
378 {
379 if ((*state ==
380 "xyz.openbmc_project.State.Chassis.PowerState.On") &&
381 (!stateTimerRunning))
382 {
383 stateTimerRunning = true;
384 monitorPlatformStateChange(server, conn);
385 }
386 else if ((*state == "xyz.openbmc_project.State.Chassis."
387 "PowerState.Off") &&
388 (stateTimerRunning))
389 {
390 stateTimer->cancel();
391 checkAndLogEvents();
392 stateTimerRunning = false;
393 }
394 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530395
396 // Update the D-Bus properties when chassis state changes.
397 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530398 }
399 });
400
401 // Capture the Host state and Start the monitor timer
402 // if state changed to 'Running'. Run timer until OS boot.
403 // Stop timer if state changed to 'Off'.
404 static auto matchHostState = sdbusplus::bus::match::match(
405 static_cast<sdbusplus::bus::bus&>(*conn),
406 "type='signal',member='PropertiesChanged', "
407 "interface='org.freedesktop.DBus.Properties', "
408 "sender='xyz.openbmc_project.State.Chassis', "
409 "arg0namespace='xyz.openbmc_project.State.Host'",
410 [&server, &conn](sdbusplus::message::message& message) {
411 std::string intfName;
412 std::map<std::string, std::variant<std::string>> properties;
413 message.read(intfName, properties);
414
415 const auto it = properties.find("CurrentHostState");
416 if (it != properties.end())
417 {
418 const std::string* state =
419 std::get_if<std::string>(&it->second);
420 if (state != nullptr)
421 {
422 if ((*state ==
423 "xyz.openbmc_project.State.Host.HostState.Running") &&
424 (!stateTimerRunning))
425 {
426 stateTimerRunning = true;
427 monitorPlatformStateChange(server, conn);
428 }
429 else if (((*state == "xyz.openbmc_project.State.Host."
430 "HostState.Off") ||
431 (*state == "xyz.openbmc_project.State.Host."
432 "HostState.Quiesced")) &&
433 (stateTimerRunning))
434 {
435 stateTimer->cancel();
436 checkAndLogEvents();
437 stateTimerRunning = false;
438 }
439 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530440
441 // Update the D-Bus properties when host state changes.
442 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530443 }
444 });
445
446 // Capture the OS state change and stop monitor timer
447 // if OS boots completly or becomes Inactive.
448 // start timer in other cases to mnitor states.
449 static auto matchOsState = sdbusplus::bus::match::match(
450 static_cast<sdbusplus::bus::bus&>(*conn),
451 "type='signal',member='PropertiesChanged', "
452 "interface='org.freedesktop.DBus.Properties', "
453 "sender='xyz.openbmc_project.State.Chassis', "
454 "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'",
455 [&server, &conn](sdbusplus::message::message& message) {
456 std::string intfName;
457 std::map<std::string, std::variant<std::string>> properties;
458 message.read(intfName, properties);
459
460 const auto it = properties.find("OperatingSystemState");
461 if (it != properties.end())
462 {
463 const std::string* state =
464 std::get_if<std::string>(&it->second);
465 if (state != nullptr)
466 {
467 if (((*state == "BootComplete") ||
468 (*state == "Inactive")) &&
469 (stateTimerRunning))
470 {
471 stateTimer->cancel();
472 checkAndLogEvents();
473 stateTimerRunning = false;
474 }
475 else if (!stateTimerRunning)
476 {
477 stateTimerRunning = true;
478 monitorPlatformStateChange(server, conn);
479 }
480 }
481 }
482 });
483
484 // First time, check and log events if any.
485 checkAndLogEvents();
486
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530487 phosphor::logging::log<phosphor::logging::level::INFO>(
488 "Intel PFR service started successfully");
489
490 io.run();
491
492 return 0;
493}