blob: cbd2082600e31a86cdadb710b3c9f632459de876 [file] [log] [blame]
AppaRao Pulie63eeda2019-07-05 16:25:38 +05301/*
2// Copyright (c) 2019 Intel Corporation
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15*/
16
AppaRao Puli88aa33b2019-07-18 23:49:55 +053017#include <systemd/sd-journal.h>
18
AppaRao Pulie63eeda2019-07-05 16:25:38 +053019#include "pfr_mgr.hpp"
AppaRao Puli88aa33b2019-07-18 23:49:55 +053020#include "pfr.hpp"
AppaRao Puli46cead92019-07-22 16:50:09 +053021#include <boost/asio.hpp>
AppaRao Pulie63eeda2019-07-05 16:25:38 +053022
AppaRao Puli88aa33b2019-07-18 23:49:55 +053023// Caches the last Recovery/Panic Count to
24// identify any new Recovery/panic actions.
25/* TODO: When BMC Reset's, these values will be lost
26 * Persist this info using settingsd */
27static uint8_t lastRecoveryCount = 0;
28static uint8_t lastPanicCount = 0;
29static uint8_t lastMajorErr = 0;
30static uint8_t lastMinorErr = 0;
31
32static bool stateTimerRunning = false;
AppaRao Puli46cead92019-07-22 16:50:09 +053033bool finishedSettingChkPoint = false;
34static constexpr uint8_t bmcBootFinishedChkPoint = 0x09;
35
AppaRao Puli88aa33b2019-07-18 23:49:55 +053036std::unique_ptr<boost::asio::steady_timer> stateTimer = nullptr;
AppaRao Puli46cead92019-07-22 16:50:09 +053037std::unique_ptr<boost::asio::steady_timer> initTimer = nullptr;
AppaRao Puli88aa33b2019-07-18 23:49:55 +053038
AppaRao Pulie4e95652019-07-19 16:52:01 +053039std::vector<std::unique_ptr<intel::pfr::PfrVersion>> pfrVersionObjects;
40std::unique_ptr<intel::pfr::PfrConfig> pfrConfigObject;
41
42using namespace intel::pfr;
43// List holds <ObjPath> <ImageType> <VersionPurpose>
44static std::vector<std::tuple<std::string, ImageType, std::string>>
45 verComponentList = {
46 std::make_tuple("bmc_active", ImageType::bmcActive, versionPurposeBMC),
47 std::make_tuple("bmc_recovery", ImageType::bmcRecovery,
48 versionPurposeBMC),
49 std::make_tuple("bios_active", ImageType::biosActive,
50 versionPurposeHost),
51 std::make_tuple("bios_recovery", ImageType::biosRecovery,
52 versionPurposeHost),
Vikram Bodireddy3c6c8c32019-12-05 11:06:15 +053053 std::make_tuple("cpld_active", ImageType::cpldActive,
54 versionPurposeOther),
55 std::make_tuple("cpld_recovery", ImageType::cpldRecovery,
56 versionPurposeOther),
57};
AppaRao Pulie4e95652019-07-19 16:52:01 +053058
AppaRao Pulie90f1282019-11-05 01:07:05 +053059// Recovery reason map.
60// {<CPLD association>,{<Redfish MessageID>, <Recovery Reason>}}
61static const boost::container::flat_map<uint8_t,
62 std::pair<std::string, std::string>>
63 recoveryReasonMap = {
64 {0x01,
65 {"BIOSFirmwareRecoveryReason",
Chalapathi3fb544b2020-02-14 15:43:49 +000066 "BIOS active image authentication failure"}},
AppaRao Pulie90f1282019-11-05 01:07:05 +053067 {0x02,
68 {"BIOSFirmwareRecoveryReason",
Chalapathi3fb544b2020-02-14 15:43:49 +000069 "BIOS recovery image authentication failure"}},
AppaRao Pulie90f1282019-11-05 01:07:05 +053070 {0x03, {"MEFirmwareRecoveryReason", "ME launch failure"}},
71 {0x04, {"BIOSFirmwareRecoveryReason", "ACM launch failure"}},
72 {0x05, {"BIOSFirmwareRecoveryReason", "IBB launch failure"}},
73 {0x06, {"BIOSFirmwareRecoveryReason", "OBB launch failure"}},
74 {0x07,
75 {"BMCFirmwareRecoveryReason",
76 "BMC active image authentication failure"}},
77 {0x08,
78 {"BMCFirmwareRecoveryReason",
79 "BMC recovery image authentication failure"}},
80 {0x09, {"BMCFirmwareRecoveryReason", "BMC launch failure"}},
81 {0x0A, {"CPLDFirmwareRecoveryReason", "CPLD watchdog expired"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053082
AppaRao Pulie90f1282019-11-05 01:07:05 +053083// Panic Reason map.
84// {<CPLD association>, {<Redfish MessageID>, <Panic reason> })
85static const boost::container::flat_map<uint8_t,
86 std::pair<std::string, std::string>>
87 panicReasonMap = {
Chalapathi3fb544b2020-02-14 15:43:49 +000088 {0x01, {"BIOSFirmwarePanicReason", "BIOS update intent"}},
89 {0x02, {"BMCFirmwarePanicReason", "BMC update intent"}},
90 {0x03, {"BMCFirmwarePanicReason", "BMC reset detected"}},
91 {0x04, {"BMCFirmwarePanicReason", "BMC watchdog expired"}},
92 {0x05, {"MEFirmwarePanicReason", "ME watchdog expired"}},
93 {0x06, {"BIOSFirmwarePanicReason", "ACM watchdog expired"}},
94 {0x07, {"BIOSFirmwarePanicReason", "IBB watchdog expired"}},
95 {0x08, {"BIOSFirmwarePanicReason", "OBB watchdog expired"}},
AppaRao Pulie90f1282019-11-05 01:07:05 +053096 {0x09,
97 {"BIOSFirmwarePanicReason",
Chalapathi3fb544b2020-02-14 15:43:49 +000098 "ACM or IBB or OBB authentication failure"}}};
AppaRao Puli88aa33b2019-07-18 23:49:55 +053099
AppaRao Puli24766942019-11-13 19:27:08 +0530100// Firmware resiliency major map.
101// {<CPLD association>, {<Redfish MessageID>, <Error reason> })
102static const boost::container::flat_map<uint8_t,
103 std::pair<std::string, std::string>>
104 majorErrorCodeMap = {
105 {0x01,
106 {"BMCFirmwareResiliencyError", "BMC image authentication failed"}},
107 {0x02,
108 {"BIOSFirmwareResiliencyError", "BIOS image authentication failed"}},
Chalapathi3fb544b2020-02-14 15:43:49 +0000109 {0x03, {"BIOSFirmwareResiliencyError", "Update from BIOS failed"}},
110 {0x04, {"BMCFirmwareResiliencyError", "Update from BMC failed"}}};
AppaRao Puli24766942019-11-13 19:27:08 +0530111
AppaRao Pulie4e95652019-07-19 16:52:01 +0530112static void updateDbusPropertiesCache()
113{
114 for (const auto& pfrVerObj : pfrVersionObjects)
115 {
116 pfrVerObj->updateVersion();
117 }
118
119 // Update provisoningStatus properties
120 pfrConfigObject->updateProvisioningStatus();
121
122 phosphor::logging::log<phosphor::logging::level::INFO>(
123 "PFR Manager service cache data updated.");
124}
125
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530126static void logLastRecoveryEvent()
127{
128 uint8_t reason = 0;
129 if (0 !=
130 intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryReason, reason))
131 {
132 return;
133 }
134
AppaRao Pulie90f1282019-11-05 01:07:05 +0530135 auto it = recoveryReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530136 if (it == recoveryReasonMap.end())
137 {
138 // No matching found. So just return without logging event.
139 return;
140 }
AppaRao Pulie90f1282019-11-05 01:07:05 +0530141 std::string msgId = "OpenBMC.0.1." + it->second.first;
142 sd_journal_send("MESSAGE=%s", "Platform firmware recovery occurred.",
143 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
144 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
145 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530146}
147
148static void logLastPanicEvent()
149{
150 uint8_t reason = 0;
151 if (0 !=
152 intel::pfr::readCpldReg(intel::pfr::ActionType::panicReason, reason))
153 {
154 return;
155 }
156
AppaRao Pulie90f1282019-11-05 01:07:05 +0530157 auto it = panicReasonMap.find(reason);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530158 if (it == panicReasonMap.end())
159 {
160 // No matching found. So just return without logging event.
161 return;
162 }
163
AppaRao Pulie90f1282019-11-05 01:07:05 +0530164 std::string msgId = "OpenBMC.0.1." + it->second.first;
165 sd_journal_send("MESSAGE=%s", "Platform firmware panic occurred.",
166 "PRIORITY=%i", LOG_WARNING, "REDFISH_MESSAGE_ID=%s",
167 msgId.c_str(), "REDFISH_MESSAGE_ARGS=%s",
168 it->second.second.c_str(), NULL);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530169}
170
AppaRao Puli24766942019-11-13 19:27:08 +0530171static void logResiliencyErrorEvent(const uint8_t majorErrorCode,
172 const uint8_t minorErrorCode)
173{
174 auto it = majorErrorCodeMap.find(majorErrorCode);
175 if (it == majorErrorCodeMap.end())
176 {
177 // No matching found. So just return without logging event.
178 return;
179 }
180
181 std::string errorStr =
182 it->second.second + "(MinorCode:0x" + toHexString(minorErrorCode) + ")";
183 std::string msgId = "OpenBMC.0.1." + it->second.first;
184 sd_journal_send(
185 "MESSAGE=%s", "Platform firmware resiliency error occurred.",
186 "PRIORITY=%i", LOG_ERR, "REDFISH_MESSAGE_ID=%s", msgId.c_str(),
187 "REDFISH_MESSAGE_ARGS=%s", errorStr.c_str(), NULL);
188}
189
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530190static void checkAndLogEvents()
191{
192 uint8_t currPanicCount = 0;
193 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::panicCount,
194 currPanicCount))
195 {
196 if (lastPanicCount != currPanicCount)
197 {
198 // Update cached data and log redfish event by reading reason.
199 lastPanicCount = currPanicCount;
200 logLastPanicEvent();
201 }
202 }
203
204 uint8_t currRecoveryCount = 0;
205 if (0 == intel::pfr::readCpldReg(intel::pfr::ActionType::recoveryCount,
206 currRecoveryCount))
207 {
208 if (lastRecoveryCount != currRecoveryCount)
209 {
210 // Update cached data and log redfish event by reading reason.
211 lastRecoveryCount = currRecoveryCount;
212 logLastRecoveryEvent();
213 }
214 }
215
216 uint8_t majorErr = 0;
217 uint8_t minorErr = 0;
218 if ((0 == intel::pfr::readCpldReg(intel::pfr::ActionType::majorError,
219 majorErr)) ||
220 (0 ==
221 intel::pfr::readCpldReg(intel::pfr::ActionType::minorError, minorErr)))
222 {
223 if ((lastMajorErr != majorErr) || (lastMinorErr != minorErr))
224 {
225 lastMajorErr = majorErr;
226 lastMinorErr = minorErr;
227
AppaRao Puli24766942019-11-13 19:27:08 +0530228 logResiliencyErrorEvent(majorErr, minorErr);
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530229 }
230 }
231}
232
233static void monitorPlatformStateChange(
234 sdbusplus::asio::object_server& server,
235 std::shared_ptr<sdbusplus::asio::connection>& conn)
236{
237 constexpr size_t pollTimeout = 10; // seconds
238 stateTimer->expires_after(std::chrono::seconds(pollTimeout));
239 stateTimer->async_wait(
240 [&server, &conn](const boost::system::error_code& ec) {
241 if (ec == boost::asio::error::operation_aborted)
242 {
243 // Timer reset.
244 return;
245 }
246 if (ec)
247 {
248 // Platform State Monitor - Timer cancelled.
249 return;
250 }
251 checkAndLogEvents();
252 monitorPlatformStateChange(server, conn);
253 });
254}
255
AppaRao Puli46cead92019-07-22 16:50:09 +0530256void checkAndSetCheckpoint(sdbusplus::asio::object_server& server,
257 std::shared_ptr<sdbusplus::asio::connection>& conn)
258{
259 // Check whether systemd completed all the loading.
260 conn->async_method_call(
261 [&server, &conn](boost::system::error_code ec,
262 const std::variant<uint64_t>& value) {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530263 if (!ec)
AppaRao Puli46cead92019-07-22 16:50:09 +0530264 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530265 if (std::get<uint64_t>(value))
AppaRao Puli46cead92019-07-22 16:50:09 +0530266 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530267 phosphor::logging::log<phosphor::logging::level::INFO>(
268 "PFR: BMC boot completed. Setting checkpoint 9.");
269 if (!finishedSettingChkPoint)
270 {
271 finishedSettingChkPoint = true;
272 intel::pfr::setBMCBootCheckpoint(
273 bmcBootFinishedChkPoint);
274 }
275 return;
AppaRao Puli46cead92019-07-22 16:50:09 +0530276 }
277 }
278 else
279 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530280 // Failed to get data from systemd. System might not
281 // be ready yet. Attempt again for data.
282 phosphor::logging::log<phosphor::logging::level::ERR>(
283 "PFR: aync call failed to get FinishTimestamp.",
284 phosphor::logging::entry("MSG=%s", ec.message().c_str()));
AppaRao Puli46cead92019-07-22 16:50:09 +0530285 }
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530286 // FIX-ME: Latest up-stream sync caused issue in receiving
287 // StartupFinished signal. Unable to get StartupFinished signal
288 // from systemd1 hence using poll method too, to trigger it
289 // properly.
290 constexpr size_t pollTimeout = 10; // seconds
291 initTimer->expires_after(std::chrono::seconds(pollTimeout));
292 initTimer->async_wait([&server,
293 &conn](const boost::system::error_code& ec) {
294 if (ec == boost::asio::error::operation_aborted)
295 {
296 // Timer reset.
297 phosphor::logging::log<phosphor::logging::level::INFO>(
298 "PFR: Set boot Checkpoint - Timer aborted or stopped.");
299 return;
300 }
301 if (ec)
302 {
303 phosphor::logging::log<phosphor::logging::level::ERR>(
304 "PFR: Set boot Checkpoint - async wait error.");
305 return;
306 }
307 checkAndSetCheckpoint(server, conn);
308 });
AppaRao Puli46cead92019-07-22 16:50:09 +0530309 },
310 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
311 "org.freedesktop.DBus.Properties", "Get",
312 "org.freedesktop.systemd1.Manager", "FinishTimestamp");
313}
314
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530315void monitorSignals(sdbusplus::asio::object_server& server,
316 std::shared_ptr<sdbusplus::asio::connection>& conn)
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530317{
AppaRao Puli46cead92019-07-22 16:50:09 +0530318 // Monitor Boot finished signal and set the checkpoint 9 to
319 // notify CPLD about BMC boot finish.
320 auto bootFinishedSignal = std::make_unique<sdbusplus::bus::match::match>(
321 static_cast<sdbusplus::bus::bus&>(*conn),
322 "type='signal',"
323 "member='StartupFinished',path='/org/freedesktop/systemd1',"
324 "interface='org.freedesktop.systemd1.Manager'",
325 [&server, &conn](sdbusplus::message::message& msg) {
326 if (!finishedSettingChkPoint)
327 {
AppaRao Pulib7e172c2019-12-13 14:46:25 +0530328 phosphor::logging::log<phosphor::logging::level::INFO>(
329 "PFR: BMC boot completed(StartupFinished). Setting "
330 "checkpoint 9.");
AppaRao Puli46cead92019-07-22 16:50:09 +0530331 finishedSettingChkPoint = true;
332 intel::pfr::setBMCBootCheckpoint(bmcBootFinishedChkPoint);
333 }
334 });
335 checkAndSetCheckpoint(server, conn);
336
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530337 // Capture the Chassis state and Start the monitor timer
338 // if state changed to 'On'. Run timer until OS boot.
339 // Stop timer if state changed to 'Off'.
340 static auto matchChassisState = sdbusplus::bus::match::match(
341 static_cast<sdbusplus::bus::bus&>(*conn),
342 "type='signal',member='PropertiesChanged', "
343 "interface='org.freedesktop.DBus.Properties', "
344 "sender='xyz.openbmc_project.State.Chassis', "
345 "arg0namespace='xyz.openbmc_project.State.Chassis'",
346 [&server, &conn](sdbusplus::message::message& message) {
347 std::string intfName;
348 std::map<std::string, std::variant<std::string>> properties;
349 message.read(intfName, properties);
350
351 const auto it = properties.find("CurrentPowerState");
352 if (it != properties.end())
353 {
354 const std::string* state =
355 std::get_if<std::string>(&it->second);
356 if (state != nullptr)
357 {
358 if ((*state ==
359 "xyz.openbmc_project.State.Chassis.PowerState.On") &&
360 (!stateTimerRunning))
361 {
362 stateTimerRunning = true;
363 monitorPlatformStateChange(server, conn);
364 }
365 else if ((*state == "xyz.openbmc_project.State.Chassis."
366 "PowerState.Off") &&
367 (stateTimerRunning))
368 {
369 stateTimer->cancel();
370 checkAndLogEvents();
371 stateTimerRunning = false;
372 }
373 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530374
375 // Update the D-Bus properties when chassis state changes.
376 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530377 }
378 });
379
380 // Capture the Host state and Start the monitor timer
381 // if state changed to 'Running'. Run timer until OS boot.
382 // Stop timer if state changed to 'Off'.
383 static auto matchHostState = sdbusplus::bus::match::match(
384 static_cast<sdbusplus::bus::bus&>(*conn),
385 "type='signal',member='PropertiesChanged', "
386 "interface='org.freedesktop.DBus.Properties', "
387 "sender='xyz.openbmc_project.State.Chassis', "
388 "arg0namespace='xyz.openbmc_project.State.Host'",
389 [&server, &conn](sdbusplus::message::message& message) {
390 std::string intfName;
391 std::map<std::string, std::variant<std::string>> properties;
392 message.read(intfName, properties);
393
394 const auto it = properties.find("CurrentHostState");
395 if (it != properties.end())
396 {
397 const std::string* state =
398 std::get_if<std::string>(&it->second);
399 if (state != nullptr)
400 {
401 if ((*state ==
402 "xyz.openbmc_project.State.Host.HostState.Running") &&
403 (!stateTimerRunning))
404 {
405 stateTimerRunning = true;
406 monitorPlatformStateChange(server, conn);
407 }
408 else if (((*state == "xyz.openbmc_project.State.Host."
409 "HostState.Off") ||
410 (*state == "xyz.openbmc_project.State.Host."
411 "HostState.Quiesced")) &&
412 (stateTimerRunning))
413 {
414 stateTimer->cancel();
415 checkAndLogEvents();
416 stateTimerRunning = false;
417 }
418 }
AppaRao Pulie4e95652019-07-19 16:52:01 +0530419
420 // Update the D-Bus properties when host state changes.
421 updateDbusPropertiesCache();
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530422 }
423 });
424
425 // Capture the OS state change and stop monitor timer
426 // if OS boots completly or becomes Inactive.
427 // start timer in other cases to mnitor states.
428 static auto matchOsState = sdbusplus::bus::match::match(
429 static_cast<sdbusplus::bus::bus&>(*conn),
430 "type='signal',member='PropertiesChanged', "
431 "interface='org.freedesktop.DBus.Properties', "
432 "sender='xyz.openbmc_project.State.Chassis', "
433 "arg0namespace='xyz.openbmc_project.State.OperatingSystem.Status'",
434 [&server, &conn](sdbusplus::message::message& message) {
435 std::string intfName;
436 std::map<std::string, std::variant<std::string>> properties;
437 message.read(intfName, properties);
438
439 const auto it = properties.find("OperatingSystemState");
440 if (it != properties.end())
441 {
442 const std::string* state =
443 std::get_if<std::string>(&it->second);
444 if (state != nullptr)
445 {
446 if (((*state == "BootComplete") ||
447 (*state == "Inactive")) &&
448 (stateTimerRunning))
449 {
450 stateTimer->cancel();
451 checkAndLogEvents();
452 stateTimerRunning = false;
453 }
454 else if (!stateTimerRunning)
455 {
456 stateTimerRunning = true;
457 monitorPlatformStateChange(server, conn);
458 }
459 }
460 }
461 });
462
463 // First time, check and log events if any.
464 checkAndLogEvents();
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530465}
AppaRao Puli88aa33b2019-07-18 23:49:55 +0530466
AppaRao Pulia9bf9712020-01-12 05:45:48 +0530467int main()
468{
469 // setup connection to dbus
470 boost::asio::io_service io;
471 auto conn = std::make_shared<sdbusplus::asio::connection>(io);
472 stateTimer = std::make_unique<boost::asio::steady_timer>(io);
473 initTimer = std::make_unique<boost::asio::steady_timer>(io);
474 auto server = sdbusplus::asio::object_server(conn, true);
475 monitorSignals(server, conn);
476
477 auto rootInterface = server.add_interface("/xyz/openbmc_project/pfr", "");
478 rootInterface->initialize();
479 server.add_manager("/xyz/openbmc_project/pfr");
480
481 // Create PFR attributes object and interface
482 pfrConfigObject = std::make_unique<intel::pfr::PfrConfig>(server, conn);
483
484 // Create Software objects using Versions interface
485 for (const auto& entry : verComponentList)
486 {
487 pfrVersionObjects.emplace_back(std::make_unique<intel::pfr::PfrVersion>(
488 server, conn, std::get<0>(entry), std::get<1>(entry),
489 std::get<2>(entry)));
490 }
491
492 conn->request_name("xyz.openbmc_project.PFR.Manager");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530493 phosphor::logging::log<phosphor::logging::level::INFO>(
494 "Intel PFR service started successfully");
AppaRao Pulie63eeda2019-07-05 16:25:38 +0530495 io.run();
496
497 return 0;
498}