psutils: Enhance PSU Update retry logic
- Update retry logic to align with IBM chart specifications.
- Add firmware file existence check before starting PSU update process.
- Switched log to lg2 in the updater for consistency.
- Fixed device intermittent issue during binding device.
- Modified the update flow to proceed even if the device driver is not
available in sysfs.
These changes improve the robustness of the PSU update process and
enhance logging for better debug isolation.
Test:
- Verified the updater does not start if the firmware file is missing.
- Checked some of the logs correctly reflect the changes using lg2
- Confirmed the PSU binds every time after FW update.
- Test PSU update process continue when the device driver not
available in sysfs.
Change-Id: Iaff5eaf9b9f3ee02d109cf3218f9b0614fa2bd92
Signed-off-by: Faisal Awada <faisal@us.ibm.com>
diff --git a/tools/power-utils/aei_updater.cpp b/tools/power-utils/aei_updater.cpp
index ffe2d1d..27da440 100644
--- a/tools/power-utils/aei_updater.cpp
+++ b/tools/power-utils/aei_updater.cpp
@@ -76,15 +76,26 @@
{
throw std::runtime_error("I2C interface error");
}
+ if (!getFirmwarePath() || !isFirmwareFileValid())
+ {
+ return 1; // No firmware file abort download
+ }
bool downloadFwFailed = false; // Download Firmware status
int retryProcessTwo(0);
int retryProcessOne(0);
+ int serviceableEvent(0);
while ((retryProcessTwo < MAX_RETRIES) && (retryProcessOne < MAX_RETRIES))
{
retryProcessTwo++;
// Write AEI PSU ISP key
if (!writeIspKey())
{
+ serviceableEvent++;
+ if (serviceableEvent == MAX_RETRIES)
+ {
+ createServiceableError(
+ "createServiceableError: ISP key failed");
+ }
lg2::error("Failed to set ISP Key");
downloadFwFailed = true; // Download Firmware status
continue;
@@ -117,6 +128,13 @@
}
else
{
+ if (retryProcessOne == MAX_RETRIES)
+ {
+ // no more retries report serviceable error
+ createServiceableError(
+ "serviceableError: Download firmware failed");
+ ispReboot(); // Try to set PSU to normal mode
+ }
downloadFwFailed = true;
continue;
}
@@ -201,6 +219,7 @@
e);
}
}
+ createServiceableError("createServiceableError: ISP Status failed");
lg2::error("Failed to set ISP Mode");
return false; // Failed to set ISP Mode after retries
}
@@ -246,34 +265,34 @@
"ERROR", e);
}
}
+ createServiceableError("createServiceableError: Failed to reset ISP");
lg2::error("Failed to reset ISP Status");
ispReboot();
return false;
}
-std::string AeiUpdater::getFirmwarePath()
+bool AeiUpdater::getFirmwarePath()
{
- const std::string fspath =
- updater::internal::getFWFilenamePath(getImageDir());
+ fspath = updater::internal::getFWFilenamePath(getImageDir());
if (fspath.empty())
{
lg2::error("Firmware file path not found");
- }
- return fspath;
-}
-
-bool AeiUpdater::isFirmwareFileValid(const std::string& fspath)
-{
- if (!updater::internal::validateFWFile(fspath))
- {
- lg2::error("Firmware validation failed");
return false;
}
return true;
}
-std::unique_ptr<std::ifstream> AeiUpdater::openFirmwareFile(
- const std::string& fspath)
+bool AeiUpdater::isFirmwareFileValid()
+{
+ if (!updater::internal::validateFWFile(fspath))
+ {
+ lg2::error("Firmware validation failed, fspath={PATH}", "PATH", fspath);
+ return false;
+ }
+ return true;
+}
+
+std::unique_ptr<std::ifstream> AeiUpdater::openFirmwareFile()
{
auto inputFile = updater::internal::openFirmwareFile(fspath);
if (!inputFile)
@@ -313,22 +332,8 @@
bool AeiUpdater::downloadPsuFirmware()
{
- // Get firmware path
- const std::string fspath = getFirmwarePath();
- if (fspath.empty())
- {
- lg2::error("Unable to find path");
- return false;
- }
- // Validate firmware file
- if (!isFirmwareFileValid(fspath))
- {
- lg2::error("Invalid file path");
- return false;
- }
-
// Open firmware file
- auto inputFile = openFirmwareFile(fspath);
+ auto inputFile = openFirmwareFile();
if (!inputFile)
{
lg2::error("Unable to open firmware file {FILE}", "FILE", fspath);
@@ -386,7 +391,13 @@
try
{
performI2cWriteRead(regAddr, readReplySize, readData, delayTime);
- if ((readData[STATUS_CML_INDEX] == 0) &&
+ if ((readData[STATUS_CML_INDEX] == 0 ||
+ // The first firmware data packet sent to the PSU have a
+ // response of 0x80 which indicates firmware update in
+ // progress. If retry to send the first packet again reply will
+ // be 0.
+ (readData[STATUS_CML_INDEX] == 0x80 &&
+ delayTime == MEM_WRITE_DELAY)) &&
(readReplySize == expectedReadSize) &&
!std::equal(readData, readData + 4, byteSwappedIndex.begin()))
{
@@ -395,7 +406,15 @@
}
else
{
- lg2::error("I2C write/read block failed");
+ uint32_t littleEndianValue =
+ *reinterpret_cast<uint32_t*>(readData);
+ uint32_t bigEndianValue =
+ ((littleEndianValue & 0x000000FF) << 24) |
+ ((littleEndianValue & 0x0000FF00) << 8) |
+ ((littleEndianValue & 0x00FF0000) >> 8) |
+ ((littleEndianValue & 0xFF000000) >> 24);
+ lg2::error("Write/read block {NUM} failed", "NUM",
+ bigEndianValue);
}
}
catch (const std::exception& e)
diff --git a/tools/power-utils/aei_updater.hpp b/tools/power-utils/aei_updater.hpp
index 99127fd..6766077 100644
--- a/tools/power-utils/aei_updater.hpp
+++ b/tools/power-utils/aei_updater.hpp
@@ -82,26 +82,24 @@
/**
* @brief Retrieves the path to the firmware file.
*
- * @return The file path of the firmware.
+ * @return True if successful, false otherwise.
*/
- std::string getFirmwarePath();
+ bool getFirmwarePath();
/**
* @brief Validates the firmware file.
*
- * @param fspath The file path to validate.
* @return True if the file is valid, false otherwise.
*/
- bool isFirmwareFileValid(const std::string& fspath);
+ bool isFirmwareFileValid();
/**
* @brief Opens a firmware file in binary mode.
*
- * @param fspath The path to the firmware file.
* @return A file stream to read the firmware
* file.
*/
- std::unique_ptr<std::ifstream> openFirmwareFile(const std::string& fspath);
+ std::unique_ptr<std::ifstream> openFirmwareFile();
/**
* @brief Reads a block of firmware data from the file.
@@ -184,6 +182,16 @@
bool ispReadRebootStatus();
/**
+ * @brief Create serviceable error
+ *
+ * Place holder for future enhancement
+ */
+ void createServiceableError(const std::string str)
+ {
+ std::cout << "createServiceableError: " << str << "\n";
+ }
+
+ /**
* @brief Pointer to the I2C interface for communication
*
* This pointer is not owned by the class. The caller is responsible for
@@ -201,5 +209,10 @@
* @brief Command block used for writing data to the device
*/
std::vector<uint8_t> cmdBlockWrite;
+
+ /**
+ * @brief The firmware file path
+ */
+ std::string fspath;
};
} // namespace aeiUpdater
diff --git a/tools/power-utils/main.cpp b/tools/power-utils/main.cpp
index 236f0d6..5adbb1b 100644
--- a/tools/power-utils/main.cpp
+++ b/tools/power-utils/main.cpp
@@ -15,10 +15,11 @@
*/
#include "model.hpp"
#include "updater.hpp"
+#include "utility.hpp"
#include "version.hpp"
#include <CLI/CLI.hpp>
-#include <phosphor-logging/log.hpp>
+#include <phosphor-logging/lg2.hpp>
#include <sdbusplus/bus.hpp>
#include <cassert>
@@ -71,6 +72,13 @@
if (updater::update(bus, updateArguments[0], updateArguments[1]))
{
ret = "Update successful";
+ lg2::info("Successful update to PSU: {PSU}", "PSU",
+ updateArguments[0]);
+ }
+ else
+ {
+ lg2::error("Failed to update PSU: {PSU}", "PSU",
+ updateArguments[0]);
}
}
diff --git a/tools/power-utils/updater.cpp b/tools/power-utils/updater.cpp
index 1bf729c..7da839f 100644
--- a/tools/power-utils/updater.cpp
+++ b/tools/power-utils/updater.cpp
@@ -21,7 +21,7 @@
#include "utility.hpp"
#include "utils.hpp"
-#include <phosphor-logging/log.hpp>
+#include <phosphor-logging/lg2.hpp>
#include <chrono>
#include <fstream>
@@ -118,8 +118,7 @@
// Ensure the file exists and get the file size.
if (!std::filesystem::exists(fileName))
{
- log<level::ERR>(
- std::format("Firmware file not found: {}", fileName).c_str());
+ lg2::error("Firmware file not found: {FILE}", "FILE", fileName);
return false;
}
@@ -127,7 +126,7 @@
auto fileSize = std::filesystem::file_size(fileName);
if (fileSize == 0)
{
- log<level::ERR>("Firmware file is empty");
+ lg2::error("Firmware {FILE} is empty", "FILE", fileName);
return false;
}
return true;
@@ -138,15 +137,14 @@
{
if (fileName.empty())
{
- log<level::ERR>("Firmware file path is not provided");
+ lg2::error("Firmware file path is not provided");
return nullptr;
}
auto inputFile =
std::make_unique<std::ifstream>(fileName, std::ios::binary);
if (!inputFile->is_open())
{
- log<level::ERR>(
- std::format("Failed to open firmware file: {}", fileName).c_str());
+ lg2::error("Failed to open firmware file: {FILE}", "FILE", fileName);
return nullptr;
}
return inputFile;
@@ -171,8 +169,7 @@
}
catch (const std::ios_base::failure& e)
{
- log<level::ERR>(
- std::format("Error reading firmware: {}", e.what()).c_str());
+ lg2::error("Error reading firmware: {ERROR}", "ERROR", e.what());
readDataBytes.clear();
}
return readDataBytes;
@@ -197,8 +194,8 @@
if (!updaterPtr->isReadyToUpdate())
{
- log<level::ERR>("PSU not ready to update",
- entry("PSU=%s", psuInventoryPath.c_str()));
+ lg2::error("PSU not ready to update PSU = {PATH}", "PATH",
+ psuInventoryPath);
return false;
}
@@ -222,10 +219,8 @@
}
catch (const fs::filesystem_error& e)
{
- log<level::ERR>("Failed to get canonical path",
- entry("DEVPATH=%s", devPath.c_str()),
- entry("ERROR=%s", e.what()));
- throw;
+ lg2::error("Failed to get canonical path DEVPATH= {PATH}, ERROR= {ERR}",
+ "PATH", devPath, "ERR", e.what());
}
}
@@ -245,6 +240,11 @@
p /= doBind ? "bind" : "unbind";
std::ofstream out(p.string());
out << devName;
+ if (doBind)
+ {
+ internal::delay(500);
+ }
+ out.close();
if (doBind)
{
@@ -263,9 +263,9 @@
}
catch (const std::exception& e)
{
- log<level::ERR>("Failed to set present property",
- entry("PSU=%s", psuInventoryPath.c_str()),
- entry("PRESENT=%d", present));
+ lg2::error(
+ "Failed to set present property PSU= {PATH}, PRESENT= {PRESENT}",
+ "PATH", psuInventoryPath, "PRESENT", present);
}
}
@@ -281,7 +281,7 @@
if (util::isPoweredOn(bus, true))
{
- log<level::WARNING>("Unable to update PSU when host is on");
+ lg2::warning("Unable to update PSU when host is on");
return false;
}
@@ -305,12 +305,11 @@
}
catch (const std::exception& e)
{
- log<level::ERR>("Failed to get present property",
- entry("PSU=%s", p.c_str()));
+ lg2::error("Failed to get present property PSU={PSU}", "PSU", p);
}
if (!present)
{
- log<level::WARNING>("PSU not present", entry("PSU=%s", p.c_str()));
+ lg2::warning("PSU not present PSU={PSU}", "PSU", p);
continue;
}
hasOtherPresent = true;
@@ -333,11 +332,10 @@
(voutStatus & status_vout::UV_FAULT) ||
(voutStatus & status_vout::OV_FAULT))
{
- log<level::WARNING>(
- "Unable to update PSU when other PSU has input/ouput fault",
- entry("PSU=%s", p.c_str()),
- entry("STATUS_WORD=0x%04x", statusWord),
- entry("VOUT_BYTE=0x%02x", voutStatus));
+ lg2::warning(
+ "Unable to update PSU when other PSU has input/ouput fault PSU={PSU}, STATUS_WORD={STATUS}, VOUT_BYTE={VOUT}",
+ "PSU", p, "STATUS", lg2::hex, statusWord, "VOUT", lg2::hex,
+ voutStatus);
return false;
}
}
@@ -345,7 +343,7 @@
{
// If error occurs on accessing the debugfs, it means something went
// wrong, e.g. PSU is not present, and it's not ready to update.
- log<level::ERR>(ex.what());
+ lg2::error("{EX}", "EX", ex.what());
return false;
}
}