Handle Exceptions and Uncorrectable Errors

We are getting processes crashes in the fleet which we want to avoid.
Handle exceptions gracefully by reinitializing instead of relying on
systemd reinit (crash and restart).

Also, ensure that we are reading the Uncorrectable Error region first.

The logic should be:
```
1. BIOS_switch ^ BMC_switch == 0 → reserved region has nothing to read
  a. CONTINUE
2. BIOS_switch ^ BMC_switch == 1 → reserved region contains unread log
  a. Read the Uncorrectable Error log
  b. If corruption is detected (log is not parsable)
    i. Go through corruption handling flow (reinit)
  c. Else toggle the BMC Uncorrectable Error log flag
```

Tested: Added unit test

Signed-off-by: Brandon Kim <brandonkim@google.com>
Change-Id: I8212476be11ea7f13f68e42ff440c2bdd2fc8c2a
diff --git a/include/buffer.hpp b/include/buffer.hpp
index 37bcb9f..06e282f 100644
--- a/include/buffer.hpp
+++ b/include/buffer.hpp
@@ -22,10 +22,14 @@
 // EntryPair.second = Error entry in vector of bytes
 using EntryPair = std::pair<struct QueueEntryHeader, std::vector<uint8_t>>;
 
+enum class BufferFlags : uint32_t
+{
+    ueSwitch = 1 << 0,
+    overflow = 1 << 1,
+};
+
 enum class BmcFlags : uint32_t
 {
-    ueSwitch = 1,
-    overflow = 1 << 1,
     ready = 1 << 2,
 };
 
@@ -99,6 +103,10 @@
     virtual void initialize(uint32_t bmcInterfaceVersion, uint16_t queueSize,
                             uint16_t ueRegionSize,
                             const std::array<uint32_t, 4>& magicNumber) = 0;
+    /**
+     * Check for unread Uncorrecatble Error (UE) logs and read them if present
+     */
+    virtual std::vector<uint8_t> readUeLogFromReservedRegion() = 0;
 
     /**
      * Read the buffer header from shared buffer
@@ -182,6 +190,7 @@
     void initialize(uint32_t bmcInterfaceVersion, uint16_t queueSize,
                     uint16_t ueRegionSize,
                     const std::array<uint32_t, 4>& magicNumber) override;
+    std::vector<uint8_t> readUeLogFromReservedRegion() override;
     void readBufferHeader() override;
     struct CircularBufferHeader getCachedBufferHeader() const override;
     void updateReadPtr(const uint32_t newReadPtr) override;
diff --git a/src/buffer.cpp b/src/buffer.cpp
index 20427cb..0254cac 100644
--- a/src/buffer.cpp
+++ b/src/buffer.cpp
@@ -6,6 +6,7 @@
 
 #include <boost/endian/arithmetic.hpp>
 #include <boost/endian/conversion.hpp>
+#include <stdplus/print.hpp>
 
 #include <algorithm>
 #include <array>
@@ -222,6 +223,52 @@
     return *reinterpret_cast<struct QueueEntryHeader*>(bytesRead.data());
 }
 
+std::vector<uint8_t> BufferImpl::readUeLogFromReservedRegion()
+{
+    // Ensure cachedBufferHeader is up-to-date
+    readBufferHeader();
+
+    uint16_t currentUeRegionSize =
+        boost::endian::little_to_native(cachedBufferHeader.ueRegionSize);
+    if (currentUeRegionSize == 0)
+    {
+        stdplus::print(stderr,
+                       "[readUeLogFromReservedRegion] UE Region size is 0\n");
+        return {};
+    }
+
+    uint32_t biosSideFlags =
+        boost::endian::little_to_native(cachedBufferHeader.biosFlags);
+    uint32_t bmcSideFlags =
+        boost::endian::little_to_native(cachedBufferHeader.bmcFlags);
+
+    // (BIOS_switch ^ BMC_switch) & BIT0 == BIT0 -> unread log
+    // This means if the ueSwitch bit differs, there's an unread log.
+    if (!((biosSideFlags ^ bmcSideFlags) &
+          static_cast<uint32_t>(BufferFlags::ueSwitch)))
+    {
+        return {};
+    }
+    // UE log should be present and unread by BMC, read from end of header
+    // (0x30) to the size of the UE region specified in the header.
+    size_t ueRegionOffset = sizeof(struct CircularBufferHeader);
+    std::vector<uint8_t> ueLogData =
+        dataInterface->read(ueRegionOffset, currentUeRegionSize);
+
+    if (ueLogData.size() == currentUeRegionSize)
+    {
+        return ueLogData;
+    }
+    stdplus::print(stderr,
+                   "[readUeLogFromReservedRegion] Failed to read "
+                   "full UE log. Expected {}, got {}\n",
+                   currentUeRegionSize, ueLogData.size());
+    // Throwing an exception allows main loop to handle re-init.
+    throw std::runtime_error(
+        std::format("Failed to read full UE log. Expected {}, got {}",
+                    currentUeRegionSize, ueLogData.size()));
+}
+
 EntryPair BufferImpl::readEntry()
 {
     struct QueueEntryHeader entryHeader = readEntryHeader();
diff --git a/src/main.cpp b/src/main.cpp
index 60fc8b5..4465185 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -46,20 +46,113 @@
         stdplus::print(stderr, "Async wait failed {}\n", error.message());
         return;
     }
-    std::vector<EntryPair> entryPairs = bufferInterface->readErrorLogs();
-    for (const auto& [entryHeader, entry] : entryPairs)
+
+    try
     {
-        rde::RdeDecodeStatus rdeDecodeStatus =
-            rdeCommandHandler->decodeRdeCommand(
-                entry,
-                static_cast<rde::RdeCommandType>(entryHeader.rdeCommandType));
-        if (rdeDecodeStatus == rde::RdeDecodeStatus::RdeStopFlagReceived)
+        std::vector<uint8_t> ueLog =
+            bufferInterface->readUeLogFromReservedRegion();
+        if (!ueLog.empty())
         {
+            stdplus::print(
+                stdout,
+                "UE log found in reserved region, attempting to process\n");
+
+            // UE log is BEJ encoded data, requiring RdeOperationInitRequest
+            rde::RdeDecodeStatus ueDecodeStatus =
+                rdeCommandHandler->decodeRdeCommand(
+                    ueLog, rde::RdeCommandType::RdeOperationInitRequest);
+
+            if (ueDecodeStatus != rde::RdeDecodeStatus::RdeOk &&
+                ueDecodeStatus != rde::RdeDecodeStatus::RdeStopFlagReceived)
+            {
+                throw std::runtime_error(std::format(
+                    "Corruption detected processing UE log from reserved region. RDE decode status: {}",
+                    static_cast<int>(ueDecodeStatus)));
+            }
+            stdplus::print(stdout, "UE log processed successfully.\n");
+            // Successfully processed. Toggle BMC's view of ueSwitch flag.
             auto bufferHeader = bufferInterface->getCachedBufferHeader();
-            auto newbmcFlags =
-                boost::endian::little_to_native(bufferHeader.bmcFlags) |
-                static_cast<uint32_t>(BmcFlags::ready);
-            bufferInterface->updateBmcFlags(newbmcFlags);
+            uint32_t bmcSideFlags =
+                boost::endian::little_to_native(bufferHeader.bmcFlags);
+            uint32_t newBmcFlags =
+                bmcSideFlags ^ static_cast<uint32_t>(BufferFlags::ueSwitch);
+            bufferInterface->updateBmcFlags(newBmcFlags);
+        }
+
+        std::vector<EntryPair> entryPairs = bufferInterface->readErrorLogs();
+        for (const auto& [entryHeader, entry] : entryPairs)
+        {
+            rde::RdeDecodeStatus rdeDecodeStatus =
+                rdeCommandHandler->decodeRdeCommand(
+                    entry, static_cast<rde::RdeCommandType>(
+                               entryHeader.rdeCommandType));
+            if (rdeDecodeStatus == rde::RdeDecodeStatus::RdeStopFlagReceived)
+            {
+                auto bufferHeader = bufferInterface->getCachedBufferHeader();
+                auto newbmcFlags =
+                    boost::endian::little_to_native(bufferHeader.bmcFlags) |
+                    static_cast<uint32_t>(BmcFlags::ready);
+                bufferInterface->updateBmcFlags(newbmcFlags);
+            }
+        }
+    }
+    catch (const std::exception& e)
+    {
+        stdplus::print(
+            stderr,
+            "Error during log processing (std::exception): {}. Attempting to reinitialize buffer.\n",
+            e.what());
+        try
+        {
+            bufferInterface->initialize(bmcInterfaceVersion, queueSize,
+                                        ueRegionSize, magicNumber);
+            stdplus::print(
+                stdout,
+                "Buffer reinitialized successfully after std::exception.\n");
+        }
+        catch (const std::exception& reinit_e)
+        {
+            stdplus::print(
+                stderr,
+                "CRITICAL: Failed to reinitialize buffer (std::exception): {}. Terminating read loop.\n",
+                reinit_e.what());
+            return;
+        }
+        catch (...)
+        {
+            stdplus::print(
+                stderr,
+                "CRITICAL: Failed to reinitialize buffer (unknown exception). Terminating read loop.\n");
+            return;
+        }
+    }
+    catch (...)
+    {
+        stdplus::print(
+            stderr,
+            "Unknown error during log processing. Attempting to reinitialize buffer.\n");
+        try
+        {
+            bufferInterface->initialize(bmcInterfaceVersion, queueSize,
+                                        ueRegionSize, magicNumber);
+            stdplus::print(
+                stdout,
+                "Buffer reinitialized successfully after unknown error.\n");
+        }
+        catch (const std::exception& reinit_e)
+        {
+            stdplus::print(
+                stderr,
+                "CRITICAL: Failed to reinitialize buffer (std::exception): {}. Terminating read loop.\n",
+                reinit_e.what());
+            return;
+        }
+        catch (...)
+        {
+            stdplus::print(
+                stderr,
+                "CRITICAL: Failed to reinitialize buffer (unknown exception). Terminating read loop.\n");
+            return;
         }
     }
 
diff --git a/test/buffer_test.cpp b/test/buffer_test.cpp
index 9207c2d..675be79 100644
--- a/test/buffer_test.cpp
+++ b/test/buffer_test.cpp
@@ -323,6 +323,103 @@
         std::runtime_error);
 }
 
+TEST_F(BufferTest, ReadUeLog_NoUeRegionConfigured)
+{
+    struct CircularBufferHeader header = testInitializationHeader;
+    header.ueRegionSize =
+        boost::endian::native_to_little<uint16_t>(0); // No UE region
+
+    uint8_t* headerPtr = reinterpret_cast<uint8_t*>(&header);
+    std::vector<uint8_t> headerBytes(headerPtr, headerPtr + bufferHeaderSize);
+    EXPECT_CALL(*dataInterfaceMockPtr, read(0, bufferHeaderSize))
+        .WillOnce(Return(headerBytes));
+
+    auto result = bufferImpl->readUeLogFromReservedRegion();
+    EXPECT_TRUE(result.empty());
+}
+
+TEST_F(BufferTest, ReadUeLog_NotPresentDueToFlags)
+{
+    struct CircularBufferHeader header = testInitializationHeader;
+    header.ueRegionSize = boost::endian::native_to_little<uint16_t>(0x20);
+    // Flags are the same, so no new UE log
+    header.biosFlags = boost::endian::native_to_little<uint32_t>(
+        static_cast<uint32_t>((BufferFlags::ueSwitch)));
+    header.bmcFlags = boost::endian::native_to_little<uint32_t>(
+        static_cast<uint32_t>(BufferFlags::ueSwitch));
+
+    uint8_t* headerPtr = reinterpret_cast<uint8_t*>(&header);
+    std::vector<uint8_t> headerBytes(headerPtr, headerPtr + bufferHeaderSize);
+    EXPECT_CALL(*dataInterfaceMockPtr, read(0, bufferHeaderSize))
+        .WillOnce(Return(headerBytes));
+
+    auto result = bufferImpl->readUeLogFromReservedRegion();
+    EXPECT_TRUE(result.empty());
+}
+
+TEST_F(BufferTest, ReadUeLog_PresentAndSuccessfullyRead)
+{
+    struct CircularBufferHeader header = testInitializationHeader;
+    uint16_t ueSize = 0x20;
+    header.ueRegionSize = boost::endian::native_to_little(ueSize);
+    header.biosFlags = boost::endian::native_to_little<uint32_t>(
+        static_cast<uint32_t>(BufferFlags::ueSwitch));
+    header.bmcFlags =
+        boost::endian::native_to_little<uint32_t>(0); // BIOS set, BMC not yet
+
+    uint8_t* headerPtr = reinterpret_cast<uint8_t*>(&header);
+    std::vector<uint8_t> headerBytes(headerPtr, headerPtr + bufferHeaderSize);
+    EXPECT_CALL(*dataInterfaceMockPtr, read(0, bufferHeaderSize))
+        .WillOnce(Return(headerBytes));
+
+    size_t ueRegionOffset = bufferHeaderSize;
+    std::vector<uint8_t> ueData(ueSize, 0xAA);
+    EXPECT_CALL(*dataInterfaceMockPtr, read(ueRegionOffset, ueSize))
+        .WillOnce(Return(ueData));
+
+    auto result = bufferImpl->readUeLogFromReservedRegion();
+    ASSERT_FALSE(result.empty());
+    EXPECT_THAT(result, ElementsAreArray(ueData));
+
+    // The initial bmcFlags (0) should remain unchanged in the cache
+    struct CircularBufferHeader cachedHeaderAfterRead =
+        bufferImpl->getCachedBufferHeader();
+    EXPECT_EQ(boost::endian::little_to_native(cachedHeaderAfterRead.bmcFlags),
+              0);
+}
+
+TEST_F(BufferTest, ReadUeLog_PresentButReadFails)
+{
+    struct CircularBufferHeader header = testInitializationHeader;
+    uint16_t ueSize = 0x20;
+    header.ueRegionSize = boost::endian::native_to_little(ueSize);
+    header.biosFlags = boost::endian::native_to_little<uint32_t>(
+        static_cast<uint32_t>(BufferFlags::ueSwitch));
+    header.bmcFlags = boost::endian::native_to_little<uint32_t>(0);
+
+    uint8_t* headerPtr = reinterpret_cast<uint8_t*>(&header);
+    std::vector<uint8_t> headerBytes(headerPtr, headerPtr + bufferHeaderSize);
+    EXPECT_CALL(*dataInterfaceMockPtr, read(0, bufferHeaderSize))
+        .WillOnce(Return(headerBytes));
+
+    size_t ueRegionOffset = bufferHeaderSize;
+    std::vector<uint8_t> shortUeData(ueSize - 1, 0xAA); // Short read
+    EXPECT_CALL(*dataInterfaceMockPtr, read(ueRegionOffset, ueSize))
+        .WillOnce(Return(shortUeData));
+
+    // Expect an exception due to short read, which is treated as corruption for
+    // UE log
+    EXPECT_THROW(
+        try {
+            bufferImpl->readUeLogFromReservedRegion();
+        } catch (const std::runtime_error& e) {
+            EXPECT_THAT(e.what(),
+                        ::testing::HasSubstr("Failed to read full UE log"));
+            throw;
+        },
+        std::runtime_error);
+}
+
 class BufferWraparoundReadTest : public BufferTest
 {
   protected: