Recover host devtree file if corrupted
This commit addresses the issue where device tree file corruption could
lead to the failure of the BMC to boot up. Corruption of the file may
occur during a kernel panic. In such instances, recovery of the previous
uncorrupted version of the file from the available read-only version is
now implemented.
A PEL is generated indicating the same and the BMC dump is generated.
Tested and working as expected.
```
peltool -i 0x50001C86
{
"Private Header": {
"Section Version": "1",
"Sub-section type": "0",
"Created by": "bmc code management",
"Created at": "04/22/2024 08:37:41",
"Committed at": "04/22/2024 08:37:41",
"Creator Subsystem": "BMC",
"CSSVER": "",
"Platform Log Id": "0x50001C86",
"Entry Id": "0x50001C86",
"BMC Event Log Id": "127"
},
"User Header": {
"Section Version": "1",
"Sub-section type": "0",
"Log Committed by": "bmc error logging",
"Subsystem": "BMC Firmware",
"Event Scope": "Entire Platform",
"Event Severity": "Unrecoverable Error",
"Event Type": "Not Applicable",
"Action Flags": [
"Service Action Required",
"Report Externally",
"HMC Call Home"
],
"Host Transmission": "Not Sent",
"HMC Transmission": "Acked"
},
"Primary SRC": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "bmc code management",
"SRC Version": "0x02",
"SRC Format": "0x55",
"Virtual Progress SRC": "False",
"I5/OS Service Event Bit": "False",
"Hypervisor Dump Initiated":"False",
"Backplane CCIN": "2E2D",
"Terminate FW Error": "False",
"Deconfigured": "False",
"Guarded": "False",
"Error Details": {
"Message": "Partition was not preserved
on reboot",
"CURRENT_FILE_SIZE": [
"0x100000",
"Size of the current running
partition"
],
"EXPECTED_FILE_SIZE": [
"0x65000",
"Size of the read only partition"
]
},
"Valid Word Count": "0x09",
"Reference Code": "BD8D360A",
"Hex Word 2": "00080055",
"Hex Word 3": "2E2D0010",
"Hex Word 4": "00000000",
"Hex Word 5": "00000000",
"Hex Word 6": "00100000",
"Hex Word 7": "00065000",
"Hex Word 8": "00000000",
"Hex Word 9": "00000000",
"Callout Section": {
"Callout Count": "1",
"Callouts": [{
"FRU Type": "Maintenance Procedure Required",
"Priority": "Mandatory, replace all with this
type as a unit",
"Procedure": "BMC0001"
}]
}
},
"Extended User Header": {
"Section Version": "1",
"Sub-section type": "0",
"Created by": "bmc error logging",
"Reporting Machine Type": "9105-22B",
"Reporting Serial Number": "139F210",
"FW Released Ver": "",
"FW SubSys Version": "fw1060.00-7",
"Common Ref Time": "00/00/0000 00:00:00",
"Symptom Id Len": "20",
"Symptom Id": "BD8D360A_2E2D0010"
},
"Failing MTMS": {
"Section Version": "1",
"Sub-section type": "0",
"Created by": "bmc error logging",
"Machine Type Model": "9105-22B",
"Serial Number": "139F210"
},
"User Data 0": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "bmc error logging",
"BMCLoad": "1.35 1.02 0.76",
"BMCState": "Ready",
"BMCUptime": "0y 2d 13h 38m 12s",
"BootState": "Unspecified",
"ChassisState": "Off",
"FW Version ID": "fw1060.00-7-2-g4d0fcde41f",
"HostState": "Off",
"System IM": "50001001"
},
"User Data 1": {
"Section Version": "1",
"Sub-section type": "1",
"Created by": "bmc error logging",
"CURRENT_FILE_SIZE": "1048576",
"EXPECTED_FILE_SIZE": "413696",
"FILE_NAME": "81e00672.lid"
}
}
```
Change-Id: I2ce9c7a471f6e1ba43ac5b061a62e27cfd61da23
Signed-off-by: deepakala-k <deepakala.karthikeyan@ibm.com>
diff --git a/meson.build b/meson.build
index c8590f7..17232b1 100644
--- a/meson.build
+++ b/meson.build
@@ -122,11 +122,13 @@
]
extra_scripts += [
'mmc/obmc-flash-bios',
+ 'mmc/recover_pnor_files',
]
extra_unit_files += [
'mmc/obmc-flash-bios-init.service',
'mmc/obmc-flash-bios-patch.service',
'mmc/openpower-bios-factory-reset.service',
+ 'mmc/obmc-recover-pnor.service',
]
endif
diff --git a/mmc/obmc-recover-pnor.service b/mmc/obmc-recover-pnor.service
new file mode 100644
index 0000000..fea7f32
--- /dev/null
+++ b/mmc/obmc-recover-pnor.service
@@ -0,0 +1,13 @@
+[Unit]
+Description=Recover Host%i PHAL devtree and lid files
+After=obmc-host-reset-running@%i.target
+Before=phal-import-devtree@0.service
+After=openpower-update-bios-attr-table.service
+
+[Service]
+RemainAfterExit=yes
+Type=oneshot
+ExecStart=/usr/bin/recover_pnor_files
+
+[Install]
+WantedBy=multi-user.target
diff --git a/mmc/recover_pnor_files b/mmc/recover_pnor_files
new file mode 100644
index 0000000..20a2fe5
--- /dev/null
+++ b/mmc/recover_pnor_files
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+base_dir="/var/lib/phosphor-software-manager/hostfw"
+ro_dir="/media/hostfw/running-ro"
+running_dir="${base_dir}/running"
+file_recovered=0
+
+if [ -f "${ro_dir}/81e00994.lid" ]; then
+ #look for the DEVTREE and the preserved files
+ filesList=$(grep 'PRESERVED\|DEVTREE' "${ro_dir}/81e00994.lid")
+ for eachFile in ${filesList}; do
+ eachFile=${eachFile##partition*=}
+ eachFile=$(echo "${eachFile}" | cut -d "," -f 1)
+ #check if it is a symbolic link
+ if [ -L "${running_dir}/${eachFile}" ]; then
+ # get the symlink target file
+ eachFile="$(readlink "${running_dir}/${eachFile}")"
+ if [ -f "${running_dir}/${eachFile}" ] && [ -f "${ro_dir}/${eachFile}" ]; then
+ runsize="$(stat -c '%s' "${running_dir}/${eachFile}")"
+ rosize="$(stat -c '%s' "${ro_dir}/${eachFile}")"
+ # Partition size may have changed or became corrupted
+ # restoring the file from the readonly copy
+ if [ "$runsize" != "$rosize" ]; then
+ cp -p ${ro_dir}/"${eachFile}" ${running_dir}/"${eachFile}"
+ # Log PEL to indicate such
+ busctl call xyz.openbmc_project.Logging \
+ /xyz/openbmc_project/logging \
+ xyz.openbmc_project.Logging.Create Create "ssa{ss}" \
+ xyz.openbmc_project.Software.Version.Error.HostFile \
+ xyz.openbmc_project.Logging.Entry.Level.Error 3 "FILE_NAME" \
+ "${eachFile}" "CURRENT_FILE_SIZE" "${runsize}" "EXPECTED_FILE_SIZE" \
+ "${rosize}"
+
+ file_recovered=1
+ fi
+ fi
+ fi
+ done
+ #one or more files could be recovered. So trigger dump outside the while loop
+ if [ $file_recovered -eq 1 ]; then
+ # Initiate dump
+ busctl call xyz.openbmc_project.Dump.Manager \
+ /xyz/openbmc_project/dump/bmc xyz.openbmc_project.Dump.Create \
+ CreateDump "a{sv}" 0
+ fi
+fi
\ No newline at end of file