meta-facebook: yosemite4: Power fault event
Summary:
The system shall register an event in the case of BIC and CPU power fault.
BMC is triggered by INT_SMB_BMC_SLOT1_4_N or INT_SMB_BMC_SLOT5_8_N, checking
which SLOT got power fault via reading the MUX (i2c-bus 8, 0x70 for SLOT1-4,
and i2c-bus 9, 0x71 for SLOT1-4).
Then, BMC reads CPLD IOE for each failed SLOT to check affected power rails.
For BIC power fault, check address 0x24, port 1:
bit
[1] PWRGD_P1V2_STBY_FAULT
[0] PWRGD_P1V8_STBY_FAULT
For CPU power fault, check address 0x24, port 2:
bit
[4] PWRGD_PVDD11_S3_R_FAULT
[3] PWRGD_PVDDIO_FAULT
[2] PWRGD_PVDDCR_CPU1_FAULT
[1] PWRGD_PVDDCR_SOC_FAULT
[0] PWRGD_PVDDCR_CPU0_FAULT
Test Plan:
Trigger power fault by setting CPLD (0x0F) on server board
to pull INT_SMB_BMC_SLOT1_4_N or INT_SMB_BMC_SLOT5_8_N low.
Testing:
Take SLOT2 for example.
1. Check gpio status
root@bmc:~# cat /sys/kernel/debug/gpio | grep -i 1_4
gpio-612 (INT_SMB_BMC_SLOT1_4_|gpio_monitor ) in hi IRQ
2. Check (1 to 4)MUX status
root@bmc:~# i2ctransfer -f -y 8 w1@0x70 0 r1
0x00
3. Check SLOT2 CPLD IOE status
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x01 r1
0x00
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x02 r1
0x00
4. Trigger power fault by setting CPLD (0x0F) on server board
root@bmc:~# i2ctransfer -y -f 1 w2@0xf 0x3f 0x1
5. Check event log
root@bmc:~# mfg-tool log-display
...
},
"2": {
"additional_data": [
"FAULT=PWRGD_P1V8_STBY_FAULT"
],
"event_id": "",
"message": "SLOT2 AC Power Fault",
"resolution": "",
"resolved": false,
"severity": "xyz.openbmc_project.Logging.Entry.Level.Error",
"timestamp": "2024-07-18T08:37:13.179000000Z",
"updated_timestamp": "2024-07-18T08:37:13.179000000Z"
},
"3": {
"additional_data": [
"FAULT=PWRGD_PVDDCR_CPU0_FAULT"
],
"event_id": "",
"message": "SLOT2 DC Power Fault",
"resolution": "",
"resolved": false,
"severity": "xyz.openbmc_project.Logging.Entry.Level.Error",
"timestamp": "2024-07-18T08:37:13.231000000Z",
"updated_timestamp": "2024-07-18T08:37:13.231000000Z"
}
6. Check gpio status
root@bmc:~# cat /sys/kernel/debug/gpio | grep -i 1_4
gpio-612 (INT_SMB_BMC_SLOT1_4_|gpio_monitor ) in lo IRQ
7. Check (1 to 4)MUX status
root@bmc:~# i2ctransfer -f -y 8 w1@0x70 0 r1
0x20
8. Check SLOT2 CPLD IOE status
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x01 r1
0x01
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x02 r1
0x01
Change-Id: I67b93e9a55c949af54aee4ca90dc998448bdfdcc
Signed-off-by: Yikai Tsai <yikai.tsai.wiwynn@gmail.com>
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault
new file mode 100644
index 0000000..73ab5be
--- /dev/null
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+log_message() {
+ local slot_num=$(($1+1))
+ local MESSAGE="SLOT$slot_num $2 Power Fault"
+ local COMBINED_INFO="$3"
+
+ busctl call \
+ xyz.openbmc_project.Logging /xyz/openbmc_project/logging \
+ xyz.openbmc_project.Logging.Create Create "ssa{ss}" "$MESSAGE" \
+ "xyz.openbmc_project.Logging.Entry.Level.Error" 1 "FAULT" "$COMBINED_INFO"
+}
+
+
+
+# if $1 == 14, slot1 to slot4. Check mux0 at i2c-8 0x70.
+# if $1 == 58, slot5 to slot8. Check mux1 at i2c-9 0x71.
+
+if [ "$1" -eq 14 ]; then
+ i2c_bus=8
+ mux_addr=0x70
+ slot_num_offset=0
+elif [ "$1" -eq 58 ]; then
+ i2c_bus=9
+ mux_addr=0x71
+ slot_num_offset=4
+else
+ echo "Invalid input. Please provide 14 or 58."
+ exit 1
+fi
+
+
+# i2ctranster to check which slot got interrupt
+read -r INT_BYTE <<< "$(i2ctransfer -f -y $i2c_bus w1@$mux_addr 0 r1)"
+INT_BYTE=$((INT_BYTE >> 4))
+
+# Traverse INT_BYTE
+count=0
+SLOT_RANGE=4
+while [ $count -lt $SLOT_RANGE ]
+do
+ fault=$(( (INT_BYTE >> count) & 1 ))
+ # if fault[i] == 1, means slot_x got ISR
+ if [ $fault -eq 1 ]; then
+ slot_num=$((count + slot_num_offset))
+
+ # i2ctranster to get CPLD IOE (0x24) addr 0x01 (port1, ac)
+ read -r FAULT_BYTE <<< "$(i2ctransfer -f -y $slot_num w1@0x24 0x01 r1)"
+
+ # check which pin fault, add to fault info
+ if (( (FAULT_BYTE >> 0) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_P1V8_STBY_FAULT" || COMBINED_INFO="PWRGD_P1V8_STBY_FAULT"
+ elif (( (FAULT_BYTE >> 1) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_P1V2_STBY_FAULT" || COMBINED_INFO="PWRGD_P1V2_STBY_FAULT"
+ fi
+
+ # call logging function
+ if [ -n "$COMBINED_INFO" ]; then
+ log_message $slot_num "AC" "$COMBINED_INFO"
+ fi
+
+ # clean
+ COMBINED_INFO=""
+ # i2ctranster to get CPLD IOE (0x24) addr 0x02 (port2, dc)
+ read -r FAULT_BYTE <<< "$(i2ctransfer -f -y $slot_num w1@0x24 0x02 r1)"
+
+ # check which pin fault, add to fault info
+ if (( (FAULT_BYTE >> 0) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDCR_CPU0_FAULT" || COMBINED_INFO="PWRGD_PVDDCR_CPU0_FAULT"
+ elif (( (FAULT_BYTE >> 1) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDCR_SOC_FAULT" || COMBINED_INFO="PWRGD_PVDDCR_SOC_FAULT"
+ elif (( (FAULT_BYTE >> 2) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDCR_CPU1_FAULT" || COMBINED_INFO="PWRGD_PVDDCR_CPU1_FAULT"
+ elif (( (FAULT_BYTE >> 3) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDIO_FAULT" || COMBINED_INFO="PWRGD_PVDDIO_FAULT"
+ elif (( (FAULT_BYTE >> 4) & 1 == 1 )); then
+ [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDD11_S3_R_FAULT" || COMBINED_INFO="PWRGD_PVDD11_S3_R_FAULT"
+ fi
+
+ # call logging function
+ if [ -n "$COMBINED_INFO" ]; then
+ log_message $slot_num "DC" $COMBINED_INFO
+ fi
+ fi
+
+ count=$((count + 1))
+done
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault@.service b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault@.service
new file mode 100644
index 0000000..f81ce05
--- /dev/null
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault@.service
@@ -0,0 +1,7 @@
+[Unit]
+Description=slot power-fault:%i
+
+[Service]
+Type=oneshot
+ExecStart=/usr/libexec/phosphor-gpio-monitor/slot-power-fault %i
+SyslogIdentifier=slot-power-fault%i
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json
index bf625a6..b283316 100644
--- a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json
@@ -313,5 +313,27 @@
"slot-hsc-fault@8.service"]
},
"Continue": true
+ },
+ {
+ "Name": "INT_SMB_BMC_SLOT1_4_BMC_N",
+ "ChipId": "0",
+ "GpioNum": 100,
+ "EventMon": "FALLING",
+ "Targets": {
+ "FALLING": [
+ "slot-power-fault@14.service"]
+ },
+ "Continue": true
+ },
+ {
+ "Name": "INT_SMB_BMC_SLOT5_8_BMC_N",
+ "ChipId": "0",
+ "GpioNum": 137,
+ "EventMon": "FALLING",
+ "Targets": {
+ "FALLING": [
+ "slot-power-fault@58.service"]
+ },
+ "Continue": true
}
]
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend
index 6ca4f74..91bfebc 100644
--- a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend
@@ -18,6 +18,8 @@
file://rescan-wf-bic@.service \
file://slot-hsc-fault \
file://slot-hsc-fault@.service \
+ file://slot-power-fault \
+ file://slot-power-fault@.service \
"
RDEPENDS:${PN}:append = " bash"
@@ -33,6 +35,7 @@
remove-nic-endpoint-slot@.service \
rescan-wf-bic@.service \
slot-hsc-fault@.service \
+ slot-power-fault@.service \
reconfig-net-interface@.service \
"
@@ -51,12 +54,14 @@
install -m 0644 ${WORKDIR}/remove-nic-endpoint-slot@.service ${D}${systemd_system_unitdir}/
install -m 0644 ${WORKDIR}/rescan-wf-bic@.service ${D}${systemd_system_unitdir}/
install -m 0644 ${WORKDIR}/slot-hsc-fault@.service ${D}${systemd_system_unitdir}/
+ install -m 0644 ${WORKDIR}/slot-power-fault@.service ${D}${systemd_system_unitdir}/
install -d ${D}${libexecdir}/${PN}
install -m 0755 ${WORKDIR}/probe-slot-device ${D}${libexecdir}/${PN}/
install -m 0755 ${WORKDIR}/reconfig-net-interface ${D}${libexecdir}/${PN}/
install -m 0755 ${WORKDIR}/rescan-fru-device ${D}${libexecdir}/${PN}/
install -m 0755 ${WORKDIR}/rescan-wf-bic ${D}${libexecdir}/${PN}/
install -m 0755 ${WORKDIR}/slot-hsc-fault ${D}${libexecdir}/${PN}/
+ install -m 0755 ${WORKDIR}/slot-power-fault ${D}${libexecdir}/${PN}/
install -d ${D}/${bindir}
install -m 0755 ${WORKDIR}/configure-nic-mctp-endpoint.sh ${D}/${bindir}/
}