meta-facebook: yosemite4: Power fault event

Summary:
The system shall register an event in the case of BIC and CPU power fault.
BMC is triggered by INT_SMB_BMC_SLOT1_4_N or INT_SMB_BMC_SLOT5_8_N, checking
which SLOT got power fault via reading the MUX (i2c-bus 8, 0x70 for SLOT1-4,
and i2c-bus 9, 0x71 for SLOT1-4).
Then, BMC reads CPLD IOE for each failed SLOT to check affected power rails.

For BIC power fault, check address 0x24, port 1:

bit
[1]	PWRGD_P1V2_STBY_FAULT
[0]	PWRGD_P1V8_STBY_FAULT

For CPU power fault, check address 0x24, port 2:

bit
[4]	PWRGD_PVDD11_S3_R_FAULT
[3]	PWRGD_PVDDIO_FAULT
[2]	PWRGD_PVDDCR_CPU1_FAULT
[1]	PWRGD_PVDDCR_SOC_FAULT
[0]	PWRGD_PVDDCR_CPU0_FAULT

Test Plan:
Trigger power fault by setting CPLD (0x0F) on server board
to pull INT_SMB_BMC_SLOT1_4_N or INT_SMB_BMC_SLOT5_8_N low.

Testing:
Take SLOT2 for example.

1. Check gpio status
root@bmc:~# cat /sys/kernel/debug/gpio | grep -i 1_4
 gpio-612 (INT_SMB_BMC_SLOT1_4_|gpio_monitor        ) in  hi IRQ

2. Check (1 to 4)MUX status
root@bmc:~# i2ctransfer -f -y 8 w1@0x70 0 r1
0x00

3. Check SLOT2 CPLD IOE status
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x01 r1
0x00
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x02 r1
0x00

4. Trigger power fault by setting CPLD (0x0F) on server board
root@bmc:~# i2ctransfer -y -f 1 w2@0xf 0x3f 0x1

5. Check event log
root@bmc:~# mfg-tool log-display
...
    },
    "2": {
        "additional_data": [
            "FAULT=PWRGD_P1V8_STBY_FAULT"
        ],
        "event_id": "",
        "message": "SLOT2 AC Power Fault",
        "resolution": "",
        "resolved": false,
        "severity": "xyz.openbmc_project.Logging.Entry.Level.Error",
        "timestamp": "2024-07-18T08:37:13.179000000Z",
        "updated_timestamp": "2024-07-18T08:37:13.179000000Z"
    },
    "3": {
        "additional_data": [
            "FAULT=PWRGD_PVDDCR_CPU0_FAULT"
        ],
        "event_id": "",
        "message": "SLOT2 DC Power Fault",
        "resolution": "",
        "resolved": false,
        "severity": "xyz.openbmc_project.Logging.Entry.Level.Error",
        "timestamp": "2024-07-18T08:37:13.231000000Z",
        "updated_timestamp": "2024-07-18T08:37:13.231000000Z"
    }

6. Check gpio status
root@bmc:~# cat /sys/kernel/debug/gpio | grep -i 1_4
 gpio-612 (INT_SMB_BMC_SLOT1_4_|gpio_monitor        ) in  lo IRQ

7. Check (1 to 4)MUX status
root@bmc:~# i2ctransfer -f -y 8 w1@0x70 0 r1
0x20

8. Check SLOT2 CPLD IOE status
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x01 r1
0x01
root@bmc:~# i2ctransfer -y -f 1 w1@0x24 0x02 r1
0x01

Change-Id: I67b93e9a55c949af54aee4ca90dc998448bdfdcc
Signed-off-by: Yikai Tsai <yikai.tsai.wiwynn@gmail.com>
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault
new file mode 100644
index 0000000..73ab5be
--- /dev/null
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+log_message() {
+    local slot_num=$(($1+1))
+    local MESSAGE="SLOT$slot_num $2 Power Fault"
+    local COMBINED_INFO="$3"
+
+    busctl call \
+        xyz.openbmc_project.Logging /xyz/openbmc_project/logging \
+        xyz.openbmc_project.Logging.Create Create "ssa{ss}" "$MESSAGE" \
+        "xyz.openbmc_project.Logging.Entry.Level.Error" 1 "FAULT" "$COMBINED_INFO"
+}
+
+
+
+# if $1 == 14, slot1 to slot4. Check mux0 at i2c-8 0x70.
+# if $1 == 58, slot5 to slot8. Check mux1 at i2c-9 0x71.
+
+if [ "$1" -eq 14 ]; then
+    i2c_bus=8
+    mux_addr=0x70
+    slot_num_offset=0
+elif [ "$1" -eq 58 ]; then
+    i2c_bus=9
+    mux_addr=0x71
+    slot_num_offset=4
+else
+    echo "Invalid input. Please provide 14 or 58."
+    exit 1
+fi
+
+
+# i2ctranster to check which slot got interrupt
+read -r INT_BYTE <<< "$(i2ctransfer -f -y $i2c_bus w1@$mux_addr 0 r1)"
+INT_BYTE=$((INT_BYTE >> 4))
+
+# Traverse INT_BYTE
+count=0
+SLOT_RANGE=4
+while [ $count -lt $SLOT_RANGE ]
+do
+    fault=$(( (INT_BYTE >> count) & 1 ))
+    # if fault[i] == 1, means slot_x got ISR
+    if [ $fault -eq 1 ]; then
+        slot_num=$((count + slot_num_offset))
+
+        # i2ctranster to get CPLD IOE (0x24) addr 0x01 (port1, ac)
+        read -r FAULT_BYTE <<< "$(i2ctransfer -f -y $slot_num w1@0x24 0x01 r1)"
+
+        # check which pin fault, add to fault info
+        if (( (FAULT_BYTE >> 0) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_P1V8_STBY_FAULT" || COMBINED_INFO="PWRGD_P1V8_STBY_FAULT"
+        elif (( (FAULT_BYTE >> 1) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_P1V2_STBY_FAULT" || COMBINED_INFO="PWRGD_P1V2_STBY_FAULT"
+        fi
+
+        # call logging function
+        if [ -n "$COMBINED_INFO" ]; then
+            log_message $slot_num "AC" "$COMBINED_INFO"
+        fi
+
+        # clean
+        COMBINED_INFO=""
+        # i2ctranster to get CPLD IOE (0x24) addr 0x02 (port2, dc)
+        read -r FAULT_BYTE <<< "$(i2ctransfer -f -y $slot_num w1@0x24 0x02 r1)"
+
+        # check which pin fault, add to fault info
+        if (( (FAULT_BYTE >> 0) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDCR_CPU0_FAULT" || COMBINED_INFO="PWRGD_PVDDCR_CPU0_FAULT"
+        elif (( (FAULT_BYTE >> 1) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDCR_SOC_FAULT" || COMBINED_INFO="PWRGD_PVDDCR_SOC_FAULT"
+        elif (( (FAULT_BYTE >> 2) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDCR_CPU1_FAULT" || COMBINED_INFO="PWRGD_PVDDCR_CPU1_FAULT"
+        elif (( (FAULT_BYTE >> 3) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDDIO_FAULT" || COMBINED_INFO="PWRGD_PVDDIO_FAULT"
+        elif (( (FAULT_BYTE >> 4) & 1 == 1 )); then
+            [ -n "$COMBINED_INFO" ] && COMBINED_INFO="${COMBINED_INFO},PWRGD_PVDD11_S3_R_FAULT" || COMBINED_INFO="PWRGD_PVDD11_S3_R_FAULT"
+        fi
+
+        # call logging function
+        if [ -n "$COMBINED_INFO" ]; then
+            log_message $slot_num "DC" $COMBINED_INFO
+        fi
+    fi
+
+    count=$((count + 1))
+done
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault@.service b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault@.service
new file mode 100644
index 0000000..f81ce05
--- /dev/null
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/slot-power-fault@.service
@@ -0,0 +1,7 @@
+[Unit]
+Description=slot power-fault:%i
+
+[Service]
+Type=oneshot
+ExecStart=/usr/libexec/phosphor-gpio-monitor/slot-power-fault %i
+SyslogIdentifier=slot-power-fault%i
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json
index bf625a6..b283316 100644
--- a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor/yosemite4-phosphor-multi-gpio-monitor.json
@@ -313,5 +313,27 @@
                 "slot-hsc-fault@8.service"]
         },
         "Continue": true
+    },
+    {
+        "Name": "INT_SMB_BMC_SLOT1_4_BMC_N",
+        "ChipId": "0",
+        "GpioNum": 100,
+        "EventMon": "FALLING",
+        "Targets": {
+            "FALLING": [
+                "slot-power-fault@14.service"]
+        },
+        "Continue": true
+    },
+    {
+        "Name": "INT_SMB_BMC_SLOT5_8_BMC_N",
+        "ChipId": "0",
+        "GpioNum": 137,
+        "EventMon": "FALLING",
+        "Targets": {
+            "FALLING": [
+                "slot-power-fault@58.service"]
+        },
+        "Continue": true
     }
 ]
diff --git a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend
index 6ca4f74..91bfebc 100644
--- a/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend
+++ b/meta-facebook/meta-yosemite4/recipes-phosphor/gpio/phosphor-gpio-monitor_%.bbappend
@@ -18,6 +18,8 @@
             file://rescan-wf-bic@.service \
             file://slot-hsc-fault \
             file://slot-hsc-fault@.service \
+            file://slot-power-fault \
+            file://slot-power-fault@.service \
             "
 
 RDEPENDS:${PN}:append = " bash"
@@ -33,6 +35,7 @@
     remove-nic-endpoint-slot@.service \
     rescan-wf-bic@.service \
     slot-hsc-fault@.service \
+    slot-power-fault@.service \
     reconfig-net-interface@.service \
     "
 
@@ -51,12 +54,14 @@
     install -m 0644 ${WORKDIR}/remove-nic-endpoint-slot@.service ${D}${systemd_system_unitdir}/
     install -m 0644 ${WORKDIR}/rescan-wf-bic@.service ${D}${systemd_system_unitdir}/
     install -m 0644 ${WORKDIR}/slot-hsc-fault@.service ${D}${systemd_system_unitdir}/
+    install -m 0644 ${WORKDIR}/slot-power-fault@.service ${D}${systemd_system_unitdir}/
     install -d ${D}${libexecdir}/${PN}
     install -m 0755 ${WORKDIR}/probe-slot-device ${D}${libexecdir}/${PN}/
     install -m 0755 ${WORKDIR}/reconfig-net-interface ${D}${libexecdir}/${PN}/
     install -m 0755 ${WORKDIR}/rescan-fru-device ${D}${libexecdir}/${PN}/
     install -m 0755 ${WORKDIR}/rescan-wf-bic ${D}${libexecdir}/${PN}/
     install -m 0755 ${WORKDIR}/slot-hsc-fault ${D}${libexecdir}/${PN}/
+    install -m 0755 ${WORKDIR}/slot-power-fault ${D}${libexecdir}/${PN}/
     install -d ${D}/${bindir}
     install -m 0755 ${WORKDIR}/configure-nic-mctp-endpoint.sh ${D}/${bindir}/
 }