blob: 141c50c36e40d3547b71e88cfa3df29e67340726 [file] [log] [blame]
Thang Q. Nguyen98de8b92023-05-05 15:33:27 +07001#!/bin/bash
2
3# This script monitors S0/S1 fault GPIO and detects errors or warnings from CPUs
4#
5# According to OpenBMC_Software_Funcional_Specification, section 3.16,
6#
7# When the BMC detects the GPIO_FAULT signal indicating an SCP booting failure:
8# • If a non-critical error/warning from the SCP occurs, the BMC blinks the Fault LED once.
9# • If a critical error from the SCP occurs, the BMC turns on the Fault LED.
10# The BMC monitors the GPIO_FAULT signal from the SCP during SCP booting to determine whether
11# the error is non-critical or critical. A fatal error is indicated when the signal is On and then Off
12# continuously, followed by a “quiet” period of about three seconds, and this pattern repeats. If the “quiet”
13# period is longer than three seconds, the error is non-fatal. The BMC must set up appropriate debounce
14# times to detect such errors. The BMC is expected to turn on the Fault LED forever for fatal errors, or to
15# turn on the Fault LED and turn it off when the fault clears for non-fatal errors.
16#
17# Usage: <app_name> <socket 0/1>
18
19# shellcheck source=/dev/null
20source /usr/sbin/gpio-lib.sh
21
22# global variables
23 error_flag='/tmp/fault_err'
24 warning_flag='/tmp/fault_warning'
25
26 duty_cycle=250000
27 scan_pulse=100000
28 blank_num=8
29
30 curr_pattern=0
31 prev_pattern=0
32
33 gpio_status=0
34 repeat=0
35
36 socket=$1
37
38 socket1_present=15
39 socket1_status=1
40
41 S0_fault_gpio=73
42 S1_fault_gpio=201
43
44map_event_name() {
45 case $curr_pattern in
46 1)
47 event_name="RAS_GPIO_INVALID_LCS"
48 ;;
49 2)
50 event_name="RAS_GPIO_FILE_HDR_INVALID"
51 ;;
52 3)
53 event_name="RAS_GPIO_FILE_INTEGRITY_INVALID"
54 ;;
55 4)
56 event_name="RAS_GPIO_KEY_CERT_AUTH_ERR"
57 ;;
58 5)
59 event_name="RAS_GPIO_CNT_CERT_AUTH_ERR"
60 ;;
61 6)
62 event_name="RAS_GPIO_I2C_HARDWARE_ERR"
63 ;;
64 7)
65 event_name="RAS_GPIO_CRYPTO_ENGINE_ERR"
66 ;;
67 8)
68 event_name="RAS_GPIO_ROTPK_EFUSE_INVALID"
69 ;;
70 9)
71 event_name="RAS_GPIO_SEED_EFUSE_INVALID"
72 ;;
73 10)
74 event_name="RAS_GPIO_LCS_FROM_EFUSE_INVALID"
75 ;;
76 11)
77 event_name="RAS_GPIO_PRIM_ROLLBACK_EFUSE_INVALID"
78 ;;
79 12)
80 event_name="RAS_GPIO_SEC_ROLLBACK_EFUSE_INVALID"
81 ;;
82 13)
83 event_name="RAS_GPIO_HUK_EFUSE_INVALID"
84 ;;
85 14)
86 event_name="RAS_GPIO_CERT_DATA_INVALID"
87 ;;
88 15)
89 event_name="RAS_GPIO_INTERNAL_HW_ERR"
90 ;;
91 *)
92 event_name="NOT_SUPPORT"
93 ;;
94 esac
95}
96
97detect_patern_repeat() {
98 local prev=0
99 local curr=0
100 local cnt=13
101
102 while true
103 do
104 usleep $scan_pulse
105 gpio_status=$(cat /sys/class/gpio/gpio"$gpio_Id"/value)
106 prev=$curr
107 curr=$gpio_status
108 if [ "$prev" == 0 ] && [ "$curr" == 1 ]; then
109 # patern start repeating, check if previous and current pattern are the same
110 repeat=1
111 break
112 fi
113 if [ "$cnt" == 0 ]; then
114 map_event_name
115 echo "detected a warning from fault GPIO #$fault_gpio $socket, event $event_name"
116 # pattern not repeat, this is a warning, turn on warning flag
117 touch $warning_flag
118 break
119 fi
120 cnt=$(( cnt - 1 ))
121 done
122}
123
124detect_pattern() {
125 local cnt_falling_edge=0
126 local cnt_blank=0
127
128 local prev=0
129 local curr=0
130
131 while true
132 do
133 prev=$curr
134 curr=$gpio_status
135 # count the falling edges, if they appear, just reset cnt_blank
136 if [ "$prev" == 1 ] && [ "$curr" == 0 ]; then
137 cnt_falling_edge=$(( cnt_falling_edge + 1 ))
138 cnt_blank=0
139 continue
140 # check if we are in the quite gap
141 elif [ "$prev" == 0 ] && [ "$curr" == 0 ]; then
142 cnt_blank=$(( cnt_blank + 1 ))
143 if [ "$cnt_blank" == "$blank_num" ]; then
144 # echo "pattern number falling_edge=$cnt_falling_edge blank=$cnt_blank"
145 curr_pattern=$cnt_falling_edge
146 # after count all falling edges, now check if patern repeat after 3s
147 detect_patern_repeat
148 break
149 fi
150 fi
151 usleep $scan_pulse
152 gpio_status=$(cat /sys/class/gpio/gpio"$gpio_Id"/value)
153 done
154}
155
156gpio_config_input() {
157 echo "$gpio_Id" > /sys/class/gpio/export
158 echo "in" > /sys/class/gpio/gpio"${gpio_Id}"/direction
159}
160
161init_sysfs_fault_gpio() {
162 gpio_Id=$(gpio_number "$fault_gpio")
163 if [ -d /sys/class/gpio/gpio"$gpio_Id" ]; then
164 return
165 fi
166 gpio_config_input "$fault_gpio"
167}
168
169# init
170if [ "$socket" == "0" ]; then
171 fault_gpio=$S0_fault_gpio
172else
173 socket1_status=$(gpioget 0 "$socket1_present")
174 if [ "$socket1_status" == 1 ]; then
175 echo "socket 1 not present"
176 exit 1
177 fi
178 fault_gpio=$S1_fault_gpio
179fi
180
181init_sysfs_fault_gpio
182
183# daemon start
184while true
185do
186 # detect when pattern starts
187 if [ "$gpio_status" == 1 ]; then
188 # now, there is something on gpio, check if that is a pattern
189 detect_pattern
190 if [ "$repeat" == 1 ] && [ "$prev_pattern" == "$curr_pattern" ]; then
191 map_event_name
192 echo "detected an error from fault GPIO #$fault_gpio $socket, event#$curr_pattern $event_name"
193 touch $error_flag
194 repeat=0
195 fi
196 prev_pattern=$curr_pattern
197 curr_pattern=0
198 continue
199 fi
200 usleep $duty_cycle
201 gpio_status=$(cat /sys/class/gpio/gpio"$gpio_Id"/value)
202
203done
204
205exit 1