blob: 299e13b17d888689eb6cfdc3997cec05271466ab [file] [log] [blame]
Thang Q. Nguyen98de8b92023-05-05 15:33:27 +07001#!/bin/bash
2# This script monitors fan, over-temperature, PSU, CPU/SCP failure and update fault LED status
3
4# shellcheck disable=SC2004
Thang Q. Nguyen617c6512023-08-16 12:32:50 +07005# shellcheck source=meta-ampere/meta-mitchell/recipes-ampere/platform/ampere-platform-init/gpio-lib.sh
Thang Q. Nguyen98de8b92023-05-05 15:33:27 +07006source /usr/sbin/gpio-lib.sh
7
8# common variables
9 on=1
10 off=0
11
12 overtemp_fault_flag='/tmp/fault_overtemp'
13
14# gpio fault
15 gpio_fault="false"
16 gpio_fault_flag="/tmp/gpio_fault"
17
18# fan variables
19 fan_failed="false"
20 fan_failed_flag='/tmp/fan_failed'
21
22# PSU variables
23 psu_failed="false"
24 psu_bus=2
25 psu0_addr=0x58
26 psu1_addr=0x59
27 status_word_cmd=0x79
28 # Following the PMBus Specification
29 # Bit[1]: CML faults
30 # Bit[2]: Over temperature faults
31 # Bit[3]: Under voltage faults
32 # Bit[4]: Over current faults
33 # Bit[5]: Over voltage fault
34 # Bit[10]: Fan faults
35 psu_fault_bitmask=0x43e
36
37# led variables
38 fan_fault_led_status=$off
39 psu_fault_led_status=$off
40 led_bus=15
41 led_addr=0x22
42 led_port0_config=0x06
43 led_port0_output=0x02
44
45# functions declaration
46check_fan_failed() {
47 if [[ -f $fan_failed_flag ]]; then
48 fan_failed="true"
49 else
50 fan_failed="false"
51 fi
52}
53
54turn_on_off_fan_fault_led() {
55 # Control fan fault led via CPLD's I2C at slave address 0x22, I2C16.
56 # Get Port0 value
57 p0_val=$(i2cget -f -y $led_bus $led_addr $led_port0_config)
58 p0_val=$(("$p0_val" & ~1))
59 # Config CPLD's IOepx Port0[0] from input to output, clear IOepx Port0[0].
60 i2cset -f -y $led_bus $led_addr $led_port0_config $p0_val
61
62 # Get led value
63 led_st=$(i2cget -f -y $led_bus $led_addr $led_port0_output)
64
65 if [ "$1" == $on ]; then
66 led_st=$(("$led_st" | 1))
67 else
68 led_st=$(("$led_st" & ~1))
69 fi
70
71 # Turn on/off fan fault led
72 i2cset -f -y $led_bus $led_addr $led_port0_output $led_st
73}
74
75turn_on_off_psu_fault_led() {
76 # Control psu fault led via CPLD's I2C at slave address 0x22, I2C16.
77 # Get Port1 value
78 p1_val=$(i2cget -f -y $led_bus $led_addr $led_port0_config)
79 p1_val=$(("$p1_val" & ~2))
80 # Config CPLD's IOepx Port0[1] from input to output, clear IOepx Port0[1].
81 i2cset -f -y $led_bus $led_addr $led_port0_config $p1_val
82
83 # Get led value
84 led_st=$(i2cget -f -y $led_bus $led_addr $led_port0_output)
85 if [ "$1" == $on ]; then
86 led_st=$(("$led_st" | 2))
87 else
88 led_st=$(("$led_st" & ~2))
89 fi
90
91 # Turn on/off psu fault led
92 i2cset -f -y $led_bus $led_addr $led_port0_output $led_st
93}
94
95control_fan_fault_led() {
96 if [ "$fan_failed" == "true" ]; then
97 if [ "$fan_fault_led_status" == $off ]; then
98 turn_on_off_fan_fault_led $on
99 fan_fault_led_status=$on
100 fi
101 else
102 if [ "$fan_fault_led_status" == $on ]; then
103 turn_on_off_fan_fault_led $off
104 fan_fault_led_status=$off
105 fi
106 fi
107}
108
109check_psu_failed() {
110 local psu0_presence
111 local psu1_presence
112 local psu0_value
113 local psu1_value
114
115 psu0_presence=$(gpio_name_get presence-ps0)
116 psu0_failed="true"
117 if [ "$psu0_presence" == "0" ]; then
118 # PSU0 presence, monitor the PSUs using pmbus, check the STATUS_WORD
119 psu0_value=$(i2cget -f -y $psu_bus $psu0_addr $status_word_cmd w)
120 psu0_bit_fault=$(($psu0_value & $psu_fault_bitmask))
121 if [ "$psu0_bit_fault" == "0" ]; then
122 psu0_failed="false"
123 fi
124 fi
125
126 psu1_presence=$(gpio_name_get presence-ps1)
127 psu1_failed="true"
128 if [ "$psu1_presence" == "0" ]; then
129 # PSU1 presence, monitor the PSUs using pmbus, check the STATUS_WORD
130 psu1_value=$(i2cget -f -y $psu_bus $psu1_addr $status_word_cmd w)
131 psu1_bit_fault=$(($psu1_value & $psu_fault_bitmask))
132 if [ "$psu1_bit_fault" == "0" ]; then
133 psu1_failed="false"
134 fi
135 fi
136
137 if [ "$psu0_failed" == "true" ] || [ "$psu1_failed" == "true" ]; then
138 psu_failed="true"
139 else
140 psu_failed="false"
141 fi
142}
143
144control_psu_fault_led() {
145 if [ "$psu_failed" == "true" ]; then
146 if [ "$psu_fault_led_status" == $off ]; then
147 turn_on_off_psu_fault_led $on
148 psu_fault_led_status=$on
149 fi
150 else
151 if [ "$psu_fault_led_status" == $on ]; then
152 turn_on_off_psu_fault_led $off
153 psu_fault_led_status=$off
154 fi
155 fi
156}
157
158check_overtemp_occured() {
159 if [[ -f $overtemp_fault_flag ]]; then
160 echo "Over temperature occured, turn on fault LED"
161 overtemp_occured="true"
162 else
163 overtemp_occured="false"
164 fi
165}
166
167
168check_gpio_fault() {
169 if [[ -f $gpio_fault_flag ]]; then
170 echo "GPIO fault event(s) occured, turn on fault LED"
171 gpio_fault="true"
172 else
173 gpio_fault="false"
174 fi
175}
176
177check_fault() {
178 if [[ "$fan_failed" == "true" ]] || [[ "$psu_failed" == "true" ]] \
179 || [[ "$overtemp_occured" == "true" ]] \
180 || [[ "$gpio_fault" == "true" ]]; then
181 fault="true"
182 else
183 fault="false"
184 fi
185}
186
187# The System Fault Led turns on upon the system error, update the System Fault Led
188# based on the Fan fault status and PSU fault status
189control_sys_fault_led() {
190 # Turn on/off the System Fault Led
191 if [ "$fault" == "true" ]; then
192 gpio_name_set led-fault $on
193 else
194 gpio_name_set led-fault $off
195 fi
196}
197
198# daemon start
199while true
200do
201 # Monitors Fan speeds
202 check_fan_failed
203 # Monitors PSU presence
204 check_psu_failed
205
206 check_overtemp_occured
207 check_gpio_fault
208 # Check fault to update fail
209 check_fault
210 control_sys_fault_led
211
212 control_fan_fault_led
213 control_psu_fault_led
214
215 sleep 2
216done
217
218exit 1