Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | |
| 3 | # When system only use single PSU ( ex : PSU1 ) to power ON normal 12V, |
| 4 | # HSC2 will be trigged Fault event (FET health). |
| 5 | # At this time, to plug-in PSU2 in system, PSU2 won't deliver power to |
| 6 | # +12V_MB because HSC2 is protected by Fault event. |
| 7 | # Due to HSC2 protected, the PSU redundancy mechanism can't be created. |
| 8 | # Once PSU1 is plugged out at this moment, system will crash ( reset ) |
| 9 | # because +12V_MB dropped. |
| 10 | |
| 11 | # BMC SW work-around solution: |
| 12 | # - When BMC detect event PSU is plugged in system, BMC will reset HSC |
| 13 | # by disbale HOT SWAP and then enable HOT SWAP through pmbus command to clear |
| 14 | # Fault event. |
| 15 | |
| 16 | # Note: |
| 17 | # In case hot swap occurs during BMC reset, BMC still not in operation state, |
| 18 | # BMC can't detect PSU plug/unplug, then the work-around won't be executed |
| 19 | |
| 20 | # Author: Chanh Nguyen <chnguyen@amperecomputing.com> |
| 21 | |
| 22 | HSC1_PMBUS_NUM=10 |
| 23 | HSC2_PMBUS_NUM=10 |
| 24 | HSC1_SLAVE_ADDR=0x10 |
| 25 | HSC2_SLAVE_ADDR=0x11 |
| 26 | OPERATION=0x01 |
| 27 | STATUS_MFR_SPECIFIC=0x80 |
| 28 | |
| 29 | # $1 will be the name of the psu |
| 30 | PSU=$1 |
| 31 | |
Thang Q. Nguyen | bd7d633 | 2021-12-06 10:18:21 +0000 | [diff] [blame^] | 32 | if [ "$PSU" = 0 ]; then |
Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 33 | HSC_PMBUS_NUM=$HSC1_PMBUS_NUM |
| 34 | HSC_SLAVE_ADDR=$HSC1_SLAVE_ADDR |
Thang Q. Nguyen | bd7d633 | 2021-12-06 10:18:21 +0000 | [diff] [blame^] | 35 | elif [ "$PSU" = 1 ]; then |
Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 36 | HSC_PMBUS_NUM=$HSC2_PMBUS_NUM |
| 37 | HSC_SLAVE_ADDR=$HSC2_SLAVE_ADDR |
| 38 | else |
Thang Q. Nguyen | bd7d633 | 2021-12-06 10:18:21 +0000 | [diff] [blame^] | 39 | echo "Please choose PSU1 (0) or PSU2 (1)" |
| 40 | echo "Ex: ampere_psu_reset_hotswap.sh 0" |
Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 41 | exit 0 |
| 42 | fi |
| 43 | |
| 44 | # Check HOST state |
| 45 | chassisstate=$(obmcutil chassisstate | awk -F. '{print $NF}') |
Thang Q. Nguyen | dde1fed | 2021-11-04 08:30:27 +0000 | [diff] [blame] | 46 | if [ "$chassisstate" = 'Off' ]; then |
Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 47 | echo "HOST is being OFF, so can't access the i2c $HSC_PMBUS_NUM. Please Turn ON HOST !" |
| 48 | exit 1 |
| 49 | fi |
| 50 | |
| 51 | # Check FET health problems |
Thang Q. Nguyen | dde1fed | 2021-11-04 08:30:27 +0000 | [diff] [blame] | 52 | if ! data=$(i2cget -f -y $HSC_PMBUS_NUM $HSC_SLAVE_ADDR $STATUS_MFR_SPECIFIC); then |
Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 53 | echo "ERROR: Can't access the i2c. Please check /dev/i2c-$HSC_PMBUS_NUM" |
| 54 | exit 1 |
| 55 | fi |
| 56 | |
| 57 | psu_sts=$(((data & 0x80) != 0)) |
| 58 | |
Thang Q. Nguyen | dde1fed | 2021-11-04 08:30:27 +0000 | [diff] [blame] | 59 | if [ $psu_sts = 1 ]; then |
Chanh Nguyen | cf71ae0 | 2021-06-11 17:05:38 +0700 | [diff] [blame] | 60 | echo "PSU $PSU: FET health problems have been detected" |
| 61 | echo "Reset Hot swap output on PSU $PSU" |
| 62 | # Disable Hot swap output |
| 63 | write_data=0x00 |
| 64 | i2cset -f -y $HSC_PMBUS_NUM $HSC_SLAVE_ADDR $OPERATION $write_data b |
| 65 | |
| 66 | # Enable Hot swap output |
| 67 | write_data=0x80; |
| 68 | i2cset -f -y $HSC_PMBUS_NUM $HSC_SLAVE_ADDR $OPERATION $write_data b |
| 69 | |
| 70 | else |
| 71 | echo "PSU $PSU: FET health problems have not been detected" |
Thang Q. Nguyen | dde1fed | 2021-11-04 08:30:27 +0000 | [diff] [blame] | 72 | fi |