RAS comment fix
Fixing optional master/non-master processor parameter
Fixing signature description parameter
Resolves openbmc/openbmc-test-automation#1051
Change-Id: I5ac06268047c99582d25f43c2b0782ba88494d70
Signed-off-by: Sridevi Ramesh <sridevra@in.ibm.com>
diff --git a/extended/test_host_ras.robot b/extended/test_host_ras.robot
index 07f0082..9f9cae2 100755
--- a/extended/test_host_ras.robot
+++ b/extended/test_host_ras.robot
@@ -23,8 +23,8 @@
*** Variables ***
${stack_mode} normal
-*** Test Cases ***
+*** Test Cases ***
# Memory channel (MCACALIFIR) related error injection.
Verify Recoverable Callout Handling For MCA With Threshold 1
@@ -37,6 +37,7 @@
Inject Recoverable Error With Threshold Limit Through Host
... ${value[0]} ${value[1]} 1 ${value[2]} ${err_log_path}
+
Verify Recoverable Callout Handling For MCA With Threshold 32
[Documentation] Verify recoverable callout handling for MCACALIFIR with
... threshold 32.
@@ -164,7 +165,7 @@
[Tags] Verify_Recoverable_Callout_Handling_For_L2FIR_With_Threshold_1
${value}= Get From Dictionary ${ERROR_INJECT_DICT} L2FIR_RECV1
- ${translated_fir}= Fetch FIR Address Translation Value 0 ${value[0]} EX
+ ${translated_fir}= Fetch FIR Address Translation Value ${value[0]} EX
${err_log_path}= Catenate ${RAS_LOG_DIR_PATH}l2fir_th1
Inject Recoverable Error With Threshold Limit Through Host
... ${translated_fir} ${value[1]} 1 ${value[2]} ${err_log_path}
@@ -177,7 +178,7 @@
[Tags] Verify_Recoverable_Callout_Handling_For_L3FIR_With_Threshold_1
${value}= Get From Dictionary ${ERROR_INJECT_DICT} L3FIR_RECV1
- ${translated_fir}= Fetch FIR Address Translation Value 0 ${value[0]} EX
+ ${translated_fir}= Fetch FIR Address Translation Value ${value[0]} EX
${err_log_path}= Catenate ${RAS_LOG_DIR_PATH}l3fir_th1
Inject Recoverable Error With Threshold Limit Through Host
... ${translated_fir} ${value[1]} 1 ${value[2]} ${err_log_path}
@@ -188,7 +189,7 @@
[Tags] Verify_Recoverable_Callout_Handling_For_L3FIR_With_Threshold_32
${value}= Get From Dictionary ${ERROR_INJECT_DICT} L3FIR_RECV32
- ${translated_fir}= Fetch FIR Address Translation Value 0 ${value[0]} EX
+ ${translated_fir}= Fetch FIR Address Translation Value ${value[0]} EX
${err_log_path}= Catenate ${RAS_LOG_DIR_PATH}l3fir_th32
Inject Recoverable Error With Threshold Limit Through Host
... ${translated_fir} ${value[1]} 32 ${value[2]} ${err_log_path}
@@ -213,7 +214,7 @@
[Tags] Verify_Recoverable_Callout_Handling_For_CMEFIR_With_Threshold_1
${value}= Get From Dictionary ${ERROR_INJECT_DICT} CMEFIR_RECV1
- ${translated_fir}= Fetch FIR Address Translation Value 0 ${value[0]} EX
+ ${translated_fir}= Fetch FIR Address Translation Value ${value[0]} EX
${err_log_path}= Catenate ${RAS_LOG_DIR_PATH}cmefir_th1
Inject Recoverable Error With Threshold Limit Through Host
... ${translated_fir} ${value[1]} 1 ${value[2]} ${err_log_path}
@@ -255,15 +256,19 @@
... 4. Verify & clear gard records.
[Arguments] ${fir} ${chip_address} ${threshold_limit}
... ${signature_desc} ${log_prefix}
+ ... ${master_proc_chip}=True
# Description of argument(s):
# fir FIR (Fault isolation register) value (e.g. 2011400).
# chip_address Chip address (e.g 2000000000000000).
# threshold_limit Threshold limit (e.g 1, 5, 32).
# signature_desc Error log signature description.
# log_prefix Log path prefix.
+ # master_proc_chip Processor chip type ('True' or 'False').
Set Auto Reboot 1
Inject Error Through HOST ${fir} ${chip_address} ${threshold_limit}
+ ... ${master_proc_chip}
+
Is Host Running
${output}= Gard Operations On OS list
Should Contain ${output} No GARD
@@ -281,6 +286,7 @@
... 4. Verify & clear gard records.
[Arguments] ${fir} ${chip_address} ${threshold_limit}
... ${signature_desc} ${log_prefix}
+ ... ${master_proc_chip}=True
# Description of argument(s):
# fir FIR (Fault isolation register) value (e.g. 2011400).
# chip_address Chip address (e.g 2000000000000000).
@@ -288,9 +294,11 @@
# signature_desc Error Log signature description.
# (e.g 'mcs(n0p0c0) (MCFIR[0]) mc internal recoverable')
# log_prefix Log path prefix.
+ # master_proc_chip Processor chip type ('True' or 'False').
Set Auto Reboot 1
Inject Error Through HOST ${fir} ${chip_address} ${threshold_limit}
+ ... ${master_proc_chip}
Wait Until Keyword Succeeds 500 sec 20 sec Is Host Rebooted
Wait for OS
Verify And Clear Gard Records On HOST
@@ -298,17 +306,26 @@
Fetch FIR Address Translation Value
[Documentation] Fetch FIR address translation value through HOST.
- [Arguments] ${proc_chip_id} ${fir} ${target_type}
+ [Arguments] ${fir} ${target_type} ${master_proc_chip}=True
# Description of argument(s):
- # proc_chip_id Processor chip ID (e.g '0', '8').
- # fir FIR (Fault isolation register) value (e.g. 2011400).
- # core_id Core ID (e.g. 9).
- # target_type Target type (e.g. 'EX', 'EQ', 'C').
+ # fir FIR (Fault isolation register) value (e.g. 2011400).
+ # core_id Core ID (e.g. 9).
+ # target_type Target type (e.g. 'EX', 'EQ', 'C').
+ # master_proc_chip Processor chip type ('True' or 'False').
Login To OS Host
Copy Address Translation Utils To HOST OS
- ${core_ids}= Get Core IDs From OS 0
+ # Fetch processor chip IDs.
+ ${proc_chip_id}= Get ProcChipId From OS Processor ${master_proc_chip}
+ # Example output:
+ # 00000000
+
+ ${core_ids}= Get Core IDs From OS ${proc_chip_id[-1]}
+ # Example output:
+ #./probe_cpus.sh | grep 'CHIP ID: 0' | cut -c21-22
+ # ['14', '15', '16', '17']
+
# Ignoring master core ID.
${output}= Get Slice From List ${core_ids} 1
# Feth random non-master core ID.
diff --git a/lib/ras/host_utils.robot b/lib/ras/host_utils.robot
index 0b326e4..4e4efdd 100644
--- a/lib/ras/host_utils.robot
+++ b/lib/ras/host_utils.robot
@@ -45,16 +45,26 @@
Get ProcChipId From OS
[Documentation] Get processor chip ID values based on the input.
- [Arguments] ${chip_type}
+ [Arguments] ${chip_type} ${master_proc_chip}
# Description of arguments:
- # chip_type The chip type (Processor/Centaur).
+ # chip_type The chip type (Processor/Centaur).
+ # master_proc_chip Processor chip type ('True' or 'False').
${cmd}= Catenate -l | grep -i ${chip_type} | cut -c1-8
${proc_chip_id}= Getscom Operations On OS ${cmd}
# Example output:
+ # getscom -l | grep processor | cut -c1-8
+ # 00000008 - False
+ # 00000000 - True
+
+ ${proc_ids}= Split String ${proc_chip_id}
+ ${proc_id}= Run Keyword If '${master_proc_chip}' == 'True'
+ \ ... Get From List ${proc_ids} 1
+ \ ... ELSE Get From List ${proc_ids} 0
+
+ # Example output:
# 00000008
- # 00000000
- [Return] ${proc_chip_id}
+ [Return] ${proc_id}
Get Core IDs From OS
[Documentation] Get Core IDs corresponding to the input processor chip ID.
@@ -89,7 +99,6 @@
# 0x10010c00
[Return] ${translated_addr[1]}
-
Inject Error Through HOST
[Documentation] Inject checkstop on processor through HOST.
... Test sequence:
@@ -97,29 +106,29 @@
... 2. Clear any existing gard records
... 3. Inject Error on processor/centaur
[Arguments] ${fir} ${chip_address} ${threshold_limit}
+ ... ${master_proc_chip}=True
# Description of argument(s):
# fir FIR (Fault isolation register) value (e.g. 2011400).
# chip_address chip address (e.g 2000000000000000).
# threshold_limit Threshold limit (e.g 1, 5, 32).
+ # master_proc_chip Processor chip type (True' or 'False').
Delete Error Logs
Login To OS Host
Gard Operations On OS clear all
# Fetch processor chip IDs.
- ${chip_ids}= Get ProcChipId From OS Processor
- ${proc_ids}= Split String ${chip_ids}
- ${proc_id}= Get From List ${proc_ids} 1
+ ${proc_chip_id}= Get ProcChipId From OS Processor ${master_proc_chip}
${threshold_limit}= Convert To Integer ${threshold_limit}
:FOR ${i} IN RANGE ${threshold_limit}
- \ Run Keyword Putscom Operations On OS ${proc_id} ${fir} ${chip_address}
+ \ Run Keyword Putscom Operations On OS ${proc_chip_id} ${fir}
+ ... ${chip_address}
# Adding delay after each error injection.
\ Sleep 10s
# Adding delay to get error log after error injection.
Sleep 120s
-
Code Update Unrecoverable Error Inject
[Documentation] Inject UE MCACALFIR checkstop on processor through
... host during PNOR code update.
diff --git a/lib/ras/variables.py b/lib/ras/variables.py
index bf69188..1bf97c8 100644
--- a/lib/ras/variables.py
+++ b/lib/ras/variables.py
@@ -3,34 +3,37 @@
Signature description in error log corresponding to error injection.
"""
-DES_MCA_RECV1 = "'mca.n0p0c0.*MCACALFIR[^0].*A MBA recoverable error'"
-DES_MCA_RECV32 = "'mca.n0p0c0.*MCACALFIR[^2].*Excessive refreshes'"
-DES_MCA_UE = "'mca.n0p0c0.*MCACALFIR[^10].*State machine'"
+DES_MCA_RECV1 = "'MCACALFIR[^0].*A MBA recoverable error'"
+DES_MCA_RECV32 = "'MCACALFIR[^2].*Excessive refreshes'"
+DES_MCA_UE = "'MCACALFIR[^10].*State machine'"
-DES_MCS_RECV1 = "'mcs.n0p0c0.*MCFIR[^0].*mc internal recoverable'"
-DES_MCS_UE = "'mcs.n0p0c0.*MCFIR[^1].*mc internal non recovervabl'"
+DES_MCS_RECV1 = "'MCFIR[^0].*mc internal recoverable'"
+DES_MCS_UE = "'MCFIR[^1].*mc internal non recovervabl'"
-DES_NX_RECV1 = "'pu.n0p0.*NXDMAENGFIR[^13].*Channel 4 GZIP ECC PE'"
-DES_NX_RECV32 = "'pu.n0p0.*NXDMAENGFIR[^4].*Channel 0 842 engine ECC'"
-DES_NX_UE = "'pu.n0p0.*NXDMAENGFIR[^5].*Channel 0 842 engine ECC'"
+DES_NX_RECV1 = "'NXDMAENGFIR[^13].*Channel 4 GZIP ECC PE'"
+DES_NX_RECV32 = "'NXDMAENGFIR[^4].*Channel 0 842 engine ECC'"
+DES_NX_UE = "'NXDMAENGFIR[^5].*Channel 0 842 engine ECC'"
-DES_OBUS_RECV32 = "'ob.n0p0c0.*OB_LFIR[^0].*CFIR internal parity error'"
+DES_OBUS_RECV32 = "'OB_LFIR[^0].*CFIR internal parity error'"
-DES_CXA_RECV5 = "'capp.n0p0c0.*CXAFIR[^34].*CXA CE on data received'"
-DES_CXA_RECV32 = "'capp.n0p0c0.*CXAFIR[^2].*CXA CE on Master array'"
+DES_CXA_RECV5 = "'CXAFIR[^34].*CXA CE on data received'"
+DES_CXA_RECV32 = "'CXAFIR[^2].*CXA CE on Master array'"
+DES_CXA_UE = "'CXAFIR[^1].*CXA System Xstop PE'"
-DES_NPU0_RECV32 = "'pu.n0p0.*NPU0FIR[^13].*CQ CTL/SM ASBE Array single'"
+DES_NPU0_RECV32 = "'NPU0FIR[^13].*CQ CTL/SM ASBE Array single'"
DES_L2_RECV1 = "'L2FIR[^8].*L2 directory CE'"
DES_L2_RECV32 = "'L2FIR[^6].*L2 directory read CE'"
+DES_L2_UE = "'L2FIR[^9].*L2 directory stuck bit CE'"
DES_L3_RECV1 = "'L3FIR[^17].*Received addr_error cresp'"
DES_L3_RECV32 = "'L3FIR[^7].*L3 cache write data CE'"
+DES_L3_UE = "'L3FIR[^8].*L3 cache write data UE'"
DES_OCC_RECV1 = "'OCCFIR[^45].*C405_ECC_CE'"
DES_CME_RECV1 = "'CMEFIR[^7].*PPE SRAM Uncorrectable Err'"
-DES_EQ_RECV32 = "'EQ_LFIR[^0].*CFIR internal parity'"
+DES_EQ_RECV32 = "'EQ_LFIR[^1].*CFIR internal parity'"
DES_NCU_RECV1 = "'NCUFIR[^8].*NCU Store Queue Data'"
# The following is an error injection dictionary with each entry consisting of:
@@ -52,17 +55,18 @@
'NX_RECV32': ['02011100', '0800000000000000', DES_NX_RECV32],
'CXA_RECV5': ['02010800', '0000000020000000', DES_CXA_RECV5],
'CXA_RECV32': ['02010800', '2000000000000000', DES_CXA_RECV32],
+ 'CXA_UE': ['02010800', '4000000000000000', DES_CXA_UE],
'OBUS_RECV32': ['0904000a', '8000000000000000', DES_OBUS_RECV32],
'NPU0_RECV32': ['05013C00', '0004000000000000', DES_NPU0_RECV32],
'L2FIR_RECV1': ['10010800', '0080000000000000', DES_L2_RECV1],
'L2FIR_RECV32': ['10010800', '0200000000000000', DES_L2_RECV32],
- 'L2FIR_UE': ['10010800', '4000000000000000', ''],
+ 'L2FIR_UE': ['10010800', '0040000000000000', DES_L2_UE],
'L3FIR_RECV1': ['10011800','0000400000000000', DES_L3_RECV1],
'L3FIR_RECV32': ['10011800', '0100000000000000', DES_L3_RECV32],
- 'L3FIR_UE': ['10011800', '0100000000000000', ''],
+ 'L3FIR_UE': ['10011800', '0080000000000000', DES_L3_UE],
'OCCFIR_RECV1': ['01010800', '0000000000040000', DES_OCC_RECV1],
'CMEFIR_RECV1': ['10012000', '0100000000000000', DES_CME_RECV1],
- 'EQFIR_RECV32': ['1004000A', '4000000000000000', DES_EQ_RECV32],
+ 'EQFIR_RECV32': ['1004000A', '8000000000000000', DES_EQ_RECV32],
'NCUFIR_RECV1': ['10011400', '0080000000000000', DES_NCU_RECV1],
}