Added a new Test Case for monitoring hardware ranges.

  - Verifies fan, temperature and power ranges while
    the system is idle.
  - Verifies fan, temperature and power ranges while
    the system is in a stressed state.

Change-Id: Ib672e8884f3421e05e85550c888e83d7fb349270
Signed-off-by: Joy Onyerikwu <onyekachukwu.joy.onyerikwu@ibm.com>
diff --git a/syslib/utils_os.robot b/syslib/utils_os.robot
index bbf8b8a..6555008 100755
--- a/syslib/utils_os.robot
+++ b/syslib/utils_os.robot
@@ -225,6 +225,40 @@
     [Return]  ${cpu_freq}
 
 
+Get CPU Max Temperature
+    [Documentation]  Get the highest CPU Temperature.
+
+    ${temperature_objs}=  Read Properties
+    ...  ${SENSORS_URI}temperature/enumerate
+    # Filter the dictionary to get just the CPU temperature info.
+    ${cmd}=  Catenate  {k:v for k,v in $temperature_objs.iteritems()
+    ...  if re.match('${SENSORS_URI}temperature/p.*core.*temp', k)}
+    ${cpu_temperatuture_objs}  Evaluate  ${cmd}  modules=re
+    # Create a list of the CPU temperature values (current).
+    ${cpu_temperatures}=  Evaluate
+    ...  [ x['Value'] for x in $cpu_temperatuture_objs.values() ]
+
+    ${cpu_max_temp}  Evaluate  max(map(int, $cpu_temperatures))/1000
+    [Return]  ${cpu_max_temp}
+
+
+Get CPU Min Temperature
+    [Documentation]  Get the  CPU Temperature.
+
+    ${temperature_objs}=  Read Properties
+    ...  ${SENSORS_URI}temperature/enumerate
+    # Filter the dictionary to get just the CPU temperature info.
+    ${cmd}=  Catenate  {k:v for k,v in $temperature_objs.iteritems()
+    ...  if re.match('${SENSORS_URI}temperature/p.*core.*temp', k)}
+    ${cpu_temperatuture_objs}=  Evaluate  ${cmd}  modules=re
+    # Create a list of the CPU temperature values (current).
+    ${cpu_temperatures}=  Evaluate
+    ...  [ x['Value'] for x in $cpu_temperatuture_objs.values() ]
+
+    ${cpu_min_temp}  Evaluate  min(map(int, $cpu_temperatures))/1000
+    [Return]  ${cpu_min_temp}
+
+
 Check For Errors On OS Dmesg Log
     [Documentation]  Check if dmesg has nvidia errors logged.
 
@@ -299,8 +333,8 @@
     [Return]  ${power_max}
 
 
-Get GPU Power
-    [Documentation]  Get the GPU power dissipation.
+Get GPU Max Power
+    [Documentation]  Get the maximum GPU power dissipation.
 
     # nvidia-smi --query-gpu=power.draw --format=csv returns
     # power.draw [W]
@@ -315,6 +349,16 @@
     [Return]  ${nvidia_out}
 
 
+Get GPU Min Power
+    [Documentation]  Return the minimum GPU power value as record by
+    ...  nvidia-smi.
+
+    ${cmd}=  Catenate  nvidia-smi --query-gpu=power.draw --format=csv |
+    ...  grep -v 'power.draw' | cut -f 1 -d ' ' | sort -n -u | head -1
+    ${gpu_min_power}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
+    [Return]  ${gpu_min_power}
+
+
 Get GPU Temperature Limit
     [Documentation]  Get NVIDIA GPU maximum permitted temperature.
 
@@ -330,8 +374,17 @@
     [Return]  ${nvidia_out}
 
 
-Get GPU Temperature
-    [Documentation]  Get the GPU temperature.
+Get GPU Min Temperature
+    [Documentation]  Get the minimum GPU temperature.
+
+    ${cmd}=  Catenate  nvidia-smi --query-gpu=temperature.gpu
+    ...  --format=csv | grep -v 'temperature.gpu' | sort -n -u | head -1
+    ${nvidia_out}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
+    [Return]  ${nvidia_out}
+
+
+Get GPU Max Temperature
+    [Documentation]  Get the maximum GPU temperature.
 
     # nvidia-smi --query-gpu=temperature.gpu --format=csv returns
     # 38
diff --git a/systest/gpu_stress_test.robot b/systest/gpu_stress_test.robot
index ffa070e..76ed6b2 100755
--- a/systest/gpu_stress_test.robot
+++ b/systest/gpu_stress_test.robot
@@ -102,8 +102,8 @@
     Check For Errors On OS Dmesg Log
 
     # Check NVIDIA power, temperature, and clocks.
-    ${power}=  Get GPU Power
-    ${temperature}=  Get GPU Temperature
+    ${power}=  Get GPU Max Power
+    ${temperature}=  Get GPU Max Temperature
     ${temperature_via_rest}=  Get GPU Temperature Via REST
     ${clock}=  Get GPU Clock
     Rprintn
diff --git a/systest/test_system_hardware_ranges.robot b/systest/test_system_hardware_ranges.robot
new file mode 100644
index 0000000..7e8e929
--- /dev/null
+++ b/systest/test_system_hardware_ranges.robot
@@ -0,0 +1,179 @@
+*** Settings ***
+
+Documentation  Verify that both the air and water cooled systems are
+...  operating in the allowable ranges for fans, power and temperature
+...  during idle and stress up at the OS.
+
+
+# TEST PARAMETERS:
+#   OPENBMC_HOST                The BMC host name or IP address.
+#   OPENBMC_USERNAME            The BMC user name.
+#   OPENBMC_PASSWORD            The BMC password.
+#   OS_HOST                     The OS host name or IP address.
+#   OS_USERNAME                 The OS user name.
+#   OS_PASSWORD                 The OS Host password.
+#   HTX_DURATION                Duration of the HTX run (e.g 1h, 20m).
+#   HTX_INTERVAL                The time delay between consecutive
+#                               checks for temperature, fan and power
+#                               ranges.
+#   HTX_MDT_PROFILE             The MDT Profile to run.
+#
+#   The parameters below should be comma-separated lists,
+#   (e.g "500,800"). See default ranges below.
+#   These ranges can vary based on the type of system under test.
+#
+#      FAN_SPEED_RANGE_IDLE     The allowable range of fan speeds,
+#                               expressed as RPMs, when the machine is
+#                               at an idle state.
+#      FAN_SPEED_RANGE_STRESS   The allowable range of fan speeds,
+#                               expressed as RPMs, when the machine is
+#                               at a stressed state.
+#      TEMPERATURE_RANGE_IDLE   The allowed range for temperature,
+#                               expressed as Celsius degrees, when
+#                               the machine is at an idle state.
+#      TEMPERATURE_RANGE_STRESS The allowable range for temperature,
+#                               expressed as Celsius degrees, when
+#                               the machine is at a stressed state.
+#      POWER_RANGE_IDLE         The allowable range for power, expressed
+#                               in Watts, while the machine is at an
+#                               idle state.
+#      POWER_RANGE_STRESS       The allowable range for power, expressed
+#                               in Watts, while the machine is at a
+#                               stressed state.
+
+
+Resource           ../syslib/utils_os.robot
+Resource           ../lib/fan_utils.robot
+Library            ../lib/gen_robot_valid.py
+Suite Setup         Suite Setup Execution
+Test Teardown      FFDC On Test Case Fail
+Suite Teardown     Shutdown HTX Exerciser
+
+*** Variables ***
+# Default Ranges.
+@{FAN_SPEED_RANGE_IDLE}=        0  6000
+@{FAN_SPEED_RANGE_STRESS}=      3000  8000
+@{TEMPERATURE_RANGE_IDLE}=      30  45
+@{TEMPERATURE_RANGE_STRESS}=    35  100
+@{POWER_RANGE_IDLE}=            15  60
+@{POWER_RANGE_STRESS}=          30  350
+
+
+*** Test Cases ***
+
+Verify Fan Speeds During Idle State
+    [Documentation]  Verify the fan speeds are within acceptable range
+    ...  while the system is idle.
+    [Tags]  Verify_Fan_Speeds_During_Idle_State
+
+    Verify Fan Speeds  ${FAN_SPEED_RANGE_IDLE}
+
+
+Verify Temperature During Idle State
+    [Documentation]  Verify the temperature values are within acceptable
+    ...  range while the system is idle.
+    [Tags]  Verify_Temperature_During_Idle_State
+
+    Verify Temperatures  ${TEMPERATURE_RANGE_IDLE}
+
+
+Verify Power During Idle State
+    [Documentation]  Verify the power values are within acceptable range
+    ...  while the system is idle.
+    [Tags]  Verify_Power_During_Idle_State
+
+    Verify Power Values  ${POWER_RANGE_IDLE}
+
+
+Test Hardware Limits During Stress
+    [Documentation]  Verify the hardware under stress is within
+    ...  acceptable range.
+    [Tags]  Test_Hardware_Limits_During_Stress
+
+    # Run HTX and verify, within intervals, that the hardware ranges
+    # are within the allowable ranges.
+    Run MDT Profile
+    Repeat Keyword  ${HTX_DURATION}  Run Keywords
+    ...  Verify Fan Speeds  ${FAN_SPEED_RANGE_STRESS}
+    ...  AND  Verify Temperatures  ${TEMPERATURE_RANGE_STRESS}
+    ...  AND  Verify Power Values  ${POWER_RANGE_STRESS}
+    ...  AND  Run Key  Sleep \ ${HTX_INTERVAL}
+
+
+*** Keywords ***
+
+Verify Fan Speeds
+    [Documentation]  Verify that the fan speeds are within the required
+    ...  range.
+    [Arguments]  ${range}
+
+    # Description of argument(s):
+    # range                     A 2-element list comprised of the lower
+    #                           and upper values which constitute the
+    #                           valid range for the fan speeds.
+    #                           (e.g [500,800]).
+
+    # Get the fans with the lowest and highest fan speeds. Verify that
+    # the speeds are within the proper range.
+    ${fan_objects}=  Read Properties  ${SENSORS_URI}fan_tach/enumerate
+    ${fan_speeds}=  Evaluate
+    ...  [ x['Value'] for x in $fan_objects.values() ]
+    ${max_fan_speed}  Evaluate  max(map(int, $fan_speeds))
+    ${min_fan_speed}  Evaluate  min(map(int, $fan_speeds))
+    Rvalid Range  max_fan_speed  ${range}
+    Rvalid Range  min_fan_speed  ${range}
+
+
+Verify Temperatures
+    [Documentation]  Verify that the temperature values are within the
+    ...  required range.
+    [Arguments]  ${range}
+
+    # Description of argument(s):
+    # range                     The allowable range for the temperature,
+    #                           values (e.g [20,60]).
+
+    # Get the lowest and highest temperatures for GPUs, verify
+    # that it is within the proper range.
+    ${gpu_max_temperature}=  Get GPU Max Temperature
+    ${gpu_min_temperature}=  Get GPU Min Temperature
+    Rvalid Range  gpu_max_temperature  ${range}
+    Rvalid Range  gpu_min_temperature  ${range}
+    # Verify for CPUs.
+    ${cpu_highest_temp}=  Get CPU Max Temperature
+    ${cpu_lowest_temp}=  Get CPU Min Temperature
+    Rvalid Range  cpu_highest_temp  ${range}
+    Rvalid Range  cpu_lowest_temp  ${range}
+
+
+Verify Power Values
+    [Documentation]  Verify that the power values for GPUs and CPUs
+    ...  are within the required range.
+    [Arguments]  ${range}
+
+    # Description of argument(s):
+    # range                     The allowable range for power values,
+    #                           (e.g [15,30]).
+
+    ${gpu_max}=  Get GPU Max Power
+    ${gpu_min}=  Get GPU Min Power
+    ${gpu_max_power}=  Evaluate  int(round(${gpu_max}))
+    ${gpu_min_power}=  Evaluate  int(round(${gpu_min}))
+    Rvalid Range  gpu_max_power  ${range}
+    Rvalid Range  gpu_min_power  ${range}
+
+    ${p0}=  Read Properties  ${SENSORS_URI}power/p0_power
+    ${p1}=  Read Properties  ${SENSORS_URI}power/p1_power
+    # The scaling factor for fans is -6 for CPU power values.
+    ${p0_value}=  Evaluate  ${p0}['Value']/1000000
+    ${p1_value}=  Evaluate  ${p1}['Value']/1000000
+    Rvalid Range  p0_value  ${range}
+    Rvalid Range  p1_value  ${range}
+
+
+Suite Setup Execution
+    [Documentation]  Do suite setup tasks.
+
+    REST Power On  stack_mode=skip
+    ${htx_running}=  Is HTX Running
+    Should Not Be True  ${htx_running}  msg=HTX needs to be shutdown.
\ No newline at end of file