Add fan performance tests

Add fan tests to:
  - Check that the fans are operating at or near their
    specified target speed.
  - Check operation of fans in manual mode.
  - Verify hwmon functionality by comparing what's on dbus
    (/xyz/openbmc_project/sensors/fan_tach/fan0_0, etc.)
    to what's in the BMC's hwmon file system (fan1_input,
    etc.).
Enhance reliability of TC "Verify Fan Speed Increase" by looping
for error to be asserted or removed instead of sleeping.

Resolves openbmc/openbmc-test-automation#1284
Resolves openbmc/openbmc-test-automation#493
Resolves openbmc/openbmc-test-automation#442

Change-Id: I4cb99ee20d3330611ef1570633c3328fdb08e3dd
Signed-off-by: Steven Sombar <ssombar@us.ibm.com>
diff --git a/lib/fan_utils.robot b/lib/fan_utils.robot
index 9bde67b..6ad0a40 100755
--- a/lib/fan_utils.robot
+++ b/lib/fan_utils.robot
@@ -55,6 +55,7 @@
 
     ${front_fault}=  Get System LED State  front_fault
     ${rear_fault}=  Get System LED State  rear_fault
+
     Run Keyword If
     ...  '${front_fault}' != '${state}' or '${rear_fault}' != '${state}'
     ...  Fail  msg=Expecting both enclosure LEDs to be ${state}.
@@ -74,8 +75,21 @@
     ...  Functional  data=${valueDict}
 
 
+Set Fan Target Speed
+    [Documentation]  Set the target speed of a fan.
+    [Arguments]  ${fan_name}  ${fan_speed}
+
+    # Description of argument(s):
+    # fan_name    The name of the fan (e.g. "fan0").
+    # fan_speed   The target speed to set (e.g. "9000").
+
+    ${valueDict}=  Create Dictionary  data=${fan_speed}
+    Write Attribute  ${SENSORS_URI}fan_tach/${fan_name}_0
+    ...  Target  data=${valueDict}
+
+
 Get Target Speed Of Fans
-    [Documentation]  Return the maximum target RPM speed of the system fans.
+    [Documentation]  Return the maximum target speed of the system fans.
 
     ${max_target}=  Set Variable  0
     ${paths}=  Get Endpoint Paths  ${SENSORS_URI}fan_tach/  0
@@ -88,6 +102,63 @@
     [Return]  ${max_target}
 
 
+Get Target And Blade Speeds
+    [Documentation]  Return the fan target speed setting, the speed of the
+    ...  fan's clockwise blade, and the speed of the counter-clockwise blade.
+    # Each fan unit has two counter-rotating fan blades
+    # One blade is expected to be moving but the other blade may not be
+    # moving whenever the fan unit is transitioning to a new target speed.
+    [Arguments]  ${fan_name}
+
+    # Description of argument(s):
+    # fan_name       The name of a fan (e.g. "fan0")
+
+    # Get the fan target speed and the clockwise blade speed.
+    ${path}=  Catenate  ${SENSORS_URI}fan_tach/${fan_name}_0
+    ${response}=  OpenBMC Get Request  ${path}
+    ${json}=  To JSON  ${response.content}
+    ${fan_clockwise_speed}=  Set Variable  ${json["data"]["Value"]}
+    ${target_speed}=  Set Variable  ${json["data"]["Target"]}
+
+    # Get the counter-clockwise blade speed.
+    ${path}=  Catenate  ${SENSORS_URI}fan_tach/${fan_name}_1
+    ${response}=  OpenBMC Get Request  ${path}
+    ${json}=  To JSON  ${response.content}
+    ${fan_counterclockwise_speed}=  Set Variable  ${json["data"]["Value"]}
+
+    [Return]  ${target_speed}  ${fan_clockwise_speed}
+    ...  ${fan_counterclockwise_speed}
+
+
+Get Fan Target And Speed
+    [Documentation]  Return the fan target speed setting and the
+    ...  speed of the fastest blade.
+    [Arguments]  ${fan_name}
+
+    # Description of argument(s):
+    # fan_name       The name of a fan (e.g. "fan0")
+
+    ${target_speed}  ${clockwise_speed}  ${counterclockwise_speed}=
+    ...  Get Target And Blade Speeds  ${fan_name}
+    ${blade_speed}=  Run Keyword If
+    ...  ${clockwise_speed} > ${counterclockwise_speed}
+    ...  Set Variable  ${clockwise_speed}  ELSE
+    ...  Set Variable  ${counterclockwise_speed}
+    [Return]  ${target_speed}  ${blade_speed}
+
+
+Set Fan Daemon State
+    [Documentation]  Set the state of the fan control service.
+    [Arguments]  ${state}
+
+    # Description of argument(s):
+    # state     The desired state of the service, usually
+    #           "start", "stop", or "restart".
+
+    ${cmd}=  Catenate  systemctl  ${state}  phosphor-fan-control@0.service
+    ${stdout}  ${stderr}  ${rc}=  BMC Execute Command  ${cmd}
+
+
 Verify Minimum Number Of Fans With Cooling Type
     [Documentation]  Verify minimum number of fans.
     [Arguments]  ${num_fans}  ${water_cooled}
diff --git a/systest/test_fans_os.robot b/systest/test_fans_os.robot
index 8457fc6..98fce54 100755
--- a/systest/test_fans_os.robot
+++ b/systest/test_fans_os.robot
@@ -6,54 +6,36 @@
 # OPENBMC_HOST       The BMC host name or IP address.
 # OPENBMC_USERNAME   The userID to login to the BMC as.
 # OPENBMC_PASSWORD   The password for OPENBMC_USERNAME.
-# OS_HOST            The OS host name or IP Address.
-# OS_USERNAME        The OS login userid (usually root).
-# OS_PASSWORD        The password for the OS login.
 #
-# Approximate run time:   8 minutes.
+# Approximate run time:   18 minutes.
 
 Resource        ../syslib/utils_os.robot
 Resource        ../lib/logging_utils.robot
 Resource        ../lib/utils.robot
 Resource        ../lib/fan_utils.robot
+Library         ../syslib/utils_keywords.py
+Library         OperatingSystem
 
-Suite Setup      Suite Setup Execution
-Test Teardown    Test Teardown Execution
+Suite Setup     Suite Setup Execution
+Test Teardown   Test Teardown Execution
 
 
 *** Variables ***
 
-# The fan speed-monitoring daemon takes less than one second to
-# notice a fan failure. This is system configurable i.e. wspoon = 30sec before
-# marking a fan non-functional.
-# Allow system_response_time before checking if there was a measurable response
-# to the daemon, such as an increase in RPMs of the other fans.
-# NOTE: This time is relative to the BMC performance and can change
-# at anytime.
-${system_response_time}  5s
-
-# The @{fan_names} list holds the names of the fans in the system.
-@{fan_names}
-
 # Fan state values.
 ${fan_functional}      ${1}
 ${fan_nonfunctional}   ${0}
 
+# Criteria for a fan to be considered to be at maximum speed.
+${max_speed}=  ${10400}
+
 
 *** Test Cases ***
 
-
 Check Number Of Fans With Power On
     [Documentation]  Verify system has the minimum number of fans.
     [Tags]  Check_Number_Of_Fans_With_Power_On
 
-    @{fan_names}  Create List
-    # Populate the list with the names of the fans in the system.
-    ${fan_names}=  Get Fan Names  ${fan_names}
-    Set Suite Variable  ${fan_names}  children=true
-
-    ${number_of_fans}=  Get Length  ${fan_names}
-
     # Determine if system is water cooled.
     ${water_coooled}=  Is Water Cooled
 
@@ -68,32 +50,152 @@
     Verify Fan Monitors With State  On
 
 
-Verify Fan RPM Increase
-    [Documentation]  Verify that RPMs of working fans increase when one fan
-    ...  is disabled.
-    [Tags]  Verify_Fan_RPM_Increase
+Check Fan Speed
+    [Documentation]  Verify fans are running at or near target speed.
+    [Tags]  Check_Fan_Speed
+
+    # Set the speed tolerance criteria.
+    # A tolerance value of .15 means that the fan's speed should be
+    # within 15% of its set target speed.   Fans may be accelerating
+    # or decelerating to meet a new target, so allow .10 extra.
+    ${tolerance}=  Set Variable  .25
+    Rpvars  tolerance
+
+    # Compare the fan's speed with its target speed.
+    :FOR  ${fan_name}  IN  @{fan_names}
+    \  ${target_speed}  ${fan_speed}=  Get Fan Target And Speed  ${fan_name}
+    \  Rpvars  fan_name  target_speed  fan_speed
+    \  # Calculate tolerance, which is a % of the target speed.
+    \  ${tolerance_value}=  Evaluate  ${tolerance}*${target_speed}
+    \  # Calculate upper and lower speed limits.
+    \  ${max_limit}=  Evaluate   ${target_speed}+${tolerance_value}
+    \  ${min_limit}=  Evaluate   ${target_speed}-${tolerance_value}
+    \  Run Keyword If
+    ...  ${fan_speed} < ${min_limit} or ${fan_speed} > ${max_limit}
+    ...  Fail  msg=${fan_name} speed of ${fan_speed} is out of range.
+
+
+Check Fan Manual Control
+    [Documentation]  Check direct control of fans.
+    [Tags]  Check_Fan_Manual_Control
+
+    # Test case overview:
+    # Turn off BMC's fan control daemon, then test to confirm
+    # that fans can be controlled manually.
+    # The app that takes data from sysfs and updates dbus is named hwmon.
+    # Verify hwmon functionality by comparing with what's on dbus
+    # (/xyz/openbmc_project/sensors/fan_tach/fan0_0, fan0_1, etc.)
+    # with what's in the BMC's file system at
+    # /sys/class/hwmon/hwmon9/fan*_input.
+
+    # The maximum target speed that can be set.
+    ${max_fan_target_setting}=  Set Variable  ${10500}
+
+    # Speed criteria for passing, which is 85% of max_fan_target_setting.
+    ${min_speed}=  Set Variable  ${8925}
+
+    # Time allowed for the fan daemon to take control and return
+    # fans to normal speed.
+    ${minutes_to_stabilize}=  Set Variable  4
+
+    # Login to BMC and disable the fan deamon. Disabling the daemon sets
+    # manual mode.
+    Open Connection And Log In
+    Set Fan Daemon State  stop
+
+    # For each fan, set a new target speed and wait for the fan to
+    # accelerate.  Then check that the fan is running near that
+    # target speed.
+    :FOR  ${fan_name}  IN  @{fan_names}
+    \  Set Fan Target Speed  ${fan_name}  ${max_fan_target_setting}
+    \  Run Key U  Sleep \ 60s
+    \  ${target_speed}  ${cw_speed}  ${ccw_speed}=
+    ...  Get Target And Blade Speeds  ${fan_name}
+    \  Rpvars  fan_name  target_speed  cw_speed  ccw_speed
+    \  Run Keyword If
+    ...  ${cw_speed} < ${min_speed} or ${ccw_speed} < ${min_speed}
+    ...  Fail  msg=${fan_name} failed manual speed test.
+
+    # Check the fan speeds in the BMC file system.
+
+    # Get the location of the fan hwmon.
+    ${controller_path}  ${stderr}  ${rc}=  BMC Execute Command
+    ...  grep -ir max31785a /sys/class/hwmon/hwmon* | grep name
+    # E.g., controller_path=/sys/class/hwmon/hwmon10/name:max31785a.
+
+    ${hwmon_path}  ${file_name}=  Split Path  ${controller_path}
+    # E.g.,  /sys/class/hwmon/hwmon10  or  /sys/class/hwmon/hwmon9.
+
+    Rpvars  controller_path  hwmon_path
+
+    # Run the BMC command which gets fan speeds from the file system.
+    ${cmd}=  Catenate  cat ${hwmon_path}/fan*_input
+    ${stdout}  ${stderr}  ${rc}=
+    ...  BMC Execute Command  ${cmd}
+
+    Rpvars  fan_speeds_from_BMC_file_system
+
+    # Convert output to integer values.
+    ${speeds}=  Evaluate  map(int, $stdout.split(${\n}))
+    Rpvars  speeds
+    # Count the number of speeds > ${min_speed}.
+    ${count}=  Set Variable  ${0}
+    :FOR  ${speed}  IN  @{speeds}
+    \  ${count}=  Run Keyword If  ${speed} > ${min_speed}
+    ...  Evaluate  ${count}+1  ELSE  Set Variable  ${count}
+    # Because each fan has two rotating fan blades, the count should be
+    # equual to 2*${number_of_fans}.  On water-cooled systems some
+    # speeds may be reported by hwmon as 0.  That is expected,
+    # and the number_of_fans reported in the system will be less.
+    ${fail_test}=  Evaluate  (2*${number_of_fans})-${count}
+
+    # Re-enable the fan daemon
+    Set Fan Daemon State  restart
+
+    Run Keyword If  ${fail_test}  Fail
+    ...  msg=hwmon did not properly report fan speeds.
+
+    # Wait for the daemon to take control and gracefully set fan speeds
+    # back to normal.
+    ${msg}=  Catenate  Waiting ${minutes_to_stabilize} minutes
+    ...  for fan daemon to stabilize fans.
+    Rprint Timen  ${msg}
+    Run Key U  Sleep \ ${minutes_to_stabilize}m
+
+
+Verify Fan Speed Increase
+    [Documentation]  Verify that the speed of working fans increase when
+    ...  one fan is disabled.
+    [Tags]  Verify_Fan_Speed_Increase
     #  A non-functional fan should cause an error log and
     #  an enclosure LED will light.  The other fans should speed up.
 
-    # Any fan at this speed or greater will be considered to be at maximum RPM.
-    ${max_fan_rpm}=  Set Variable  10400
+    # Allow system_response_time before checking if there was a
+    # response by the system to an applied fault.
+    ${system_response_time}=  Set Variable  60s
 
-    # Choose a fan to test with, e.g., fan1.
-    ${test_fan_name}=  Get From List  ${fan_names}  1
-    Rpvars  test_fan_name
+    # Choose a fan to test with, e.g., fan0.
+    ${test_fan_name}=  Get From List  ${fan_names}  0
 
     ${initial_speed}=  Get Target Speed Of Fans
-    Rpvars  initial_speed
+    Rpvars  test_fan_name  initial_speed
 
     # If initial speed is not already at maximum, set expect_increase.
     # This flag is used later to determine if speed checking is
     # to be done or not.
     ${expect_increase}=  Run Keyword If
-    ...  ${initial_speed} < ${max_fan_rpm}
+    ...  ${initial_speed} < ${max_speed}
     ...  Set Variable  1  ELSE  Set Variable  0
 
     Set Fan State  ${test_fan_name}  ${fan_nonfunctional}
-    Sleep  ${system_response_time}
+
+    # Wait for error to be asserted.
+
+    :FOR  ${n}  IN RANGE  30
+    \  ${front_fault}=  Get System LED State  front_fault
+    \  ${rear_fault}=  Get System LED State  rear_fault
+    \  Run Key U  Sleep \ 1s
+    \  Exit For Loop If  '${front_fault}' == 'On' and '${rear_fault}' == 'On'
 
     Verify System Error Indication Due To Fans
 
@@ -106,6 +208,14 @@
     \  Should Contain  ${endpoint_name}  ${test_fan_name}
     ...  msg=Error log present but not for ${test_fan_name}.
 
+    Run Key U  Sleep \ ${system_response_time}
+
+    # A heavily loaded system may have powered-off.
+    ${host_state}=  Get Host State
+    Rpvars  host_state
+    Run Keyword If  'Running' != '${host_state}'  Pass Execution
+    ...  msg=System shutdown so skipping remainder of test.
+
     ${new_fan_speed}=  Get Target Speed Of Fans
     Rpvars  expect_increase  initial_speed  new_fan_speed
 
@@ -115,29 +225,16 @@
     ...  ${expect_increase} == 1 and ${new_fan_speed} < ${initial_speed}
     ...  Fail  msg=Remaining fans did not increase speed with loss of one fan.
 
-    # Recover the fan.
-    Set Fan State  ${test_fan_name}  ${fan_functional}
-    Sleep  ${system_response_time}
-
-    Delete Error Logs
-    Sleep  2s
-
-    # Enclosure LEDs should go off immediately after deleting the error logs.
-    Verify Front And Rear LED State  Off
-
-    ${restored_fan_speed}=  Get Target Speed Of Fans
-    Rpvars  new_fan_speed  restored_fan_speed
-
-    # Fan speed should lower because the fan is now functional again.
-    Run Keyword If
-    ...  ${expect_increase} == 1 and ${new_fan_speed} < ${restored_fan_speed}
-    ...  Fail  msg=Fans did not recover speed with all fans functional again.
-
 
 Verify System Shutdown Due To Fans
     [Documentation]  Shut down when not enough fans.
     [Tags]  Verify_System_Shutdown_Due_To_Fans
 
+    ${wait_after_poweroff}=  Set Variable  15s
+
+    # The previous test may have shutdown the system.
+    REST Power On  stack_mode=skip
+
     # Set fans to be non-functional.
     :FOR  ${fan_name}  IN  @{fan_names}
     \  Set Fan State  ${fan_name}  ${fan_nonfunctional}
@@ -147,6 +244,8 @@
     # an error if power off does not happen within a reasonable time.
     Wait For PowerOff
 
+    Run Key U  Sleep \ ${wait_after_poweroff}
+
     Verify System Error Indication Due To Fans
 
     # Verify there is an error log because of the shutdown.
@@ -164,7 +263,6 @@
 
 *** Keywords ***
 
-
 Reset Fans
     [Documentation]  Set the fans to functional state.
     # Set state of fans to functional by writing 1 to the Functional
@@ -183,7 +281,18 @@
     [Documentation]  Do the pre-test setup.
 
     REST Power On  stack_mode=skip
-    Delete All Error Logs
+
+    # The @{fan_names} list holds the names of the fans in the system.
+    @{fan_names}  Create List
+    ${fan_names}=  Get Fan Names  ${fan_names}
+    Set Suite Variable  ${fan_names}  children=true
+
+    ${number_of_fans}=  Get Length  ${fan_names}
+    Set Suite Variable  ${number_of_fans}  children=true
+
+    Reset Fans
+    Run Key U  Sleep \ 15s
+    Delete Error Logs
     Set System LED State  front_fault  Off
     Set System LED State  rear_fault  Off
 
@@ -193,6 +302,7 @@
 
     FFDC On Test Case Fail
     Reset Fans
+    Run Key U  Sleep \ 15s
     Delete Error Logs
     Set System LED State  front_fault  Off
     Set System LED State  rear_fault  Off