blob: 9ab2d8262852e2bfbfb7b9bbf755b9213a4f525e [file] [log] [blame]
William A. Kennington III7d6fa422021-02-08 17:04:02 -08001#ifndef PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_
2#define PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_
3
4/* Nemora NC-SI (Finite) State Machine implementation */
5
6#include <stdint.h>
7
8#include "platforms/nemora/portable/ncsi.h"
9#include "platforms/nemora/portable/net_types.h"
10
11/* TODO put this into config somewhere? */
12#define NCSI_FSM_RESTART_DELAY_COUNT 100
13#define NCSI_FSM_RETEST_DELAY_COUNT 100
14
15/* The network state is defined as a combination of the NC-SI connection state
16 * and the network configuration. However the two cannot be decoupled:
17 * - we cannot DHCP unless the NC-SI connection is up
18 * - we cannot do the OEM L3/L4 NC-SI configuration unless we have a valid
19 * network configuration
20 *
21 * For additional complexity we cannot get DHCP/ARP responses after the host
22 * has loaded the Mellanox NIC driver but we want to be able to periodically
23 * test the NC-SI connection regardless of whether we have network configuration
24 * (so that flaky cables can be troubleshooted using the host interface).
25 *
26 * For this reason there are actually 3 NC-SI finite state machines:
27 * - L2 configuration (i.e. enabling all available NC-SI channel for passthrough
28 * RX and TX, although only TX will work after the host loads the NIC driver)
29 * - L3/L4 configuration (i.e. configuring flow steering for RX traffic that
30 * matches our IP address and dedicated Nemora port so that we can receive
31 * Nemora requests even after the host loaded the NIC driver)
32 * - Connection test (i.e. periodically doing a ping test between the EC and the
33 * NIC) and also ensuring that L3/L4 configuration parameters have not been
34 * wiped out)
35 *
36 * For good karma, try to keep the state machines as linear as possible (one
37 * step after the other).
38 */
39
40typedef enum {
41 // First
42 NCSI_STATE_L2_CONFIG_BEGIN,
43 // Actual sequence
44 NCSI_STATE_RESTART = NCSI_STATE_L2_CONFIG_BEGIN,
45 NCSI_STATE_CLEAR_0,
46 NCSI_STATE_CLEAR_0_RESPONSE,
47 NCSI_STATE_GET_VERSION,
48 NCSI_STATE_GET_VERSION_RESPONSE,
49 NCSI_STATE_GET_CAPABILITIES,
50 NCSI_STATE_GET_CAPABILITIES_RESPONSE,
51 NCSI_STATE_CLEAR_1,
52 NCSI_STATE_CLEAR_1_RESPONSE,
53 NCSI_STATE_RESET_CHANNEL_0,
54 NCSI_STATE_RESET_CHANNEL_0_RESPONSE,
55 NCSI_STATE_RESET_CHANNEL_1,
56 NCSI_STATE_RESET_CHANNEL_1_RESPONSE,
57 NCSI_STATE_STOPPED,
58 NCSI_STATE_GET_MAC,
59 NCSI_STATE_GET_MAC_RESPONSE,
60 NCSI_STATE_SET_MAC_FILTER_0,
61 NCSI_STATE_SET_MAC_FILTER_0_RESPONSE,
62 NCSI_STATE_SET_MAC_FILTER_1,
63 NCSI_STATE_SET_MAC_FILTER_1_RESPONSE,
64 NCSI_STATE_ENABLE_CHANNEL_0,
65 NCSI_STATE_ENABLE_CHANNEL_0_RESPONSE,
66 NCSI_STATE_ENABLE_CHANNEL_1,
67 NCSI_STATE_ENABLE_CHANNEL_1_RESPONSE,
68 NCSI_STATE_ENABLE_TX,
69 NCSI_STATE_ENABLE_TX_RESPONSE,
70 // Last
71 NCSI_STATE_L2_CONFIG_END
72} ncsi_l2_config_state_t;
73
74typedef enum {
75 // First
76 NCSI_STATE_L3L4_CONFIG_BEGIN,
77 // Actual sequence
78 NCSI_STATE_CONFIG_FILTERS,
79 // Last
80 NCSI_STATE_L3L4_CONFIG_END
81} ncsi_l3l4_config_state_t;
82
83typedef enum {
84 // First
85 NCSI_STATE_TEST_BEGIN,
86 // Actual sequence
87 NCSI_STATE_TEST_PARAMS = NCSI_STATE_TEST_BEGIN,
88 NCSI_STATE_ECHO,
89 NCSI_STATE_ECHO_RESPONSE,
90 NCSI_STATE_CHECK_FILTERS,
91 NCSI_STATE_CHECK_FILTERS_RESPONSE,
92 NCSI_STATE_GET_PT_STATS,
93 NCSI_STATE_GET_PT_STATS_RESPONSE,
94 NCSI_STATE_GET_LINK_STATUS,
95 NCSI_STATE_GET_LINK_STATUS_RESPONSE,
96 // Last
97 NCSI_STATE_TEST_END
98} ncsi_test_state_t;
99
100typedef struct {
101 ncsi_l2_config_state_t l2_config_state;
102 ncsi_l3l4_config_state_t l3l4_config_state;
103 ncsi_test_state_t test_state;
104 // Last (OEM) command that was sent. (L3L4 SM only)
105 // Valid only if l3l4_waiting_response is true.
106 uint8_t l3l4_command;
107 // Number of the channel we are currently operating on. (L3L4 SM only)
108 uint8_t l3l4_channel;
109 // If true, means the request was sent and we are waiting for response.
110 bool l3l4_waiting_response;
111 uint8_t channel_count;
112 // The re-start and re-test delays ensures that we can flush the DMA
113 // buffers of potential out-of-sequence NC-SI packets (e.g. from
114 // packet that may have been received shortly after we timed out on
115 // them). The re-test delays also reduce the effect of NC-SI
116 // testing on more useful traffic.
117 uint8_t restart_delay_count;
118 uint8_t retest_delay_count;
119 struct {
120 uint8_t flags;
121 uint8_t regid[8];
122 } flowsteering[2];
123} ncsi_state_t;
124
125// Debug variables.
126// TODO - Change name to something more meaningful since the NC-SI test
127// is not a debug-only feature.
128typedef struct {
129 uint32_t task_count;
130 uint32_t host_ctrl_flags;
131 struct {
132 bool enabled;
133 bool pending_stop;
134 bool pending_restart;
135 bool oem_filter_disable;
136 bool loopback;
137 bool mlx_legacy;
138 uint32_t fail_count;
139 ncsi_state_t state_that_failed;
140 uint32_t tx_count;
141 uint32_t rx_count;
142 uint32_t tx_error_count;
143 struct {
144 uint32_t timeout_count;
145 uint32_t oversized_count;
146 uint32_t undersized_count;
147 uint32_t nack_count;
148 uint32_t unexpected_size_count;
149 uint32_t unexpected_type_count;
150 } rx_error;
151 struct {
152 uint32_t runs;
153 uint8_t ch_under_test;
154 uint8_t tries;
155 uint8_t max_tries; // 0 = skip test, 1 = restart on failure, > 1 = retry
156 struct {
157 uint8_t tx[NCSI_OEM_ECHO_PATTERN_SIZE];
158 uint32_t tx_count;
159 uint32_t rx_count;
160 uint32_t bad_rx_count;
161 uint8_t last_bad_rx[NCSI_OEM_ECHO_PATTERN_SIZE];
162 } ping;
163 } test;
164 ncsi_passthrough_stats_t pt_stats_be[2]; // big-endian as received from NIC
165 } ncsi;
166} network_debug_t;
167
168typedef struct {
169 uint8_t data[ETH_BUFFER_SIZE];
170 uint32_t len; // Non-zero when there's a new NC-SI response.
171} ncsi_buf_t;
172
173
174#ifdef __cplusplus
175extern "C" {
176#endif
177
178ncsi_response_type_t ncsi_fsm_poll_l2_config(ncsi_state_t* ncsi_state,
179 network_debug_t* network_debug,
180 ncsi_buf_t* ncsi_buf,
181 mac_addr_t* mac);
182
183ncsi_response_type_t ncsi_fsm_poll_l3l4_config(ncsi_state_t* ncsi_state,
184 network_debug_t* network_debug,
185 ncsi_buf_t* ncsi_buf,
186 mac_addr_t* mac,
187 uint32_t ipv4_addr,
188 uint16_t rx_port);
189
190ncsi_response_type_t ncsi_fsm_poll_test(ncsi_state_t* ncsi_state,
191 network_debug_t* network_debug,
192 ncsi_buf_t* ncsi_buf, mac_addr_t* mac,
193 uint32_t ipv4_addr, uint16_t rx_port);
194
195/*
196 * Report a global state of the NC-SI connection as a function of the state
197 * of the 3 finite state machines.
198 * Note: Additionally for the case where the connection is down it reports
199 * whether a loopback is inferred.
200 */
201ncsi_connection_state_t ncsi_fsm_connection_state(
202 const ncsi_state_t* ncsi_state, const network_debug_t* network_debug);
203
204/*
205 * Returns true if we have executed an NC-SI Get OEM Filter command for all
206 * channels and the flags indicate that it is running in hostless mode.
207 * This means that we can DHCP/ARP if needed.
208 * Otherwise returns false.
209 *
210 * NOTE: We default to false, if we cannot complete the L2 config state
211 * machine or the test sequence.
212 */
213bool ncsi_fsm_is_nic_hostless(const ncsi_state_t* ncsi_state);
214
215#ifdef __cplusplus
216} /* extern "C" */
217#endif
218
219#endif // PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_