blob: de71345754c60ea4d9ff82af5f50e59000c4473d [file] [log] [blame]
Brandon Kimdab96f12021-02-18 11:21:37 -08001/*
2 * Copyright 2021 Google LLC
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
William A. Kennington III7d6fa422021-02-08 17:04:02 -080017#ifndef PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_
18#define PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_
19
20/* Nemora NC-SI (Finite) State Machine implementation */
21
22#include <stdint.h>
23
24#include "platforms/nemora/portable/ncsi.h"
25#include "platforms/nemora/portable/net_types.h"
26
27/* TODO put this into config somewhere? */
28#define NCSI_FSM_RESTART_DELAY_COUNT 100
29#define NCSI_FSM_RETEST_DELAY_COUNT 100
30
31/* The network state is defined as a combination of the NC-SI connection state
32 * and the network configuration. However the two cannot be decoupled:
33 * - we cannot DHCP unless the NC-SI connection is up
34 * - we cannot do the OEM L3/L4 NC-SI configuration unless we have a valid
35 * network configuration
36 *
37 * For additional complexity we cannot get DHCP/ARP responses after the host
38 * has loaded the Mellanox NIC driver but we want to be able to periodically
39 * test the NC-SI connection regardless of whether we have network configuration
40 * (so that flaky cables can be troubleshooted using the host interface).
41 *
42 * For this reason there are actually 3 NC-SI finite state machines:
43 * - L2 configuration (i.e. enabling all available NC-SI channel for passthrough
44 * RX and TX, although only TX will work after the host loads the NIC driver)
45 * - L3/L4 configuration (i.e. configuring flow steering for RX traffic that
46 * matches our IP address and dedicated Nemora port so that we can receive
47 * Nemora requests even after the host loaded the NIC driver)
48 * - Connection test (i.e. periodically doing a ping test between the EC and the
49 * NIC) and also ensuring that L3/L4 configuration parameters have not been
50 * wiped out)
51 *
52 * For good karma, try to keep the state machines as linear as possible (one
53 * step after the other).
54 */
55
56typedef enum {
57 // First
58 NCSI_STATE_L2_CONFIG_BEGIN,
59 // Actual sequence
60 NCSI_STATE_RESTART = NCSI_STATE_L2_CONFIG_BEGIN,
61 NCSI_STATE_CLEAR_0,
62 NCSI_STATE_CLEAR_0_RESPONSE,
63 NCSI_STATE_GET_VERSION,
64 NCSI_STATE_GET_VERSION_RESPONSE,
65 NCSI_STATE_GET_CAPABILITIES,
66 NCSI_STATE_GET_CAPABILITIES_RESPONSE,
67 NCSI_STATE_CLEAR_1,
68 NCSI_STATE_CLEAR_1_RESPONSE,
69 NCSI_STATE_RESET_CHANNEL_0,
70 NCSI_STATE_RESET_CHANNEL_0_RESPONSE,
71 NCSI_STATE_RESET_CHANNEL_1,
72 NCSI_STATE_RESET_CHANNEL_1_RESPONSE,
73 NCSI_STATE_STOPPED,
74 NCSI_STATE_GET_MAC,
75 NCSI_STATE_GET_MAC_RESPONSE,
76 NCSI_STATE_SET_MAC_FILTER_0,
77 NCSI_STATE_SET_MAC_FILTER_0_RESPONSE,
78 NCSI_STATE_SET_MAC_FILTER_1,
79 NCSI_STATE_SET_MAC_FILTER_1_RESPONSE,
80 NCSI_STATE_ENABLE_CHANNEL_0,
81 NCSI_STATE_ENABLE_CHANNEL_0_RESPONSE,
82 NCSI_STATE_ENABLE_CHANNEL_1,
83 NCSI_STATE_ENABLE_CHANNEL_1_RESPONSE,
84 NCSI_STATE_ENABLE_TX,
85 NCSI_STATE_ENABLE_TX_RESPONSE,
86 // Last
87 NCSI_STATE_L2_CONFIG_END
88} ncsi_l2_config_state_t;
89
90typedef enum {
91 // First
92 NCSI_STATE_L3L4_CONFIG_BEGIN,
93 // Actual sequence
94 NCSI_STATE_CONFIG_FILTERS,
95 // Last
96 NCSI_STATE_L3L4_CONFIG_END
97} ncsi_l3l4_config_state_t;
98
99typedef enum {
100 // First
101 NCSI_STATE_TEST_BEGIN,
102 // Actual sequence
103 NCSI_STATE_TEST_PARAMS = NCSI_STATE_TEST_BEGIN,
104 NCSI_STATE_ECHO,
105 NCSI_STATE_ECHO_RESPONSE,
106 NCSI_STATE_CHECK_FILTERS,
107 NCSI_STATE_CHECK_FILTERS_RESPONSE,
108 NCSI_STATE_GET_PT_STATS,
109 NCSI_STATE_GET_PT_STATS_RESPONSE,
110 NCSI_STATE_GET_LINK_STATUS,
111 NCSI_STATE_GET_LINK_STATUS_RESPONSE,
112 // Last
113 NCSI_STATE_TEST_END
114} ncsi_test_state_t;
115
116typedef struct {
117 ncsi_l2_config_state_t l2_config_state;
118 ncsi_l3l4_config_state_t l3l4_config_state;
119 ncsi_test_state_t test_state;
120 // Last (OEM) command that was sent. (L3L4 SM only)
121 // Valid only if l3l4_waiting_response is true.
122 uint8_t l3l4_command;
123 // Number of the channel we are currently operating on. (L3L4 SM only)
124 uint8_t l3l4_channel;
125 // If true, means the request was sent and we are waiting for response.
126 bool l3l4_waiting_response;
127 uint8_t channel_count;
128 // The re-start and re-test delays ensures that we can flush the DMA
129 // buffers of potential out-of-sequence NC-SI packets (e.g. from
130 // packet that may have been received shortly after we timed out on
131 // them). The re-test delays also reduce the effect of NC-SI
132 // testing on more useful traffic.
133 uint8_t restart_delay_count;
134 uint8_t retest_delay_count;
135 struct {
136 uint8_t flags;
137 uint8_t regid[8];
138 } flowsteering[2];
139} ncsi_state_t;
140
141// Debug variables.
142// TODO - Change name to something more meaningful since the NC-SI test
143// is not a debug-only feature.
144typedef struct {
145 uint32_t task_count;
146 uint32_t host_ctrl_flags;
147 struct {
148 bool enabled;
149 bool pending_stop;
150 bool pending_restart;
151 bool oem_filter_disable;
152 bool loopback;
153 bool mlx_legacy;
154 uint32_t fail_count;
155 ncsi_state_t state_that_failed;
156 uint32_t tx_count;
157 uint32_t rx_count;
158 uint32_t tx_error_count;
159 struct {
160 uint32_t timeout_count;
161 uint32_t oversized_count;
162 uint32_t undersized_count;
163 uint32_t nack_count;
164 uint32_t unexpected_size_count;
165 uint32_t unexpected_type_count;
166 } rx_error;
167 struct {
168 uint32_t runs;
169 uint8_t ch_under_test;
170 uint8_t tries;
171 uint8_t max_tries; // 0 = skip test, 1 = restart on failure, > 1 = retry
172 struct {
173 uint8_t tx[NCSI_OEM_ECHO_PATTERN_SIZE];
174 uint32_t tx_count;
175 uint32_t rx_count;
176 uint32_t bad_rx_count;
177 uint8_t last_bad_rx[NCSI_OEM_ECHO_PATTERN_SIZE];
178 } ping;
179 } test;
180 ncsi_passthrough_stats_t pt_stats_be[2]; // big-endian as received from NIC
181 } ncsi;
182} network_debug_t;
183
184typedef struct {
185 uint8_t data[ETH_BUFFER_SIZE];
186 uint32_t len; // Non-zero when there's a new NC-SI response.
187} ncsi_buf_t;
188
189
190#ifdef __cplusplus
191extern "C" {
192#endif
193
194ncsi_response_type_t ncsi_fsm_poll_l2_config(ncsi_state_t* ncsi_state,
195 network_debug_t* network_debug,
196 ncsi_buf_t* ncsi_buf,
197 mac_addr_t* mac);
198
199ncsi_response_type_t ncsi_fsm_poll_l3l4_config(ncsi_state_t* ncsi_state,
200 network_debug_t* network_debug,
201 ncsi_buf_t* ncsi_buf,
202 mac_addr_t* mac,
203 uint32_t ipv4_addr,
204 uint16_t rx_port);
205
206ncsi_response_type_t ncsi_fsm_poll_test(ncsi_state_t* ncsi_state,
207 network_debug_t* network_debug,
208 ncsi_buf_t* ncsi_buf, mac_addr_t* mac,
209 uint32_t ipv4_addr, uint16_t rx_port);
210
211/*
212 * Report a global state of the NC-SI connection as a function of the state
213 * of the 3 finite state machines.
214 * Note: Additionally for the case where the connection is down it reports
215 * whether a loopback is inferred.
216 */
217ncsi_connection_state_t ncsi_fsm_connection_state(
218 const ncsi_state_t* ncsi_state, const network_debug_t* network_debug);
219
220/*
221 * Returns true if we have executed an NC-SI Get OEM Filter command for all
222 * channels and the flags indicate that it is running in hostless mode.
223 * This means that we can DHCP/ARP if needed.
224 * Otherwise returns false.
225 *
226 * NOTE: We default to false, if we cannot complete the L2 config state
227 * machine or the test sequence.
228 */
229bool ncsi_fsm_is_nic_hostless(const ncsi_state_t* ncsi_state);
230
231#ifdef __cplusplus
232} /* extern "C" */
233#endif
234
235#endif // PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_