blob: 215042ced32ab41dc2b0f960d800f889d7afa494 [file] [log] [blame]
Alexander Hansen40fb5492025-10-28 17:56:12 +01001// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright 2019 IBM Corporation
3
Matt Spinlerf60ac272019-12-11 13:47:50 -06004#include "host_notifier.hpp"
5
Matt Spinler1b418862023-06-29 12:37:41 -05006#include <phosphor-logging/lg2.hpp>
Matt Spinlerf60ac272019-12-11 13:47:50 -06007
8namespace openpower::pels
9{
10
11const auto subscriptionName = "PELHostNotifier";
Matt Spinlerf77debb2019-12-12 10:04:33 -060012const size_t maxRetryAttempts = 15;
Matt Spinlerf60ac272019-12-11 13:47:50 -060013
Matt Spinlerf60ac272019-12-11 13:47:50 -060014HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
15 std::unique_ptr<HostInterface> hostIface) :
Patrick Williams075c7922024-08-16 15:19:49 -040016 _repo(repo), _dataIface(dataIface), _hostIface(std::move(hostIface)),
Matt Spinlerf869fcf2019-12-11 15:02:20 -060017 _retryTimer(_hostIface->getEvent(),
Matt Spinler41293cb2019-12-12 13:11:09 -060018 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
19 _hostFullTimer(
20 _hostIface->getEvent(),
Matt Spinlere5f75082022-01-24 16:09:51 -060021 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)),
Patrick Williams075c7922024-08-16 15:19:49 -040022 _hostUpTimer(_hostIface->getEvent(),
23 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired),
24 this))
Matt Spinlerf60ac272019-12-11 13:47:50 -060025{
26 // Subscribe to be told about new PELs.
27 _repo.subscribeToAdds(subscriptionName,
28 std::bind(std::mem_fn(&HostNotifier::newLogCallback),
29 this, std::placeholders::_1));
30
Matt Spinler7cb985f2020-03-05 16:02:39 -060031 // Subscribe to be told about deleted PELs.
32 _repo.subscribeToDeletes(
33 subscriptionName,
34 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
35 std::placeholders::_1));
36
Matt Spinlerf60ac272019-12-11 13:47:50 -060037 // Add any existing PELs to the queue to send them if necessary.
38 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
39 std::placeholders::_1));
40
41 // Subscribe to be told about host state changes.
42 _dataIface.subscribeToHostStateChange(
Matt Spinler4f1bed72022-06-09 09:06:15 -050043 subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange),
44 this, std::placeholders::_1));
Matt Spinlerf60ac272019-12-11 13:47:50 -060045
46 // Set the function to call when the async reponse is received.
47 _hostIface->setResponseFunction(
48 std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
49 std::placeholders::_1));
50
51 // Start sending logs if the host is running
52 if (!_pelQueue.empty() && _dataIface.isHostUp())
53 {
Matt Spinler1b418862023-06-29 12:37:41 -050054 lg2::debug("Host is already up at startup");
Matt Spinlere5f75082022-01-24 16:09:51 -060055 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
Matt Spinlerf60ac272019-12-11 13:47:50 -060056 }
57}
58
59HostNotifier::~HostNotifier()
60{
61 _repo.unsubscribeFromAdds(subscriptionName);
62 _dataIface.unsubscribeFromHostStateChange(subscriptionName);
63}
64
Matt Spinlere5f75082022-01-24 16:09:51 -060065void HostNotifier::hostUpTimerExpired()
66{
Matt Spinler1b418862023-06-29 12:37:41 -050067 lg2::debug("Host up timer expired");
Matt Spinlere5f75082022-01-24 16:09:51 -060068 doNewLogNotify();
69}
70
Matt Spinlerf60ac272019-12-11 13:47:50 -060071bool HostNotifier::addPELToQueue(const PEL& pel)
72{
73 if (enqueueRequired(pel.id()))
74 {
75 _pelQueue.push_back(pel.id());
76 }
77
78 // Return false so that Repo::for_each keeps going.
79 return false;
80}
81
82bool HostNotifier::enqueueRequired(uint32_t id) const
83{
84 bool required = true;
Matt Spinlera943b152019-12-11 14:44:50 -060085 Repository::LogID i{Repository::LogID::Pel{id}};
86
Matt Spinler24a85582020-01-27 16:40:21 -060087 // Manufacturing testing may turn off sending up PELs
88 if (!_dataIface.getHostPELEnablement())
89 {
90 return false;
91 }
92
Matt Spinlera943b152019-12-11 14:44:50 -060093 if (auto attributes = _repo.getPELAttributes(i); attributes)
94 {
95 auto a = attributes.value().get();
96
97 if ((a.hostState == TransmissionState::acked) ||
98 (a.hostState == TransmissionState::badPEL))
99 {
100 required = false;
101 }
102 else if (a.actionFlags.test(hiddenFlagBit) &&
103 (a.hmcState == TransmissionState::acked))
104 {
105 required = false;
106 }
107 else if (a.actionFlags.test(dontReportToHostFlagBit))
108 {
109 required = false;
110 }
111 }
112 else
113 {
114 using namespace phosphor::logging;
Matt Spinler1b418862023-06-29 12:37:41 -0500115 lg2::error("Host Enqueue: Unable to find PEL ID {ID} in repository",
116 "ID", lg2::hex, id);
Matt Spinlera943b152019-12-11 14:44:50 -0600117 required = false;
118 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600119
120 return required;
121}
122
Matt Spinlerf77debb2019-12-12 10:04:33 -0600123bool HostNotifier::notifyRequired(uint32_t id) const
124{
125 bool notify = true;
126 Repository::LogID i{Repository::LogID::Pel{id}};
127
128 if (auto attributes = _repo.getPELAttributes(i); attributes)
129 {
130 // If already acked by the host, don't send again.
131 // (A safety check as it shouldn't get to this point.)
132 auto a = attributes.value().get();
133 if (a.hostState == TransmissionState::acked)
134 {
135 notify = false;
136 }
137 else if (a.actionFlags.test(hiddenFlagBit))
138 {
139 // If hidden and acked (or will be) acked by the HMC,
140 // also don't send it. (HMC management can come and
141 // go at any time)
142 if ((a.hmcState == TransmissionState::acked) ||
143 _dataIface.isHMCManaged())
144 {
145 notify = false;
146 }
147 }
148 }
149 else
150 {
151 // Must have been deleted since put on the queue.
152 notify = false;
153 }
154
155 return notify;
156}
157
Matt Spinlerf60ac272019-12-11 13:47:50 -0600158void HostNotifier::newLogCallback(const PEL& pel)
159{
160 if (!enqueueRequired(pel.id()))
161 {
162 return;
163 }
164
Matt Spinler1b418862023-06-29 12:37:41 -0500165 lg2::debug("New PEL added to queue, PEL ID = {ID}", "ID", lg2::hex,
166 pel.id());
Matt Spinler5f5352e2020-03-05 16:23:27 -0600167
Matt Spinlerf60ac272019-12-11 13:47:50 -0600168 _pelQueue.push_back(pel.id());
169
Matt Spinlere5f75082022-01-24 16:09:51 -0600170 // Notify shouldn't happen if host is down, not up long enough, or full
171 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled())
Matt Spinler7d800a42019-12-12 10:35:01 -0600172 {
173 return;
174 }
175
176 // Dispatch a command now if there isn't currently a command
177 // in progress and this is the first log in the queue or it
178 // previously gave up from a hard failure.
179 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
180 _retryTimer.isEnabled();
181
182 auto firstPEL = _pelQueue.size() == 1;
183 auto gaveUp = _retryCount >= maxRetryAttempts;
184
185 if (!inProgress && (firstPEL || gaveUp))
186 {
187 _retryCount = 0;
188
189 // Send a log, but from the event loop, not from here.
190 scheduleDispatch();
191 }
192}
193
Matt Spinler7cb985f2020-03-05 16:02:39 -0600194void HostNotifier::deleteLogCallback(uint32_t id)
195{
196 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
197 if (queueIt != _pelQueue.end())
198 {
Matt Spinler1b418862023-06-29 12:37:41 -0500199 lg2::debug("Host notifier removing deleted log from queue");
Matt Spinler7cb985f2020-03-05 16:02:39 -0600200 _pelQueue.erase(queueIt);
201 }
202
203 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
204 if (sentIt != _sentPELs.end())
205 {
Matt Spinler1b418862023-06-29 12:37:41 -0500206 lg2::debug("Host notifier removing deleted log from sent list");
Matt Spinler7cb985f2020-03-05 16:02:39 -0600207 _sentPELs.erase(sentIt);
208 }
209
210 // Nothing we can do about this...
211 if (id == _inProgressPEL)
212 {
Matt Spinler1b418862023-06-29 12:37:41 -0500213 lg2::warning(
214 "A PEL was deleted while its host notification was in progress, PEL ID = {ID}",
215 "ID", lg2::hex, id);
Matt Spinler7cb985f2020-03-05 16:02:39 -0600216 }
217}
218
Matt Spinler7d800a42019-12-12 10:35:01 -0600219void HostNotifier::scheduleDispatch()
220{
221 _dispatcher = std::make_unique<sdeventplus::source::Defer>(
222 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
223 this, std::placeholders::_1));
224}
225
Patrick Williamsd26fa3e2021-04-21 15:22:23 -0500226void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/)
Matt Spinler7d800a42019-12-12 10:35:01 -0600227{
228 _dispatcher.reset();
229
230 doNewLogNotify();
Matt Spinlerf60ac272019-12-11 13:47:50 -0600231}
232
233void HostNotifier::doNewLogNotify()
234{
Matt Spinler41293cb2019-12-12 13:11:09 -0600235 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
236 _hostFullTimer.isEnabled())
Matt Spinlerf77debb2019-12-12 10:04:33 -0600237 {
238 return;
239 }
240
241 if (_retryCount >= maxRetryAttempts)
242 {
243 // Give up until a new log comes in.
244 if (_retryCount == maxRetryAttempts)
245 {
246 // If this were to really happen, the PLDM interface
247 // would be down and isolating that shouldn't left to
248 // a logging daemon, so just trace. Also, this will start
249 // trying again when the next new log comes in.
Matt Spinler1b418862023-06-29 12:37:41 -0500250 lg2::error(
251 "PEL Host notifier hit max retry attempts. Giving up for now. PEL ID = {ID}",
252 "ID", lg2::hex, _pelQueue.front());
Matt Spinler829b0522020-03-04 08:38:46 -0600253
254 // Tell the host interface object to clean itself up, especially to
255 // release the PLDM instance ID it's been using.
256 _hostIface->cancelCmd();
Matt Spinlerf77debb2019-12-12 10:04:33 -0600257 }
258 return;
259 }
260
261 bool doNotify = false;
262 uint32_t id = 0;
263
264 // Find the PEL to send
265 while (!doNotify && !_pelQueue.empty())
266 {
267 id = _pelQueue.front();
268 _pelQueue.pop_front();
269
270 if (notifyRequired(id))
271 {
272 doNotify = true;
273 }
274 }
275
276 if (doNotify)
277 {
278 // Get the size using the repo attributes
279 Repository::LogID i{Repository::LogID::Pel{id}};
280 if (auto attributes = _repo.getPELAttributes(i); attributes)
281 {
282 auto size = static_cast<size_t>(
283 std::filesystem::file_size((*attributes).get().path));
Matt Spinler5f5352e2020-03-05 16:23:27 -0600284
Matt Spinler1b418862023-06-29 12:37:41 -0500285 lg2::debug("sendNewLogCmd: ID {ID} size {SIZE}", "ID", lg2::hex, id,
286 "SIZE", size);
Matt Spinler5f5352e2020-03-05 16:23:27 -0600287
Matt Spinlerf77debb2019-12-12 10:04:33 -0600288 auto rc = _hostIface->sendNewLogCmd(id, size);
289
290 if (rc == CmdStatus::success)
291 {
292 _inProgressPEL = id;
293 }
294 else
295 {
296 // It failed. Retry
Matt Spinler1b418862023-06-29 12:37:41 -0500297 lg2::error("PLDM send failed, PEL ID = {ID}", "ID", lg2::hex,
298 id);
Matt Spinlerf77debb2019-12-12 10:04:33 -0600299 _pelQueue.push_front(id);
300 _inProgressPEL = 0;
301 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
302 }
303 }
304 else
305 {
Matt Spinler1b418862023-06-29 12:37:41 -0500306 lg2::error(
307 "PEL ID is not in repository. Cannot notify host. PEL ID = {ID}",
308 "ID", lg2::hex, id);
Matt Spinlerf77debb2019-12-12 10:04:33 -0600309 }
310 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600311}
312
313void HostNotifier::hostStateChange(bool hostUp)
314{
Matt Spinler3019c6f2019-12-11 15:24:45 -0600315 _retryCount = 0;
Matt Spinler41293cb2019-12-12 13:11:09 -0600316 _hostFull = false;
Matt Spinler3019c6f2019-12-11 15:24:45 -0600317
318 if (hostUp && !_pelQueue.empty())
319 {
Matt Spinler1b418862023-06-29 12:37:41 -0500320 lg2::debug("Host state change to on");
Matt Spinlere5f75082022-01-24 16:09:51 -0600321 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
Matt Spinler3019c6f2019-12-11 15:24:45 -0600322 }
323 else if (!hostUp)
324 {
Matt Spinler1b418862023-06-29 12:37:41 -0500325 lg2::debug("Host state change to off");
Matt Spinler5f5352e2020-03-05 16:23:27 -0600326
Matt Spinler3019c6f2019-12-11 15:24:45 -0600327 stopCommand();
328
329 // Reset the state on any PELs that were sent but not acked back
330 // to new so they'll get sent again.
331 for (auto id : _sentPELs)
332 {
333 _pelQueue.push_back(id);
334 _repo.setPELHostTransState(id, TransmissionState::newPEL);
335 }
336
337 _sentPELs.clear();
Matt Spinler41293cb2019-12-12 13:11:09 -0600338
339 if (_hostFullTimer.isEnabled())
340 {
341 _hostFullTimer.setEnabled(false);
342 }
Matt Spinlere5f75082022-01-24 16:09:51 -0600343
344 if (_hostUpTimer.isEnabled())
345 {
346 _hostUpTimer.setEnabled(false);
347 }
Matt Spinler3019c6f2019-12-11 15:24:45 -0600348 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600349}
350
351void HostNotifier::commandResponse(ResponseStatus status)
352{
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600353 auto id = _inProgressPEL;
354 _inProgressPEL = 0;
355
356 if (status == ResponseStatus::success)
357 {
Matt Spinler1b418862023-06-29 12:37:41 -0500358 lg2::debug("HostNotifier command response success, PEL ID = {ID}", "ID",
359 lg2::hex, id);
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600360 _retryCount = 0;
361
362 _sentPELs.push_back(id);
363
364 _repo.setPELHostTransState(id, TransmissionState::sent);
365
Matt Spinler41293cb2019-12-12 13:11:09 -0600366 // If the host is full, don't send off the next PEL
367 if (!_hostFull && !_pelQueue.empty())
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600368 {
369 doNewLogNotify();
370 }
371 }
372 else
373 {
Matt Spinler1b418862023-06-29 12:37:41 -0500374 lg2::error("PLDM command response failure, PEL ID = {ID}", "ID",
375 lg2::hex, id);
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600376 // Retry
377 _pelQueue.push_front(id);
378 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
379 }
380}
381
382void HostNotifier::retryTimerExpired()
383{
384 if (_dataIface.isHostUp())
385 {
Matt Spinler1b418862023-06-29 12:37:41 -0500386 lg2::info("Attempting command retry, PEL ID = {ID}", "ID", lg2::hex,
387 _pelQueue.front());
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600388 _retryCount++;
389 doNewLogNotify();
390 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600391}
392
Matt Spinler41293cb2019-12-12 13:11:09 -0600393void HostNotifier::hostFullTimerExpired()
394{
Matt Spinler1b418862023-06-29 12:37:41 -0500395 lg2::debug("Host full timer expired, trying send again");
Matt Spinler41293cb2019-12-12 13:11:09 -0600396 doNewLogNotify();
397}
398
Matt Spinler3019c6f2019-12-11 15:24:45 -0600399void HostNotifier::stopCommand()
400{
401 _retryCount = 0;
402
403 if (_inProgressPEL != 0)
404 {
405 _pelQueue.push_front(_inProgressPEL);
406 _inProgressPEL = 0;
407 }
408
409 if (_retryTimer.isEnabled())
410 {
411 _retryTimer.setEnabled(false);
412 }
413
Matt Spinler829b0522020-03-04 08:38:46 -0600414 // Ensure the PLDM instance ID is released
415 _hostIface->cancelCmd();
Matt Spinler3019c6f2019-12-11 15:24:45 -0600416}
417
Matt Spinlercc3b64a2019-12-12 11:27:10 -0600418void HostNotifier::ackPEL(uint32_t id)
419{
420 _repo.setPELHostTransState(id, TransmissionState::acked);
421
422 // No longer just 'sent', so remove it from the sent list.
423 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
424 if (sent != _sentPELs.end())
425 {
426 _sentPELs.erase(sent);
427 }
Matt Spinler41293cb2019-12-12 13:11:09 -0600428
429 // An ack means the host is no longer full
430 if (_hostFullTimer.isEnabled())
431 {
432 _hostFullTimer.setEnabled(false);
433 }
434
435 if (_hostFull)
436 {
437 _hostFull = false;
438
Matt Spinler1b418862023-06-29 12:37:41 -0500439 lg2::debug("Host previously full, not anymore after this ack");
Matt Spinler5f5352e2020-03-05 16:23:27 -0600440
Matt Spinler41293cb2019-12-12 13:11:09 -0600441 // Start sending PELs again, from the event loop
442 if (!_pelQueue.empty())
443 {
444 scheduleDispatch();
445 }
446 }
447}
448
449void HostNotifier::setHostFull(uint32_t id)
450{
Matt Spinler610e80f2023-09-12 09:45:01 -0500451 lg2::debug("Received Host full indication, PEL ID = {ID}", "ID", lg2::hex,
452 id);
Matt Spinler41293cb2019-12-12 13:11:09 -0600453
454 _hostFull = true;
455
456 // This PEL needs to get re-sent
457 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
458 if (sent != _sentPELs.end())
459 {
460 _sentPELs.erase(sent);
461 _repo.setPELHostTransState(id, TransmissionState::newPEL);
462
463 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
464 _pelQueue.end())
465 {
466 _pelQueue.push_front(id);
467 }
468 }
469
470 // The only PELs that will be sent when the
471 // host is full is from this timer callback.
472 if (!_hostFullTimer.isEnabled())
473 {
Matt Spinler1b418862023-06-29 12:37:41 -0500474 lg2::debug("Starting host full timer");
Matt Spinler41293cb2019-12-12 13:11:09 -0600475 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
476 }
Matt Spinlercc3b64a2019-12-12 11:27:10 -0600477}
478
Matt Spinlera19b6232019-12-12 13:30:14 -0600479void HostNotifier::setBadPEL(uint32_t id)
480{
Matt Spinler1b418862023-06-29 12:37:41 -0500481 lg2::error("PEL rejected by the host, PEL ID = {ID}", "ID", lg2::hex, id);
Matt Spinlera19b6232019-12-12 13:30:14 -0600482
483 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
484 if (sent != _sentPELs.end())
485 {
486 _sentPELs.erase(sent);
487 }
488
489 _repo.setPELHostTransState(id, TransmissionState::badPEL);
490}
491
Matt Spinlerf60ac272019-12-11 13:47:50 -0600492} // namespace openpower::pels