blob: a89af1d835aaa5d3f3d4ff146f121d662b1c2309 [file] [log] [blame]
Matt Spinlerf60ac272019-12-11 13:47:50 -06001/**
2 * Copyright © 2019 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include "host_notifier.hpp"
17
18#include <phosphor-logging/log.hpp>
19
20namespace openpower::pels
21{
22
23const auto subscriptionName = "PELHostNotifier";
Matt Spinlerf77debb2019-12-12 10:04:33 -060024const size_t maxRetryAttempts = 15;
Matt Spinlerf60ac272019-12-11 13:47:50 -060025
26using namespace phosphor::logging;
27
28HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
29 std::unique_ptr<HostInterface> hostIface) :
30 _repo(repo),
Matt Spinlerf869fcf2019-12-11 15:02:20 -060031 _dataIface(dataIface), _hostIface(std::move(hostIface)),
32 _retryTimer(_hostIface->getEvent(),
Matt Spinler41293cb2019-12-12 13:11:09 -060033 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
34 _hostFullTimer(
35 _hostIface->getEvent(),
Matt Spinlere5f75082022-01-24 16:09:51 -060036 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)),
37 _hostUpTimer(
38 _hostIface->getEvent(),
39 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), this))
Matt Spinlerf60ac272019-12-11 13:47:50 -060040{
41 // Subscribe to be told about new PELs.
42 _repo.subscribeToAdds(subscriptionName,
43 std::bind(std::mem_fn(&HostNotifier::newLogCallback),
44 this, std::placeholders::_1));
45
Matt Spinler7cb985f2020-03-05 16:02:39 -060046 // Subscribe to be told about deleted PELs.
47 _repo.subscribeToDeletes(
48 subscriptionName,
49 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
50 std::placeholders::_1));
51
Matt Spinlerf60ac272019-12-11 13:47:50 -060052 // Add any existing PELs to the queue to send them if necessary.
53 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
54 std::placeholders::_1));
55
56 // Subscribe to be told about host state changes.
57 _dataIface.subscribeToHostStateChange(
58 subscriptionName,
59 std::bind(std::mem_fun(&HostNotifier::hostStateChange), this,
60 std::placeholders::_1));
61
62 // Set the function to call when the async reponse is received.
63 _hostIface->setResponseFunction(
64 std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
65 std::placeholders::_1));
66
67 // Start sending logs if the host is running
68 if (!_pelQueue.empty() && _dataIface.isHostUp())
69 {
Matt Spinlere5f75082022-01-24 16:09:51 -060070 log<level::DEBUG>("Host is already up at startup");
71 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
Matt Spinlerf60ac272019-12-11 13:47:50 -060072 }
73}
74
75HostNotifier::~HostNotifier()
76{
77 _repo.unsubscribeFromAdds(subscriptionName);
78 _dataIface.unsubscribeFromHostStateChange(subscriptionName);
79}
80
Matt Spinlere5f75082022-01-24 16:09:51 -060081void HostNotifier::hostUpTimerExpired()
82{
83 log<level::DEBUG>("Host up timer expired");
84 doNewLogNotify();
85}
86
Matt Spinlerf60ac272019-12-11 13:47:50 -060087bool HostNotifier::addPELToQueue(const PEL& pel)
88{
89 if (enqueueRequired(pel.id()))
90 {
91 _pelQueue.push_back(pel.id());
92 }
93
94 // Return false so that Repo::for_each keeps going.
95 return false;
96}
97
98bool HostNotifier::enqueueRequired(uint32_t id) const
99{
100 bool required = true;
Matt Spinlera943b152019-12-11 14:44:50 -0600101 Repository::LogID i{Repository::LogID::Pel{id}};
102
Matt Spinler24a85582020-01-27 16:40:21 -0600103 // Manufacturing testing may turn off sending up PELs
104 if (!_dataIface.getHostPELEnablement())
105 {
106 return false;
107 }
108
Matt Spinlera943b152019-12-11 14:44:50 -0600109 if (auto attributes = _repo.getPELAttributes(i); attributes)
110 {
111 auto a = attributes.value().get();
112
113 if ((a.hostState == TransmissionState::acked) ||
114 (a.hostState == TransmissionState::badPEL))
115 {
116 required = false;
117 }
118 else if (a.actionFlags.test(hiddenFlagBit) &&
119 (a.hmcState == TransmissionState::acked))
120 {
121 required = false;
122 }
123 else if (a.actionFlags.test(dontReportToHostFlagBit))
124 {
125 required = false;
126 }
127 }
128 else
129 {
130 using namespace phosphor::logging;
131 log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository",
132 entry("PEL_ID=0x%X", id));
133 required = false;
134 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600135
136 return required;
137}
138
Matt Spinlerf77debb2019-12-12 10:04:33 -0600139bool HostNotifier::notifyRequired(uint32_t id) const
140{
141 bool notify = true;
142 Repository::LogID i{Repository::LogID::Pel{id}};
143
144 if (auto attributes = _repo.getPELAttributes(i); attributes)
145 {
146 // If already acked by the host, don't send again.
147 // (A safety check as it shouldn't get to this point.)
148 auto a = attributes.value().get();
149 if (a.hostState == TransmissionState::acked)
150 {
151 notify = false;
152 }
153 else if (a.actionFlags.test(hiddenFlagBit))
154 {
155 // If hidden and acked (or will be) acked by the HMC,
156 // also don't send it. (HMC management can come and
157 // go at any time)
158 if ((a.hmcState == TransmissionState::acked) ||
159 _dataIface.isHMCManaged())
160 {
161 notify = false;
162 }
163 }
164 }
165 else
166 {
167 // Must have been deleted since put on the queue.
168 notify = false;
169 }
170
171 return notify;
172}
173
Matt Spinlerf60ac272019-12-11 13:47:50 -0600174void HostNotifier::newLogCallback(const PEL& pel)
175{
176 if (!enqueueRequired(pel.id()))
177 {
178 return;
179 }
180
Matt Spinler5f5352e2020-03-05 16:23:27 -0600181 log<level::DEBUG>("new PEL added to queue", entry("PEL_ID=0x%X", pel.id()));
182
Matt Spinlerf60ac272019-12-11 13:47:50 -0600183 _pelQueue.push_back(pel.id());
184
Matt Spinlere5f75082022-01-24 16:09:51 -0600185 // Notify shouldn't happen if host is down, not up long enough, or full
186 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled())
Matt Spinler7d800a42019-12-12 10:35:01 -0600187 {
188 return;
189 }
190
191 // Dispatch a command now if there isn't currently a command
192 // in progress and this is the first log in the queue or it
193 // previously gave up from a hard failure.
194 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
195 _retryTimer.isEnabled();
196
197 auto firstPEL = _pelQueue.size() == 1;
198 auto gaveUp = _retryCount >= maxRetryAttempts;
199
200 if (!inProgress && (firstPEL || gaveUp))
201 {
202 _retryCount = 0;
203
204 // Send a log, but from the event loop, not from here.
205 scheduleDispatch();
206 }
207}
208
Matt Spinler7cb985f2020-03-05 16:02:39 -0600209void HostNotifier::deleteLogCallback(uint32_t id)
210{
211 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
212 if (queueIt != _pelQueue.end())
213 {
214 log<level::DEBUG>("Host notifier removing deleted log from queue");
215 _pelQueue.erase(queueIt);
216 }
217
218 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
219 if (sentIt != _sentPELs.end())
220 {
221 log<level::DEBUG>("Host notifier removing deleted log from sent list");
222 _sentPELs.erase(sentIt);
223 }
224
225 // Nothing we can do about this...
226 if (id == _inProgressPEL)
227 {
228 log<level::WARNING>(
229 "A PEL was deleted while its host notification was in progress",
230 entry("PEL_ID=0x%X", id));
231 }
232}
233
Matt Spinler7d800a42019-12-12 10:35:01 -0600234void HostNotifier::scheduleDispatch()
235{
236 _dispatcher = std::make_unique<sdeventplus::source::Defer>(
237 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
238 this, std::placeholders::_1));
239}
240
Patrick Williamsd26fa3e2021-04-21 15:22:23 -0500241void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/)
Matt Spinler7d800a42019-12-12 10:35:01 -0600242{
243 _dispatcher.reset();
244
245 doNewLogNotify();
Matt Spinlerf60ac272019-12-11 13:47:50 -0600246}
247
248void HostNotifier::doNewLogNotify()
249{
Matt Spinler41293cb2019-12-12 13:11:09 -0600250 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
251 _hostFullTimer.isEnabled())
Matt Spinlerf77debb2019-12-12 10:04:33 -0600252 {
253 return;
254 }
255
256 if (_retryCount >= maxRetryAttempts)
257 {
258 // Give up until a new log comes in.
259 if (_retryCount == maxRetryAttempts)
260 {
261 // If this were to really happen, the PLDM interface
262 // would be down and isolating that shouldn't left to
263 // a logging daemon, so just trace. Also, this will start
264 // trying again when the next new log comes in.
265 log<level::ERR>(
266 "PEL Host notifier hit max retry attempts. Giving up for now.",
267 entry("PEL_ID=0x%X", _pelQueue.front()));
Matt Spinler829b0522020-03-04 08:38:46 -0600268
269 // Tell the host interface object to clean itself up, especially to
270 // release the PLDM instance ID it's been using.
271 _hostIface->cancelCmd();
Matt Spinlerf77debb2019-12-12 10:04:33 -0600272 }
273 return;
274 }
275
276 bool doNotify = false;
277 uint32_t id = 0;
278
279 // Find the PEL to send
280 while (!doNotify && !_pelQueue.empty())
281 {
282 id = _pelQueue.front();
283 _pelQueue.pop_front();
284
285 if (notifyRequired(id))
286 {
287 doNotify = true;
288 }
289 }
290
291 if (doNotify)
292 {
293 // Get the size using the repo attributes
294 Repository::LogID i{Repository::LogID::Pel{id}};
295 if (auto attributes = _repo.getPELAttributes(i); attributes)
296 {
297 auto size = static_cast<size_t>(
298 std::filesystem::file_size((*attributes).get().path));
Matt Spinler5f5352e2020-03-05 16:23:27 -0600299
300 log<level::DEBUG>("sendNewLogCmd", entry("PEL_ID=0x%X", id),
301 entry("PEL_SIZE=%d", size));
302
Matt Spinlerf77debb2019-12-12 10:04:33 -0600303 auto rc = _hostIface->sendNewLogCmd(id, size);
304
305 if (rc == CmdStatus::success)
306 {
307 _inProgressPEL = id;
308 }
309 else
310 {
311 // It failed. Retry
312 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id));
313 _pelQueue.push_front(id);
314 _inProgressPEL = 0;
315 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
316 }
317 }
318 else
319 {
320 log<level::ERR>("PEL ID not in repository. Cannot notify host",
321 entry("PEL_ID=0x%X", id));
322 }
323 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600324}
325
326void HostNotifier::hostStateChange(bool hostUp)
327{
Matt Spinler3019c6f2019-12-11 15:24:45 -0600328 _retryCount = 0;
Matt Spinler41293cb2019-12-12 13:11:09 -0600329 _hostFull = false;
Matt Spinler3019c6f2019-12-11 15:24:45 -0600330
331 if (hostUp && !_pelQueue.empty())
332 {
Matt Spinler5f5352e2020-03-05 16:23:27 -0600333 log<level::DEBUG>("Host state change to on");
Matt Spinlere5f75082022-01-24 16:09:51 -0600334 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
Matt Spinler3019c6f2019-12-11 15:24:45 -0600335 }
336 else if (!hostUp)
337 {
Matt Spinler5f5352e2020-03-05 16:23:27 -0600338 log<level::DEBUG>("Host state change to off");
339
Matt Spinler3019c6f2019-12-11 15:24:45 -0600340 stopCommand();
341
342 // Reset the state on any PELs that were sent but not acked back
343 // to new so they'll get sent again.
344 for (auto id : _sentPELs)
345 {
346 _pelQueue.push_back(id);
347 _repo.setPELHostTransState(id, TransmissionState::newPEL);
348 }
349
350 _sentPELs.clear();
Matt Spinler41293cb2019-12-12 13:11:09 -0600351
352 if (_hostFullTimer.isEnabled())
353 {
354 _hostFullTimer.setEnabled(false);
355 }
Matt Spinlere5f75082022-01-24 16:09:51 -0600356
357 if (_hostUpTimer.isEnabled())
358 {
359 _hostUpTimer.setEnabled(false);
360 }
Matt Spinler3019c6f2019-12-11 15:24:45 -0600361 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600362}
363
364void HostNotifier::commandResponse(ResponseStatus status)
365{
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600366 auto id = _inProgressPEL;
367 _inProgressPEL = 0;
368
369 if (status == ResponseStatus::success)
370 {
Matt Spinler5f5352e2020-03-05 16:23:27 -0600371 log<level::DEBUG>("HostNotifier command response success",
372 entry("PEL_ID=0x%X", id));
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600373 _retryCount = 0;
374
375 _sentPELs.push_back(id);
376
377 _repo.setPELHostTransState(id, TransmissionState::sent);
378
Matt Spinler41293cb2019-12-12 13:11:09 -0600379 // If the host is full, don't send off the next PEL
380 if (!_hostFull && !_pelQueue.empty())
Matt Spinlerf869fcf2019-12-11 15:02:20 -0600381 {
382 doNewLogNotify();
383 }
384 }
385 else
386 {
387 log<level::ERR>("PLDM command response failure",
388 entry("PEL_ID=0x%X", id));
389 // Retry
390 _pelQueue.push_front(id);
391 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
392 }
393}
394
395void HostNotifier::retryTimerExpired()
396{
397 if (_dataIface.isHostUp())
398 {
399 log<level::INFO>("Attempting command retry",
400 entry("PEL_ID=0x%X", _pelQueue.front()));
401 _retryCount++;
402 doNewLogNotify();
403 }
Matt Spinlerf60ac272019-12-11 13:47:50 -0600404}
405
Matt Spinler41293cb2019-12-12 13:11:09 -0600406void HostNotifier::hostFullTimerExpired()
407{
Matt Spinler5f5352e2020-03-05 16:23:27 -0600408 log<level::DEBUG>("Host full timer expired, trying send again");
Matt Spinler41293cb2019-12-12 13:11:09 -0600409 doNewLogNotify();
410}
411
Matt Spinler3019c6f2019-12-11 15:24:45 -0600412void HostNotifier::stopCommand()
413{
414 _retryCount = 0;
415
416 if (_inProgressPEL != 0)
417 {
418 _pelQueue.push_front(_inProgressPEL);
419 _inProgressPEL = 0;
420 }
421
422 if (_retryTimer.isEnabled())
423 {
424 _retryTimer.setEnabled(false);
425 }
426
Matt Spinler829b0522020-03-04 08:38:46 -0600427 // Ensure the PLDM instance ID is released
428 _hostIface->cancelCmd();
Matt Spinler3019c6f2019-12-11 15:24:45 -0600429}
430
Matt Spinlercc3b64a2019-12-12 11:27:10 -0600431void HostNotifier::ackPEL(uint32_t id)
432{
433 _repo.setPELHostTransState(id, TransmissionState::acked);
434
435 // No longer just 'sent', so remove it from the sent list.
436 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
437 if (sent != _sentPELs.end())
438 {
439 _sentPELs.erase(sent);
440 }
Matt Spinler41293cb2019-12-12 13:11:09 -0600441
442 // An ack means the host is no longer full
443 if (_hostFullTimer.isEnabled())
444 {
445 _hostFullTimer.setEnabled(false);
446 }
447
448 if (_hostFull)
449 {
450 _hostFull = false;
451
Matt Spinler5f5352e2020-03-05 16:23:27 -0600452 log<level::DEBUG>("Host previously full, not anymore after this ack");
453
Matt Spinler41293cb2019-12-12 13:11:09 -0600454 // Start sending PELs again, from the event loop
455 if (!_pelQueue.empty())
456 {
457 scheduleDispatch();
458 }
459 }
460}
461
462void HostNotifier::setHostFull(uint32_t id)
463{
464 log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
465
466 _hostFull = true;
467
468 // This PEL needs to get re-sent
469 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
470 if (sent != _sentPELs.end())
471 {
472 _sentPELs.erase(sent);
473 _repo.setPELHostTransState(id, TransmissionState::newPEL);
474
475 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
476 _pelQueue.end())
477 {
478 _pelQueue.push_front(id);
479 }
480 }
481
482 // The only PELs that will be sent when the
483 // host is full is from this timer callback.
484 if (!_hostFullTimer.isEnabled())
485 {
Matt Spinler5f5352e2020-03-05 16:23:27 -0600486 log<level::DEBUG>("Starting host full timer");
Matt Spinler41293cb2019-12-12 13:11:09 -0600487 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
488 }
Matt Spinlercc3b64a2019-12-12 11:27:10 -0600489}
490
Matt Spinlera19b6232019-12-12 13:30:14 -0600491void HostNotifier::setBadPEL(uint32_t id)
492{
493 log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id));
494
495 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
496 if (sent != _sentPELs.end())
497 {
498 _sentPELs.erase(sent);
499 }
500
501 _repo.setPELHostTransState(id, TransmissionState::badPEL);
502}
503
Matt Spinlerf60ac272019-12-11 13:47:50 -0600504} // namespace openpower::pels