Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 1 | /** |
| 2 | * Copyright © 2019 IBM Corporation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | #include "host_notifier.hpp" |
| 17 | |
| 18 | #include <phosphor-logging/log.hpp> |
| 19 | |
| 20 | namespace openpower::pels |
| 21 | { |
| 22 | |
| 23 | const auto subscriptionName = "PELHostNotifier"; |
Matt Spinler | f77debb | 2019-12-12 10:04:33 -0600 | [diff] [blame] | 24 | const size_t maxRetryAttempts = 15; |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 25 | |
| 26 | using namespace phosphor::logging; |
| 27 | |
| 28 | HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, |
| 29 | std::unique_ptr<HostInterface> hostIface) : |
| 30 | _repo(repo), |
Matt Spinler | f869fcf | 2019-12-11 15:02:20 -0600 | [diff] [blame] | 31 | _dataIface(dataIface), _hostIface(std::move(hostIface)), |
| 32 | _retryTimer(_hostIface->getEvent(), |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 33 | std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), |
| 34 | _hostFullTimer( |
| 35 | _hostIface->getEvent(), |
| 36 | std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)) |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 37 | { |
| 38 | // Subscribe to be told about new PELs. |
| 39 | _repo.subscribeToAdds(subscriptionName, |
| 40 | std::bind(std::mem_fn(&HostNotifier::newLogCallback), |
| 41 | this, std::placeholders::_1)); |
| 42 | |
| 43 | // Add any existing PELs to the queue to send them if necessary. |
| 44 | _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, |
| 45 | std::placeholders::_1)); |
| 46 | |
| 47 | // Subscribe to be told about host state changes. |
| 48 | _dataIface.subscribeToHostStateChange( |
| 49 | subscriptionName, |
| 50 | std::bind(std::mem_fun(&HostNotifier::hostStateChange), this, |
| 51 | std::placeholders::_1)); |
| 52 | |
| 53 | // Set the function to call when the async reponse is received. |
| 54 | _hostIface->setResponseFunction( |
| 55 | std::bind(std::mem_fn(&HostNotifier::commandResponse), this, |
| 56 | std::placeholders::_1)); |
| 57 | |
| 58 | // Start sending logs if the host is running |
| 59 | if (!_pelQueue.empty() && _dataIface.isHostUp()) |
| 60 | { |
| 61 | doNewLogNotify(); |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | HostNotifier::~HostNotifier() |
| 66 | { |
| 67 | _repo.unsubscribeFromAdds(subscriptionName); |
| 68 | _dataIface.unsubscribeFromHostStateChange(subscriptionName); |
| 69 | } |
| 70 | |
| 71 | bool HostNotifier::addPELToQueue(const PEL& pel) |
| 72 | { |
| 73 | if (enqueueRequired(pel.id())) |
| 74 | { |
| 75 | _pelQueue.push_back(pel.id()); |
| 76 | } |
| 77 | |
| 78 | // Return false so that Repo::for_each keeps going. |
| 79 | return false; |
| 80 | } |
| 81 | |
| 82 | bool HostNotifier::enqueueRequired(uint32_t id) const |
| 83 | { |
| 84 | bool required = true; |
Matt Spinler | a943b15 | 2019-12-11 14:44:50 -0600 | [diff] [blame] | 85 | Repository::LogID i{Repository::LogID::Pel{id}}; |
| 86 | |
Matt Spinler | 24a8558 | 2020-01-27 16:40:21 -0600 | [diff] [blame] | 87 | // Manufacturing testing may turn off sending up PELs |
| 88 | if (!_dataIface.getHostPELEnablement()) |
| 89 | { |
| 90 | return false; |
| 91 | } |
| 92 | |
Matt Spinler | a943b15 | 2019-12-11 14:44:50 -0600 | [diff] [blame] | 93 | if (auto attributes = _repo.getPELAttributes(i); attributes) |
| 94 | { |
| 95 | auto a = attributes.value().get(); |
| 96 | |
| 97 | if ((a.hostState == TransmissionState::acked) || |
| 98 | (a.hostState == TransmissionState::badPEL)) |
| 99 | { |
| 100 | required = false; |
| 101 | } |
| 102 | else if (a.actionFlags.test(hiddenFlagBit) && |
| 103 | (a.hmcState == TransmissionState::acked)) |
| 104 | { |
| 105 | required = false; |
| 106 | } |
| 107 | else if (a.actionFlags.test(dontReportToHostFlagBit)) |
| 108 | { |
| 109 | required = false; |
| 110 | } |
| 111 | } |
| 112 | else |
| 113 | { |
| 114 | using namespace phosphor::logging; |
| 115 | log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository", |
| 116 | entry("PEL_ID=0x%X", id)); |
| 117 | required = false; |
| 118 | } |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 119 | |
| 120 | return required; |
| 121 | } |
| 122 | |
Matt Spinler | f77debb | 2019-12-12 10:04:33 -0600 | [diff] [blame] | 123 | bool HostNotifier::notifyRequired(uint32_t id) const |
| 124 | { |
| 125 | bool notify = true; |
| 126 | Repository::LogID i{Repository::LogID::Pel{id}}; |
| 127 | |
| 128 | if (auto attributes = _repo.getPELAttributes(i); attributes) |
| 129 | { |
| 130 | // If already acked by the host, don't send again. |
| 131 | // (A safety check as it shouldn't get to this point.) |
| 132 | auto a = attributes.value().get(); |
| 133 | if (a.hostState == TransmissionState::acked) |
| 134 | { |
| 135 | notify = false; |
| 136 | } |
| 137 | else if (a.actionFlags.test(hiddenFlagBit)) |
| 138 | { |
| 139 | // If hidden and acked (or will be) acked by the HMC, |
| 140 | // also don't send it. (HMC management can come and |
| 141 | // go at any time) |
| 142 | if ((a.hmcState == TransmissionState::acked) || |
| 143 | _dataIface.isHMCManaged()) |
| 144 | { |
| 145 | notify = false; |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | else |
| 150 | { |
| 151 | // Must have been deleted since put on the queue. |
| 152 | notify = false; |
| 153 | } |
| 154 | |
| 155 | return notify; |
| 156 | } |
| 157 | |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 158 | void HostNotifier::newLogCallback(const PEL& pel) |
| 159 | { |
| 160 | if (!enqueueRequired(pel.id())) |
| 161 | { |
| 162 | return; |
| 163 | } |
| 164 | |
| 165 | _pelQueue.push_back(pel.id()); |
| 166 | |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 167 | // Notify shouldn't happen if host is down or full |
| 168 | if (!_dataIface.isHostUp() || _hostFull) |
Matt Spinler | 7d800a4 | 2019-12-12 10:35:01 -0600 | [diff] [blame] | 169 | { |
| 170 | return; |
| 171 | } |
| 172 | |
| 173 | // Dispatch a command now if there isn't currently a command |
| 174 | // in progress and this is the first log in the queue or it |
| 175 | // previously gave up from a hard failure. |
| 176 | auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || |
| 177 | _retryTimer.isEnabled(); |
| 178 | |
| 179 | auto firstPEL = _pelQueue.size() == 1; |
| 180 | auto gaveUp = _retryCount >= maxRetryAttempts; |
| 181 | |
| 182 | if (!inProgress && (firstPEL || gaveUp)) |
| 183 | { |
| 184 | _retryCount = 0; |
| 185 | |
| 186 | // Send a log, but from the event loop, not from here. |
| 187 | scheduleDispatch(); |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | void HostNotifier::scheduleDispatch() |
| 192 | { |
| 193 | _dispatcher = std::make_unique<sdeventplus::source::Defer>( |
| 194 | _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), |
| 195 | this, std::placeholders::_1)); |
| 196 | } |
| 197 | |
| 198 | void HostNotifier::dispatch(sdeventplus::source::EventBase& source) |
| 199 | { |
| 200 | _dispatcher.reset(); |
| 201 | |
| 202 | doNewLogNotify(); |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 203 | } |
| 204 | |
| 205 | void HostNotifier::doNewLogNotify() |
| 206 | { |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 207 | if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || |
| 208 | _hostFullTimer.isEnabled()) |
Matt Spinler | f77debb | 2019-12-12 10:04:33 -0600 | [diff] [blame] | 209 | { |
| 210 | return; |
| 211 | } |
| 212 | |
| 213 | if (_retryCount >= maxRetryAttempts) |
| 214 | { |
| 215 | // Give up until a new log comes in. |
| 216 | if (_retryCount == maxRetryAttempts) |
| 217 | { |
| 218 | // If this were to really happen, the PLDM interface |
| 219 | // would be down and isolating that shouldn't left to |
| 220 | // a logging daemon, so just trace. Also, this will start |
| 221 | // trying again when the next new log comes in. |
| 222 | log<level::ERR>( |
| 223 | "PEL Host notifier hit max retry attempts. Giving up for now.", |
| 224 | entry("PEL_ID=0x%X", _pelQueue.front())); |
| 225 | } |
| 226 | return; |
| 227 | } |
| 228 | |
| 229 | bool doNotify = false; |
| 230 | uint32_t id = 0; |
| 231 | |
| 232 | // Find the PEL to send |
| 233 | while (!doNotify && !_pelQueue.empty()) |
| 234 | { |
| 235 | id = _pelQueue.front(); |
| 236 | _pelQueue.pop_front(); |
| 237 | |
| 238 | if (notifyRequired(id)) |
| 239 | { |
| 240 | doNotify = true; |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | if (doNotify) |
| 245 | { |
| 246 | // Get the size using the repo attributes |
| 247 | Repository::LogID i{Repository::LogID::Pel{id}}; |
| 248 | if (auto attributes = _repo.getPELAttributes(i); attributes) |
| 249 | { |
| 250 | auto size = static_cast<size_t>( |
| 251 | std::filesystem::file_size((*attributes).get().path)); |
| 252 | auto rc = _hostIface->sendNewLogCmd(id, size); |
| 253 | |
| 254 | if (rc == CmdStatus::success) |
| 255 | { |
| 256 | _inProgressPEL = id; |
| 257 | } |
| 258 | else |
| 259 | { |
| 260 | // It failed. Retry |
| 261 | log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id)); |
| 262 | _pelQueue.push_front(id); |
| 263 | _inProgressPEL = 0; |
| 264 | _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); |
| 265 | } |
| 266 | } |
| 267 | else |
| 268 | { |
| 269 | log<level::ERR>("PEL ID not in repository. Cannot notify host", |
| 270 | entry("PEL_ID=0x%X", id)); |
| 271 | } |
| 272 | } |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 273 | } |
| 274 | |
| 275 | void HostNotifier::hostStateChange(bool hostUp) |
| 276 | { |
Matt Spinler | 3019c6f | 2019-12-11 15:24:45 -0600 | [diff] [blame] | 277 | _retryCount = 0; |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 278 | _hostFull = false; |
Matt Spinler | 3019c6f | 2019-12-11 15:24:45 -0600 | [diff] [blame] | 279 | |
| 280 | if (hostUp && !_pelQueue.empty()) |
| 281 | { |
| 282 | doNewLogNotify(); |
| 283 | } |
| 284 | else if (!hostUp) |
| 285 | { |
| 286 | stopCommand(); |
| 287 | |
| 288 | // Reset the state on any PELs that were sent but not acked back |
| 289 | // to new so they'll get sent again. |
| 290 | for (auto id : _sentPELs) |
| 291 | { |
| 292 | _pelQueue.push_back(id); |
| 293 | _repo.setPELHostTransState(id, TransmissionState::newPEL); |
| 294 | } |
| 295 | |
| 296 | _sentPELs.clear(); |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 297 | |
| 298 | if (_hostFullTimer.isEnabled()) |
| 299 | { |
| 300 | _hostFullTimer.setEnabled(false); |
| 301 | } |
Matt Spinler | 3019c6f | 2019-12-11 15:24:45 -0600 | [diff] [blame] | 302 | } |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 303 | } |
| 304 | |
| 305 | void HostNotifier::commandResponse(ResponseStatus status) |
| 306 | { |
Matt Spinler | f869fcf | 2019-12-11 15:02:20 -0600 | [diff] [blame] | 307 | auto id = _inProgressPEL; |
| 308 | _inProgressPEL = 0; |
| 309 | |
| 310 | if (status == ResponseStatus::success) |
| 311 | { |
| 312 | _retryCount = 0; |
| 313 | |
| 314 | _sentPELs.push_back(id); |
| 315 | |
| 316 | _repo.setPELHostTransState(id, TransmissionState::sent); |
| 317 | |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 318 | // If the host is full, don't send off the next PEL |
| 319 | if (!_hostFull && !_pelQueue.empty()) |
Matt Spinler | f869fcf | 2019-12-11 15:02:20 -0600 | [diff] [blame] | 320 | { |
| 321 | doNewLogNotify(); |
| 322 | } |
| 323 | } |
| 324 | else |
| 325 | { |
| 326 | log<level::ERR>("PLDM command response failure", |
| 327 | entry("PEL_ID=0x%X", id)); |
| 328 | // Retry |
| 329 | _pelQueue.push_front(id); |
| 330 | _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | void HostNotifier::retryTimerExpired() |
| 335 | { |
| 336 | if (_dataIface.isHostUp()) |
| 337 | { |
| 338 | log<level::INFO>("Attempting command retry", |
| 339 | entry("PEL_ID=0x%X", _pelQueue.front())); |
| 340 | _retryCount++; |
| 341 | doNewLogNotify(); |
| 342 | } |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 343 | } |
| 344 | |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 345 | void HostNotifier::hostFullTimerExpired() |
| 346 | { |
| 347 | doNewLogNotify(); |
| 348 | } |
| 349 | |
Matt Spinler | 3019c6f | 2019-12-11 15:24:45 -0600 | [diff] [blame] | 350 | void HostNotifier::stopCommand() |
| 351 | { |
| 352 | _retryCount = 0; |
| 353 | |
| 354 | if (_inProgressPEL != 0) |
| 355 | { |
| 356 | _pelQueue.push_front(_inProgressPEL); |
| 357 | _inProgressPEL = 0; |
| 358 | } |
| 359 | |
| 360 | if (_retryTimer.isEnabled()) |
| 361 | { |
| 362 | _retryTimer.setEnabled(false); |
| 363 | } |
| 364 | |
| 365 | if (_hostIface->cmdInProgress()) |
| 366 | { |
| 367 | _hostIface->cancelCmd(); |
| 368 | } |
| 369 | } |
| 370 | |
Matt Spinler | cc3b64a | 2019-12-12 11:27:10 -0600 | [diff] [blame] | 371 | void HostNotifier::ackPEL(uint32_t id) |
| 372 | { |
| 373 | _repo.setPELHostTransState(id, TransmissionState::acked); |
| 374 | |
| 375 | // No longer just 'sent', so remove it from the sent list. |
| 376 | auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); |
| 377 | if (sent != _sentPELs.end()) |
| 378 | { |
| 379 | _sentPELs.erase(sent); |
| 380 | } |
Matt Spinler | 41293cb | 2019-12-12 13:11:09 -0600 | [diff] [blame] | 381 | |
| 382 | // An ack means the host is no longer full |
| 383 | if (_hostFullTimer.isEnabled()) |
| 384 | { |
| 385 | _hostFullTimer.setEnabled(false); |
| 386 | } |
| 387 | |
| 388 | if (_hostFull) |
| 389 | { |
| 390 | _hostFull = false; |
| 391 | |
| 392 | // Start sending PELs again, from the event loop |
| 393 | if (!_pelQueue.empty()) |
| 394 | { |
| 395 | scheduleDispatch(); |
| 396 | } |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | void HostNotifier::setHostFull(uint32_t id) |
| 401 | { |
| 402 | log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id)); |
| 403 | |
| 404 | _hostFull = true; |
| 405 | |
| 406 | // This PEL needs to get re-sent |
| 407 | auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); |
| 408 | if (sent != _sentPELs.end()) |
| 409 | { |
| 410 | _sentPELs.erase(sent); |
| 411 | _repo.setPELHostTransState(id, TransmissionState::newPEL); |
| 412 | |
| 413 | if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == |
| 414 | _pelQueue.end()) |
| 415 | { |
| 416 | _pelQueue.push_front(id); |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | // The only PELs that will be sent when the |
| 421 | // host is full is from this timer callback. |
| 422 | if (!_hostFullTimer.isEnabled()) |
| 423 | { |
| 424 | _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); |
| 425 | } |
Matt Spinler | cc3b64a | 2019-12-12 11:27:10 -0600 | [diff] [blame] | 426 | } |
| 427 | |
Matt Spinler | a19b623 | 2019-12-12 13:30:14 -0600 | [diff] [blame] | 428 | void HostNotifier::setBadPEL(uint32_t id) |
| 429 | { |
| 430 | log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id)); |
| 431 | |
| 432 | auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); |
| 433 | if (sent != _sentPELs.end()) |
| 434 | { |
| 435 | _sentPELs.erase(sent); |
| 436 | } |
| 437 | |
| 438 | _repo.setPELHostTransState(id, TransmissionState::badPEL); |
| 439 | } |
| 440 | |
Matt Spinler | f60ac27 | 2019-12-11 13:47:50 -0600 | [diff] [blame] | 441 | } // namespace openpower::pels |