blob: 1cb2348d9f54d723e80cada111e716e5b253d4e2 [file] [log] [blame]
#pragma once
extern "C"
{
#include <libpdbg.h>
#include <libpdbg_sbe.h>
}
#include "dump_utils.hpp"
#include "sbe_consts.hpp"
#include "sbe_type.hpp"
#include <phal_exception.H>
#include <cstdint>
#include <filesystem>
#include <future>
#include <vector>
namespace openpower::dump::sbe_chipop
{
using TargetMap =
std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>;
/**
* @class SbeDumpCollector
* @brief Manages the collection of dumps from SBEs on failure.
*
* This class provides functionalities to orchestrate the collection of
* diagnostic dumps from Self Boot Engines across multiple processors
* in response to failures or for diagnostic purposes.
*/
class SbeDumpCollector
{
public:
/**
* @brief Constructs a new SbeDumpCollector object.
*/
SbeDumpCollector() = default;
/**
* @brief Destroys the SbeDumpCollector object.
*/
~SbeDumpCollector() = default;
/**
* @brief Orchestrates the collection of dumps from all available SBEs.
*
* Initiates the process of collecting diagnostic dumps from SBEs. This
* involves identifying available processors, initiating the dump
* collection process, and managing the collected dump files.
*
* @param type The type of dump to collect.
* @param id A unique identifier for the dump collection operation.
* @param failingUnit The identifier of the failing unit prompting the dump
* collection.
* @param path The filesystem path where collected dumps should be stored.
*/
void collectDump(uint8_t type, uint32_t id, uint64_t failingUnit,
const std::filesystem::path& path);
private:
/**
* @brief Collects a dump from a single SBE.
*
* Executes the low-level operations required to collect a diagnostic
* dump from the specified SBE.
*
* @param chip A pointer to the pdbg_target structure representing the SBE.
* @param path The filesystem path where the dump should be stored.
* @param id The unique identifier for this dump collection operation.
* @param type The type of dump to collect.
* @param clockState The clock state of the SBE during dump collection.
* @param failingUnit The identifier of the failing unit.
*/
void collectDumpFromSBE(struct pdbg_target* chip,
const std::filesystem::path& path, uint32_t id,
uint8_t type, uint8_t clockState,
uint64_t failingUnit);
/**
* @brief Initializes the PDBG library.
*
* Prepares the PDBG library for interacting with processor targets. This
* must be called before any PDBG-related operations are performed.
*/
void initializePdbg();
/**
* @brief Launches asynchronous dump collection tasks for a set of targets.
*
* This method initiates the dump collection process asynchronously for each
* target provided in the `targets` vector. It launches a separate
* asynchronous task for each target, where each task calls
* `collectDumpFromSBE` with the specified parameters, including the clock
* state.
*
* @param type The type of the dump to collect. This could be a hardware
* dump, software dump, etc., as defined by the SBE dump type enumeration.
* @param id A unique identifier for the dump collection operation. This ID
* is used to tag the collected dump for identification.
* @param path The filesystem path where the collected dumps should be
* stored. Each dump file will be stored under this directory.
* @param failingUnit The identifier of the unit or component that is
* failing or suspected to be the cause of the issue prompting the dump
* collection. This is used for diagnostic purposes.
* @param cstate The clock state during the dump collection. This parameter
* dictates whether the dump should be collected with the
* clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF).
* @param targetMap A map of `pdbg_target*` representing the targets from
* which dumps should be collected. The key is the proc target with the
* list of ocmb targets associated with the proc.
*
* @return A vector of `std::future<void>` objects. Each future represents
* the completion state of an asynchronous dump collection task. The caller
* can wait on these futures to determine when all dump collection
* tasks have completed. Exceptions thrown by the asynchronous tasks are
* captured by the futures and can be rethrown when the futures are
* accessed.
*/
std::vector<std::future<void>> spawnDumpCollectionProcesses(
uint8_t type, uint32_t id, const std::filesystem::path& path,
uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap);
/** @brief This function creates the new dump file in dump file name
* format and then writes the contents into it.
* @param path - Path to dump file
* @param id - A unique id assigned to dump to be collected
* @param clockState - Clock state, ON or Off
* @param nodeNum - Node containing the chip
* @param chipName - Name of the chip
* @param chipPos - Chip position of the failing unit
* @param dataPtr - Content to write to file
* @param len - Length of the content
*/
void writeDumpFile(const std::filesystem::path& path, const uint32_t id,
const uint8_t clockState, const uint8_t nodeNum,
const std::string& chipName, const uint8_t chipPos,
util::DumpDataPtr& dataPtr, const uint32_t len);
/**
* @brief Determines if fastarray collection is needed based on dump type
* and unit.
*
* @param clockState The current state of the clock.
* @param type The type of the dump being collected.
* @param failingUnit The ID of the failing unit.
* @param chipPos The position of the chip for which the dump is being
* collected.
*
* @return uint8_t - Returns 1 if fastarray collection is needed, 0
* otherwise.
*/
inline uint8_t checkFastarrayCollectionNeeded(
const uint8_t clockState, const uint8_t type, uint64_t failingUnit,
const uint8_t chipPos) const
{
using namespace openpower::dump::SBE;
return (clockState == SBE_CLOCK_OFF &&
(type == SBE_DUMP_TYPE_HOSTBOOT ||
(type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit)))
? 1
: 0;
}
/**
* Logs an error and creates a PEL for SBE chip-op failures.
*
* @param sbeError - An error object encapsulating details about the SBE
* error.
* @param chipPos - The position of the chip where the error occurred.
* @param sbeType - The type of SBE, used to determine the event log
* message.
* @param cmdClass - The command class associated with the SBE operation.
* @param cmdType - The specific type of command within the command class.
*
*/
bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
uint64_t chipPos, SBETypes sbeType,
uint32_t cmdClass, uint32_t cmdType);
/**
* Determines the type of SBE for a given chip target.
*
* @param chip - A pointer to a pdbg_target structure representing the chip.
* @return The SBE type for the given chip target.
*/
inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip)
{
if (is_ody_ocmb_chip(chip))
{
return SBETypes::OCMB;
}
return SBETypes::PROC;
}
/**
* @brief Executes thread stop on a processor target
*
* If the Self Boot Engine (SBE) is not ready to accept chip operations
* (chip-ops), it logs the condition and excludes the processor from the
* dump collection process. For critical errors, such as a timeout during
* the stop operation, it logs the error and again excludes the processor.
* In case of SBE command failure or non-critical errors, it continues with
* the dump collection process.
*
* @param target Pointer to the pdbg target structure representing the
* processor to perform the thread stop on.
* @return true If the thread stop was successful or in case of non-critical
* errors where dump collection can proceed.
* @return false If the SBE is not ready for chip-ops or in case of critical
* errors like timeouts, indicating the processor should be
* excluded from the dump collection.
*/
bool executeThreadStop(struct pdbg_target* target);
};
} // namespace openpower::dump::sbe_chipop