Hardware error analyzer for checkstop analyses
When a checkstop occurs the attention handler will use the hardware
error isolator to gather information pertaining to the error
condition(s).
Signed-off-by: Ben Tyner <ben.tyner@ibm.com>
Change-Id: If933be6bddfc653edf0ed3db660c32d3aae04c5f
diff --git a/analyzer/analyzer_main.cpp b/analyzer/analyzer_main.cpp
index 8de5924..6a47540 100644
--- a/analyzer/analyzer_main.cpp
+++ b/analyzer/analyzer_main.cpp
@@ -1,43 +1,152 @@
+#include <libpdbg.h>
+
#include <hei_main.hpp>
+#include <fstream>
+#include <iostream>
#include <map>
#include <string>
namespace analyzer
{
-/** Analyze error condition using the hardware error isolator */
-bool analyzeHardware(std::map<std::string, std::string>& i_errors)
+/** @brief Chip types that coorelate device tree nodes to chip data files */
+static constexpr uint8_t chipTypeOcmb[4] = {0x00, 0x20, 0x0d, 0x16};
+static constexpr uint8_t chipTypeProc[4] = {0x49, 0xa0, 0x0d, 0x12};
+
+/**
+ * @brief send chip data file to isolator
+ *
+ * Read a chip data file into memory and then send it to the isolator via
+ * the initialize interface.
+ *
+ * @param i_filePath The file path and name to read into memory
+ *
+ * @return Returns true if the isolator was successfully initialized with
+ * a single chip data file. Returns false otherwise.
+ *
+ */
+bool initWithFile(const char* i_filePath)
{
using namespace libhei;
- std::vector<Chip> chipList; // data to isolator
- IsolationData isoData{}; // data from isolator
+ bool rc = true; // assume success
- // FIXME TEMP CODE - start
+ // open the file and seek to the end to get length
+ std::ifstream fileStream(i_filePath, std::ios::binary | std::ios::ate);
- initialize(nullptr, 0);
-
- chipList.emplace_back(Chip{"proc", static_cast<ChipType_t>(0xdeadbeef)});
-
- isolate(chipList, isoData); // isolate errors
-
- if (!(isoData.getSignatureList().empty()))
+ if (!fileStream)
{
- // Signature signature = isoData.getSignatureList().back();
+ std::cout << "could not open file" << std::endl;
+ rc = false;
}
else
{
- std::string signature = "0xfeed";
- std::string chip = "0xbeef";
- i_errors[signature] = chip;
+ // get file size based on seek position
+ std::ifstream::pos_type fileSize = fileStream.tellg();
+
+ // create a buffer large enough to hold the entire file
+ std::vector<char> fileBuffer(fileSize);
+
+ // seek to the beginning of the file
+ fileStream.seekg(0, std::ios::beg);
+
+ // read the entire file into the buffer
+ fileStream.read(fileBuffer.data(), fileSize);
+
+ // done with the file
+ fileStream.close();
+
+ // intialize the isolator with the chip data
+ initialize(fileBuffer.data(), fileSize); // hei initialize
}
- uninitialize();
+ return rc;
+}
- // FIXME TEMP CODE - end
+/**
+ * @brief Analyze using the hardware error isolator
+ *
+ * Query the hardware for each active chip that is a valid candidate for
+ * error analyses. Based on the list of active chips initialize the
+ * isolator with the associated chip data files. Finally request analyses
+ * from the hardware error isolator and log the results.
+ *
+ * @param o_errors A map for storing information about erros that were
+ * detected by the hardware error isolator.
+ *
+ * @return True if hardware error analyses was successful, false otherwise
+ */
+bool analyzeHardware(std::map<std::string, std::string>& o_errors)
+{
+ using namespace libhei;
- return true; // FIXME - error/success from isolator or isolation data
+ bool rc = true;
+
+ std::vector<Chip> chipList; // chips that need to be analyzed
+
+ IsolationData isoData{}; // data from isolato
+
+ pdbg_target *targetProc, *targetOcmb; // P10 and explorer targets
+
+ /** @brief gather list of chips to analyze */
+ pdbg_for_each_class_target("proc", targetProc)
+ {
+ if (PDBG_TARGET_ENABLED == pdbg_target_probe(targetProc))
+ {
+ // add each processor chip to the chip list
+ chipList.emplace_back(Chip(targetProc, *(uint32_t*)chipTypeProc));
+
+ pdbg_for_each_target("ocmb_chip", targetProc, targetOcmb)
+ {
+ if (PDBG_TARGET_ENABLED == pdbg_target_probe(targetOcmb))
+ {
+ // add each explorer chip (ocmb) to the chip list
+ chipList.emplace_back(
+ Chip(targetOcmb, *(uint32_t*)chipTypeOcmb));
+ }
+ }
+ }
+ }
+
+ // TODO select chip data files based on chip types detected
+ do
+ {
+ // TODO for now chip data files are local
+ // hei initialize
+ if (false ==
+ initWithFile("/usr/share/openpower-hw-diags/chip_data_ocmb.cdb"))
+ {
+ rc = false;
+ break;
+ }
+
+ // TODO for now chip data files are local
+ // hei initialize
+ if (false ==
+ initWithFile("/usr/share/openpower-hw-diags/chip_data_proc.cdb"))
+ {
+ rc = false;
+ break;
+ }
+
+ // hei isolate
+ isolate(chipList, isoData);
+
+ if (!(isoData.getSignatureList().empty()))
+ {
+ // TODO parse signature list
+ int numErrors = isoData.getSignatureList().size();
+
+ std::cout << "isolated: " << numErrors << std::endl;
+ }
+
+ // hei uninitialize
+ uninitialize();
+
+ } while (0);
+
+ return rc;
}
} // namespace analyzer
diff --git a/listener.cpp b/listener.cpp
index e5946c4..5360d81 100644
--- a/listener.cpp
+++ b/listener.cpp
@@ -51,9 +51,6 @@
// create config
attn::Config attnConfig;
- // initialize pdbg targets
- pdbg_targets_init(nullptr);
-
// This is the main listener loop. All the above code will be executed
// only once. All other communtication with the attention handler will
// originate from here via the message queue.
diff --git a/main.cpp b/main.cpp
index 26775ff..b7430ce 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,3 +1,5 @@
+#include <libpdbg.h>
+
#include <analyzer/analyzer_main.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
#include <cli.hpp>
@@ -47,7 +49,13 @@
}
else
{
- // todo usage
+ // Pdbg targets should only be initialized once according to
+ // libpdbg documentation. Initializing them here will make sure
+ // they are initialized for the attention handler, invocation of
+ // the analyzer via attention handler and direct invocation of
+ // the analyzer via command line (--analyze).
+
+ pdbg_targets_init(nullptr); // nullptr == use default fdt
// Either analyze (application mode) or daemon mode
if (true == getCliOption(argv, argv + argc, "--analyze"))
@@ -60,7 +68,7 @@
{
// TODO - add error processing/display
- printf("analyzer isolated %i errors", (int)errors.size());
+ printf("analyzer isolated %i error(s)\n", (int)errors.size());
}
else
{
diff --git a/main_nl.cpp b/main_nl.cpp
index 1eead08..d66e378 100644
--- a/main_nl.cpp
+++ b/main_nl.cpp
@@ -44,6 +44,14 @@
}
else
{
+ // Pdbg targets should only be initialized once according to
+ // libpdbg documentation. Initializing them here will make sure
+ // they are initialized for the attention handler, invocation of
+ // the analyzer via attention handler and direct invocation of
+ // the analyzer via command line (--analyze).
+
+ pdbg_targets_init(nullptr); // nullptr == use default fdt
+
// Either analyze (application mode) or daemon mode
if (true == getCliOption(argv, argv + argc, "--analyze"))
{
@@ -52,7 +60,7 @@
rc = analyzer::analyzeHardware(errors); // analyze hardware
- printf("analyzer isolated %i errors", (int)errors.size());
+ printf("analyzer isolated %i error(s)\n", (int)errors.size());
}
// daemon mode
else