blob: dc37b2413c2d0c245a76e5b2c7e09c1cf095e8e3 [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
3
Ben Tyner0205f3b2020-02-24 10:24:47 -06004#include <hei_main.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05005#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06006
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05007#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -05008#include <fstream>
9#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050010#include <map>
11#include <string>
12
Ben Tyner0205f3b2020-02-24 10:24:47 -060013namespace analyzer
14{
15
Ben Tyner87eabc62020-05-14 17:56:54 -050016/** @brief Chip types that coorelate device tree nodes to chip data files */
17static constexpr uint8_t chipTypeOcmb[4] = {0x00, 0x20, 0x0d, 0x16};
18static constexpr uint8_t chipTypeProc[4] = {0x49, 0xa0, 0x0d, 0x12};
19
20/**
21 * @brief send chip data file to isolator
22 *
23 * Read a chip data file into memory and then send it to the isolator via
24 * the initialize interface.
25 *
26 * @param i_filePath The file path and name to read into memory
27 *
28 * @return Returns true if the isolator was successfully initialized with
29 * a single chip data file. Returns false otherwise.
30 *
31 */
Zane Shelley2e994bc2020-06-08 14:38:14 -050032void initWithFile(const char* i_filePath)
Ben Tyner0205f3b2020-02-24 10:24:47 -060033{
Ben Tyner87eabc62020-05-14 17:56:54 -050034 // open the file and seek to the end to get length
35 std::ifstream fileStream(i_filePath, std::ios::binary | std::ios::ate);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050036
Zane Shelley2e994bc2020-06-08 14:38:14 -050037 if (!fileStream.good())
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050038 {
Zane Shelley2e994bc2020-06-08 14:38:14 -050039 trace::err("Unable to open file: %s", i_filePath);
40 assert(0);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050041 }
42 else
43 {
Ben Tyner87eabc62020-05-14 17:56:54 -050044 // get file size based on seek position
Zane Shelley2e994bc2020-06-08 14:38:14 -050045 fileStream.seekg(0, std::ios::end);
Ben Tyner87eabc62020-05-14 17:56:54 -050046 std::ifstream::pos_type fileSize = fileStream.tellg();
47
48 // create a buffer large enough to hold the entire file
49 std::vector<char> fileBuffer(fileSize);
50
51 // seek to the beginning of the file
52 fileStream.seekg(0, std::ios::beg);
53
54 // read the entire file into the buffer
55 fileStream.read(fileBuffer.data(), fileSize);
56
57 // done with the file
58 fileStream.close();
59
Zane Shelley2e994bc2020-06-08 14:38:14 -050060 // initialize the isolator with the chip data
61 libhei::initialize(fileBuffer.data(), fileSize);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050062 }
Ben Tyner87eabc62020-05-14 17:56:54 -050063}
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050064
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050065//------------------------------------------------------------------------------
66
67// Returns the chip model/level of the given target. Also, adds the chip
68// model/level to the list of type types needed to initialize the isolator.
69libhei::ChipType_t __getChipType(pdbg_target* i_trgt,
70 std::vector<libhei::ChipType_t>& o_types)
71{
72 libhei::ChipType_t type;
73
74 // START WORKAROUND
75 // TODO: Will need to grab the model/level from the target attributes when
76 // they are available. For now, use ATTR_TYPE to determine which
77 // currently supported value to use supported.
78 char* attrType = new char[1];
79
80 pdbg_target_get_attribute(i_trgt, "ATTR_TYPE", 1, 1, attrType);
81
82 switch (attrType[0])
83 {
84 case 0x05: // PROC
85 type = 0x120DA049;
86 break;
87
88 case 0x4b: // OCMB_CHIP
89 type = 0x160D2000;
90 break;
91
92 default:
93 trace::err("Unsupported ATTR_TYPE value: 0x%02x", attrType[0]);
94 assert(0);
95 }
96
97 delete[] attrType;
98 // END WORKAROUND
99
100 o_types.push_back(type);
101
102 return type;
103}
104
105//------------------------------------------------------------------------------
106
107// Gathers list of active chips to analyze. Also, returns the list of chip types
108// needed to initialize the isolator.
109void __getActiveChips(std::vector<libhei::Chip>& o_chips,
110 std::vector<libhei::ChipType_t>& o_types)
111{
112 // Iterate each processor.
113 pdbg_target* procTrgt;
114 pdbg_for_each_class_target("proc", procTrgt)
115 {
116 // Active processors only.
117 if (PDBG_TARGET_ENABLED != pdbg_target_probe(procTrgt))
118 continue;
119
120 // Add the processor to the list.
121 o_chips.emplace_back(procTrgt, __getChipType(procTrgt, o_types));
122
123 // Iterate the connected OCMBs, if they exist.
124 pdbg_target* ocmbTrgt;
125 pdbg_for_each_target("ocmb_chip", procTrgt, ocmbTrgt)
126 {
127 // Active OCMBs only.
128 if (PDBG_TARGET_ENABLED != pdbg_target_probe(ocmbTrgt))
129 continue;
130
131 // Add the OCMB to the list.
132 o_chips.emplace_back(ocmbTrgt, __getChipType(ocmbTrgt, o_types));
133 }
134 }
135
136 // Make sure the model/level list is of unique values only.
137 auto itr = std::unique(o_types.begin(), o_types.end());
138 o_types.resize(std::distance(o_types.begin(), itr));
139}
140
141//------------------------------------------------------------------------------
142
Zane Shelley2e994bc2020-06-08 14:38:14 -0500143// Initializes the isolator for each specified chip type.
144void __initializeIsolator(const std::vector<libhei::ChipType_t>& i_types)
145{
146 // START WORKAROUND
147 // TODO: The chip data will eventually come from the CHIPDATA section of the
148 // PNOR. Until that support is available, we'll use temporary chip
149 // data files.
150 for (const auto& type : i_types)
151 {
152 switch (type)
153 {
154 case 0x120DA049: // PROC
155 initWithFile(
156 "/usr/share/openpower-hw-diags/chip_data_proc.cdb");
157 break;
158
159 case 0x160D2000: // OCMB_CHIP
160 initWithFile(
161 "/usr/share/openpower-hw-diags/chip_data_ocmb.cdb");
162 break;
163
164 default:
165 trace::err("Unsupported ChipType_t value: 0x%0" PRIx32, type);
166 assert(0);
167 }
168 }
169 // END WORKAROUND
170}
171
172//------------------------------------------------------------------------------
173
Ben Tyner87eabc62020-05-14 17:56:54 -0500174/**
175 * @brief Analyze using the hardware error isolator
176 *
177 * Query the hardware for each active chip that is a valid candidate for
178 * error analyses. Based on the list of active chips initialize the
179 * isolator with the associated chip data files. Finally request analyses
180 * from the hardware error isolator and log the results.
181 *
182 * @param o_errors A map for storing information about erros that were
183 * detected by the hardware error isolator.
184 *
185 * @return True if hardware error analyses was successful, false otherwise
186 */
187bool analyzeHardware(std::map<std::string, std::string>& o_errors)
188{
189 using namespace libhei;
Ben Tynerb1ebfcb2020-05-08 18:52:48 -0500190
Ben Tyner87eabc62020-05-14 17:56:54 -0500191 bool rc = true;
192
Zane Shelleyd84ed6e2020-06-08 13:41:48 -0500193 // Get the active chips to be analyzed and their types.
194 std::vector<libhei::Chip> chipList;
195 std::vector<libhei::ChipType_t> chipTypes;
196 __getActiveChips(chipList, chipTypes);
Ben Tyner87eabc62020-05-14 17:56:54 -0500197
Zane Shelley2e994bc2020-06-08 14:38:14 -0500198 // Initialize the isolator for all chip types.
199 __initializeIsolator(chipTypes);
200
Ben Tyner87eabc62020-05-14 17:56:54 -0500201 IsolationData isoData{}; // data from isolato
202
Ben Tyner87eabc62020-05-14 17:56:54 -0500203 do
204 {
Ben Tyner87eabc62020-05-14 17:56:54 -0500205 // hei isolate
206 isolate(chipList, isoData);
207
208 if (!(isoData.getSignatureList().empty()))
209 {
210 // TODO parse signature list
211 int numErrors = isoData.getSignatureList().size();
212
213 std::cout << "isolated: " << numErrors << std::endl;
214 }
215
216 // hei uninitialize
217 uninitialize();
218
219 } while (0);
220
221 return rc;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600222}
223
224} // namespace analyzer