blob: b84153bd8361e93a5c7b09db56fc29e3a626b31d [file] [log] [blame]
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05001#include <assert.h>
Ben Tyner87eabc62020-05-14 17:56:54 -05002#include <libpdbg.h>
3
Ben Tyner0205f3b2020-02-24 10:24:47 -06004#include <hei_main.hpp>
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05005#include <util/trace.hpp>
Ben Tyner0205f3b2020-02-24 10:24:47 -06006
Zane Shelleyd84ed6e2020-06-08 13:41:48 -05007#include <algorithm>
Ben Tyner87eabc62020-05-14 17:56:54 -05008#include <fstream>
9#include <iostream>
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050010#include <map>
11#include <string>
12
Ben Tyner0205f3b2020-02-24 10:24:47 -060013namespace analyzer
14{
15
Ben Tyner87eabc62020-05-14 17:56:54 -050016/** @brief Chip types that coorelate device tree nodes to chip data files */
17static constexpr uint8_t chipTypeOcmb[4] = {0x00, 0x20, 0x0d, 0x16};
18static constexpr uint8_t chipTypeProc[4] = {0x49, 0xa0, 0x0d, 0x12};
19
20/**
21 * @brief send chip data file to isolator
22 *
23 * Read a chip data file into memory and then send it to the isolator via
24 * the initialize interface.
25 *
26 * @param i_filePath The file path and name to read into memory
27 *
28 * @return Returns true if the isolator was successfully initialized with
29 * a single chip data file. Returns false otherwise.
30 *
31 */
Zane Shelley2e994bc2020-06-08 14:38:14 -050032void initWithFile(const char* i_filePath)
Ben Tyner0205f3b2020-02-24 10:24:47 -060033{
Ben Tyner87eabc62020-05-14 17:56:54 -050034 // open the file and seek to the end to get length
35 std::ifstream fileStream(i_filePath, std::ios::binary | std::ios::ate);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050036
Zane Shelley2e994bc2020-06-08 14:38:14 -050037 if (!fileStream.good())
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050038 {
Zane Shelley2e994bc2020-06-08 14:38:14 -050039 trace::err("Unable to open file: %s", i_filePath);
40 assert(0);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050041 }
42 else
43 {
Ben Tyner87eabc62020-05-14 17:56:54 -050044 // get file size based on seek position
Zane Shelley2e994bc2020-06-08 14:38:14 -050045 fileStream.seekg(0, std::ios::end);
Ben Tyner87eabc62020-05-14 17:56:54 -050046 std::ifstream::pos_type fileSize = fileStream.tellg();
47
48 // create a buffer large enough to hold the entire file
49 std::vector<char> fileBuffer(fileSize);
50
51 // seek to the beginning of the file
52 fileStream.seekg(0, std::ios::beg);
53
54 // read the entire file into the buffer
55 fileStream.read(fileBuffer.data(), fileSize);
56
57 // done with the file
58 fileStream.close();
59
Zane Shelley2e994bc2020-06-08 14:38:14 -050060 // initialize the isolator with the chip data
61 libhei::initialize(fileBuffer.data(), fileSize);
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050062 }
Ben Tyner87eabc62020-05-14 17:56:54 -050063}
Ben Tynerb1ebfcb2020-05-08 18:52:48 -050064
Zane Shelleyd84ed6e2020-06-08 13:41:48 -050065//------------------------------------------------------------------------------
66
67// Returns the chip model/level of the given target. Also, adds the chip
68// model/level to the list of type types needed to initialize the isolator.
69libhei::ChipType_t __getChipType(pdbg_target* i_trgt,
70 std::vector<libhei::ChipType_t>& o_types)
71{
72 libhei::ChipType_t type;
73
74 // START WORKAROUND
75 // TODO: Will need to grab the model/level from the target attributes when
76 // they are available. For now, use ATTR_TYPE to determine which
77 // currently supported value to use supported.
78 char* attrType = new char[1];
79
80 pdbg_target_get_attribute(i_trgt, "ATTR_TYPE", 1, 1, attrType);
81
82 switch (attrType[0])
83 {
84 case 0x05: // PROC
85 type = 0x120DA049;
86 break;
87
88 case 0x4b: // OCMB_CHIP
89 type = 0x160D2000;
90 break;
91
92 default:
93 trace::err("Unsupported ATTR_TYPE value: 0x%02x", attrType[0]);
94 assert(0);
95 }
96
97 delete[] attrType;
98 // END WORKAROUND
99
100 o_types.push_back(type);
101
102 return type;
103}
104
105//------------------------------------------------------------------------------
106
107// Gathers list of active chips to analyze. Also, returns the list of chip types
108// needed to initialize the isolator.
109void __getActiveChips(std::vector<libhei::Chip>& o_chips,
110 std::vector<libhei::ChipType_t>& o_types)
111{
112 // Iterate each processor.
113 pdbg_target* procTrgt;
114 pdbg_for_each_class_target("proc", procTrgt)
115 {
116 // Active processors only.
117 if (PDBG_TARGET_ENABLED != pdbg_target_probe(procTrgt))
118 continue;
119
120 // Add the processor to the list.
121 o_chips.emplace_back(procTrgt, __getChipType(procTrgt, o_types));
122
123 // Iterate the connected OCMBs, if they exist.
124 pdbg_target* ocmbTrgt;
125 pdbg_for_each_target("ocmb_chip", procTrgt, ocmbTrgt)
126 {
127 // Active OCMBs only.
128 if (PDBG_TARGET_ENABLED != pdbg_target_probe(ocmbTrgt))
129 continue;
130
131 // Add the OCMB to the list.
132 o_chips.emplace_back(ocmbTrgt, __getChipType(ocmbTrgt, o_types));
133 }
134 }
135
136 // Make sure the model/level list is of unique values only.
137 auto itr = std::unique(o_types.begin(), o_types.end());
138 o_types.resize(std::distance(o_types.begin(), itr));
139}
140
141//------------------------------------------------------------------------------
142
Zane Shelley2e994bc2020-06-08 14:38:14 -0500143// Initializes the isolator for each specified chip type.
144void __initializeIsolator(const std::vector<libhei::ChipType_t>& i_types)
145{
146 // START WORKAROUND
147 // TODO: The chip data will eventually come from the CHIPDATA section of the
148 // PNOR. Until that support is available, we'll use temporary chip
149 // data files.
150 for (const auto& type : i_types)
151 {
152 switch (type)
153 {
154 case 0x120DA049: // PROC
155 initWithFile(
156 "/usr/share/openpower-hw-diags/chip_data_proc.cdb");
157 break;
158
159 case 0x160D2000: // OCMB_CHIP
160 initWithFile(
161 "/usr/share/openpower-hw-diags/chip_data_ocmb.cdb");
162 break;
163
164 default:
165 trace::err("Unsupported ChipType_t value: 0x%0" PRIx32, type);
166 assert(0);
167 }
168 }
169 // END WORKAROUND
170}
171
172//------------------------------------------------------------------------------
173
Zane Shelley097a71a2020-06-08 15:55:29 -0500174// Takes a signature list that will be filtered and sorted. The first entry in
175// the returned list will be the root cause. If the returned list is empty,
176// analysis failed.
177void __filterRootCause(std::vector<libhei::Signature>& io_list)
178{
179 // Special and host attentions are not supported by this user application.
180 auto newEndItr =
181 std::remove_if(io_list.begin(), io_list.end(), [&](const auto& t) {
182 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() ||
183 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType());
184 });
185
186 // Shrink the vector, if needed.
187 io_list.resize(std::distance(io_list.begin(), newEndItr));
188
189 // START WORKAROUND
190 // TODO: Filtering should be determined by the RAS Data Files provided by
191 // the host firmware via the PNOR (similar to the Chip Data Files).
192 // Until that support is available, use a rudimentary filter that
193 // first looks for any recoverable attention, then any unit checkstop,
194 // and then any system checkstop. This is built on the premise that
195 // recoverable errors could be the root cause of an system checkstop
196 // attentions. Fortunately, we just need to sort the list by the
197 // greater attention type value.
198 std::sort(io_list.begin(), io_list.end(),
199 [&](const auto& a, const auto& b) {
200 return a.getAttnType() > b.getAttnType();
201 });
202 // END WORKAROUND
203}
204
205//------------------------------------------------------------------------------
206
Ben Tyner87eabc62020-05-14 17:56:54 -0500207bool analyzeHardware(std::map<std::string, std::string>& o_errors)
208{
Zane Shelley097a71a2020-06-08 15:55:29 -0500209 bool attnFound = false;
Ben Tyner87eabc62020-05-14 17:56:54 -0500210
Zane Shelleyd84ed6e2020-06-08 13:41:48 -0500211 // Get the active chips to be analyzed and their types.
212 std::vector<libhei::Chip> chipList;
213 std::vector<libhei::ChipType_t> chipTypes;
214 __getActiveChips(chipList, chipTypes);
Ben Tyner87eabc62020-05-14 17:56:54 -0500215
Zane Shelley2e994bc2020-06-08 14:38:14 -0500216 // Initialize the isolator for all chip types.
217 __initializeIsolator(chipTypes);
218
Zane Shelley097a71a2020-06-08 15:55:29 -0500219 // Isolate attentions.
220 libhei::IsolationData isoData{};
221 libhei::isolate(chipList, isoData);
Ben Tyner87eabc62020-05-14 17:56:54 -0500222
Zane Shelley097a71a2020-06-08 15:55:29 -0500223 // Filter signatures to determine root cause.
224 std::vector<libhei::Signature> sigList{isoData.getSignatureList()};
225 __filterRootCause(sigList);
226
227 if (sigList.empty())
Ben Tyner87eabc62020-05-14 17:56:54 -0500228 {
Zane Shelley097a71a2020-06-08 15:55:29 -0500229 // Don't throw an error here because it could happen for during TI
230 // analysis. Attention Handler will need to determine if this is an
231 // actual problem.
232 trace::inf("No active attentions found");
233 }
234 else
235 {
236 attnFound = true;
237 trace::inf("Active attentions found: %d", sigList.size());
Ben Tyner87eabc62020-05-14 17:56:54 -0500238
Zane Shelley097a71a2020-06-08 15:55:29 -0500239 libhei::Signature root = sigList.front();
240 trace::inf("Root cause attention: %p 0x%04x%02x%02x %d",
241 root.getChip().getChip(), root.getId(), root.getInstance(),
242 root.getBit(), root.getAttnType());
Ben Tyner87eabc62020-05-14 17:56:54 -0500243
Zane Shelley097a71a2020-06-08 15:55:29 -0500244 // TODO: generate log information
245 }
Ben Tyner87eabc62020-05-14 17:56:54 -0500246
Zane Shelley097a71a2020-06-08 15:55:29 -0500247 // All done, clean up the isolator.
248 libhei::uninitialize();
Ben Tyner87eabc62020-05-14 17:56:54 -0500249
Zane Shelley097a71a2020-06-08 15:55:29 -0500250 return attnFound;
Ben Tyner0205f3b2020-02-24 10:24:47 -0600251}
252
253} // namespace analyzer