blob: 118bdb4981c01a0dd9fab9c5d268b5d519eaf4c1 [file] [log] [blame]
Spencer Kub7028eb2021-10-26 15:27:35 +08001#pragma once
2
3#include <zlib.h>
4
5#include <array>
6#include <filesystem>
7#include <vector>
8
9class GzFileReader
10{
11 public:
12 bool gzGetLines(const std::string& filename, uint64_t& skip, uint64_t& top,
13 std::vector<std::string>& logEntries, size_t& logCount)
14 {
15 gzFile logStream = gzopen(filename.c_str(), "r");
16 if (!logStream)
17 {
18 BMCWEB_LOG_ERROR << "Can't open gz file: " << filename << '\n';
19 return false;
20 }
21
22 if (!readFile(logStream, skip, top, logEntries, logCount))
23 {
24 gzclose(logStream);
25 return false;
26 }
27 gzclose(logStream);
28 return true;
29 }
30
31 std::string getLastMessage()
32 {
33 return lastMessage;
34 }
35
36 private:
37 std::string lastMessage;
38 std::string lastDelimiter;
39 size_t totalFilesSize = 0;
40
41 void printErrorMessage(gzFile logStream)
42 {
43 int errNum = 0;
44 const char* errMsg = gzerror(logStream, &errNum);
45
46 BMCWEB_LOG_ERROR << "Error reading gz compressed data.\n"
47 << "Error Message: " << errMsg << '\n'
48 << "Error Number: " << errNum;
49 }
50
51 bool readFile(gzFile logStream, uint64_t& skip, uint64_t& top,
52 std::vector<std::string>& logEntries, size_t& logCount)
53 {
54 constexpr int bufferLimitSize = 1024;
55 do
56 {
57 std::string bufferStr;
58 bufferStr.resize(bufferLimitSize);
59
60 int bytesRead = gzread(logStream, bufferStr.data(),
61 static_cast<unsigned int>(bufferStr.size()));
62 // On errors, gzread() shall return a value less than 0.
63 if (bytesRead < 0)
64 {
65 printErrorMessage(logStream);
66 return false;
67 }
68 bufferStr.resize(static_cast<size_t>(bytesRead));
69 if (!hostLogEntryParser(bufferStr, skip, top, logEntries, logCount))
70 {
71 BMCWEB_LOG_ERROR << "Error occurs during parsing host log.\n";
72 return false;
73 }
74 } while (!gzeof(logStream));
75
76 return true;
77 }
78
79 bool hostLogEntryParser(const std::string& bufferStr, uint64_t& skip,
80 uint64_t& top, std::vector<std::string>& logEntries,
81 size_t& logCount)
82 {
83 // Assume we have 8 files, and the max size of each file is
84 // 16k, so define the max size as 256kb (double of 8 files *
85 // 16kb)
86 constexpr size_t maxTotalFilesSize = 262144;
87
88 // It may contain several log entry in one line, and
89 // the end of each log entry will be '\r\n' or '\r'.
90 // So we need to go through and split string by '\n' and '\r'
91 size_t pos = bufferStr.find_first_of("\n\r");
92 size_t initialPos = 0;
93 std::string newLastMessage;
94
95 while (pos != std::string::npos)
96 {
97 std::string logEntry =
98 bufferStr.substr(initialPos, pos - initialPos);
99 // Since there might be consecutive delimiters like "\r\n", we need
100 // to filter empty strings.
101 if (!logEntry.empty())
102 {
103 logCount++;
104 if (!lastMessage.empty())
105 {
106 logEntry.insert(0, lastMessage);
107 lastMessage.clear();
108 }
109 if (logCount > skip && logCount <= (skip + top))
110 {
111 totalFilesSize += logEntry.size();
112 if (totalFilesSize > maxTotalFilesSize)
113 {
114 BMCWEB_LOG_ERROR
115 << "File size exceeds maximum allowed size of "
116 << maxTotalFilesSize;
117 return false;
118 }
119 logEntries.push_back(logEntry);
120 }
121 }
122 else
123 {
124 // Handle consecutive delimiter. '\r\n' act as a single
125 // delimiter, the other case like '\n\n', '\n\r' or '\r\r' will
126 // push back a "\n" as a log.
127 std::string delimiters;
128 if (pos > 0)
129 {
130 delimiters = bufferStr.substr(pos - 1, 2);
131 }
132 // Handle consecutive delimiter but spilt between two files.
133 if (pos == 0 && !(lastDelimiter.empty()))
134 {
135 delimiters = lastDelimiter + bufferStr.substr(0, 1);
136 }
137 if (delimiters != "\r\n")
138 {
139 logCount++;
140 if (logCount > skip && logCount <= (skip + top))
141 {
142 totalFilesSize++;
143 if (totalFilesSize > maxTotalFilesSize)
144 {
145 BMCWEB_LOG_ERROR
146 << "File size exceeds maximum allowed size of "
147 << maxTotalFilesSize;
148 return false;
149 }
150 logEntries.emplace_back("\n");
151 }
152 }
153 }
154 initialPos = pos + 1;
155 pos = bufferStr.find_first_of("\n\r", initialPos);
156 }
157
158 // Store the last message
159 if (initialPos < bufferStr.size())
160 {
161 newLastMessage = bufferStr.substr(initialPos);
162 }
163 // If consecutive delimiter spilt by buffer or file, the last character
164 // must be the delimiter.
165 else if (initialPos == bufferStr.size())
166 {
167 lastDelimiter = std::string(1, bufferStr.back());
168 }
169 // If file doesn't contain any "\r" or "\n", initialPos should be zero
170 if (initialPos == 0)
171 {
172 // Solved an edge case that the log doesn't in skip and top range,
173 // but consecutive files don't contain a single delimiter, this
174 // lastMessage becomes unnecessarily large. Since last message will
175 // prepend to next log, logCount need to plus 1
176 if ((logCount + 1) > skip && (logCount + 1) <= (skip + top))
177 {
178 lastMessage.insert(
179 lastMessage.end(),
180 std::make_move_iterator(newLastMessage.begin()),
181 std::make_move_iterator(newLastMessage.end()));
182
183 // Following the previous question, protect lastMessage don't
184 // larger than max total files size
185 size_t tmpMessageSize = totalFilesSize + lastMessage.size();
186 if (tmpMessageSize > maxTotalFilesSize)
187 {
188 BMCWEB_LOG_ERROR
189 << "File size exceeds maximum allowed size of "
190 << maxTotalFilesSize;
191 return false;
192 }
193 }
194 }
195 else
196 {
197 if (!newLastMessage.empty())
198 {
199 lastMessage = std::move(newLastMessage);
200 }
201 }
202 return true;
203 }
204
205 public:
206 GzFileReader() = default;
207 ~GzFileReader() = default;
208 GzFileReader(const GzFileReader&) = delete;
209 GzFileReader& operator=(const GzFileReader&) = delete;
210};