Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 1 | #pragma once |
| 2 | |
| 3 | #include <zlib.h> |
| 4 | |
| 5 | #include <array> |
| 6 | #include <filesystem> |
| 7 | #include <vector> |
| 8 | |
| 9 | class GzFileReader |
| 10 | { |
| 11 | public: |
Nan Zhou | 9739de9 | 2022-04-06 11:07:27 -0700 | [diff] [blame] | 12 | bool gzGetLines(const std::string& filename, uint64_t skip, uint64_t top, |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 13 | std::vector<std::string>& logEntries, size_t& logCount) |
| 14 | { |
| 15 | gzFile logStream = gzopen(filename.c_str(), "r"); |
Ed Tanous | e662eae | 2022-01-25 10:39:19 -0800 | [diff] [blame] | 16 | if (logStream == nullptr) |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 17 | { |
| 18 | BMCWEB_LOG_ERROR << "Can't open gz file: " << filename << '\n'; |
| 19 | return false; |
| 20 | } |
| 21 | |
| 22 | if (!readFile(logStream, skip, top, logEntries, logCount)) |
| 23 | { |
| 24 | gzclose(logStream); |
| 25 | return false; |
| 26 | } |
| 27 | gzclose(logStream); |
| 28 | return true; |
| 29 | } |
| 30 | |
| 31 | std::string getLastMessage() |
| 32 | { |
| 33 | return lastMessage; |
| 34 | } |
| 35 | |
| 36 | private: |
| 37 | std::string lastMessage; |
| 38 | std::string lastDelimiter; |
| 39 | size_t totalFilesSize = 0; |
| 40 | |
Ed Tanous | 56d2396 | 2022-02-14 20:42:02 -0800 | [diff] [blame] | 41 | static void printErrorMessage(gzFile logStream) |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 42 | { |
| 43 | int errNum = 0; |
| 44 | const char* errMsg = gzerror(logStream, &errNum); |
| 45 | |
| 46 | BMCWEB_LOG_ERROR << "Error reading gz compressed data.\n" |
| 47 | << "Error Message: " << errMsg << '\n' |
| 48 | << "Error Number: " << errNum; |
| 49 | } |
| 50 | |
Nan Zhou | 9739de9 | 2022-04-06 11:07:27 -0700 | [diff] [blame] | 51 | bool readFile(gzFile logStream, uint64_t skip, uint64_t top, |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 52 | std::vector<std::string>& logEntries, size_t& logCount) |
| 53 | { |
| 54 | constexpr int bufferLimitSize = 1024; |
| 55 | do |
| 56 | { |
| 57 | std::string bufferStr; |
| 58 | bufferStr.resize(bufferLimitSize); |
| 59 | |
| 60 | int bytesRead = gzread(logStream, bufferStr.data(), |
| 61 | static_cast<unsigned int>(bufferStr.size())); |
| 62 | // On errors, gzread() shall return a value less than 0. |
| 63 | if (bytesRead < 0) |
| 64 | { |
| 65 | printErrorMessage(logStream); |
| 66 | return false; |
| 67 | } |
| 68 | bufferStr.resize(static_cast<size_t>(bytesRead)); |
| 69 | if (!hostLogEntryParser(bufferStr, skip, top, logEntries, logCount)) |
| 70 | { |
| 71 | BMCWEB_LOG_ERROR << "Error occurs during parsing host log.\n"; |
| 72 | return false; |
| 73 | } |
Ed Tanous | e662eae | 2022-01-25 10:39:19 -0800 | [diff] [blame] | 74 | } while (gzeof(logStream) != 1); |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 75 | |
| 76 | return true; |
| 77 | } |
| 78 | |
Nan Zhou | 9739de9 | 2022-04-06 11:07:27 -0700 | [diff] [blame] | 79 | bool hostLogEntryParser(const std::string& bufferStr, uint64_t skip, |
| 80 | uint64_t top, std::vector<std::string>& logEntries, |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 81 | size_t& logCount) |
| 82 | { |
| 83 | // Assume we have 8 files, and the max size of each file is |
| 84 | // 16k, so define the max size as 256kb (double of 8 files * |
| 85 | // 16kb) |
| 86 | constexpr size_t maxTotalFilesSize = 262144; |
| 87 | |
| 88 | // It may contain several log entry in one line, and |
| 89 | // the end of each log entry will be '\r\n' or '\r'. |
| 90 | // So we need to go through and split string by '\n' and '\r' |
| 91 | size_t pos = bufferStr.find_first_of("\n\r"); |
| 92 | size_t initialPos = 0; |
| 93 | std::string newLastMessage; |
| 94 | |
| 95 | while (pos != std::string::npos) |
| 96 | { |
| 97 | std::string logEntry = |
| 98 | bufferStr.substr(initialPos, pos - initialPos); |
| 99 | // Since there might be consecutive delimiters like "\r\n", we need |
| 100 | // to filter empty strings. |
| 101 | if (!logEntry.empty()) |
| 102 | { |
| 103 | logCount++; |
| 104 | if (!lastMessage.empty()) |
| 105 | { |
| 106 | logEntry.insert(0, lastMessage); |
| 107 | lastMessage.clear(); |
| 108 | } |
| 109 | if (logCount > skip && logCount <= (skip + top)) |
| 110 | { |
| 111 | totalFilesSize += logEntry.size(); |
| 112 | if (totalFilesSize > maxTotalFilesSize) |
| 113 | { |
| 114 | BMCWEB_LOG_ERROR |
| 115 | << "File size exceeds maximum allowed size of " |
| 116 | << maxTotalFilesSize; |
| 117 | return false; |
| 118 | } |
| 119 | logEntries.push_back(logEntry); |
| 120 | } |
| 121 | } |
| 122 | else |
| 123 | { |
| 124 | // Handle consecutive delimiter. '\r\n' act as a single |
| 125 | // delimiter, the other case like '\n\n', '\n\r' or '\r\r' will |
| 126 | // push back a "\n" as a log. |
| 127 | std::string delimiters; |
| 128 | if (pos > 0) |
| 129 | { |
| 130 | delimiters = bufferStr.substr(pos - 1, 2); |
| 131 | } |
| 132 | // Handle consecutive delimiter but spilt between two files. |
| 133 | if (pos == 0 && !(lastDelimiter.empty())) |
| 134 | { |
| 135 | delimiters = lastDelimiter + bufferStr.substr(0, 1); |
| 136 | } |
| 137 | if (delimiters != "\r\n") |
| 138 | { |
| 139 | logCount++; |
| 140 | if (logCount > skip && logCount <= (skip + top)) |
| 141 | { |
| 142 | totalFilesSize++; |
| 143 | if (totalFilesSize > maxTotalFilesSize) |
| 144 | { |
| 145 | BMCWEB_LOG_ERROR |
| 146 | << "File size exceeds maximum allowed size of " |
| 147 | << maxTotalFilesSize; |
| 148 | return false; |
| 149 | } |
| 150 | logEntries.emplace_back("\n"); |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | initialPos = pos + 1; |
| 155 | pos = bufferStr.find_first_of("\n\r", initialPos); |
| 156 | } |
| 157 | |
| 158 | // Store the last message |
| 159 | if (initialPos < bufferStr.size()) |
| 160 | { |
| 161 | newLastMessage = bufferStr.substr(initialPos); |
| 162 | } |
| 163 | // If consecutive delimiter spilt by buffer or file, the last character |
| 164 | // must be the delimiter. |
| 165 | else if (initialPos == bufferStr.size()) |
| 166 | { |
| 167 | lastDelimiter = std::string(1, bufferStr.back()); |
| 168 | } |
| 169 | // If file doesn't contain any "\r" or "\n", initialPos should be zero |
| 170 | if (initialPos == 0) |
| 171 | { |
| 172 | // Solved an edge case that the log doesn't in skip and top range, |
| 173 | // but consecutive files don't contain a single delimiter, this |
| 174 | // lastMessage becomes unnecessarily large. Since last message will |
| 175 | // prepend to next log, logCount need to plus 1 |
| 176 | if ((logCount + 1) > skip && (logCount + 1) <= (skip + top)) |
| 177 | { |
| 178 | lastMessage.insert( |
| 179 | lastMessage.end(), |
| 180 | std::make_move_iterator(newLastMessage.begin()), |
| 181 | std::make_move_iterator(newLastMessage.end())); |
| 182 | |
| 183 | // Following the previous question, protect lastMessage don't |
| 184 | // larger than max total files size |
| 185 | size_t tmpMessageSize = totalFilesSize + lastMessage.size(); |
| 186 | if (tmpMessageSize > maxTotalFilesSize) |
| 187 | { |
| 188 | BMCWEB_LOG_ERROR |
| 189 | << "File size exceeds maximum allowed size of " |
| 190 | << maxTotalFilesSize; |
| 191 | return false; |
| 192 | } |
| 193 | } |
| 194 | } |
| 195 | else |
| 196 | { |
| 197 | if (!newLastMessage.empty()) |
| 198 | { |
| 199 | lastMessage = std::move(newLastMessage); |
| 200 | } |
| 201 | } |
| 202 | return true; |
| 203 | } |
| 204 | |
| 205 | public: |
| 206 | GzFileReader() = default; |
| 207 | ~GzFileReader() = default; |
| 208 | GzFileReader(const GzFileReader&) = delete; |
| 209 | GzFileReader& operator=(const GzFileReader&) = delete; |
Ed Tanous | ecd6a3a | 2022-01-07 09:18:40 -0800 | [diff] [blame] | 210 | GzFileReader(GzFileReader&&) = delete; |
| 211 | GzFileReader& operator=(GzFileReader&&) = delete; |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 212 | }; |