Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 1 | #pragma once |
| 2 | |
Ed Tanous | 3ccb3ad | 2023-01-13 17:40:03 -0800 | [diff] [blame] | 3 | #include "logging.hpp" |
| 4 | |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 5 | #include <zlib.h> |
| 6 | |
| 7 | #include <array> |
| 8 | #include <filesystem> |
Ed Tanous | 3ccb3ad | 2023-01-13 17:40:03 -0800 | [diff] [blame] | 9 | #include <string> |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 10 | #include <vector> |
| 11 | |
| 12 | class GzFileReader |
| 13 | { |
| 14 | public: |
Nan Zhou | 9739de9 | 2022-04-06 11:07:27 -0700 | [diff] [blame] | 15 | bool gzGetLines(const std::string& filename, uint64_t skip, uint64_t top, |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 16 | std::vector<std::string>& logEntries, size_t& logCount) |
| 17 | { |
| 18 | gzFile logStream = gzopen(filename.c_str(), "r"); |
Ed Tanous | e662eae | 2022-01-25 10:39:19 -0800 | [diff] [blame] | 19 | if (logStream == nullptr) |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 20 | { |
| 21 | BMCWEB_LOG_ERROR << "Can't open gz file: " << filename << '\n'; |
| 22 | return false; |
| 23 | } |
| 24 | |
| 25 | if (!readFile(logStream, skip, top, logEntries, logCount)) |
| 26 | { |
| 27 | gzclose(logStream); |
| 28 | return false; |
| 29 | } |
| 30 | gzclose(logStream); |
| 31 | return true; |
| 32 | } |
| 33 | |
| 34 | std::string getLastMessage() |
| 35 | { |
| 36 | return lastMessage; |
| 37 | } |
| 38 | |
| 39 | private: |
| 40 | std::string lastMessage; |
| 41 | std::string lastDelimiter; |
| 42 | size_t totalFilesSize = 0; |
| 43 | |
Ed Tanous | 56d2396 | 2022-02-14 20:42:02 -0800 | [diff] [blame] | 44 | static void printErrorMessage(gzFile logStream) |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 45 | { |
| 46 | int errNum = 0; |
| 47 | const char* errMsg = gzerror(logStream, &errNum); |
| 48 | |
| 49 | BMCWEB_LOG_ERROR << "Error reading gz compressed data.\n" |
| 50 | << "Error Message: " << errMsg << '\n' |
| 51 | << "Error Number: " << errNum; |
| 52 | } |
| 53 | |
Nan Zhou | 9739de9 | 2022-04-06 11:07:27 -0700 | [diff] [blame] | 54 | bool readFile(gzFile logStream, uint64_t skip, uint64_t top, |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 55 | std::vector<std::string>& logEntries, size_t& logCount) |
| 56 | { |
| 57 | constexpr int bufferLimitSize = 1024; |
| 58 | do |
| 59 | { |
| 60 | std::string bufferStr; |
| 61 | bufferStr.resize(bufferLimitSize); |
| 62 | |
| 63 | int bytesRead = gzread(logStream, bufferStr.data(), |
| 64 | static_cast<unsigned int>(bufferStr.size())); |
| 65 | // On errors, gzread() shall return a value less than 0. |
| 66 | if (bytesRead < 0) |
| 67 | { |
| 68 | printErrorMessage(logStream); |
| 69 | return false; |
| 70 | } |
| 71 | bufferStr.resize(static_cast<size_t>(bytesRead)); |
| 72 | if (!hostLogEntryParser(bufferStr, skip, top, logEntries, logCount)) |
| 73 | { |
| 74 | BMCWEB_LOG_ERROR << "Error occurs during parsing host log.\n"; |
| 75 | return false; |
| 76 | } |
Ed Tanous | e662eae | 2022-01-25 10:39:19 -0800 | [diff] [blame] | 77 | } while (gzeof(logStream) != 1); |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 78 | |
| 79 | return true; |
| 80 | } |
| 81 | |
Nan Zhou | 9739de9 | 2022-04-06 11:07:27 -0700 | [diff] [blame] | 82 | bool hostLogEntryParser(const std::string& bufferStr, uint64_t skip, |
| 83 | uint64_t top, std::vector<std::string>& logEntries, |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 84 | size_t& logCount) |
| 85 | { |
| 86 | // Assume we have 8 files, and the max size of each file is |
| 87 | // 16k, so define the max size as 256kb (double of 8 files * |
| 88 | // 16kb) |
| 89 | constexpr size_t maxTotalFilesSize = 262144; |
| 90 | |
| 91 | // It may contain several log entry in one line, and |
| 92 | // the end of each log entry will be '\r\n' or '\r'. |
| 93 | // So we need to go through and split string by '\n' and '\r' |
| 94 | size_t pos = bufferStr.find_first_of("\n\r"); |
| 95 | size_t initialPos = 0; |
| 96 | std::string newLastMessage; |
| 97 | |
| 98 | while (pos != std::string::npos) |
| 99 | { |
| 100 | std::string logEntry = |
| 101 | bufferStr.substr(initialPos, pos - initialPos); |
| 102 | // Since there might be consecutive delimiters like "\r\n", we need |
| 103 | // to filter empty strings. |
| 104 | if (!logEntry.empty()) |
| 105 | { |
| 106 | logCount++; |
| 107 | if (!lastMessage.empty()) |
| 108 | { |
| 109 | logEntry.insert(0, lastMessage); |
| 110 | lastMessage.clear(); |
| 111 | } |
| 112 | if (logCount > skip && logCount <= (skip + top)) |
| 113 | { |
| 114 | totalFilesSize += logEntry.size(); |
| 115 | if (totalFilesSize > maxTotalFilesSize) |
| 116 | { |
| 117 | BMCWEB_LOG_ERROR |
| 118 | << "File size exceeds maximum allowed size of " |
| 119 | << maxTotalFilesSize; |
| 120 | return false; |
| 121 | } |
| 122 | logEntries.push_back(logEntry); |
| 123 | } |
| 124 | } |
| 125 | else |
| 126 | { |
| 127 | // Handle consecutive delimiter. '\r\n' act as a single |
| 128 | // delimiter, the other case like '\n\n', '\n\r' or '\r\r' will |
| 129 | // push back a "\n" as a log. |
| 130 | std::string delimiters; |
| 131 | if (pos > 0) |
| 132 | { |
| 133 | delimiters = bufferStr.substr(pos - 1, 2); |
| 134 | } |
| 135 | // Handle consecutive delimiter but spilt between two files. |
| 136 | if (pos == 0 && !(lastDelimiter.empty())) |
| 137 | { |
| 138 | delimiters = lastDelimiter + bufferStr.substr(0, 1); |
| 139 | } |
| 140 | if (delimiters != "\r\n") |
| 141 | { |
| 142 | logCount++; |
| 143 | if (logCount > skip && logCount <= (skip + top)) |
| 144 | { |
| 145 | totalFilesSize++; |
| 146 | if (totalFilesSize > maxTotalFilesSize) |
| 147 | { |
| 148 | BMCWEB_LOG_ERROR |
| 149 | << "File size exceeds maximum allowed size of " |
| 150 | << maxTotalFilesSize; |
| 151 | return false; |
| 152 | } |
| 153 | logEntries.emplace_back("\n"); |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | initialPos = pos + 1; |
| 158 | pos = bufferStr.find_first_of("\n\r", initialPos); |
| 159 | } |
| 160 | |
| 161 | // Store the last message |
| 162 | if (initialPos < bufferStr.size()) |
| 163 | { |
| 164 | newLastMessage = bufferStr.substr(initialPos); |
| 165 | } |
| 166 | // If consecutive delimiter spilt by buffer or file, the last character |
| 167 | // must be the delimiter. |
| 168 | else if (initialPos == bufferStr.size()) |
| 169 | { |
| 170 | lastDelimiter = std::string(1, bufferStr.back()); |
| 171 | } |
| 172 | // If file doesn't contain any "\r" or "\n", initialPos should be zero |
| 173 | if (initialPos == 0) |
| 174 | { |
| 175 | // Solved an edge case that the log doesn't in skip and top range, |
| 176 | // but consecutive files don't contain a single delimiter, this |
| 177 | // lastMessage becomes unnecessarily large. Since last message will |
| 178 | // prepend to next log, logCount need to plus 1 |
| 179 | if ((logCount + 1) > skip && (logCount + 1) <= (skip + top)) |
| 180 | { |
| 181 | lastMessage.insert( |
| 182 | lastMessage.end(), |
| 183 | std::make_move_iterator(newLastMessage.begin()), |
| 184 | std::make_move_iterator(newLastMessage.end())); |
| 185 | |
| 186 | // Following the previous question, protect lastMessage don't |
| 187 | // larger than max total files size |
| 188 | size_t tmpMessageSize = totalFilesSize + lastMessage.size(); |
| 189 | if (tmpMessageSize > maxTotalFilesSize) |
| 190 | { |
| 191 | BMCWEB_LOG_ERROR |
| 192 | << "File size exceeds maximum allowed size of " |
| 193 | << maxTotalFilesSize; |
| 194 | return false; |
| 195 | } |
| 196 | } |
| 197 | } |
| 198 | else |
| 199 | { |
| 200 | if (!newLastMessage.empty()) |
| 201 | { |
| 202 | lastMessage = std::move(newLastMessage); |
| 203 | } |
| 204 | } |
| 205 | return true; |
| 206 | } |
| 207 | |
| 208 | public: |
| 209 | GzFileReader() = default; |
| 210 | ~GzFileReader() = default; |
| 211 | GzFileReader(const GzFileReader&) = delete; |
| 212 | GzFileReader& operator=(const GzFileReader&) = delete; |
Ed Tanous | ecd6a3a | 2022-01-07 09:18:40 -0800 | [diff] [blame] | 213 | GzFileReader(GzFileReader&&) = delete; |
| 214 | GzFileReader& operator=(GzFileReader&&) = delete; |
Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 215 | }; |