| Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 1 | #pragma once | 
 | 2 |  | 
 | 3 | #include <zlib.h> | 
 | 4 |  | 
 | 5 | #include <array> | 
 | 6 | #include <filesystem> | 
 | 7 | #include <vector> | 
 | 8 |  | 
 | 9 | class GzFileReader | 
 | 10 | { | 
 | 11 |   public: | 
 | 12 |     bool gzGetLines(const std::string& filename, uint64_t& skip, uint64_t& top, | 
 | 13 |                     std::vector<std::string>& logEntries, size_t& logCount) | 
 | 14 |     { | 
 | 15 |         gzFile logStream = gzopen(filename.c_str(), "r"); | 
| Ed Tanous | e662eae | 2022-01-25 10:39:19 -0800 | [diff] [blame] | 16 |         if (logStream == nullptr) | 
| Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 17 |         { | 
 | 18 |             BMCWEB_LOG_ERROR << "Can't open gz file: " << filename << '\n'; | 
 | 19 |             return false; | 
 | 20 |         } | 
 | 21 |  | 
 | 22 |         if (!readFile(logStream, skip, top, logEntries, logCount)) | 
 | 23 |         { | 
 | 24 |             gzclose(logStream); | 
 | 25 |             return false; | 
 | 26 |         } | 
 | 27 |         gzclose(logStream); | 
 | 28 |         return true; | 
 | 29 |     } | 
 | 30 |  | 
 | 31 |     std::string getLastMessage() | 
 | 32 |     { | 
 | 33 |         return lastMessage; | 
 | 34 |     } | 
 | 35 |  | 
 | 36 |   private: | 
 | 37 |     std::string lastMessage; | 
 | 38 |     std::string lastDelimiter; | 
 | 39 |     size_t totalFilesSize = 0; | 
 | 40 |  | 
| Ed Tanous | 56d2396 | 2022-02-14 20:42:02 -0800 | [diff] [blame] | 41 |     static void printErrorMessage(gzFile logStream) | 
| Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 42 |     { | 
 | 43 |         int errNum = 0; | 
 | 44 |         const char* errMsg = gzerror(logStream, &errNum); | 
 | 45 |  | 
 | 46 |         BMCWEB_LOG_ERROR << "Error reading gz compressed data.\n" | 
 | 47 |                          << "Error Message: " << errMsg << '\n' | 
 | 48 |                          << "Error Number: " << errNum; | 
 | 49 |     } | 
 | 50 |  | 
 | 51 |     bool readFile(gzFile logStream, uint64_t& skip, uint64_t& top, | 
 | 52 |                   std::vector<std::string>& logEntries, size_t& logCount) | 
 | 53 |     { | 
 | 54 |         constexpr int bufferLimitSize = 1024; | 
 | 55 |         do | 
 | 56 |         { | 
 | 57 |             std::string bufferStr; | 
 | 58 |             bufferStr.resize(bufferLimitSize); | 
 | 59 |  | 
 | 60 |             int bytesRead = gzread(logStream, bufferStr.data(), | 
 | 61 |                                    static_cast<unsigned int>(bufferStr.size())); | 
 | 62 |             // On errors, gzread() shall return a value less than 0. | 
 | 63 |             if (bytesRead < 0) | 
 | 64 |             { | 
 | 65 |                 printErrorMessage(logStream); | 
 | 66 |                 return false; | 
 | 67 |             } | 
 | 68 |             bufferStr.resize(static_cast<size_t>(bytesRead)); | 
 | 69 |             if (!hostLogEntryParser(bufferStr, skip, top, logEntries, logCount)) | 
 | 70 |             { | 
 | 71 |                 BMCWEB_LOG_ERROR << "Error occurs during parsing host log.\n"; | 
 | 72 |                 return false; | 
 | 73 |             } | 
| Ed Tanous | e662eae | 2022-01-25 10:39:19 -0800 | [diff] [blame] | 74 |         } while (gzeof(logStream) != 1); | 
| Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 75 |  | 
 | 76 |         return true; | 
 | 77 |     } | 
 | 78 |  | 
 | 79 |     bool hostLogEntryParser(const std::string& bufferStr, uint64_t& skip, | 
 | 80 |                             uint64_t& top, std::vector<std::string>& logEntries, | 
 | 81 |                             size_t& logCount) | 
 | 82 |     { | 
 | 83 |         // Assume we have 8 files, and the max size of each file is | 
 | 84 |         // 16k, so define the max size as 256kb (double of 8 files * | 
 | 85 |         // 16kb) | 
 | 86 |         constexpr size_t maxTotalFilesSize = 262144; | 
 | 87 |  | 
 | 88 |         // It may contain several log entry in one line, and | 
 | 89 |         // the end of each log entry will be '\r\n' or '\r'. | 
 | 90 |         // So we need to go through and split string by '\n' and '\r' | 
 | 91 |         size_t pos = bufferStr.find_first_of("\n\r"); | 
 | 92 |         size_t initialPos = 0; | 
 | 93 |         std::string newLastMessage; | 
 | 94 |  | 
 | 95 |         while (pos != std::string::npos) | 
 | 96 |         { | 
 | 97 |             std::string logEntry = | 
 | 98 |                 bufferStr.substr(initialPos, pos - initialPos); | 
 | 99 |             // Since there might be consecutive delimiters like "\r\n", we need | 
 | 100 |             // to filter empty strings. | 
 | 101 |             if (!logEntry.empty()) | 
 | 102 |             { | 
 | 103 |                 logCount++; | 
 | 104 |                 if (!lastMessage.empty()) | 
 | 105 |                 { | 
 | 106 |                     logEntry.insert(0, lastMessage); | 
 | 107 |                     lastMessage.clear(); | 
 | 108 |                 } | 
 | 109 |                 if (logCount > skip && logCount <= (skip + top)) | 
 | 110 |                 { | 
 | 111 |                     totalFilesSize += logEntry.size(); | 
 | 112 |                     if (totalFilesSize > maxTotalFilesSize) | 
 | 113 |                     { | 
 | 114 |                         BMCWEB_LOG_ERROR | 
 | 115 |                             << "File size exceeds maximum allowed size of " | 
 | 116 |                             << maxTotalFilesSize; | 
 | 117 |                         return false; | 
 | 118 |                     } | 
 | 119 |                     logEntries.push_back(logEntry); | 
 | 120 |                 } | 
 | 121 |             } | 
 | 122 |             else | 
 | 123 |             { | 
 | 124 |                 // Handle consecutive delimiter. '\r\n' act as a single | 
 | 125 |                 // delimiter, the other case like '\n\n', '\n\r' or '\r\r' will | 
 | 126 |                 // push back a "\n" as a log. | 
 | 127 |                 std::string delimiters; | 
 | 128 |                 if (pos > 0) | 
 | 129 |                 { | 
 | 130 |                     delimiters = bufferStr.substr(pos - 1, 2); | 
 | 131 |                 } | 
 | 132 |                 // Handle consecutive delimiter but spilt between two files. | 
 | 133 |                 if (pos == 0 && !(lastDelimiter.empty())) | 
 | 134 |                 { | 
 | 135 |                     delimiters = lastDelimiter + bufferStr.substr(0, 1); | 
 | 136 |                 } | 
 | 137 |                 if (delimiters != "\r\n") | 
 | 138 |                 { | 
 | 139 |                     logCount++; | 
 | 140 |                     if (logCount > skip && logCount <= (skip + top)) | 
 | 141 |                     { | 
 | 142 |                         totalFilesSize++; | 
 | 143 |                         if (totalFilesSize > maxTotalFilesSize) | 
 | 144 |                         { | 
 | 145 |                             BMCWEB_LOG_ERROR | 
 | 146 |                                 << "File size exceeds maximum allowed size of " | 
 | 147 |                                 << maxTotalFilesSize; | 
 | 148 |                             return false; | 
 | 149 |                         } | 
 | 150 |                         logEntries.emplace_back("\n"); | 
 | 151 |                     } | 
 | 152 |                 } | 
 | 153 |             } | 
 | 154 |             initialPos = pos + 1; | 
 | 155 |             pos = bufferStr.find_first_of("\n\r", initialPos); | 
 | 156 |         } | 
 | 157 |  | 
 | 158 |         // Store the last message | 
 | 159 |         if (initialPos < bufferStr.size()) | 
 | 160 |         { | 
 | 161 |             newLastMessage = bufferStr.substr(initialPos); | 
 | 162 |         } | 
 | 163 |         // If consecutive delimiter spilt by buffer or file, the last character | 
 | 164 |         // must be the delimiter. | 
 | 165 |         else if (initialPos == bufferStr.size()) | 
 | 166 |         { | 
 | 167 |             lastDelimiter = std::string(1, bufferStr.back()); | 
 | 168 |         } | 
 | 169 |         // If file doesn't contain any "\r" or "\n", initialPos should be zero | 
 | 170 |         if (initialPos == 0) | 
 | 171 |         { | 
 | 172 |             // Solved an edge case that the log doesn't in skip and top range, | 
 | 173 |             // but consecutive files don't contain a single delimiter, this | 
 | 174 |             // lastMessage becomes unnecessarily large. Since last message will | 
 | 175 |             // prepend to next log, logCount need to plus 1 | 
 | 176 |             if ((logCount + 1) > skip && (logCount + 1) <= (skip + top)) | 
 | 177 |             { | 
 | 178 |                 lastMessage.insert( | 
 | 179 |                     lastMessage.end(), | 
 | 180 |                     std::make_move_iterator(newLastMessage.begin()), | 
 | 181 |                     std::make_move_iterator(newLastMessage.end())); | 
 | 182 |  | 
 | 183 |                 // Following the previous question, protect lastMessage don't | 
 | 184 |                 // larger than max total files size | 
 | 185 |                 size_t tmpMessageSize = totalFilesSize + lastMessage.size(); | 
 | 186 |                 if (tmpMessageSize > maxTotalFilesSize) | 
 | 187 |                 { | 
 | 188 |                     BMCWEB_LOG_ERROR | 
 | 189 |                         << "File size exceeds maximum allowed size of " | 
 | 190 |                         << maxTotalFilesSize; | 
 | 191 |                     return false; | 
 | 192 |                 } | 
 | 193 |             } | 
 | 194 |         } | 
 | 195 |         else | 
 | 196 |         { | 
 | 197 |             if (!newLastMessage.empty()) | 
 | 198 |             { | 
 | 199 |                 lastMessage = std::move(newLastMessage); | 
 | 200 |             } | 
 | 201 |         } | 
 | 202 |         return true; | 
 | 203 |     } | 
 | 204 |  | 
 | 205 |   public: | 
 | 206 |     GzFileReader() = default; | 
 | 207 |     ~GzFileReader() = default; | 
 | 208 |     GzFileReader(const GzFileReader&) = delete; | 
 | 209 |     GzFileReader& operator=(const GzFileReader&) = delete; | 
| Ed Tanous | ecd6a3a | 2022-01-07 09:18:40 -0800 | [diff] [blame] | 210 |     GzFileReader(GzFileReader&&) = delete; | 
 | 211 |     GzFileReader& operator=(GzFileReader&&) = delete; | 
| Spencer Ku | b7028eb | 2021-10-26 15:27:35 +0800 | [diff] [blame] | 212 | }; |