blob: adeeccb4df52dea4d839a0beee1cb5c8adb6858f [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
Ed Tanous3ccb3ad2023-01-13 17:40:03 -08003#include "http_request.hpp"
4
Ed Tanousaf4edf62020-07-21 08:46:25 -07005#include <boost/beast/http/fields.hpp>
Ed Tanousaf4edf62020-07-21 08:46:25 -07006
7#include <string>
8#include <string_view>
9
10enum class ParserError
11{
12 PARSER_SUCCESS,
13 ERROR_BOUNDARY_FORMAT,
14 ERROR_BOUNDARY_CR,
15 ERROR_BOUNDARY_LF,
16 ERROR_BOUNDARY_DATA,
17 ERROR_EMPTY_HEADER,
18 ERROR_HEADER_NAME,
19 ERROR_HEADER_VALUE,
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +020020 ERROR_HEADER_ENDING,
21 ERROR_UNEXPECTED_END_OF_HEADER,
22 ERROR_UNEXPECTED_END_OF_INPUT,
23 ERROR_OUT_OF_RANGE
Ed Tanousaf4edf62020-07-21 08:46:25 -070024};
25
26enum class State
27{
28 START,
29 START_BOUNDARY,
30 HEADER_FIELD_START,
31 HEADER_FIELD,
32 HEADER_VALUE_START,
33 HEADER_VALUE,
34 HEADER_VALUE_ALMOST_DONE,
35 HEADERS_ALMOST_DONE,
36 PART_DATA_START,
37 PART_DATA,
38 END
39};
40
41enum class Boundary
42{
43 NON_BOUNDARY,
44 PART_BOUNDARY,
45 END_BOUNDARY,
46};
47
48struct FormPart
49{
50 boost::beast::http::fields fields;
51 std::string content;
52};
53
54class MultipartParser
55{
56 public:
57 MultipartParser() = default;
58
59 [[nodiscard]] ParserError parse(const crow::Request& req)
60 {
61 std::string_view contentType = req.getHeaderValue("content-type");
62
63 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070064 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070065 {
66 return ParserError::ERROR_BOUNDARY_FORMAT;
67 }
68
69 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
70
71 boundary = "\r\n--";
72 boundary += ctBoundary;
73 indexBoundary();
74 lookbehind.resize(boundary.size() + 8);
75 state = State::START;
76
Ed Tanous33c6b582023-02-14 15:05:48 -080077 const char* buffer = req.body().data();
78 size_t len = req.body().size();
Ed Tanousaf4edf62020-07-21 08:46:25 -070079 char cl = 0;
80
81 for (size_t i = 0; i < len; i++)
82 {
Ed Tanousca45aa32022-01-07 09:28:45 -080083 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -070084 char c = buffer[i];
85 switch (state)
86 {
87 case State::START:
88 index = 0;
89 state = State::START_BOUNDARY;
90 [[fallthrough]];
91 case State::START_BOUNDARY:
92 if (index == boundary.size() - 2)
93 {
94 if (c != cr)
95 {
96 return ParserError::ERROR_BOUNDARY_CR;
97 }
98 index++;
99 break;
100 }
101 else if (index - 1 == boundary.size() - 2)
102 {
103 if (c != lf)
104 {
105 return ParserError::ERROR_BOUNDARY_LF;
106 }
107 index = 0;
108 mime_fields.push_back({});
109 state = State::HEADER_FIELD_START;
110 break;
111 }
112 if (c != boundary[index + 2])
113 {
114 return ParserError::ERROR_BOUNDARY_DATA;
115 }
116 index++;
117 break;
118 case State::HEADER_FIELD_START:
119 currentHeaderName.resize(0);
120 state = State::HEADER_FIELD;
121 headerFieldMark = i;
122 index = 0;
123 [[fallthrough]];
124 case State::HEADER_FIELD:
125 if (c == cr)
126 {
127 headerFieldMark = 0;
128 state = State::HEADERS_ALMOST_DONE;
129 break;
130 }
131
132 index++;
133 if (c == hyphen)
134 {
135 break;
136 }
137
138 if (c == colon)
139 {
140 if (index == 1)
141 {
142 return ParserError::ERROR_EMPTY_HEADER;
143 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800144
145 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700146 currentHeaderName.append(buffer + headerFieldMark,
147 i - headerFieldMark);
148 state = State::HEADER_VALUE_START;
149 break;
150 }
151 cl = lower(c);
152 if (cl < 'a' || cl > 'z')
153 {
154 return ParserError::ERROR_HEADER_NAME;
155 }
156 break;
157 case State::HEADER_VALUE_START:
158 if (c == space)
159 {
160 break;
161 }
162 headerValueMark = i;
163 state = State::HEADER_VALUE;
164 [[fallthrough]];
165 case State::HEADER_VALUE:
166 if (c == cr)
167 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800168 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700169 std::string_view value(buffer + headerValueMark,
170 i - headerValueMark);
171 mime_fields.rbegin()->fields.set(currentHeaderName,
172 value);
173 state = State::HEADER_VALUE_ALMOST_DONE;
174 }
175 break;
176 case State::HEADER_VALUE_ALMOST_DONE:
177 if (c != lf)
178 {
179 return ParserError::ERROR_HEADER_VALUE;
180 }
181 state = State::HEADER_FIELD_START;
182 break;
183 case State::HEADERS_ALMOST_DONE:
184 if (c != lf)
185 {
186 return ParserError::ERROR_HEADER_ENDING;
187 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200188 if (index > 0)
189 {
190 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
191 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700192 state = State::PART_DATA_START;
193 break;
194 case State::PART_DATA_START:
195 state = State::PART_DATA;
196 partDataMark = i;
197 [[fallthrough]];
198 case State::PART_DATA:
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200199 {
Ed Tanousaf4edf62020-07-21 08:46:25 -0700200 if (index == 0)
201 {
202 skipNonBoundary(buffer, len, boundary.size() - 1, i);
Ed Tanousca45aa32022-01-07 09:28:45 -0800203
204 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700205 c = buffer[i];
206 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200207 const ParserError ec = processPartData(buffer, i, c);
208 if (ec != ParserError::PARSER_SUCCESS)
209 {
210 return ec;
211 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700212 break;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200213 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700214 case State::END:
215 break;
216 }
217 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200218
219 if (state != State::END)
220 {
221 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
222 }
223
Ed Tanousaf4edf62020-07-21 08:46:25 -0700224 return ParserError::PARSER_SUCCESS;
225 }
226 std::vector<FormPart> mime_fields;
227 std::string boundary;
228
229 private:
230 void indexBoundary()
231 {
232 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
233 for (const char current : boundary)
234 {
235 boundaryIndex[static_cast<unsigned char>(current)] = true;
236 }
237 }
238
Ed Tanous56d23962022-02-14 20:42:02 -0800239 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700240 {
241 return static_cast<char>(c | 0x20);
242 }
243
244 inline bool isBoundaryChar(char c) const
245 {
246 return boundaryIndex[static_cast<unsigned char>(c)];
247 }
248
249 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
250 size_t& i)
251 {
252 // boyer-moore derived algorithm to safely skip non-boundary data
253 while (i + boundary.size() <= len)
254 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800255 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700256 if (isBoundaryChar(buffer[i + boundaryEnd]))
257 {
258 break;
259 }
260 i += boundary.size();
261 }
262 }
263
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200264 ParserError processPartData(const char* buffer, size_t& i, char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700265 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200266 size_t prevIndex = index;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700267
268 if (index < boundary.size())
269 {
270 if (boundary[index] == c)
271 {
272 if (index == 0)
273 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800274 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
275 const char* start = buffer + partDataMark;
276 size_t size = i - partDataMark;
277 mime_fields.rbegin()->content +=
278 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700279 }
280 index++;
281 }
282 else
283 {
284 index = 0;
285 }
286 }
287 else if (index == boundary.size())
288 {
289 index++;
290 if (c == cr)
291 {
292 // cr = part boundary
293 flags = Boundary::PART_BOUNDARY;
294 }
295 else if (c == hyphen)
296 {
297 // hyphen = end boundary
298 flags = Boundary::END_BOUNDARY;
299 }
300 else
301 {
302 index = 0;
303 }
304 }
305 else
306 {
307 if (flags == Boundary::PART_BOUNDARY)
308 {
309 index = 0;
310 if (c == lf)
311 {
312 // unset the PART_BOUNDARY flag
313 flags = Boundary::NON_BOUNDARY;
314 mime_fields.push_back({});
315 state = State::HEADER_FIELD_START;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200316 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700317 }
318 }
319 if (flags == Boundary::END_BOUNDARY)
320 {
321 if (c == hyphen)
322 {
323 state = State::END;
324 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200325 else
326 {
327 flags = Boundary::NON_BOUNDARY;
328 index = 0;
329 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700330 }
331 }
332
333 if (index > 0)
334 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200335 if ((index - 1) >= lookbehind.size())
336 {
337 // Should never happen, but when it does it won't cause crash
338 return ParserError::ERROR_OUT_OF_RANGE;
339 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700340 lookbehind[index - 1] = c;
341 }
342 else if (prevIndex > 0)
343 {
344 // if our boundary turned out to be rubbish, the captured
345 // lookbehind belongs to partData
346
347 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700348 partDataMark = i;
349
350 // reconsider the current character even so it interrupted
351 // the sequence it could be the beginning of a new sequence
352 i--;
353 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200354 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700355 }
356
357 std::string currentHeaderName;
358 std::string currentHeaderValue;
359
360 static constexpr char cr = '\r';
361 static constexpr char lf = '\n';
362 static constexpr char space = ' ';
363 static constexpr char hyphen = '-';
364 static constexpr char colon = ':';
365
Ed Tanousd3a9e082022-01-07 09:30:41 -0800366 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700367 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800368 State state{State::START};
369 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700370 size_t index = 0;
371 size_t partDataMark = 0;
372 size_t headerFieldMark = 0;
373 size_t headerValueMark = 0;
374};