blob: a2f63bd167b946c852ebbf63b8839490e2ff7ae7 [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
Ed Tanousaf4edf62020-07-21 08:46:25 -07003#include <boost/beast/http/fields.hpp>
4#include <http_request.hpp>
5
6#include <string>
7#include <string_view>
8
9enum class ParserError
10{
11 PARSER_SUCCESS,
12 ERROR_BOUNDARY_FORMAT,
13 ERROR_BOUNDARY_CR,
14 ERROR_BOUNDARY_LF,
15 ERROR_BOUNDARY_DATA,
16 ERROR_EMPTY_HEADER,
17 ERROR_HEADER_NAME,
18 ERROR_HEADER_VALUE,
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +020019 ERROR_HEADER_ENDING,
20 ERROR_UNEXPECTED_END_OF_HEADER,
21 ERROR_UNEXPECTED_END_OF_INPUT,
22 ERROR_OUT_OF_RANGE
Ed Tanousaf4edf62020-07-21 08:46:25 -070023};
24
25enum class State
26{
27 START,
28 START_BOUNDARY,
29 HEADER_FIELD_START,
30 HEADER_FIELD,
31 HEADER_VALUE_START,
32 HEADER_VALUE,
33 HEADER_VALUE_ALMOST_DONE,
34 HEADERS_ALMOST_DONE,
35 PART_DATA_START,
36 PART_DATA,
37 END
38};
39
40enum class Boundary
41{
42 NON_BOUNDARY,
43 PART_BOUNDARY,
44 END_BOUNDARY,
45};
46
47struct FormPart
48{
49 boost::beast::http::fields fields;
50 std::string content;
51};
52
53class MultipartParser
54{
55 public:
56 MultipartParser() = default;
57
58 [[nodiscard]] ParserError parse(const crow::Request& req)
59 {
60 std::string_view contentType = req.getHeaderValue("content-type");
61
62 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070063 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070064 {
65 return ParserError::ERROR_BOUNDARY_FORMAT;
66 }
67
68 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
69
70 boundary = "\r\n--";
71 boundary += ctBoundary;
72 indexBoundary();
73 lookbehind.resize(boundary.size() + 8);
74 state = State::START;
75
76 const char* buffer = req.body.data();
77 size_t len = req.body.size();
Ed Tanousaf4edf62020-07-21 08:46:25 -070078 char cl = 0;
79
80 for (size_t i = 0; i < len; i++)
81 {
Ed Tanousca45aa32022-01-07 09:28:45 -080082 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -070083 char c = buffer[i];
84 switch (state)
85 {
86 case State::START:
87 index = 0;
88 state = State::START_BOUNDARY;
89 [[fallthrough]];
90 case State::START_BOUNDARY:
91 if (index == boundary.size() - 2)
92 {
93 if (c != cr)
94 {
95 return ParserError::ERROR_BOUNDARY_CR;
96 }
97 index++;
98 break;
99 }
100 else if (index - 1 == boundary.size() - 2)
101 {
102 if (c != lf)
103 {
104 return ParserError::ERROR_BOUNDARY_LF;
105 }
106 index = 0;
107 mime_fields.push_back({});
108 state = State::HEADER_FIELD_START;
109 break;
110 }
111 if (c != boundary[index + 2])
112 {
113 return ParserError::ERROR_BOUNDARY_DATA;
114 }
115 index++;
116 break;
117 case State::HEADER_FIELD_START:
118 currentHeaderName.resize(0);
119 state = State::HEADER_FIELD;
120 headerFieldMark = i;
121 index = 0;
122 [[fallthrough]];
123 case State::HEADER_FIELD:
124 if (c == cr)
125 {
126 headerFieldMark = 0;
127 state = State::HEADERS_ALMOST_DONE;
128 break;
129 }
130
131 index++;
132 if (c == hyphen)
133 {
134 break;
135 }
136
137 if (c == colon)
138 {
139 if (index == 1)
140 {
141 return ParserError::ERROR_EMPTY_HEADER;
142 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800143
144 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700145 currentHeaderName.append(buffer + headerFieldMark,
146 i - headerFieldMark);
147 state = State::HEADER_VALUE_START;
148 break;
149 }
150 cl = lower(c);
151 if (cl < 'a' || cl > 'z')
152 {
153 return ParserError::ERROR_HEADER_NAME;
154 }
155 break;
156 case State::HEADER_VALUE_START:
157 if (c == space)
158 {
159 break;
160 }
161 headerValueMark = i;
162 state = State::HEADER_VALUE;
163 [[fallthrough]];
164 case State::HEADER_VALUE:
165 if (c == cr)
166 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800167 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700168 std::string_view value(buffer + headerValueMark,
169 i - headerValueMark);
170 mime_fields.rbegin()->fields.set(currentHeaderName,
171 value);
172 state = State::HEADER_VALUE_ALMOST_DONE;
173 }
174 break;
175 case State::HEADER_VALUE_ALMOST_DONE:
176 if (c != lf)
177 {
178 return ParserError::ERROR_HEADER_VALUE;
179 }
180 state = State::HEADER_FIELD_START;
181 break;
182 case State::HEADERS_ALMOST_DONE:
183 if (c != lf)
184 {
185 return ParserError::ERROR_HEADER_ENDING;
186 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200187 if (index > 0)
188 {
189 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
190 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700191 state = State::PART_DATA_START;
192 break;
193 case State::PART_DATA_START:
194 state = State::PART_DATA;
195 partDataMark = i;
196 [[fallthrough]];
197 case State::PART_DATA:
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200198 {
Ed Tanousaf4edf62020-07-21 08:46:25 -0700199 if (index == 0)
200 {
201 skipNonBoundary(buffer, len, boundary.size() - 1, i);
Ed Tanousca45aa32022-01-07 09:28:45 -0800202
203 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700204 c = buffer[i];
205 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200206 const ParserError ec = processPartData(buffer, i, c);
207 if (ec != ParserError::PARSER_SUCCESS)
208 {
209 return ec;
210 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700211 break;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200212 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700213 case State::END:
214 break;
215 }
216 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200217
218 if (state != State::END)
219 {
220 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
221 }
222
Ed Tanousaf4edf62020-07-21 08:46:25 -0700223 return ParserError::PARSER_SUCCESS;
224 }
225 std::vector<FormPart> mime_fields;
226 std::string boundary;
227
228 private:
229 void indexBoundary()
230 {
231 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
232 for (const char current : boundary)
233 {
234 boundaryIndex[static_cast<unsigned char>(current)] = true;
235 }
236 }
237
Ed Tanous56d23962022-02-14 20:42:02 -0800238 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700239 {
240 return static_cast<char>(c | 0x20);
241 }
242
243 inline bool isBoundaryChar(char c) const
244 {
245 return boundaryIndex[static_cast<unsigned char>(c)];
246 }
247
248 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
249 size_t& i)
250 {
251 // boyer-moore derived algorithm to safely skip non-boundary data
252 while (i + boundary.size() <= len)
253 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800254 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700255 if (isBoundaryChar(buffer[i + boundaryEnd]))
256 {
257 break;
258 }
259 i += boundary.size();
260 }
261 }
262
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200263 ParserError processPartData(const char* buffer, size_t& i, char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700264 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200265 size_t prevIndex = index;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700266
267 if (index < boundary.size())
268 {
269 if (boundary[index] == c)
270 {
271 if (index == 0)
272 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800273 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
274 const char* start = buffer + partDataMark;
275 size_t size = i - partDataMark;
276 mime_fields.rbegin()->content +=
277 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700278 }
279 index++;
280 }
281 else
282 {
283 index = 0;
284 }
285 }
286 else if (index == boundary.size())
287 {
288 index++;
289 if (c == cr)
290 {
291 // cr = part boundary
292 flags = Boundary::PART_BOUNDARY;
293 }
294 else if (c == hyphen)
295 {
296 // hyphen = end boundary
297 flags = Boundary::END_BOUNDARY;
298 }
299 else
300 {
301 index = 0;
302 }
303 }
304 else
305 {
306 if (flags == Boundary::PART_BOUNDARY)
307 {
308 index = 0;
309 if (c == lf)
310 {
311 // unset the PART_BOUNDARY flag
312 flags = Boundary::NON_BOUNDARY;
313 mime_fields.push_back({});
314 state = State::HEADER_FIELD_START;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200315 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700316 }
317 }
318 if (flags == Boundary::END_BOUNDARY)
319 {
320 if (c == hyphen)
321 {
322 state = State::END;
323 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200324 else
325 {
326 flags = Boundary::NON_BOUNDARY;
327 index = 0;
328 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700329 }
330 }
331
332 if (index > 0)
333 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200334 if ((index - 1) >= lookbehind.size())
335 {
336 // Should never happen, but when it does it won't cause crash
337 return ParserError::ERROR_OUT_OF_RANGE;
338 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700339 lookbehind[index - 1] = c;
340 }
341 else if (prevIndex > 0)
342 {
343 // if our boundary turned out to be rubbish, the captured
344 // lookbehind belongs to partData
345
346 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700347 partDataMark = i;
348
349 // reconsider the current character even so it interrupted
350 // the sequence it could be the beginning of a new sequence
351 i--;
352 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200353 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700354 }
355
356 std::string currentHeaderName;
357 std::string currentHeaderValue;
358
359 static constexpr char cr = '\r';
360 static constexpr char lf = '\n';
361 static constexpr char space = ' ';
362 static constexpr char hyphen = '-';
363 static constexpr char colon = ':';
364
Ed Tanousd3a9e082022-01-07 09:30:41 -0800365 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700366 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800367 State state{State::START};
368 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700369 size_t index = 0;
370 size_t partDataMark = 0;
371 size_t headerFieldMark = 0;
372 size_t headerValueMark = 0;
373};