blob: 3c6099521e2c332d0db09a05aeb18270ad4d6390 [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
Ed Tanous3ccb3ad2023-01-13 17:40:03 -08003#include "http_request.hpp"
4
Ed Tanousaf4edf62020-07-21 08:46:25 -07005#include <boost/beast/http/fields.hpp>
Ed Tanousaf4edf62020-07-21 08:46:25 -07006
Ed Tanous3544d2a2023-08-06 18:12:20 -07007#include <ranges>
Ed Tanousaf4edf62020-07-21 08:46:25 -07008#include <string>
9#include <string_view>
10
11enum class ParserError
12{
13 PARSER_SUCCESS,
14 ERROR_BOUNDARY_FORMAT,
15 ERROR_BOUNDARY_CR,
16 ERROR_BOUNDARY_LF,
17 ERROR_BOUNDARY_DATA,
18 ERROR_EMPTY_HEADER,
19 ERROR_HEADER_NAME,
20 ERROR_HEADER_VALUE,
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +020021 ERROR_HEADER_ENDING,
22 ERROR_UNEXPECTED_END_OF_HEADER,
23 ERROR_UNEXPECTED_END_OF_INPUT,
24 ERROR_OUT_OF_RANGE
Ed Tanousaf4edf62020-07-21 08:46:25 -070025};
26
27enum class State
28{
29 START,
30 START_BOUNDARY,
31 HEADER_FIELD_START,
32 HEADER_FIELD,
33 HEADER_VALUE_START,
34 HEADER_VALUE,
35 HEADER_VALUE_ALMOST_DONE,
36 HEADERS_ALMOST_DONE,
37 PART_DATA_START,
38 PART_DATA,
39 END
40};
41
42enum class Boundary
43{
44 NON_BOUNDARY,
45 PART_BOUNDARY,
46 END_BOUNDARY,
47};
48
49struct FormPart
50{
51 boost::beast::http::fields fields;
52 std::string content;
53};
54
55class MultipartParser
56{
57 public:
58 MultipartParser() = default;
59
60 [[nodiscard]] ParserError parse(const crow::Request& req)
61 {
62 std::string_view contentType = req.getHeaderValue("content-type");
63
64 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070065 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070066 {
67 return ParserError::ERROR_BOUNDARY_FORMAT;
68 }
69
70 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
71
72 boundary = "\r\n--";
73 boundary += ctBoundary;
74 indexBoundary();
75 lookbehind.resize(boundary.size() + 8);
76 state = State::START;
77
Patrick Williams0e31e952023-05-10 19:40:27 -050078 const std::string& buffer = req.body();
79 size_t len = buffer.size();
Ed Tanousaf4edf62020-07-21 08:46:25 -070080 char cl = 0;
81
82 for (size_t i = 0; i < len; i++)
83 {
84 char c = buffer[i];
85 switch (state)
86 {
87 case State::START:
88 index = 0;
89 state = State::START_BOUNDARY;
90 [[fallthrough]];
91 case State::START_BOUNDARY:
92 if (index == boundary.size() - 2)
93 {
94 if (c != cr)
95 {
96 return ParserError::ERROR_BOUNDARY_CR;
97 }
98 index++;
99 break;
100 }
101 else if (index - 1 == boundary.size() - 2)
102 {
103 if (c != lf)
104 {
105 return ParserError::ERROR_BOUNDARY_LF;
106 }
107 index = 0;
Patrick Williams26eee3a2023-10-20 20:54:01 -0500108 mime_fields.emplace_back();
Ed Tanousaf4edf62020-07-21 08:46:25 -0700109 state = State::HEADER_FIELD_START;
110 break;
111 }
112 if (c != boundary[index + 2])
113 {
114 return ParserError::ERROR_BOUNDARY_DATA;
115 }
116 index++;
117 break;
118 case State::HEADER_FIELD_START:
119 currentHeaderName.resize(0);
120 state = State::HEADER_FIELD;
121 headerFieldMark = i;
122 index = 0;
123 [[fallthrough]];
124 case State::HEADER_FIELD:
125 if (c == cr)
126 {
127 headerFieldMark = 0;
128 state = State::HEADERS_ALMOST_DONE;
129 break;
130 }
131
132 index++;
133 if (c == hyphen)
134 {
135 break;
136 }
137
138 if (c == colon)
139 {
140 if (index == 1)
141 {
142 return ParserError::ERROR_EMPTY_HEADER;
143 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800144
Patrick Williams0e31e952023-05-10 19:40:27 -0500145 currentHeaderName.append(&buffer[headerFieldMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700146 i - headerFieldMark);
147 state = State::HEADER_VALUE_START;
148 break;
149 }
150 cl = lower(c);
151 if (cl < 'a' || cl > 'z')
152 {
153 return ParserError::ERROR_HEADER_NAME;
154 }
155 break;
156 case State::HEADER_VALUE_START:
157 if (c == space)
158 {
159 break;
160 }
161 headerValueMark = i;
162 state = State::HEADER_VALUE;
163 [[fallthrough]];
164 case State::HEADER_VALUE:
165 if (c == cr)
166 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500167 std::string_view value(&buffer[headerValueMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700168 i - headerValueMark);
169 mime_fields.rbegin()->fields.set(currentHeaderName,
170 value);
171 state = State::HEADER_VALUE_ALMOST_DONE;
172 }
173 break;
174 case State::HEADER_VALUE_ALMOST_DONE:
175 if (c != lf)
176 {
177 return ParserError::ERROR_HEADER_VALUE;
178 }
179 state = State::HEADER_FIELD_START;
180 break;
181 case State::HEADERS_ALMOST_DONE:
182 if (c != lf)
183 {
184 return ParserError::ERROR_HEADER_ENDING;
185 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200186 if (index > 0)
187 {
188 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
189 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700190 state = State::PART_DATA_START;
191 break;
192 case State::PART_DATA_START:
193 state = State::PART_DATA;
194 partDataMark = i;
195 [[fallthrough]];
196 case State::PART_DATA:
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200197 {
Ed Tanousaf4edf62020-07-21 08:46:25 -0700198 if (index == 0)
199 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500200 skipNonBoundary(buffer, boundary.size() - 1, i);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700201 c = buffer[i];
202 }
Patrick Williams0e31e952023-05-10 19:40:27 -0500203 if (auto ec = processPartData(buffer, i, c);
204 ec != ParserError::PARSER_SUCCESS)
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200205 {
206 return ec;
207 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700208 break;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200209 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700210 case State::END:
211 break;
Ed Tanous4da04902024-03-19 11:32:44 -0700212 default:
213 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700214 }
215 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200216
217 if (state != State::END)
218 {
219 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
220 }
221
Ed Tanousaf4edf62020-07-21 08:46:25 -0700222 return ParserError::PARSER_SUCCESS;
223 }
224 std::vector<FormPart> mime_fields;
225 std::string boundary;
226
227 private:
228 void indexBoundary()
229 {
Ed Tanous3544d2a2023-08-06 18:12:20 -0700230 std::ranges::fill(boundaryIndex, 0);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700231 for (const char current : boundary)
232 {
233 boundaryIndex[static_cast<unsigned char>(current)] = true;
234 }
235 }
236
Ed Tanous56d23962022-02-14 20:42:02 -0800237 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700238 {
239 return static_cast<char>(c | 0x20);
240 }
241
Ed Tanous9de65b32024-03-27 13:34:40 -0700242 bool isBoundaryChar(char c) const
Ed Tanousaf4edf62020-07-21 08:46:25 -0700243 {
244 return boundaryIndex[static_cast<unsigned char>(c)];
245 }
246
Patrick Williams0e31e952023-05-10 19:40:27 -0500247 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
Ed Tanousaf4edf62020-07-21 08:46:25 -0700248 size_t& i)
249 {
250 // boyer-moore derived algorithm to safely skip non-boundary data
Patrick Williams0e31e952023-05-10 19:40:27 -0500251 while (i + boundary.size() <= buffer.length())
Ed Tanousaf4edf62020-07-21 08:46:25 -0700252 {
253 if (isBoundaryChar(buffer[i + boundaryEnd]))
254 {
255 break;
256 }
257 i += boundary.size();
258 }
259 }
260
Patrick Williams0e31e952023-05-10 19:40:27 -0500261 ParserError processPartData(const std::string& buffer, size_t& i, char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700262 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200263 size_t prevIndex = index;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700264
265 if (index < boundary.size())
266 {
267 if (boundary[index] == c)
268 {
269 if (index == 0)
270 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500271 const char* start = &buffer[partDataMark];
Ed Tanousca45aa32022-01-07 09:28:45 -0800272 size_t size = i - partDataMark;
Patrick Williamsbd79bce2024-08-16 15:22:20 -0400273 mime_fields.rbegin()->content +=
274 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700275 }
276 index++;
277 }
278 else
279 {
280 index = 0;
281 }
282 }
283 else if (index == boundary.size())
284 {
285 index++;
286 if (c == cr)
287 {
288 // cr = part boundary
289 flags = Boundary::PART_BOUNDARY;
290 }
291 else if (c == hyphen)
292 {
293 // hyphen = end boundary
294 flags = Boundary::END_BOUNDARY;
295 }
296 else
297 {
298 index = 0;
299 }
300 }
301 else
302 {
303 if (flags == Boundary::PART_BOUNDARY)
304 {
305 index = 0;
306 if (c == lf)
307 {
308 // unset the PART_BOUNDARY flag
309 flags = Boundary::NON_BOUNDARY;
Patrick Williams26eee3a2023-10-20 20:54:01 -0500310 mime_fields.emplace_back();
Ed Tanousaf4edf62020-07-21 08:46:25 -0700311 state = State::HEADER_FIELD_START;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200312 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700313 }
314 }
315 if (flags == Boundary::END_BOUNDARY)
316 {
317 if (c == hyphen)
318 {
319 state = State::END;
320 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200321 else
322 {
323 flags = Boundary::NON_BOUNDARY;
324 index = 0;
325 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700326 }
327 }
328
329 if (index > 0)
330 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200331 if ((index - 1) >= lookbehind.size())
332 {
333 // Should never happen, but when it does it won't cause crash
334 return ParserError::ERROR_OUT_OF_RANGE;
335 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700336 lookbehind[index - 1] = c;
337 }
338 else if (prevIndex > 0)
339 {
340 // if our boundary turned out to be rubbish, the captured
341 // lookbehind belongs to partData
342
343 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700344 partDataMark = i;
345
346 // reconsider the current character even so it interrupted
347 // the sequence it could be the beginning of a new sequence
348 i--;
349 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200350 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700351 }
352
353 std::string currentHeaderName;
354 std::string currentHeaderValue;
355
356 static constexpr char cr = '\r';
357 static constexpr char lf = '\n';
358 static constexpr char space = ' ';
359 static constexpr char hyphen = '-';
360 static constexpr char colon = ':';
361
Ed Tanousd3a9e082022-01-07 09:30:41 -0800362 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700363 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800364 State state{State::START};
365 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700366 size_t index = 0;
367 size_t partDataMark = 0;
368 size_t headerFieldMark = 0;
369 size_t headerValueMark = 0;
370};