blob: 5ac196dd314ec406b97e224c231700471261d78d [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
Ed Tanous3ccb3ad2023-01-13 17:40:03 -08003#include "http_request.hpp"
4
Ed Tanousaf4edf62020-07-21 08:46:25 -07005#include <boost/beast/http/fields.hpp>
Ed Tanousaf4edf62020-07-21 08:46:25 -07006
7#include <string>
8#include <string_view>
9
10enum class ParserError
11{
12 PARSER_SUCCESS,
13 ERROR_BOUNDARY_FORMAT,
14 ERROR_BOUNDARY_CR,
15 ERROR_BOUNDARY_LF,
16 ERROR_BOUNDARY_DATA,
17 ERROR_EMPTY_HEADER,
18 ERROR_HEADER_NAME,
19 ERROR_HEADER_VALUE,
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +020020 ERROR_HEADER_ENDING,
21 ERROR_UNEXPECTED_END_OF_HEADER,
22 ERROR_UNEXPECTED_END_OF_INPUT,
23 ERROR_OUT_OF_RANGE
Ed Tanousaf4edf62020-07-21 08:46:25 -070024};
25
26enum class State
27{
28 START,
29 START_BOUNDARY,
30 HEADER_FIELD_START,
31 HEADER_FIELD,
32 HEADER_VALUE_START,
33 HEADER_VALUE,
34 HEADER_VALUE_ALMOST_DONE,
35 HEADERS_ALMOST_DONE,
36 PART_DATA_START,
37 PART_DATA,
38 END
39};
40
41enum class Boundary
42{
43 NON_BOUNDARY,
44 PART_BOUNDARY,
45 END_BOUNDARY,
46};
47
48struct FormPart
49{
50 boost::beast::http::fields fields;
51 std::string content;
52};
53
54class MultipartParser
55{
56 public:
57 MultipartParser() = default;
58
59 [[nodiscard]] ParserError parse(const crow::Request& req)
60 {
61 std::string_view contentType = req.getHeaderValue("content-type");
62
63 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070064 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070065 {
66 return ParserError::ERROR_BOUNDARY_FORMAT;
67 }
68
69 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
70
71 boundary = "\r\n--";
72 boundary += ctBoundary;
73 indexBoundary();
74 lookbehind.resize(boundary.size() + 8);
75 state = State::START;
76
Patrick Williams0e31e952023-05-10 19:40:27 -050077 const std::string& buffer = req.body();
78 size_t len = buffer.size();
Ed Tanousaf4edf62020-07-21 08:46:25 -070079 char cl = 0;
80
81 for (size_t i = 0; i < len; i++)
82 {
83 char c = buffer[i];
84 switch (state)
85 {
86 case State::START:
87 index = 0;
88 state = State::START_BOUNDARY;
89 [[fallthrough]];
90 case State::START_BOUNDARY:
91 if (index == boundary.size() - 2)
92 {
93 if (c != cr)
94 {
95 return ParserError::ERROR_BOUNDARY_CR;
96 }
97 index++;
98 break;
99 }
100 else if (index - 1 == boundary.size() - 2)
101 {
102 if (c != lf)
103 {
104 return ParserError::ERROR_BOUNDARY_LF;
105 }
106 index = 0;
107 mime_fields.push_back({});
108 state = State::HEADER_FIELD_START;
109 break;
110 }
111 if (c != boundary[index + 2])
112 {
113 return ParserError::ERROR_BOUNDARY_DATA;
114 }
115 index++;
116 break;
117 case State::HEADER_FIELD_START:
118 currentHeaderName.resize(0);
119 state = State::HEADER_FIELD;
120 headerFieldMark = i;
121 index = 0;
122 [[fallthrough]];
123 case State::HEADER_FIELD:
124 if (c == cr)
125 {
126 headerFieldMark = 0;
127 state = State::HEADERS_ALMOST_DONE;
128 break;
129 }
130
131 index++;
132 if (c == hyphen)
133 {
134 break;
135 }
136
137 if (c == colon)
138 {
139 if (index == 1)
140 {
141 return ParserError::ERROR_EMPTY_HEADER;
142 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800143
Patrick Williams0e31e952023-05-10 19:40:27 -0500144 currentHeaderName.append(&buffer[headerFieldMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700145 i - headerFieldMark);
146 state = State::HEADER_VALUE_START;
147 break;
148 }
149 cl = lower(c);
150 if (cl < 'a' || cl > 'z')
151 {
152 return ParserError::ERROR_HEADER_NAME;
153 }
154 break;
155 case State::HEADER_VALUE_START:
156 if (c == space)
157 {
158 break;
159 }
160 headerValueMark = i;
161 state = State::HEADER_VALUE;
162 [[fallthrough]];
163 case State::HEADER_VALUE:
164 if (c == cr)
165 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500166 std::string_view value(&buffer[headerValueMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700167 i - headerValueMark);
168 mime_fields.rbegin()->fields.set(currentHeaderName,
169 value);
170 state = State::HEADER_VALUE_ALMOST_DONE;
171 }
172 break;
173 case State::HEADER_VALUE_ALMOST_DONE:
174 if (c != lf)
175 {
176 return ParserError::ERROR_HEADER_VALUE;
177 }
178 state = State::HEADER_FIELD_START;
179 break;
180 case State::HEADERS_ALMOST_DONE:
181 if (c != lf)
182 {
183 return ParserError::ERROR_HEADER_ENDING;
184 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200185 if (index > 0)
186 {
187 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
188 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700189 state = State::PART_DATA_START;
190 break;
191 case State::PART_DATA_START:
192 state = State::PART_DATA;
193 partDataMark = i;
194 [[fallthrough]];
195 case State::PART_DATA:
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200196 {
Ed Tanousaf4edf62020-07-21 08:46:25 -0700197 if (index == 0)
198 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500199 skipNonBoundary(buffer, boundary.size() - 1, i);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700200 c = buffer[i];
201 }
Patrick Williams0e31e952023-05-10 19:40:27 -0500202 if (auto ec = processPartData(buffer, i, c);
203 ec != ParserError::PARSER_SUCCESS)
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200204 {
205 return ec;
206 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700207 break;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200208 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700209 case State::END:
210 break;
211 }
212 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200213
214 if (state != State::END)
215 {
216 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
217 }
218
Ed Tanousaf4edf62020-07-21 08:46:25 -0700219 return ParserError::PARSER_SUCCESS;
220 }
221 std::vector<FormPart> mime_fields;
222 std::string boundary;
223
224 private:
225 void indexBoundary()
226 {
227 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
228 for (const char current : boundary)
229 {
230 boundaryIndex[static_cast<unsigned char>(current)] = true;
231 }
232 }
233
Ed Tanous56d23962022-02-14 20:42:02 -0800234 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700235 {
236 return static_cast<char>(c | 0x20);
237 }
238
239 inline bool isBoundaryChar(char c) const
240 {
241 return boundaryIndex[static_cast<unsigned char>(c)];
242 }
243
Patrick Williams0e31e952023-05-10 19:40:27 -0500244 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
Ed Tanousaf4edf62020-07-21 08:46:25 -0700245 size_t& i)
246 {
247 // boyer-moore derived algorithm to safely skip non-boundary data
Patrick Williams0e31e952023-05-10 19:40:27 -0500248 while (i + boundary.size() <= buffer.length())
Ed Tanousaf4edf62020-07-21 08:46:25 -0700249 {
250 if (isBoundaryChar(buffer[i + boundaryEnd]))
251 {
252 break;
253 }
254 i += boundary.size();
255 }
256 }
257
Patrick Williams0e31e952023-05-10 19:40:27 -0500258 ParserError processPartData(const std::string& buffer, size_t& i, char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700259 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200260 size_t prevIndex = index;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700261
262 if (index < boundary.size())
263 {
264 if (boundary[index] == c)
265 {
266 if (index == 0)
267 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500268 const char* start = &buffer[partDataMark];
Ed Tanousca45aa32022-01-07 09:28:45 -0800269 size_t size = i - partDataMark;
Patrick Williams89492a12023-05-10 07:51:34 -0500270 mime_fields.rbegin()->content += std::string_view(start,
271 size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700272 }
273 index++;
274 }
275 else
276 {
277 index = 0;
278 }
279 }
280 else if (index == boundary.size())
281 {
282 index++;
283 if (c == cr)
284 {
285 // cr = part boundary
286 flags = Boundary::PART_BOUNDARY;
287 }
288 else if (c == hyphen)
289 {
290 // hyphen = end boundary
291 flags = Boundary::END_BOUNDARY;
292 }
293 else
294 {
295 index = 0;
296 }
297 }
298 else
299 {
300 if (flags == Boundary::PART_BOUNDARY)
301 {
302 index = 0;
303 if (c == lf)
304 {
305 // unset the PART_BOUNDARY flag
306 flags = Boundary::NON_BOUNDARY;
307 mime_fields.push_back({});
308 state = State::HEADER_FIELD_START;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200309 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700310 }
311 }
312 if (flags == Boundary::END_BOUNDARY)
313 {
314 if (c == hyphen)
315 {
316 state = State::END;
317 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200318 else
319 {
320 flags = Boundary::NON_BOUNDARY;
321 index = 0;
322 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700323 }
324 }
325
326 if (index > 0)
327 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200328 if ((index - 1) >= lookbehind.size())
329 {
330 // Should never happen, but when it does it won't cause crash
331 return ParserError::ERROR_OUT_OF_RANGE;
332 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700333 lookbehind[index - 1] = c;
334 }
335 else if (prevIndex > 0)
336 {
337 // if our boundary turned out to be rubbish, the captured
338 // lookbehind belongs to partData
339
340 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700341 partDataMark = i;
342
343 // reconsider the current character even so it interrupted
344 // the sequence it could be the beginning of a new sequence
345 i--;
346 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200347 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700348 }
349
350 std::string currentHeaderName;
351 std::string currentHeaderValue;
352
353 static constexpr char cr = '\r';
354 static constexpr char lf = '\n';
355 static constexpr char space = ' ';
356 static constexpr char hyphen = '-';
357 static constexpr char colon = ':';
358
Ed Tanousd3a9e082022-01-07 09:30:41 -0800359 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700360 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800361 State state{State::START};
362 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700363 size_t index = 0;
364 size_t partDataMark = 0;
365 size_t headerFieldMark = 0;
366 size_t headerValueMark = 0;
367};