blob: 2fe3679452ac8be34b295c4f2b6dfd0a91676a47 [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
Ed Tanousaf4edf62020-07-21 08:46:25 -07003#include <boost/beast/http/fields.hpp>
4#include <http_request.hpp>
5
6#include <string>
7#include <string_view>
8
9enum class ParserError
10{
11 PARSER_SUCCESS,
12 ERROR_BOUNDARY_FORMAT,
13 ERROR_BOUNDARY_CR,
14 ERROR_BOUNDARY_LF,
15 ERROR_BOUNDARY_DATA,
16 ERROR_EMPTY_HEADER,
17 ERROR_HEADER_NAME,
18 ERROR_HEADER_VALUE,
19 ERROR_HEADER_ENDING
20};
21
22enum class State
23{
24 START,
25 START_BOUNDARY,
26 HEADER_FIELD_START,
27 HEADER_FIELD,
28 HEADER_VALUE_START,
29 HEADER_VALUE,
30 HEADER_VALUE_ALMOST_DONE,
31 HEADERS_ALMOST_DONE,
32 PART_DATA_START,
33 PART_DATA,
34 END
35};
36
37enum class Boundary
38{
39 NON_BOUNDARY,
40 PART_BOUNDARY,
41 END_BOUNDARY,
42};
43
44struct FormPart
45{
46 boost::beast::http::fields fields;
47 std::string content;
48};
49
50class MultipartParser
51{
52 public:
53 MultipartParser() = default;
54
55 [[nodiscard]] ParserError parse(const crow::Request& req)
56 {
57 std::string_view contentType = req.getHeaderValue("content-type");
58
59 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070060 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070061 {
62 return ParserError::ERROR_BOUNDARY_FORMAT;
63 }
64
65 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
66
67 boundary = "\r\n--";
68 boundary += ctBoundary;
69 indexBoundary();
70 lookbehind.resize(boundary.size() + 8);
71 state = State::START;
72
73 const char* buffer = req.body.data();
74 size_t len = req.body.size();
75 size_t prevIndex = index;
76 char cl = 0;
77
78 for (size_t i = 0; i < len; i++)
79 {
Ed Tanousca45aa32022-01-07 09:28:45 -080080 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -070081 char c = buffer[i];
82 switch (state)
83 {
84 case State::START:
85 index = 0;
86 state = State::START_BOUNDARY;
87 [[fallthrough]];
88 case State::START_BOUNDARY:
89 if (index == boundary.size() - 2)
90 {
91 if (c != cr)
92 {
93 return ParserError::ERROR_BOUNDARY_CR;
94 }
95 index++;
96 break;
97 }
98 else if (index - 1 == boundary.size() - 2)
99 {
100 if (c != lf)
101 {
102 return ParserError::ERROR_BOUNDARY_LF;
103 }
104 index = 0;
105 mime_fields.push_back({});
106 state = State::HEADER_FIELD_START;
107 break;
108 }
109 if (c != boundary[index + 2])
110 {
111 return ParserError::ERROR_BOUNDARY_DATA;
112 }
113 index++;
114 break;
115 case State::HEADER_FIELD_START:
116 currentHeaderName.resize(0);
117 state = State::HEADER_FIELD;
118 headerFieldMark = i;
119 index = 0;
120 [[fallthrough]];
121 case State::HEADER_FIELD:
122 if (c == cr)
123 {
124 headerFieldMark = 0;
125 state = State::HEADERS_ALMOST_DONE;
126 break;
127 }
128
129 index++;
130 if (c == hyphen)
131 {
132 break;
133 }
134
135 if (c == colon)
136 {
137 if (index == 1)
138 {
139 return ParserError::ERROR_EMPTY_HEADER;
140 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800141
142 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700143 currentHeaderName.append(buffer + headerFieldMark,
144 i - headerFieldMark);
145 state = State::HEADER_VALUE_START;
146 break;
147 }
148 cl = lower(c);
149 if (cl < 'a' || cl > 'z')
150 {
151 return ParserError::ERROR_HEADER_NAME;
152 }
153 break;
154 case State::HEADER_VALUE_START:
155 if (c == space)
156 {
157 break;
158 }
159 headerValueMark = i;
160 state = State::HEADER_VALUE;
161 [[fallthrough]];
162 case State::HEADER_VALUE:
163 if (c == cr)
164 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800165 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700166 std::string_view value(buffer + headerValueMark,
167 i - headerValueMark);
168 mime_fields.rbegin()->fields.set(currentHeaderName,
169 value);
170 state = State::HEADER_VALUE_ALMOST_DONE;
171 }
172 break;
173 case State::HEADER_VALUE_ALMOST_DONE:
174 if (c != lf)
175 {
176 return ParserError::ERROR_HEADER_VALUE;
177 }
178 state = State::HEADER_FIELD_START;
179 break;
180 case State::HEADERS_ALMOST_DONE:
181 if (c != lf)
182 {
183 return ParserError::ERROR_HEADER_ENDING;
184 }
185 state = State::PART_DATA_START;
186 break;
187 case State::PART_DATA_START:
188 state = State::PART_DATA;
189 partDataMark = i;
190 [[fallthrough]];
191 case State::PART_DATA:
192 if (index == 0)
193 {
194 skipNonBoundary(buffer, len, boundary.size() - 1, i);
Ed Tanousca45aa32022-01-07 09:28:45 -0800195
196 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700197 c = buffer[i];
198 }
Ed Tanous8a592812022-06-04 09:06:59 -0700199 processPartData(prevIndex, buffer, i, c);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700200 break;
201 case State::END:
202 break;
203 }
204 }
205 return ParserError::PARSER_SUCCESS;
206 }
207 std::vector<FormPart> mime_fields;
208 std::string boundary;
209
210 private:
211 void indexBoundary()
212 {
213 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
214 for (const char current : boundary)
215 {
216 boundaryIndex[static_cast<unsigned char>(current)] = true;
217 }
218 }
219
Ed Tanous56d23962022-02-14 20:42:02 -0800220 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700221 {
222 return static_cast<char>(c | 0x20);
223 }
224
225 inline bool isBoundaryChar(char c) const
226 {
227 return boundaryIndex[static_cast<unsigned char>(c)];
228 }
229
230 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
231 size_t& i)
232 {
233 // boyer-moore derived algorithm to safely skip non-boundary data
234 while (i + boundary.size() <= len)
235 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800236 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700237 if (isBoundaryChar(buffer[i + boundaryEnd]))
238 {
239 break;
240 }
241 i += boundary.size();
242 }
243 }
244
Ed Tanous8a592812022-06-04 09:06:59 -0700245 void processPartData(size_t& prevIndex, const char* buffer, size_t& i,
246 char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700247 {
248 prevIndex = index;
249
250 if (index < boundary.size())
251 {
252 if (boundary[index] == c)
253 {
254 if (index == 0)
255 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800256 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
257 const char* start = buffer + partDataMark;
258 size_t size = i - partDataMark;
259 mime_fields.rbegin()->content +=
260 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700261 }
262 index++;
263 }
264 else
265 {
266 index = 0;
267 }
268 }
269 else if (index == boundary.size())
270 {
271 index++;
272 if (c == cr)
273 {
274 // cr = part boundary
275 flags = Boundary::PART_BOUNDARY;
276 }
277 else if (c == hyphen)
278 {
279 // hyphen = end boundary
280 flags = Boundary::END_BOUNDARY;
281 }
282 else
283 {
284 index = 0;
285 }
286 }
287 else
288 {
289 if (flags == Boundary::PART_BOUNDARY)
290 {
291 index = 0;
292 if (c == lf)
293 {
294 // unset the PART_BOUNDARY flag
295 flags = Boundary::NON_BOUNDARY;
296 mime_fields.push_back({});
297 state = State::HEADER_FIELD_START;
298 return;
299 }
300 }
301 if (flags == Boundary::END_BOUNDARY)
302 {
303 if (c == hyphen)
304 {
305 state = State::END;
306 }
307 }
308 }
309
310 if (index > 0)
311 {
312 lookbehind[index - 1] = c;
313 }
314 else if (prevIndex > 0)
315 {
316 // if our boundary turned out to be rubbish, the captured
317 // lookbehind belongs to partData
318
319 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
320 prevIndex = 0;
321 partDataMark = i;
322
323 // reconsider the current character even so it interrupted
324 // the sequence it could be the beginning of a new sequence
325 i--;
326 }
327 }
328
329 std::string currentHeaderName;
330 std::string currentHeaderValue;
331
332 static constexpr char cr = '\r';
333 static constexpr char lf = '\n';
334 static constexpr char space = ' ';
335 static constexpr char hyphen = '-';
336 static constexpr char colon = ':';
337
Ed Tanousd3a9e082022-01-07 09:30:41 -0800338 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700339 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800340 State state{State::START};
341 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700342 size_t index = 0;
343 size_t partDataMark = 0;
344 size_t headerFieldMark = 0;
345 size_t headerValueMark = 0;
346};