blob: 3728311fbedea2c8ed3d411f3271474325f9e446 [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
3#include <boost/algorithm/string/predicate.hpp>
4#include <boost/beast/http/fields.hpp>
5#include <http_request.hpp>
6
7#include <string>
8#include <string_view>
9
10enum class ParserError
11{
12 PARSER_SUCCESS,
13 ERROR_BOUNDARY_FORMAT,
14 ERROR_BOUNDARY_CR,
15 ERROR_BOUNDARY_LF,
16 ERROR_BOUNDARY_DATA,
17 ERROR_EMPTY_HEADER,
18 ERROR_HEADER_NAME,
19 ERROR_HEADER_VALUE,
20 ERROR_HEADER_ENDING
21};
22
23enum class State
24{
25 START,
26 START_BOUNDARY,
27 HEADER_FIELD_START,
28 HEADER_FIELD,
29 HEADER_VALUE_START,
30 HEADER_VALUE,
31 HEADER_VALUE_ALMOST_DONE,
32 HEADERS_ALMOST_DONE,
33 PART_DATA_START,
34 PART_DATA,
35 END
36};
37
38enum class Boundary
39{
40 NON_BOUNDARY,
41 PART_BOUNDARY,
42 END_BOUNDARY,
43};
44
45struct FormPart
46{
47 boost::beast::http::fields fields;
48 std::string content;
49};
50
51class MultipartParser
52{
53 public:
54 MultipartParser() = default;
55
56 [[nodiscard]] ParserError parse(const crow::Request& req)
57 {
58 std::string_view contentType = req.getHeaderValue("content-type");
59
60 const std::string boundaryFormat = "multipart/form-data; boundary=";
61 if (!boost::starts_with(req.getHeaderValue("content-type"),
62 boundaryFormat))
63 {
64 return ParserError::ERROR_BOUNDARY_FORMAT;
65 }
66
67 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
68
69 boundary = "\r\n--";
70 boundary += ctBoundary;
71 indexBoundary();
72 lookbehind.resize(boundary.size() + 8);
73 state = State::START;
74
75 const char* buffer = req.body.data();
76 size_t len = req.body.size();
77 size_t prevIndex = index;
78 char cl = 0;
79
80 for (size_t i = 0; i < len; i++)
81 {
82 char c = buffer[i];
83 switch (state)
84 {
85 case State::START:
86 index = 0;
87 state = State::START_BOUNDARY;
88 [[fallthrough]];
89 case State::START_BOUNDARY:
90 if (index == boundary.size() - 2)
91 {
92 if (c != cr)
93 {
94 return ParserError::ERROR_BOUNDARY_CR;
95 }
96 index++;
97 break;
98 }
99 else if (index - 1 == boundary.size() - 2)
100 {
101 if (c != lf)
102 {
103 return ParserError::ERROR_BOUNDARY_LF;
104 }
105 index = 0;
106 mime_fields.push_back({});
107 state = State::HEADER_FIELD_START;
108 break;
109 }
110 if (c != boundary[index + 2])
111 {
112 return ParserError::ERROR_BOUNDARY_DATA;
113 }
114 index++;
115 break;
116 case State::HEADER_FIELD_START:
117 currentHeaderName.resize(0);
118 state = State::HEADER_FIELD;
119 headerFieldMark = i;
120 index = 0;
121 [[fallthrough]];
122 case State::HEADER_FIELD:
123 if (c == cr)
124 {
125 headerFieldMark = 0;
126 state = State::HEADERS_ALMOST_DONE;
127 break;
128 }
129
130 index++;
131 if (c == hyphen)
132 {
133 break;
134 }
135
136 if (c == colon)
137 {
138 if (index == 1)
139 {
140 return ParserError::ERROR_EMPTY_HEADER;
141 }
142 currentHeaderName.append(buffer + headerFieldMark,
143 i - headerFieldMark);
144 state = State::HEADER_VALUE_START;
145 break;
146 }
147 cl = lower(c);
148 if (cl < 'a' || cl > 'z')
149 {
150 return ParserError::ERROR_HEADER_NAME;
151 }
152 break;
153 case State::HEADER_VALUE_START:
154 if (c == space)
155 {
156 break;
157 }
158 headerValueMark = i;
159 state = State::HEADER_VALUE;
160 [[fallthrough]];
161 case State::HEADER_VALUE:
162 if (c == cr)
163 {
164 std::string_view value(buffer + headerValueMark,
165 i - headerValueMark);
166 mime_fields.rbegin()->fields.set(currentHeaderName,
167 value);
168 state = State::HEADER_VALUE_ALMOST_DONE;
169 }
170 break;
171 case State::HEADER_VALUE_ALMOST_DONE:
172 if (c != lf)
173 {
174 return ParserError::ERROR_HEADER_VALUE;
175 }
176 state = State::HEADER_FIELD_START;
177 break;
178 case State::HEADERS_ALMOST_DONE:
179 if (c != lf)
180 {
181 return ParserError::ERROR_HEADER_ENDING;
182 }
183 state = State::PART_DATA_START;
184 break;
185 case State::PART_DATA_START:
186 state = State::PART_DATA;
187 partDataMark = i;
188 [[fallthrough]];
189 case State::PART_DATA:
190 if (index == 0)
191 {
192 skipNonBoundary(buffer, len, boundary.size() - 1, i);
193 c = buffer[i];
194 }
195 processPartData(prevIndex, index, buffer, i, c, state);
196 break;
197 case State::END:
198 break;
199 }
200 }
201 return ParserError::PARSER_SUCCESS;
202 }
203 std::vector<FormPart> mime_fields;
204 std::string boundary;
205
206 private:
207 void indexBoundary()
208 {
209 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
210 for (const char current : boundary)
211 {
212 boundaryIndex[static_cast<unsigned char>(current)] = true;
213 }
214 }
215
216 char lower(char c) const
217 {
218 return static_cast<char>(c | 0x20);
219 }
220
221 inline bool isBoundaryChar(char c) const
222 {
223 return boundaryIndex[static_cast<unsigned char>(c)];
224 }
225
226 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
227 size_t& i)
228 {
229 // boyer-moore derived algorithm to safely skip non-boundary data
230 while (i + boundary.size() <= len)
231 {
232 if (isBoundaryChar(buffer[i + boundaryEnd]))
233 {
234 break;
235 }
236 i += boundary.size();
237 }
238 }
239
240 void processPartData(size_t& prevIndex, size_t& index, const char* buffer,
241 size_t& i, char c, State& state)
242 {
243 prevIndex = index;
244
245 if (index < boundary.size())
246 {
247 if (boundary[index] == c)
248 {
249 if (index == 0)
250 {
251 mime_fields.rbegin()->content += std::string_view(
252 buffer + partDataMark, i - partDataMark);
253 }
254 index++;
255 }
256 else
257 {
258 index = 0;
259 }
260 }
261 else if (index == boundary.size())
262 {
263 index++;
264 if (c == cr)
265 {
266 // cr = part boundary
267 flags = Boundary::PART_BOUNDARY;
268 }
269 else if (c == hyphen)
270 {
271 // hyphen = end boundary
272 flags = Boundary::END_BOUNDARY;
273 }
274 else
275 {
276 index = 0;
277 }
278 }
279 else
280 {
281 if (flags == Boundary::PART_BOUNDARY)
282 {
283 index = 0;
284 if (c == lf)
285 {
286 // unset the PART_BOUNDARY flag
287 flags = Boundary::NON_BOUNDARY;
288 mime_fields.push_back({});
289 state = State::HEADER_FIELD_START;
290 return;
291 }
292 }
293 if (flags == Boundary::END_BOUNDARY)
294 {
295 if (c == hyphen)
296 {
297 state = State::END;
298 }
299 }
300 }
301
302 if (index > 0)
303 {
304 lookbehind[index - 1] = c;
305 }
306 else if (prevIndex > 0)
307 {
308 // if our boundary turned out to be rubbish, the captured
309 // lookbehind belongs to partData
310
311 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
312 prevIndex = 0;
313 partDataMark = i;
314
315 // reconsider the current character even so it interrupted
316 // the sequence it could be the beginning of a new sequence
317 i--;
318 }
319 }
320
321 std::string currentHeaderName;
322 std::string currentHeaderValue;
323
324 static constexpr char cr = '\r';
325 static constexpr char lf = '\n';
326 static constexpr char space = ' ';
327 static constexpr char hyphen = '-';
328 static constexpr char colon = ':';
329
330 std::array<bool, 256> boundaryIndex;
331 std::string lookbehind;
332 State state;
333 Boundary flags;
334 size_t index = 0;
335 size_t partDataMark = 0;
336 size_t headerFieldMark = 0;
337 size_t headerValueMark = 0;
338};