blob: ee028f1d3e39a3b75241599c0f33b2a9ccec9357 [file] [log] [blame]
Ed Tanousaf4edf62020-07-21 08:46:25 -07001#pragma once
2
3#include <boost/algorithm/string/predicate.hpp>
4#include <boost/beast/http/fields.hpp>
5#include <http_request.hpp>
6
7#include <string>
8#include <string_view>
9
10enum class ParserError
11{
12 PARSER_SUCCESS,
13 ERROR_BOUNDARY_FORMAT,
14 ERROR_BOUNDARY_CR,
15 ERROR_BOUNDARY_LF,
16 ERROR_BOUNDARY_DATA,
17 ERROR_EMPTY_HEADER,
18 ERROR_HEADER_NAME,
19 ERROR_HEADER_VALUE,
20 ERROR_HEADER_ENDING
21};
22
23enum class State
24{
25 START,
26 START_BOUNDARY,
27 HEADER_FIELD_START,
28 HEADER_FIELD,
29 HEADER_VALUE_START,
30 HEADER_VALUE,
31 HEADER_VALUE_ALMOST_DONE,
32 HEADERS_ALMOST_DONE,
33 PART_DATA_START,
34 PART_DATA,
35 END
36};
37
38enum class Boundary
39{
40 NON_BOUNDARY,
41 PART_BOUNDARY,
42 END_BOUNDARY,
43};
44
45struct FormPart
46{
47 boost::beast::http::fields fields;
48 std::string content;
49};
50
51class MultipartParser
52{
53 public:
54 MultipartParser() = default;
55
56 [[nodiscard]] ParserError parse(const crow::Request& req)
57 {
58 std::string_view contentType = req.getHeaderValue("content-type");
59
60 const std::string boundaryFormat = "multipart/form-data; boundary=";
61 if (!boost::starts_with(req.getHeaderValue("content-type"),
62 boundaryFormat))
63 {
64 return ParserError::ERROR_BOUNDARY_FORMAT;
65 }
66
67 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
68
69 boundary = "\r\n--";
70 boundary += ctBoundary;
71 indexBoundary();
72 lookbehind.resize(boundary.size() + 8);
73 state = State::START;
74
75 const char* buffer = req.body.data();
76 size_t len = req.body.size();
77 size_t prevIndex = index;
78 char cl = 0;
79
80 for (size_t i = 0; i < len; i++)
81 {
Ed Tanousca45aa32022-01-07 09:28:45 -080082 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -070083 char c = buffer[i];
84 switch (state)
85 {
86 case State::START:
87 index = 0;
88 state = State::START_BOUNDARY;
89 [[fallthrough]];
90 case State::START_BOUNDARY:
91 if (index == boundary.size() - 2)
92 {
93 if (c != cr)
94 {
95 return ParserError::ERROR_BOUNDARY_CR;
96 }
97 index++;
98 break;
99 }
100 else if (index - 1 == boundary.size() - 2)
101 {
102 if (c != lf)
103 {
104 return ParserError::ERROR_BOUNDARY_LF;
105 }
106 index = 0;
107 mime_fields.push_back({});
108 state = State::HEADER_FIELD_START;
109 break;
110 }
111 if (c != boundary[index + 2])
112 {
113 return ParserError::ERROR_BOUNDARY_DATA;
114 }
115 index++;
116 break;
117 case State::HEADER_FIELD_START:
118 currentHeaderName.resize(0);
119 state = State::HEADER_FIELD;
120 headerFieldMark = i;
121 index = 0;
122 [[fallthrough]];
123 case State::HEADER_FIELD:
124 if (c == cr)
125 {
126 headerFieldMark = 0;
127 state = State::HEADERS_ALMOST_DONE;
128 break;
129 }
130
131 index++;
132 if (c == hyphen)
133 {
134 break;
135 }
136
137 if (c == colon)
138 {
139 if (index == 1)
140 {
141 return ParserError::ERROR_EMPTY_HEADER;
142 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800143
144 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700145 currentHeaderName.append(buffer + headerFieldMark,
146 i - headerFieldMark);
147 state = State::HEADER_VALUE_START;
148 break;
149 }
150 cl = lower(c);
151 if (cl < 'a' || cl > 'z')
152 {
153 return ParserError::ERROR_HEADER_NAME;
154 }
155 break;
156 case State::HEADER_VALUE_START:
157 if (c == space)
158 {
159 break;
160 }
161 headerValueMark = i;
162 state = State::HEADER_VALUE;
163 [[fallthrough]];
164 case State::HEADER_VALUE:
165 if (c == cr)
166 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800167 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700168 std::string_view value(buffer + headerValueMark,
169 i - headerValueMark);
170 mime_fields.rbegin()->fields.set(currentHeaderName,
171 value);
172 state = State::HEADER_VALUE_ALMOST_DONE;
173 }
174 break;
175 case State::HEADER_VALUE_ALMOST_DONE:
176 if (c != lf)
177 {
178 return ParserError::ERROR_HEADER_VALUE;
179 }
180 state = State::HEADER_FIELD_START;
181 break;
182 case State::HEADERS_ALMOST_DONE:
183 if (c != lf)
184 {
185 return ParserError::ERROR_HEADER_ENDING;
186 }
187 state = State::PART_DATA_START;
188 break;
189 case State::PART_DATA_START:
190 state = State::PART_DATA;
191 partDataMark = i;
192 [[fallthrough]];
193 case State::PART_DATA:
194 if (index == 0)
195 {
196 skipNonBoundary(buffer, len, boundary.size() - 1, i);
Ed Tanousca45aa32022-01-07 09:28:45 -0800197
198 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700199 c = buffer[i];
200 }
201 processPartData(prevIndex, index, buffer, i, c, state);
202 break;
203 case State::END:
204 break;
205 }
206 }
207 return ParserError::PARSER_SUCCESS;
208 }
209 std::vector<FormPart> mime_fields;
210 std::string boundary;
211
212 private:
213 void indexBoundary()
214 {
215 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
216 for (const char current : boundary)
217 {
218 boundaryIndex[static_cast<unsigned char>(current)] = true;
219 }
220 }
221
222 char lower(char c) const
223 {
224 return static_cast<char>(c | 0x20);
225 }
226
227 inline bool isBoundaryChar(char c) const
228 {
229 return boundaryIndex[static_cast<unsigned char>(c)];
230 }
231
232 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
233 size_t& i)
234 {
235 // boyer-moore derived algorithm to safely skip non-boundary data
236 while (i + boundary.size() <= len)
237 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800238 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700239 if (isBoundaryChar(buffer[i + boundaryEnd]))
240 {
241 break;
242 }
243 i += boundary.size();
244 }
245 }
246
247 void processPartData(size_t& prevIndex, size_t& index, const char* buffer,
248 size_t& i, char c, State& state)
249 {
250 prevIndex = index;
251
252 if (index < boundary.size())
253 {
254 if (boundary[index] == c)
255 {
256 if (index == 0)
257 {
Ed Tanousca45aa32022-01-07 09:28:45 -0800258 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
259 const char* start = buffer + partDataMark;
260 size_t size = i - partDataMark;
261 mime_fields.rbegin()->content +=
262 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700263 }
264 index++;
265 }
266 else
267 {
268 index = 0;
269 }
270 }
271 else if (index == boundary.size())
272 {
273 index++;
274 if (c == cr)
275 {
276 // cr = part boundary
277 flags = Boundary::PART_BOUNDARY;
278 }
279 else if (c == hyphen)
280 {
281 // hyphen = end boundary
282 flags = Boundary::END_BOUNDARY;
283 }
284 else
285 {
286 index = 0;
287 }
288 }
289 else
290 {
291 if (flags == Boundary::PART_BOUNDARY)
292 {
293 index = 0;
294 if (c == lf)
295 {
296 // unset the PART_BOUNDARY flag
297 flags = Boundary::NON_BOUNDARY;
298 mime_fields.push_back({});
299 state = State::HEADER_FIELD_START;
300 return;
301 }
302 }
303 if (flags == Boundary::END_BOUNDARY)
304 {
305 if (c == hyphen)
306 {
307 state = State::END;
308 }
309 }
310 }
311
312 if (index > 0)
313 {
314 lookbehind[index - 1] = c;
315 }
316 else if (prevIndex > 0)
317 {
318 // if our boundary turned out to be rubbish, the captured
319 // lookbehind belongs to partData
320
321 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
322 prevIndex = 0;
323 partDataMark = i;
324
325 // reconsider the current character even so it interrupted
326 // the sequence it could be the beginning of a new sequence
327 i--;
328 }
329 }
330
331 std::string currentHeaderName;
332 std::string currentHeaderValue;
333
334 static constexpr char cr = '\r';
335 static constexpr char lf = '\n';
336 static constexpr char space = ' ';
337 static constexpr char hyphen = '-';
338 static constexpr char colon = ':';
339
Ed Tanousd3a9e082022-01-07 09:30:41 -0800340 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700341 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800342 State state{State::START};
343 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700344 size_t index = 0;
345 size_t partDataMark = 0;
346 size_t headerFieldMark = 0;
347 size_t headerValueMark = 0;
348};