blob: 19d09f43cc46b9c229292c36116cc44625c5e8b9 [file] [log] [blame]
Ed Tanous40e9b922024-09-10 13:50:16 -07001// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright OpenBMC Authors
Ed Tanousaf4edf62020-07-21 08:46:25 -07003#pragma once
4
Ed Tanous3ccb3ad2023-01-13 17:40:03 -08005#include "http_request.hpp"
6
Ed Tanousaf4edf62020-07-21 08:46:25 -07007#include <boost/beast/http/fields.hpp>
Ed Tanousaf4edf62020-07-21 08:46:25 -07008
Ed Tanous3544d2a2023-08-06 18:12:20 -07009#include <ranges>
Ed Tanousaf4edf62020-07-21 08:46:25 -070010#include <string>
11#include <string_view>
12
13enum class ParserError
14{
15 PARSER_SUCCESS,
16 ERROR_BOUNDARY_FORMAT,
17 ERROR_BOUNDARY_CR,
18 ERROR_BOUNDARY_LF,
19 ERROR_BOUNDARY_DATA,
20 ERROR_EMPTY_HEADER,
21 ERROR_HEADER_NAME,
22 ERROR_HEADER_VALUE,
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +020023 ERROR_HEADER_ENDING,
24 ERROR_UNEXPECTED_END_OF_HEADER,
25 ERROR_UNEXPECTED_END_OF_INPUT,
26 ERROR_OUT_OF_RANGE
Ed Tanousaf4edf62020-07-21 08:46:25 -070027};
28
29enum class State
30{
31 START,
32 START_BOUNDARY,
33 HEADER_FIELD_START,
34 HEADER_FIELD,
35 HEADER_VALUE_START,
36 HEADER_VALUE,
37 HEADER_VALUE_ALMOST_DONE,
38 HEADERS_ALMOST_DONE,
39 PART_DATA_START,
40 PART_DATA,
41 END
42};
43
44enum class Boundary
45{
46 NON_BOUNDARY,
47 PART_BOUNDARY,
48 END_BOUNDARY,
49};
50
51struct FormPart
52{
53 boost::beast::http::fields fields;
54 std::string content;
55};
56
57class MultipartParser
58{
59 public:
60 MultipartParser() = default;
61
62 [[nodiscard]] ParserError parse(const crow::Request& req)
63 {
64 std::string_view contentType = req.getHeaderValue("content-type");
65
66 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070067 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070068 {
69 return ParserError::ERROR_BOUNDARY_FORMAT;
70 }
71
72 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
73
74 boundary = "\r\n--";
75 boundary += ctBoundary;
76 indexBoundary();
77 lookbehind.resize(boundary.size() + 8);
78 state = State::START;
79
Patrick Williams0e31e952023-05-10 19:40:27 -050080 const std::string& buffer = req.body();
81 size_t len = buffer.size();
Ed Tanousaf4edf62020-07-21 08:46:25 -070082 char cl = 0;
83
84 for (size_t i = 0; i < len; i++)
85 {
86 char c = buffer[i];
87 switch (state)
88 {
89 case State::START:
90 index = 0;
91 state = State::START_BOUNDARY;
92 [[fallthrough]];
93 case State::START_BOUNDARY:
94 if (index == boundary.size() - 2)
95 {
96 if (c != cr)
97 {
98 return ParserError::ERROR_BOUNDARY_CR;
99 }
100 index++;
101 break;
102 }
103 else if (index - 1 == boundary.size() - 2)
104 {
105 if (c != lf)
106 {
107 return ParserError::ERROR_BOUNDARY_LF;
108 }
109 index = 0;
Patrick Williams26eee3a2023-10-20 20:54:01 -0500110 mime_fields.emplace_back();
Ed Tanousaf4edf62020-07-21 08:46:25 -0700111 state = State::HEADER_FIELD_START;
112 break;
113 }
114 if (c != boundary[index + 2])
115 {
116 return ParserError::ERROR_BOUNDARY_DATA;
117 }
118 index++;
119 break;
120 case State::HEADER_FIELD_START:
121 currentHeaderName.resize(0);
122 state = State::HEADER_FIELD;
123 headerFieldMark = i;
124 index = 0;
125 [[fallthrough]];
126 case State::HEADER_FIELD:
127 if (c == cr)
128 {
129 headerFieldMark = 0;
130 state = State::HEADERS_ALMOST_DONE;
131 break;
132 }
133
134 index++;
135 if (c == hyphen)
136 {
137 break;
138 }
139
140 if (c == colon)
141 {
142 if (index == 1)
143 {
144 return ParserError::ERROR_EMPTY_HEADER;
145 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800146
Patrick Williams0e31e952023-05-10 19:40:27 -0500147 currentHeaderName.append(&buffer[headerFieldMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700148 i - headerFieldMark);
149 state = State::HEADER_VALUE_START;
150 break;
151 }
152 cl = lower(c);
153 if (cl < 'a' || cl > 'z')
154 {
155 return ParserError::ERROR_HEADER_NAME;
156 }
157 break;
158 case State::HEADER_VALUE_START:
159 if (c == space)
160 {
161 break;
162 }
163 headerValueMark = i;
164 state = State::HEADER_VALUE;
165 [[fallthrough]];
166 case State::HEADER_VALUE:
167 if (c == cr)
168 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500169 std::string_view value(&buffer[headerValueMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700170 i - headerValueMark);
171 mime_fields.rbegin()->fields.set(currentHeaderName,
172 value);
173 state = State::HEADER_VALUE_ALMOST_DONE;
174 }
175 break;
176 case State::HEADER_VALUE_ALMOST_DONE:
177 if (c != lf)
178 {
179 return ParserError::ERROR_HEADER_VALUE;
180 }
181 state = State::HEADER_FIELD_START;
182 break;
183 case State::HEADERS_ALMOST_DONE:
184 if (c != lf)
185 {
186 return ParserError::ERROR_HEADER_ENDING;
187 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200188 if (index > 0)
189 {
190 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
191 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700192 state = State::PART_DATA_START;
193 break;
194 case State::PART_DATA_START:
195 state = State::PART_DATA;
196 partDataMark = i;
197 [[fallthrough]];
198 case State::PART_DATA:
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200199 {
Ed Tanousaf4edf62020-07-21 08:46:25 -0700200 if (index == 0)
201 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500202 skipNonBoundary(buffer, boundary.size() - 1, i);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700203 c = buffer[i];
204 }
Patrick Williams0e31e952023-05-10 19:40:27 -0500205 if (auto ec = processPartData(buffer, i, c);
206 ec != ParserError::PARSER_SUCCESS)
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200207 {
208 return ec;
209 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700210 break;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200211 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700212 case State::END:
213 break;
Ed Tanous4da04902024-03-19 11:32:44 -0700214 default:
215 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700216 }
217 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200218
219 if (state != State::END)
220 {
221 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
222 }
223
Ed Tanousaf4edf62020-07-21 08:46:25 -0700224 return ParserError::PARSER_SUCCESS;
225 }
226 std::vector<FormPart> mime_fields;
227 std::string boundary;
228
229 private:
230 void indexBoundary()
231 {
Ed Tanous3544d2a2023-08-06 18:12:20 -0700232 std::ranges::fill(boundaryIndex, 0);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700233 for (const char current : boundary)
234 {
235 boundaryIndex[static_cast<unsigned char>(current)] = true;
236 }
237 }
238
Ed Tanous56d23962022-02-14 20:42:02 -0800239 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700240 {
241 return static_cast<char>(c | 0x20);
242 }
243
Ed Tanous9de65b32024-03-27 13:34:40 -0700244 bool isBoundaryChar(char c) const
Ed Tanousaf4edf62020-07-21 08:46:25 -0700245 {
246 return boundaryIndex[static_cast<unsigned char>(c)];
247 }
248
Patrick Williams0e31e952023-05-10 19:40:27 -0500249 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
Ed Tanousaf4edf62020-07-21 08:46:25 -0700250 size_t& i)
251 {
252 // boyer-moore derived algorithm to safely skip non-boundary data
Patrick Williams0e31e952023-05-10 19:40:27 -0500253 while (i + boundary.size() <= buffer.length())
Ed Tanousaf4edf62020-07-21 08:46:25 -0700254 {
255 if (isBoundaryChar(buffer[i + boundaryEnd]))
256 {
257 break;
258 }
259 i += boundary.size();
260 }
261 }
262
Patrick Williams0e31e952023-05-10 19:40:27 -0500263 ParserError processPartData(const std::string& buffer, size_t& i, char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700264 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200265 size_t prevIndex = index;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700266
267 if (index < boundary.size())
268 {
269 if (boundary[index] == c)
270 {
271 if (index == 0)
272 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500273 const char* start = &buffer[partDataMark];
Ed Tanousca45aa32022-01-07 09:28:45 -0800274 size_t size = i - partDataMark;
Patrick Williamsbd79bce2024-08-16 15:22:20 -0400275 mime_fields.rbegin()->content +=
276 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700277 }
278 index++;
279 }
280 else
281 {
282 index = 0;
283 }
284 }
285 else if (index == boundary.size())
286 {
287 index++;
288 if (c == cr)
289 {
290 // cr = part boundary
291 flags = Boundary::PART_BOUNDARY;
292 }
293 else if (c == hyphen)
294 {
295 // hyphen = end boundary
296 flags = Boundary::END_BOUNDARY;
297 }
298 else
299 {
300 index = 0;
301 }
302 }
303 else
304 {
305 if (flags == Boundary::PART_BOUNDARY)
306 {
307 index = 0;
308 if (c == lf)
309 {
310 // unset the PART_BOUNDARY flag
311 flags = Boundary::NON_BOUNDARY;
Patrick Williams26eee3a2023-10-20 20:54:01 -0500312 mime_fields.emplace_back();
Ed Tanousaf4edf62020-07-21 08:46:25 -0700313 state = State::HEADER_FIELD_START;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200314 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700315 }
316 }
317 if (flags == Boundary::END_BOUNDARY)
318 {
319 if (c == hyphen)
320 {
321 state = State::END;
322 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200323 else
324 {
325 flags = Boundary::NON_BOUNDARY;
326 index = 0;
327 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700328 }
329 }
330
331 if (index > 0)
332 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200333 if ((index - 1) >= lookbehind.size())
334 {
335 // Should never happen, but when it does it won't cause crash
336 return ParserError::ERROR_OUT_OF_RANGE;
337 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700338 lookbehind[index - 1] = c;
339 }
340 else if (prevIndex > 0)
341 {
342 // if our boundary turned out to be rubbish, the captured
343 // lookbehind belongs to partData
344
345 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700346 partDataMark = i;
347
348 // reconsider the current character even so it interrupted
349 // the sequence it could be the beginning of a new sequence
350 i--;
351 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200352 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700353 }
354
355 std::string currentHeaderName;
356 std::string currentHeaderValue;
357
358 static constexpr char cr = '\r';
359 static constexpr char lf = '\n';
360 static constexpr char space = ' ';
361 static constexpr char hyphen = '-';
362 static constexpr char colon = ':';
363
Ed Tanousd3a9e082022-01-07 09:30:41 -0800364 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700365 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800366 State state{State::START};
367 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700368 size_t index = 0;
369 size_t partDataMark = 0;
370 size_t headerFieldMark = 0;
371 size_t headerValueMark = 0;
372};