blob: 4dc79730b052927386f18cd641aec3706a1f285f [file] [log] [blame]
Ed Tanous40e9b922024-09-10 13:50:16 -07001// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright OpenBMC Authors
Ed Tanousaf4edf62020-07-21 08:46:25 -07003#pragma once
4
Ed Tanous3ccb3ad2023-01-13 17:40:03 -08005#include "http_request.hpp"
6
Ed Tanousaf4edf62020-07-21 08:46:25 -07007#include <boost/beast/http/fields.hpp>
Ed Tanousaf4edf62020-07-21 08:46:25 -07008
Ed Tanousd7857202025-01-28 15:32:26 -08009#include <algorithm>
10#include <array>
11#include <cstddef>
Ed Tanous3544d2a2023-08-06 18:12:20 -070012#include <ranges>
Ed Tanousaf4edf62020-07-21 08:46:25 -070013#include <string>
14#include <string_view>
Ed Tanousd7857202025-01-28 15:32:26 -080015#include <vector>
Ed Tanousaf4edf62020-07-21 08:46:25 -070016
17enum class ParserError
18{
19 PARSER_SUCCESS,
20 ERROR_BOUNDARY_FORMAT,
21 ERROR_BOUNDARY_CR,
22 ERROR_BOUNDARY_LF,
23 ERROR_BOUNDARY_DATA,
24 ERROR_EMPTY_HEADER,
25 ERROR_HEADER_NAME,
26 ERROR_HEADER_VALUE,
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +020027 ERROR_HEADER_ENDING,
28 ERROR_UNEXPECTED_END_OF_HEADER,
29 ERROR_UNEXPECTED_END_OF_INPUT,
30 ERROR_OUT_OF_RANGE
Ed Tanousaf4edf62020-07-21 08:46:25 -070031};
32
33enum class State
34{
35 START,
36 START_BOUNDARY,
37 HEADER_FIELD_START,
38 HEADER_FIELD,
39 HEADER_VALUE_START,
40 HEADER_VALUE,
41 HEADER_VALUE_ALMOST_DONE,
42 HEADERS_ALMOST_DONE,
43 PART_DATA_START,
44 PART_DATA,
45 END
46};
47
48enum class Boundary
49{
50 NON_BOUNDARY,
51 PART_BOUNDARY,
52 END_BOUNDARY,
53};
54
55struct FormPart
56{
57 boost::beast::http::fields fields;
58 std::string content;
59};
60
61class MultipartParser
62{
63 public:
64 MultipartParser() = default;
65
66 [[nodiscard]] ParserError parse(const crow::Request& req)
67 {
68 std::string_view contentType = req.getHeaderValue("content-type");
69
70 const std::string boundaryFormat = "multipart/form-data; boundary=";
Ed Tanous11ba3972022-07-11 09:50:41 -070071 if (!contentType.starts_with(boundaryFormat))
Ed Tanousaf4edf62020-07-21 08:46:25 -070072 {
73 return ParserError::ERROR_BOUNDARY_FORMAT;
74 }
75
76 std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
77
78 boundary = "\r\n--";
79 boundary += ctBoundary;
80 indexBoundary();
81 lookbehind.resize(boundary.size() + 8);
82 state = State::START;
83
Patrick Williams0e31e952023-05-10 19:40:27 -050084 const std::string& buffer = req.body();
85 size_t len = buffer.size();
Ed Tanousaf4edf62020-07-21 08:46:25 -070086 char cl = 0;
87
88 for (size_t i = 0; i < len; i++)
89 {
90 char c = buffer[i];
91 switch (state)
92 {
93 case State::START:
94 index = 0;
95 state = State::START_BOUNDARY;
96 [[fallthrough]];
97 case State::START_BOUNDARY:
98 if (index == boundary.size() - 2)
99 {
100 if (c != cr)
101 {
102 return ParserError::ERROR_BOUNDARY_CR;
103 }
104 index++;
105 break;
106 }
107 else if (index - 1 == boundary.size() - 2)
108 {
109 if (c != lf)
110 {
111 return ParserError::ERROR_BOUNDARY_LF;
112 }
113 index = 0;
Patrick Williams26eee3a2023-10-20 20:54:01 -0500114 mime_fields.emplace_back();
Ed Tanousaf4edf62020-07-21 08:46:25 -0700115 state = State::HEADER_FIELD_START;
116 break;
117 }
118 if (c != boundary[index + 2])
119 {
120 return ParserError::ERROR_BOUNDARY_DATA;
121 }
122 index++;
123 break;
124 case State::HEADER_FIELD_START:
125 currentHeaderName.resize(0);
126 state = State::HEADER_FIELD;
127 headerFieldMark = i;
128 index = 0;
129 [[fallthrough]];
130 case State::HEADER_FIELD:
131 if (c == cr)
132 {
133 headerFieldMark = 0;
134 state = State::HEADERS_ALMOST_DONE;
135 break;
136 }
137
138 index++;
139 if (c == hyphen)
140 {
141 break;
142 }
143
144 if (c == colon)
145 {
146 if (index == 1)
147 {
148 return ParserError::ERROR_EMPTY_HEADER;
149 }
Ed Tanousca45aa32022-01-07 09:28:45 -0800150
Patrick Williams0e31e952023-05-10 19:40:27 -0500151 currentHeaderName.append(&buffer[headerFieldMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700152 i - headerFieldMark);
153 state = State::HEADER_VALUE_START;
154 break;
155 }
156 cl = lower(c);
157 if (cl < 'a' || cl > 'z')
158 {
159 return ParserError::ERROR_HEADER_NAME;
160 }
161 break;
162 case State::HEADER_VALUE_START:
163 if (c == space)
164 {
165 break;
166 }
167 headerValueMark = i;
168 state = State::HEADER_VALUE;
169 [[fallthrough]];
170 case State::HEADER_VALUE:
171 if (c == cr)
172 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500173 std::string_view value(&buffer[headerValueMark],
Ed Tanousaf4edf62020-07-21 08:46:25 -0700174 i - headerValueMark);
175 mime_fields.rbegin()->fields.set(currentHeaderName,
176 value);
177 state = State::HEADER_VALUE_ALMOST_DONE;
178 }
179 break;
180 case State::HEADER_VALUE_ALMOST_DONE:
181 if (c != lf)
182 {
183 return ParserError::ERROR_HEADER_VALUE;
184 }
185 state = State::HEADER_FIELD_START;
186 break;
187 case State::HEADERS_ALMOST_DONE:
188 if (c != lf)
189 {
190 return ParserError::ERROR_HEADER_ENDING;
191 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200192 if (index > 0)
193 {
194 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
195 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700196 state = State::PART_DATA_START;
197 break;
198 case State::PART_DATA_START:
199 state = State::PART_DATA;
200 partDataMark = i;
201 [[fallthrough]];
202 case State::PART_DATA:
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200203 {
Ed Tanousaf4edf62020-07-21 08:46:25 -0700204 if (index == 0)
205 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500206 skipNonBoundary(buffer, boundary.size() - 1, i);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700207 c = buffer[i];
208 }
Patrick Williams0e31e952023-05-10 19:40:27 -0500209 if (auto ec = processPartData(buffer, i, c);
210 ec != ParserError::PARSER_SUCCESS)
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200211 {
212 return ec;
213 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700214 break;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200215 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700216 case State::END:
217 break;
Ed Tanous4da04902024-03-19 11:32:44 -0700218 default:
219 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700220 }
221 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200222
223 if (state != State::END)
224 {
225 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
226 }
227
Ed Tanousaf4edf62020-07-21 08:46:25 -0700228 return ParserError::PARSER_SUCCESS;
229 }
230 std::vector<FormPart> mime_fields;
231 std::string boundary;
232
233 private:
234 void indexBoundary()
235 {
Ed Tanous3544d2a2023-08-06 18:12:20 -0700236 std::ranges::fill(boundaryIndex, 0);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700237 for (const char current : boundary)
238 {
239 boundaryIndex[static_cast<unsigned char>(current)] = true;
240 }
241 }
242
Ed Tanous56d23962022-02-14 20:42:02 -0800243 static char lower(char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700244 {
245 return static_cast<char>(c | 0x20);
246 }
247
Ed Tanous9de65b32024-03-27 13:34:40 -0700248 bool isBoundaryChar(char c) const
Ed Tanousaf4edf62020-07-21 08:46:25 -0700249 {
250 return boundaryIndex[static_cast<unsigned char>(c)];
251 }
252
Patrick Williams0e31e952023-05-10 19:40:27 -0500253 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
Ed Tanousaf4edf62020-07-21 08:46:25 -0700254 size_t& i)
255 {
256 // boyer-moore derived algorithm to safely skip non-boundary data
Patrick Williams0e31e952023-05-10 19:40:27 -0500257 while (i + boundary.size() <= buffer.length())
Ed Tanousaf4edf62020-07-21 08:46:25 -0700258 {
259 if (isBoundaryChar(buffer[i + boundaryEnd]))
260 {
261 break;
262 }
263 i += boundary.size();
264 }
265 }
266
Patrick Williams0e31e952023-05-10 19:40:27 -0500267 ParserError processPartData(const std::string& buffer, size_t& i, char c)
Ed Tanousaf4edf62020-07-21 08:46:25 -0700268 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200269 size_t prevIndex = index;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700270
271 if (index < boundary.size())
272 {
273 if (boundary[index] == c)
274 {
275 if (index == 0)
276 {
Patrick Williams0e31e952023-05-10 19:40:27 -0500277 const char* start = &buffer[partDataMark];
Ed Tanousca45aa32022-01-07 09:28:45 -0800278 size_t size = i - partDataMark;
Patrick Williamsbd79bce2024-08-16 15:22:20 -0400279 mime_fields.rbegin()->content +=
280 std::string_view(start, size);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700281 }
282 index++;
283 }
284 else
285 {
286 index = 0;
287 }
288 }
289 else if (index == boundary.size())
290 {
291 index++;
292 if (c == cr)
293 {
294 // cr = part boundary
295 flags = Boundary::PART_BOUNDARY;
296 }
297 else if (c == hyphen)
298 {
299 // hyphen = end boundary
300 flags = Boundary::END_BOUNDARY;
301 }
302 else
303 {
304 index = 0;
305 }
306 }
307 else
308 {
309 if (flags == Boundary::PART_BOUNDARY)
310 {
311 index = 0;
312 if (c == lf)
313 {
314 // unset the PART_BOUNDARY flag
315 flags = Boundary::NON_BOUNDARY;
Patrick Williams26eee3a2023-10-20 20:54:01 -0500316 mime_fields.emplace_back();
Ed Tanousaf4edf62020-07-21 08:46:25 -0700317 state = State::HEADER_FIELD_START;
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200318 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700319 }
320 }
321 if (flags == Boundary::END_BOUNDARY)
322 {
323 if (c == hyphen)
324 {
325 state = State::END;
326 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200327 else
328 {
329 flags = Boundary::NON_BOUNDARY;
330 index = 0;
331 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700332 }
333 }
334
335 if (index > 0)
336 {
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200337 if ((index - 1) >= lookbehind.size())
338 {
339 // Should never happen, but when it does it won't cause crash
340 return ParserError::ERROR_OUT_OF_RANGE;
341 }
Ed Tanousaf4edf62020-07-21 08:46:25 -0700342 lookbehind[index - 1] = c;
343 }
344 else if (prevIndex > 0)
345 {
346 // if our boundary turned out to be rubbish, the captured
347 // lookbehind belongs to partData
348
349 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
Ed Tanousaf4edf62020-07-21 08:46:25 -0700350 partDataMark = i;
351
352 // reconsider the current character even so it interrupted
353 // the sequence it could be the beginning of a new sequence
354 i--;
355 }
Krzysztof Grobelny18e3f7f2022-08-24 09:24:33 +0200356 return ParserError::PARSER_SUCCESS;
Ed Tanousaf4edf62020-07-21 08:46:25 -0700357 }
358
359 std::string currentHeaderName;
360 std::string currentHeaderValue;
361
362 static constexpr char cr = '\r';
363 static constexpr char lf = '\n';
364 static constexpr char space = ' ';
365 static constexpr char hyphen = '-';
366 static constexpr char colon = ':';
367
Ed Tanousd3a9e082022-01-07 09:30:41 -0800368 std::array<bool, 256> boundaryIndex{};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700369 std::string lookbehind;
Ed Tanousd3a9e082022-01-07 09:30:41 -0800370 State state{State::START};
371 Boundary flags{Boundary::NON_BOUNDARY};
Ed Tanousaf4edf62020-07-21 08:46:25 -0700372 size_t index = 0;
373 size_t partDataMark = 0;
374 size_t headerFieldMark = 0;
375 size_t headerValueMark = 0;
376};