Blame - include/multipart_parser.hpp - openbmc/bmcweb

blob: 3728311fbedea2c8ed3d411f3271474325f9e446 [file] [log] [blame]

Ed Tanous	af4edf6	2020-07-21 08:46:25 -0700	[diff] [blame^]	1	#pragma once
				2
				3	#include <boost/algorithm/string/predicate.hpp>
				4	#include <boost/beast/http/fields.hpp>
				5	#include <http_request.hpp>
				6
				7	#include <string>
				8	#include <string_view>
				9
				10	enum class ParserError
				11	{
				12	PARSER_SUCCESS,
				13	ERROR_BOUNDARY_FORMAT,
				14	ERROR_BOUNDARY_CR,
				15	ERROR_BOUNDARY_LF,
				16	ERROR_BOUNDARY_DATA,
				17	ERROR_EMPTY_HEADER,
				18	ERROR_HEADER_NAME,
				19	ERROR_HEADER_VALUE,
				20	ERROR_HEADER_ENDING
				21	};
				22
				23	enum class State
				24	{
				25	START,
				26	START_BOUNDARY,
				27	HEADER_FIELD_START,
				28	HEADER_FIELD,
				29	HEADER_VALUE_START,
				30	HEADER_VALUE,
				31	HEADER_VALUE_ALMOST_DONE,
				32	HEADERS_ALMOST_DONE,
				33	PART_DATA_START,
				34	PART_DATA,
				35	END
				36	};
				37
				38	enum class Boundary
				39	{
				40	NON_BOUNDARY,
				41	PART_BOUNDARY,
				42	END_BOUNDARY,
				43	};
				44
				45	struct FormPart
				46	{
				47	boost::beast::http::fields fields;
				48	std::string content;
				49	};
				50
				51	class MultipartParser
				52	{
				53	public:
				54	MultipartParser() = default;
				55
				56	[[nodiscard]] ParserError parse(const crow::Request& req)
				57	{
				58	std::string_view contentType = req.getHeaderValue("content-type");
				59
				60	const std::string boundaryFormat = "multipart/form-data; boundary=";
				61	if (!boost::starts_with(req.getHeaderValue("content-type"),
				62	boundaryFormat))
				63	{
				64	return ParserError::ERROR_BOUNDARY_FORMAT;
				65	}
				66
				67	std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
				68
				69	boundary = "\r\n--";
				70	boundary += ctBoundary;
				71	indexBoundary();
				72	lookbehind.resize(boundary.size() + 8);
				73	state = State::START;
				74
				75	const char* buffer = req.body.data();
				76	size_t len = req.body.size();
				77	size_t prevIndex = index;
				78	char cl = 0;
				79
				80	for (size_t i = 0; i < len; i++)
				81	{
				82	char c = buffer[i];
				83	switch (state)
				84	{
				85	case State::START:
				86	index = 0;
				87	state = State::START_BOUNDARY;
				88	[[fallthrough]];
				89	case State::START_BOUNDARY:
				90	if (index == boundary.size() - 2)
				91	{
				92	if (c != cr)
				93	{
				94	return ParserError::ERROR_BOUNDARY_CR;
				95	}
				96	index++;
				97	break;
				98	}
				99	else if (index - 1 == boundary.size() - 2)
				100	{
				101	if (c != lf)
				102	{
				103	return ParserError::ERROR_BOUNDARY_LF;
				104	}
				105	index = 0;
				106	mime_fields.push_back({});
				107	state = State::HEADER_FIELD_START;
				108	break;
				109	}
				110	if (c != boundary[index + 2])
				111	{
				112	return ParserError::ERROR_BOUNDARY_DATA;
				113	}
				114	index++;
				115	break;
				116	case State::HEADER_FIELD_START:
				117	currentHeaderName.resize(0);
				118	state = State::HEADER_FIELD;
				119	headerFieldMark = i;
				120	index = 0;
				121	[[fallthrough]];
				122	case State::HEADER_FIELD:
				123	if (c == cr)
				124	{
				125	headerFieldMark = 0;
				126	state = State::HEADERS_ALMOST_DONE;
				127	break;
				128	}
				129
				130	index++;
				131	if (c == hyphen)
				132	{
				133	break;
				134	}
				135
				136	if (c == colon)
				137	{
				138	if (index == 1)
				139	{
				140	return ParserError::ERROR_EMPTY_HEADER;
				141	}
				142	currentHeaderName.append(buffer + headerFieldMark,
				143	i - headerFieldMark);
				144	state = State::HEADER_VALUE_START;
				145	break;
				146	}
				147	cl = lower(c);
				148	if (cl < 'a' \|\| cl > 'z')
				149	{
				150	return ParserError::ERROR_HEADER_NAME;
				151	}
				152	break;
				153	case State::HEADER_VALUE_START:
				154	if (c == space)
				155	{
				156	break;
				157	}
				158	headerValueMark = i;
				159	state = State::HEADER_VALUE;
				160	[[fallthrough]];
				161	case State::HEADER_VALUE:
				162	if (c == cr)
				163	{
				164	std::string_view value(buffer + headerValueMark,
				165	i - headerValueMark);
				166	mime_fields.rbegin()->fields.set(currentHeaderName,
				167	value);
				168	state = State::HEADER_VALUE_ALMOST_DONE;
				169	}
				170	break;
				171	case State::HEADER_VALUE_ALMOST_DONE:
				172	if (c != lf)
				173	{
				174	return ParserError::ERROR_HEADER_VALUE;
				175	}
				176	state = State::HEADER_FIELD_START;
				177	break;
				178	case State::HEADERS_ALMOST_DONE:
				179	if (c != lf)
				180	{
				181	return ParserError::ERROR_HEADER_ENDING;
				182	}
				183	state = State::PART_DATA_START;
				184	break;
				185	case State::PART_DATA_START:
				186	state = State::PART_DATA;
				187	partDataMark = i;
				188	[[fallthrough]];
				189	case State::PART_DATA:
				190	if (index == 0)
				191	{
				192	skipNonBoundary(buffer, len, boundary.size() - 1, i);
				193	c = buffer[i];
				194	}
				195	processPartData(prevIndex, index, buffer, i, c, state);
				196	break;
				197	case State::END:
				198	break;
				199	}
				200	}
				201	return ParserError::PARSER_SUCCESS;
				202	}
				203	std::vector<FormPart> mime_fields;
				204	std::string boundary;
				205
				206	private:
				207	void indexBoundary()
				208	{
				209	std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
				210	for (const char current : boundary)
				211	{
				212	boundaryIndex[static_cast<unsigned char>(current)] = true;
				213	}
				214	}
				215
				216	char lower(char c) const
				217	{
				218	return static_cast<char>(c \| 0x20);
				219	}
				220
				221	inline bool isBoundaryChar(char c) const
				222	{
				223	return boundaryIndex[static_cast<unsigned char>(c)];
				224	}
				225
				226	void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
				227	size_t& i)
				228	{
				229	// boyer-moore derived algorithm to safely skip non-boundary data
				230	while (i + boundary.size() <= len)
				231	{
				232	if (isBoundaryChar(buffer[i + boundaryEnd]))
				233	{
				234	break;
				235	}
				236	i += boundary.size();
				237	}
				238	}
				239
				240	void processPartData(size_t& prevIndex, size_t& index, const char* buffer,
				241	size_t& i, char c, State& state)
				242	{
				243	prevIndex = index;
				244
				245	if (index < boundary.size())
				246	{
				247	if (boundary[index] == c)
				248	{
				249	if (index == 0)
				250	{
				251	mime_fields.rbegin()->content += std::string_view(
				252	buffer + partDataMark, i - partDataMark);
				253	}
				254	index++;
				255	}
				256	else
				257	{
				258	index = 0;
				259	}
				260	}
				261	else if (index == boundary.size())
				262	{
				263	index++;
				264	if (c == cr)
				265	{
				266	// cr = part boundary
				267	flags = Boundary::PART_BOUNDARY;
				268	}
				269	else if (c == hyphen)
				270	{
				271	// hyphen = end boundary
				272	flags = Boundary::END_BOUNDARY;
				273	}
				274	else
				275	{
				276	index = 0;
				277	}
				278	}
				279	else
				280	{
				281	if (flags == Boundary::PART_BOUNDARY)
				282	{
				283	index = 0;
				284	if (c == lf)
				285	{
				286	// unset the PART_BOUNDARY flag
				287	flags = Boundary::NON_BOUNDARY;
				288	mime_fields.push_back({});
				289	state = State::HEADER_FIELD_START;
				290	return;
				291	}
				292	}
				293	if (flags == Boundary::END_BOUNDARY)
				294	{
				295	if (c == hyphen)
				296	{
				297	state = State::END;
				298	}
				299	}
				300	}
				301
				302	if (index > 0)
				303	{
				304	lookbehind[index - 1] = c;
				305	}
				306	else if (prevIndex > 0)
				307	{
				308	// if our boundary turned out to be rubbish, the captured
				309	// lookbehind belongs to partData
				310
				311	mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
				312	prevIndex = 0;
				313	partDataMark = i;
				314
				315	// reconsider the current character even so it interrupted
				316	// the sequence it could be the beginning of a new sequence
				317	i--;
				318	}
				319	}
				320
				321	std::string currentHeaderName;
				322	std::string currentHeaderValue;
				323
				324	static constexpr char cr = '\r';
				325	static constexpr char lf = '\n';
				326	static constexpr char space = ' ';
				327	static constexpr char hyphen = '-';
				328	static constexpr char colon = ':';
				329
				330	std::array<bool, 256> boundaryIndex;
				331	std::string lookbehind;
				332	State state;
				333	Boundary flags;
				334	size_t index = 0;
				335	size_t partDataMark = 0;
				336	size_t headerFieldMark = 0;
				337	size_t headerValueMark = 0;
				338	};