1 /* Copyright Joyent, Inc. and other Node contributors.
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 #include "http_parser.h"
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
43 # define BIT_AT(a, i) \
44 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
45 (1 << ((unsigned int) (i) & 7))))
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #define SET_ERRNO(e) \
54 parser->nread = nread; \
55 parser->http_errno = (e); \
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
62 parser->nread = nread; \
63 parser->state = CURRENT_STATE(); \
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
89 UPDATE_STATE(parser->state); \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
116 UPDATE_STATE(parser->state); \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
135 /* Set the mark FOR; non-destructive if mark is already set */
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed max_header_size. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. max_header_size is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
154 #define COUNT_HEADER_SIZE(V) \
156 nread += (uint32_t)(V); \
157 if (UNLIKELY(nread > max_header_size)) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
174 static const char *method_strings[] =
176 #define XX(num, name, string) #string,
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 ' ', '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
236 #if HTTP_PARSER_STRICT
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
280 { s_dead = 1 /* important that this is > 0 */
293 , s_res_first_status_code
297 , s_res_line_almost_done
302 , s_req_spaces_before_url
305 , s_req_schema_slash_slash
308 , s_req_server_with_at
310 , s_req_query_string_start
312 , s_req_fragment_start
325 , s_req_line_almost_done
327 , s_header_field_start
329 , s_header_value_discard_ws
330 , s_header_value_discard_ws_almost_done
331 , s_header_value_discard_lws
332 , s_header_value_start
336 , s_header_almost_done
341 , s_chunk_size_almost_done
343 , s_headers_almost_done
346 /* Important: 's_headers_done' must be the last 'header' state. All
347 * states beyond this must be 'body' states. It is used for overflow
348 * checking. See the PARSING_HEADER() macro.
352 , s_chunk_data_almost_done
356 , s_body_identity_eof
362 #define PARSING_HEADER(state) (state <= s_headers_done)
371 , h_matching_connection
372 , h_matching_proxy_connection
373 , h_matching_content_length
374 , h_matching_transfer_encoding
379 , h_content_length_num
380 , h_content_length_ws
381 , h_transfer_encoding
384 , h_matching_transfer_encoding_token_start
385 , h_matching_transfer_encoding_chunked
386 , h_matching_transfer_encoding_token
388 , h_matching_connection_token_start
389 , h_matching_connection_keep_alive
390 , h_matching_connection_close
391 , h_matching_connection_upgrade
392 , h_matching_connection_token
394 , h_transfer_encoding_chunked
395 , h_connection_keep_alive
397 , h_connection_upgrade
403 , s_http_userinfo_start
406 , s_http_host_v6_start
410 , s_http_host_v6_zone_start
411 , s_http_host_v6_zone
412 , s_http_host_port_start
416 /* Macros for character classes; depends on strict-mode */
419 #define LOWER(c) (unsigned char)(c | 0x20)
420 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
422 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
423 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
425 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
427 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429 (c) == '$' || (c) == ',')
431 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
433 #if HTTP_PARSER_STRICT
434 #define TOKEN(c) STRICT_TOKEN(c)
435 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
436 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
438 #define TOKEN(c) tokens[(unsigned char)c]
439 #define IS_URL_CHAR(c) \
440 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441 #define IS_HOST_CHAR(c) \
442 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
446 * Verify that a char is a valid visible (printable) US-ASCII
447 * character or %x80-FF
449 #define IS_HEADER_CHAR(ch) \
450 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
452 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
455 #if HTTP_PARSER_STRICT
456 # define STRICT_CHECK(cond) \
459 SET_ERRNO(HPE_STRICT); \
463 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
465 # define STRICT_CHECK(cond)
466 # define NEW_MESSAGE() start_state
470 /* Map errno values to strings for human-readable output */
471 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
474 const char *description;
475 } http_strerror_tab[] = {
476 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
478 #undef HTTP_STRERROR_GEN
480 int http_message_needs_eof(const http_parser *parser);
484 * This is designed to be shared by http_parser_execute() for URL validation,
485 * hence it has a state transition + byte-for-byte interface. In addition, it
486 * is meant to be embedded in http_parser_parse_url(), which does the dirty
487 * work of turning state transitions URL components for its API.
489 * This function should only be invoked with non-space characters. It is
490 * assumed that the caller cares about (and can detect) the transition between
491 * URL and non-URL states by looking for these.
494 parse_url_char(enum state s, const char ch)
496 if (ch == ' ' || ch == '\r' || ch == '\n') {
500 #if HTTP_PARSER_STRICT
501 if (ch == '\t' || ch == '\f') {
507 case s_req_spaces_before_url:
508 /* Proxied requests are followed by scheme of an absolute URI (alpha).
509 * All methods except CONNECT are followed by '/' or '*'.
512 if (ch == '/' || ch == '*') {
528 return s_req_schema_slash;
533 case s_req_schema_slash:
535 return s_req_schema_slash_slash;
540 case s_req_schema_slash_slash:
542 return s_req_server_start;
547 case s_req_server_with_at:
553 case s_req_server_start:
560 return s_req_query_string_start;
564 return s_req_server_with_at;
567 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
574 if (IS_URL_CHAR(ch)) {
580 return s_req_query_string_start;
583 return s_req_fragment_start;
588 case s_req_query_string_start:
589 case s_req_query_string:
590 if (IS_URL_CHAR(ch)) {
591 return s_req_query_string;
596 /* allow extra '?' in query string */
597 return s_req_query_string;
600 return s_req_fragment_start;
605 case s_req_fragment_start:
606 if (IS_URL_CHAR(ch)) {
607 return s_req_fragment;
612 return s_req_fragment;
621 if (IS_URL_CHAR(ch)) {
637 /* We should never fall out of the switch above unless there's an error */
641 size_t http_parser_execute (http_parser *parser,
642 const http_parser_settings *settings,
648 const char *p = data;
649 const char *header_field_mark = 0;
650 const char *header_value_mark = 0;
651 const char *url_mark = 0;
652 const char *body_mark = 0;
653 const char *status_mark = 0;
654 enum state p_state = (enum state) parser->state;
655 const unsigned int lenient = parser->lenient_http_headers;
656 uint32_t nread = parser->nread;
658 /* We're in an error state. Don't bother doing anything. */
659 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
664 switch (CURRENT_STATE()) {
665 case s_body_identity_eof:
666 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
669 CALLBACK_NOTIFY_NOADVANCE(message_complete);
673 case s_start_req_or_res:
679 SET_ERRNO(HPE_INVALID_EOF_STATE);
685 if (CURRENT_STATE() == s_header_field)
686 header_field_mark = data;
687 if (CURRENT_STATE() == s_header_value)
688 header_value_mark = data;
689 switch (CURRENT_STATE()) {
692 case s_req_schema_slash:
693 case s_req_schema_slash_slash:
694 case s_req_server_start:
696 case s_req_server_with_at:
697 case s_req_query_string_start:
698 case s_req_query_string:
699 case s_req_fragment_start:
710 for (p=data; p != data + len; p++) {
713 if (PARSING_HEADER(CURRENT_STATE()))
714 COUNT_HEADER_SIZE(1);
717 switch (CURRENT_STATE()) {
720 /* this state is used after a 'Connection: close' message
721 * the parser will error out if it reads another message
723 if (LIKELY(ch == CR || ch == LF))
726 SET_ERRNO(HPE_CLOSED_CONNECTION);
729 case s_start_req_or_res:
731 if (ch == CR || ch == LF)
734 parser->extra_flags = 0;
735 parser->content_length = ULLONG_MAX;
738 UPDATE_STATE(s_res_or_resp_H);
740 CALLBACK_NOTIFY(message_begin);
742 parser->type = HTTP_REQUEST;
743 UPDATE_STATE(s_start_req);
750 case s_res_or_resp_H:
752 parser->type = HTTP_RESPONSE;
753 UPDATE_STATE(s_res_HT);
755 if (UNLIKELY(ch != 'E')) {
756 SET_ERRNO(HPE_INVALID_CONSTANT);
760 parser->type = HTTP_REQUEST;
761 parser->method = HTTP_HEAD;
763 UPDATE_STATE(s_req_method);
769 if (ch == CR || ch == LF)
772 parser->extra_flags = 0;
773 parser->content_length = ULLONG_MAX;
776 UPDATE_STATE(s_res_H);
778 SET_ERRNO(HPE_INVALID_CONSTANT);
782 CALLBACK_NOTIFY(message_begin);
787 STRICT_CHECK(ch != 'T');
788 UPDATE_STATE(s_res_HT);
792 STRICT_CHECK(ch != 'T');
793 UPDATE_STATE(s_res_HTT);
797 STRICT_CHECK(ch != 'P');
798 UPDATE_STATE(s_res_HTTP);
802 STRICT_CHECK(ch != '/');
803 UPDATE_STATE(s_res_http_major);
806 case s_res_http_major:
807 if (UNLIKELY(!IS_NUM(ch))) {
808 SET_ERRNO(HPE_INVALID_VERSION);
812 parser->http_major = ch - '0';
813 UPDATE_STATE(s_res_http_dot);
818 if (UNLIKELY(ch != '.')) {
819 SET_ERRNO(HPE_INVALID_VERSION);
823 UPDATE_STATE(s_res_http_minor);
827 case s_res_http_minor:
828 if (UNLIKELY(!IS_NUM(ch))) {
829 SET_ERRNO(HPE_INVALID_VERSION);
833 parser->http_minor = ch - '0';
834 UPDATE_STATE(s_res_http_end);
839 if (UNLIKELY(ch != ' ')) {
840 SET_ERRNO(HPE_INVALID_VERSION);
844 UPDATE_STATE(s_res_first_status_code);
848 case s_res_first_status_code:
855 SET_ERRNO(HPE_INVALID_STATUS);
858 parser->status_code = ch - '0';
859 UPDATE_STATE(s_res_status_code);
863 case s_res_status_code:
868 UPDATE_STATE(s_res_status_start);
872 UPDATE_STATE(s_res_status_start);
876 SET_ERRNO(HPE_INVALID_STATUS);
882 parser->status_code *= 10;
883 parser->status_code += ch - '0';
885 if (UNLIKELY(parser->status_code > 999)) {
886 SET_ERRNO(HPE_INVALID_STATUS);
893 case s_res_status_start:
896 UPDATE_STATE(s_res_status);
899 if (ch == CR || ch == LF)
907 UPDATE_STATE(s_res_line_almost_done);
908 CALLBACK_DATA(status);
913 UPDATE_STATE(s_header_field_start);
914 CALLBACK_DATA(status);
920 case s_res_line_almost_done:
921 STRICT_CHECK(ch != LF);
922 UPDATE_STATE(s_header_field_start);
927 if (ch == CR || ch == LF)
930 parser->extra_flags = 0;
931 parser->content_length = ULLONG_MAX;
933 if (UNLIKELY(!IS_ALPHA(ch))) {
934 SET_ERRNO(HPE_INVALID_METHOD);
938 parser->method = (enum http_method) 0;
941 case 'A': parser->method = HTTP_ACL; break;
942 case 'B': parser->method = HTTP_BIND; break;
943 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
944 case 'D': parser->method = HTTP_DELETE; break;
945 case 'G': parser->method = HTTP_GET; break;
946 case 'H': parser->method = HTTP_HEAD; break;
947 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
948 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
949 case 'N': parser->method = HTTP_NOTIFY; break;
950 case 'O': parser->method = HTTP_OPTIONS; break;
951 case 'P': parser->method = HTTP_POST;
952 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
954 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
955 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
956 case 'T': parser->method = HTTP_TRACE; break;
957 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
959 SET_ERRNO(HPE_INVALID_METHOD);
962 UPDATE_STATE(s_req_method);
964 CALLBACK_NOTIFY(message_begin);
972 if (UNLIKELY(ch == '\0')) {
973 SET_ERRNO(HPE_INVALID_METHOD);
977 matcher = method_strings[parser->method];
978 if (ch == ' ' && matcher[parser->index] == '\0') {
979 UPDATE_STATE(s_req_spaces_before_url);
980 } else if (ch == matcher[parser->index]) {
982 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
984 switch (parser->method << 16 | parser->index << 8 | ch) {
985 #define XX(meth, pos, ch, new_meth) \
986 case (HTTP_##meth << 16 | pos << 8 | ch): \
987 parser->method = HTTP_##new_meth; break;
989 XX(POST, 1, 'U', PUT)
990 XX(POST, 1, 'A', PATCH)
991 XX(POST, 1, 'R', PROPFIND)
992 XX(PUT, 2, 'R', PURGE)
993 XX(CONNECT, 1, 'H', CHECKOUT)
994 XX(CONNECT, 2, 'P', COPY)
995 XX(MKCOL, 1, 'O', MOVE)
996 XX(MKCOL, 1, 'E', MERGE)
997 XX(MKCOL, 1, '-', MSEARCH)
998 XX(MKCOL, 2, 'A', MKACTIVITY)
999 XX(MKCOL, 3, 'A', MKCALENDAR)
1000 XX(SUBSCRIBE, 1, 'E', SEARCH)
1001 XX(SUBSCRIBE, 1, 'O', SOURCE)
1002 XX(REPORT, 2, 'B', REBIND)
1003 XX(PROPFIND, 4, 'P', PROPPATCH)
1004 XX(LOCK, 1, 'I', LINK)
1005 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1006 XX(UNLOCK, 2, 'B', UNBIND)
1007 XX(UNLOCK, 3, 'I', UNLINK)
1010 SET_ERRNO(HPE_INVALID_METHOD);
1014 SET_ERRNO(HPE_INVALID_METHOD);
1022 case s_req_spaces_before_url:
1024 if (ch == ' ') break;
1027 if (parser->method == HTTP_CONNECT) {
1028 UPDATE_STATE(s_req_server_start);
1031 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1032 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1033 SET_ERRNO(HPE_INVALID_URL);
1041 case s_req_schema_slash:
1042 case s_req_schema_slash_slash:
1043 case s_req_server_start:
1046 /* No whitespace allowed here */
1050 SET_ERRNO(HPE_INVALID_URL);
1053 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1054 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1055 SET_ERRNO(HPE_INVALID_URL);
1064 case s_req_server_with_at:
1066 case s_req_query_string_start:
1067 case s_req_query_string:
1068 case s_req_fragment_start:
1069 case s_req_fragment:
1073 UPDATE_STATE(s_req_http_start);
1078 parser->http_major = 0;
1079 parser->http_minor = 9;
1080 UPDATE_STATE((ch == CR) ?
1081 s_req_line_almost_done :
1082 s_header_field_start);
1086 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1087 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1088 SET_ERRNO(HPE_INVALID_URL);
1095 case s_req_http_start:
1100 UPDATE_STATE(s_req_http_H);
1103 if (parser->method == HTTP_SOURCE) {
1104 UPDATE_STATE(s_req_http_I);
1109 SET_ERRNO(HPE_INVALID_CONSTANT);
1115 STRICT_CHECK(ch != 'T');
1116 UPDATE_STATE(s_req_http_HT);
1120 STRICT_CHECK(ch != 'T');
1121 UPDATE_STATE(s_req_http_HTT);
1124 case s_req_http_HTT:
1125 STRICT_CHECK(ch != 'P');
1126 UPDATE_STATE(s_req_http_HTTP);
1130 STRICT_CHECK(ch != 'C');
1131 UPDATE_STATE(s_req_http_IC);
1135 STRICT_CHECK(ch != 'E');
1136 UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1139 case s_req_http_HTTP:
1140 STRICT_CHECK(ch != '/');
1141 UPDATE_STATE(s_req_http_major);
1144 case s_req_http_major:
1145 if (UNLIKELY(!IS_NUM(ch))) {
1146 SET_ERRNO(HPE_INVALID_VERSION);
1150 parser->http_major = ch - '0';
1151 UPDATE_STATE(s_req_http_dot);
1154 case s_req_http_dot:
1156 if (UNLIKELY(ch != '.')) {
1157 SET_ERRNO(HPE_INVALID_VERSION);
1161 UPDATE_STATE(s_req_http_minor);
1165 case s_req_http_minor:
1166 if (UNLIKELY(!IS_NUM(ch))) {
1167 SET_ERRNO(HPE_INVALID_VERSION);
1171 parser->http_minor = ch - '0';
1172 UPDATE_STATE(s_req_http_end);
1175 case s_req_http_end:
1178 UPDATE_STATE(s_req_line_almost_done);
1183 UPDATE_STATE(s_header_field_start);
1187 SET_ERRNO(HPE_INVALID_VERSION);
1192 /* end of request line */
1193 case s_req_line_almost_done:
1195 if (UNLIKELY(ch != LF)) {
1196 SET_ERRNO(HPE_LF_EXPECTED);
1200 UPDATE_STATE(s_header_field_start);
1204 case s_header_field_start:
1207 UPDATE_STATE(s_headers_almost_done);
1212 /* they might be just sending \n instead of \r\n so this would be
1213 * the second \n to denote the end of headers*/
1214 UPDATE_STATE(s_headers_almost_done);
1221 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1228 UPDATE_STATE(s_header_field);
1232 parser->header_state = h_C;
1236 parser->header_state = h_matching_proxy_connection;
1240 parser->header_state = h_matching_transfer_encoding;
1244 parser->header_state = h_matching_upgrade;
1248 parser->header_state = h_general;
1254 case s_header_field:
1256 const char* start = p;
1257 for (; p != data + len; p++) {
1264 switch (parser->header_state) {
1266 size_t left = data + len - p;
1267 const char* pe = p + MIN(left, max_header_size);
1268 while (p+1 < pe && TOKEN(p[1])) {
1276 parser->header_state = (c == 'o' ? h_CO : h_general);
1281 parser->header_state = (c == 'n' ? h_CON : h_general);
1288 parser->header_state = h_matching_connection;
1291 parser->header_state = h_matching_content_length;
1294 parser->header_state = h_general;
1301 case h_matching_connection:
1303 if (parser->index > sizeof(CONNECTION)-1
1304 || c != CONNECTION[parser->index]) {
1305 parser->header_state = h_general;
1306 } else if (parser->index == sizeof(CONNECTION)-2) {
1307 parser->header_state = h_connection;
1311 /* proxy-connection */
1313 case h_matching_proxy_connection:
1315 if (parser->index > sizeof(PROXY_CONNECTION)-1
1316 || c != PROXY_CONNECTION[parser->index]) {
1317 parser->header_state = h_general;
1318 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1319 parser->header_state = h_connection;
1323 /* content-length */
1325 case h_matching_content_length:
1327 if (parser->index > sizeof(CONTENT_LENGTH)-1
1328 || c != CONTENT_LENGTH[parser->index]) {
1329 parser->header_state = h_general;
1330 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1331 parser->header_state = h_content_length;
1335 /* transfer-encoding */
1337 case h_matching_transfer_encoding:
1339 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1340 || c != TRANSFER_ENCODING[parser->index]) {
1341 parser->header_state = h_general;
1342 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1343 parser->header_state = h_transfer_encoding;
1344 parser->extra_flags |= F_TRANSFER_ENCODING >> 8;
1350 case h_matching_upgrade:
1352 if (parser->index > sizeof(UPGRADE)-1
1353 || c != UPGRADE[parser->index]) {
1354 parser->header_state = h_general;
1355 } else if (parser->index == sizeof(UPGRADE)-2) {
1356 parser->header_state = h_upgrade;
1361 case h_content_length:
1362 case h_transfer_encoding:
1364 if (ch != ' ') parser->header_state = h_general;
1368 assert(0 && "Unknown header_state");
1373 if (p == data + len) {
1375 COUNT_HEADER_SIZE(p - start);
1379 COUNT_HEADER_SIZE(p - start);
1382 UPDATE_STATE(s_header_value_discard_ws);
1383 CALLBACK_DATA(header_field);
1387 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1391 case s_header_value_discard_ws:
1392 if (ch == ' ' || ch == '\t') break;
1395 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1400 UPDATE_STATE(s_header_value_discard_lws);
1406 case s_header_value_start:
1410 UPDATE_STATE(s_header_value);
1415 switch (parser->header_state) {
1417 parser->flags |= F_UPGRADE;
1418 parser->header_state = h_general;
1421 case h_transfer_encoding:
1422 /* looking for 'Transfer-Encoding: chunked' */
1424 parser->header_state = h_matching_transfer_encoding_chunked;
1426 parser->header_state = h_matching_transfer_encoding_token;
1430 /* Multi-value `Transfer-Encoding` header */
1431 case h_matching_transfer_encoding_token_start:
1434 case h_content_length:
1435 if (UNLIKELY(!IS_NUM(ch))) {
1436 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1440 if (parser->flags & F_CONTENTLENGTH) {
1441 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1445 parser->flags |= F_CONTENTLENGTH;
1446 parser->content_length = ch - '0';
1447 parser->header_state = h_content_length_num;
1450 /* when obsolete line folding is encountered for content length
1451 * continue to the s_header_value state */
1452 case h_content_length_ws:
1456 /* looking for 'Connection: keep-alive' */
1458 parser->header_state = h_matching_connection_keep_alive;
1459 /* looking for 'Connection: close' */
1460 } else if (c == 'c') {
1461 parser->header_state = h_matching_connection_close;
1462 } else if (c == 'u') {
1463 parser->header_state = h_matching_connection_upgrade;
1465 parser->header_state = h_matching_connection_token;
1469 /* Multi-value `Connection` header */
1470 case h_matching_connection_token_start:
1474 parser->header_state = h_general;
1480 case s_header_value:
1482 const char* start = p;
1483 enum header_states h_state = (enum header_states) parser->header_state;
1484 for (; p != data + len; p++) {
1487 UPDATE_STATE(s_header_almost_done);
1488 parser->header_state = h_state;
1489 CALLBACK_DATA(header_value);
1494 UPDATE_STATE(s_header_almost_done);
1495 COUNT_HEADER_SIZE(p - start);
1496 parser->header_state = h_state;
1497 CALLBACK_DATA_NOADVANCE(header_value);
1501 if (!lenient && !IS_HEADER_CHAR(ch)) {
1502 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1511 size_t left = data + len - p;
1512 const char* pe = p + MIN(left, max_header_size);
1514 for (; p != pe; p++) {
1516 if (ch == CR || ch == LF) {
1520 if (!lenient && !IS_HEADER_CHAR(ch)) {
1521 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1525 if (p == data + len)
1531 case h_transfer_encoding:
1532 assert(0 && "Shouldn't get here.");
1535 case h_content_length:
1536 if (ch == ' ') break;
1537 h_state = h_content_length_num;
1540 case h_content_length_num:
1545 h_state = h_content_length_ws;
1549 if (UNLIKELY(!IS_NUM(ch))) {
1550 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1551 parser->header_state = h_state;
1555 t = parser->content_length;
1559 /* Overflow? Test against a conservative limit for simplicity. */
1560 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1561 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1562 parser->header_state = h_state;
1566 parser->content_length = t;
1570 case h_content_length_ws:
1571 if (ch == ' ') break;
1572 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1573 parser->header_state = h_state;
1576 /* Transfer-Encoding: chunked */
1577 case h_matching_transfer_encoding_token_start:
1578 /* looking for 'Transfer-Encoding: chunked' */
1580 h_state = h_matching_transfer_encoding_chunked;
1581 } else if (STRICT_TOKEN(c)) {
1582 /* TODO(indutny): similar code below does this, but why?
1583 * At the very least it seems to be inconsistent given that
1584 * h_matching_transfer_encoding_token does not check for
1587 h_state = h_matching_transfer_encoding_token;
1588 } else if (c == ' ' || c == '\t') {
1591 h_state = h_general;
1595 case h_matching_transfer_encoding_chunked:
1597 if (parser->index > sizeof(CHUNKED)-1
1598 || c != CHUNKED[parser->index]) {
1599 h_state = h_matching_transfer_encoding_token;
1600 } else if (parser->index == sizeof(CHUNKED)-2) {
1601 h_state = h_transfer_encoding_chunked;
1605 case h_matching_transfer_encoding_token:
1607 h_state = h_matching_transfer_encoding_token_start;
1612 case h_matching_connection_token_start:
1613 /* looking for 'Connection: keep-alive' */
1615 h_state = h_matching_connection_keep_alive;
1616 /* looking for 'Connection: close' */
1617 } else if (c == 'c') {
1618 h_state = h_matching_connection_close;
1619 } else if (c == 'u') {
1620 h_state = h_matching_connection_upgrade;
1621 } else if (STRICT_TOKEN(c)) {
1622 h_state = h_matching_connection_token;
1623 } else if (c == ' ' || c == '\t') {
1626 h_state = h_general;
1630 /* looking for 'Connection: keep-alive' */
1631 case h_matching_connection_keep_alive:
1633 if (parser->index > sizeof(KEEP_ALIVE)-1
1634 || c != KEEP_ALIVE[parser->index]) {
1635 h_state = h_matching_connection_token;
1636 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1637 h_state = h_connection_keep_alive;
1641 /* looking for 'Connection: close' */
1642 case h_matching_connection_close:
1644 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1645 h_state = h_matching_connection_token;
1646 } else if (parser->index == sizeof(CLOSE)-2) {
1647 h_state = h_connection_close;
1651 /* looking for 'Connection: upgrade' */
1652 case h_matching_connection_upgrade:
1654 if (parser->index > sizeof(UPGRADE) - 1 ||
1655 c != UPGRADE[parser->index]) {
1656 h_state = h_matching_connection_token;
1657 } else if (parser->index == sizeof(UPGRADE)-2) {
1658 h_state = h_connection_upgrade;
1662 case h_matching_connection_token:
1664 h_state = h_matching_connection_token_start;
1669 case h_transfer_encoding_chunked:
1670 if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1673 case h_connection_keep_alive:
1674 case h_connection_close:
1675 case h_connection_upgrade:
1677 if (h_state == h_connection_keep_alive) {
1678 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1679 } else if (h_state == h_connection_close) {
1680 parser->flags |= F_CONNECTION_CLOSE;
1681 } else if (h_state == h_connection_upgrade) {
1682 parser->flags |= F_CONNECTION_UPGRADE;
1684 h_state = h_matching_connection_token_start;
1686 } else if (ch != ' ') {
1687 h_state = h_matching_connection_token;
1692 UPDATE_STATE(s_header_value);
1693 h_state = h_general;
1697 parser->header_state = h_state;
1699 if (p == data + len)
1702 COUNT_HEADER_SIZE(p - start);
1706 case s_header_almost_done:
1708 if (UNLIKELY(ch != LF)) {
1709 SET_ERRNO(HPE_LF_EXPECTED);
1713 UPDATE_STATE(s_header_value_lws);
1717 case s_header_value_lws:
1719 if (ch == ' ' || ch == '\t') {
1720 if (parser->header_state == h_content_length_num) {
1721 /* treat obsolete line folding as space */
1722 parser->header_state = h_content_length_ws;
1724 UPDATE_STATE(s_header_value_start);
1728 /* finished the header */
1729 switch (parser->header_state) {
1730 case h_connection_keep_alive:
1731 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1733 case h_connection_close:
1734 parser->flags |= F_CONNECTION_CLOSE;
1736 case h_transfer_encoding_chunked:
1737 parser->flags |= F_CHUNKED;
1739 case h_connection_upgrade:
1740 parser->flags |= F_CONNECTION_UPGRADE;
1746 UPDATE_STATE(s_header_field_start);
1750 case s_header_value_discard_ws_almost_done:
1752 STRICT_CHECK(ch != LF);
1753 UPDATE_STATE(s_header_value_discard_lws);
1757 case s_header_value_discard_lws:
1759 if (ch == ' ' || ch == '\t') {
1760 UPDATE_STATE(s_header_value_discard_ws);
1763 switch (parser->header_state) {
1764 case h_connection_keep_alive:
1765 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1767 case h_connection_close:
1768 parser->flags |= F_CONNECTION_CLOSE;
1770 case h_connection_upgrade:
1771 parser->flags |= F_CONNECTION_UPGRADE;
1773 case h_transfer_encoding_chunked:
1774 parser->flags |= F_CHUNKED;
1776 case h_content_length:
1777 /* do not allow empty content length */
1778 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1785 /* header value was empty */
1787 UPDATE_STATE(s_header_field_start);
1788 CALLBACK_DATA_NOADVANCE(header_value);
1793 case s_headers_almost_done:
1795 STRICT_CHECK(ch != LF);
1797 if (parser->flags & F_TRAILING) {
1798 /* End of a chunked request */
1799 UPDATE_STATE(s_message_done);
1800 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1804 /* Cannot us transfer-encoding and a content-length header together
1805 per the HTTP specification. (RFC 7230 Section 3.3.3) */
1806 if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) &&
1807 (parser->flags & F_CONTENTLENGTH)) {
1808 /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1811 if (!lenient || (parser->flags & F_CHUNKED)) {
1812 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1817 UPDATE_STATE(s_headers_done);
1819 /* Set this here so that on_headers_complete() callbacks can see it */
1820 if ((parser->flags & F_UPGRADE) &&
1821 (parser->flags & F_CONNECTION_UPGRADE)) {
1822 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1823 * mandatory only when it is a 101 Switching Protocols response,
1824 * otherwise it is purely informational, to announce support.
1827 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1829 parser->upgrade = (parser->method == HTTP_CONNECT);
1832 /* Here we call the headers_complete callback. This is somewhat
1833 * different than other callbacks because if the user returns 1, we
1834 * will interpret that as saying that this message has no body. This
1835 * is needed for the annoying case of recieving a response to a HEAD
1838 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1839 * we have to simulate it by handling a change in errno below.
1841 if (settings->on_headers_complete) {
1842 switch (settings->on_headers_complete(parser)) {
1847 parser->upgrade = 1;
1851 parser->flags |= F_SKIPBODY;
1855 SET_ERRNO(HPE_CB_headers_complete);
1856 RETURN(p - data); /* Error */
1860 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1867 case s_headers_done:
1870 STRICT_CHECK(ch != LF);
1875 hasBody = parser->flags & F_CHUNKED ||
1876 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1877 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1878 (parser->flags & F_SKIPBODY) || !hasBody)) {
1879 /* Exit, the rest of the message is in a different protocol. */
1880 UPDATE_STATE(NEW_MESSAGE());
1881 CALLBACK_NOTIFY(message_complete);
1882 RETURN((p - data) + 1);
1885 if (parser->flags & F_SKIPBODY) {
1886 UPDATE_STATE(NEW_MESSAGE());
1887 CALLBACK_NOTIFY(message_complete);
1888 } else if (parser->flags & F_CHUNKED) {
1889 /* chunked encoding - ignore Content-Length header,
1890 * prepare for a chunk */
1891 UPDATE_STATE(s_chunk_size_start);
1892 } else if (parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) {
1893 if (parser->type == HTTP_REQUEST && !lenient) {
1894 /* RFC 7230 3.3.3 */
1896 /* If a Transfer-Encoding header field
1897 * is present in a request and the chunked transfer coding is not
1898 * the final encoding, the message body length cannot be determined
1899 * reliably; the server MUST respond with the 400 (Bad Request)
1900 * status code and then close the connection.
1902 SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1903 RETURN(p - data); /* Error */
1905 /* RFC 7230 3.3.3 */
1907 /* If a Transfer-Encoding header field is present in a response and
1908 * the chunked transfer coding is not the final encoding, the
1909 * message body length is determined by reading the connection until
1910 * it is closed by the server.
1912 UPDATE_STATE(s_body_identity_eof);
1915 if (parser->content_length == 0) {
1916 /* Content-Length header given but zero: Content-Length: 0\r\n */
1917 UPDATE_STATE(NEW_MESSAGE());
1918 CALLBACK_NOTIFY(message_complete);
1919 } else if (parser->content_length != ULLONG_MAX) {
1920 /* Content-Length header given and non-zero */
1921 UPDATE_STATE(s_body_identity);
1923 if (!http_message_needs_eof(parser)) {
1924 /* Assume content-length 0 - read the next */
1925 UPDATE_STATE(NEW_MESSAGE());
1926 CALLBACK_NOTIFY(message_complete);
1928 /* Read body until EOF */
1929 UPDATE_STATE(s_body_identity_eof);
1937 case s_body_identity:
1939 uint64_t to_read = MIN(parser->content_length,
1940 (uint64_t) ((data + len) - p));
1942 assert(parser->content_length != 0
1943 && parser->content_length != ULLONG_MAX);
1945 /* The difference between advancing content_length and p is because
1946 * the latter will automaticaly advance on the next loop iteration.
1947 * Further, if content_length ends up at 0, we want to see the last
1948 * byte again for our message complete callback.
1951 parser->content_length -= to_read;
1954 if (parser->content_length == 0) {
1955 UPDATE_STATE(s_message_done);
1957 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1959 * The alternative to doing this is to wait for the next byte to
1960 * trigger the data callback, just as in every other case. The
1961 * problem with this is that this makes it difficult for the test
1962 * harness to distinguish between complete-on-EOF and
1963 * complete-on-length. It's not clear that this distinction is
1964 * important for applications, but let's keep it for now.
1966 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1973 /* read until EOF */
1974 case s_body_identity_eof:
1980 case s_message_done:
1981 UPDATE_STATE(NEW_MESSAGE());
1982 CALLBACK_NOTIFY(message_complete);
1983 if (parser->upgrade) {
1984 /* Exit, the rest of the message is in a different protocol. */
1985 RETURN((p - data) + 1);
1989 case s_chunk_size_start:
1992 assert(parser->flags & F_CHUNKED);
1994 unhex_val = unhex[(unsigned char)ch];
1995 if (UNLIKELY(unhex_val == -1)) {
1996 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2000 parser->content_length = unhex_val;
2001 UPDATE_STATE(s_chunk_size);
2009 assert(parser->flags & F_CHUNKED);
2012 UPDATE_STATE(s_chunk_size_almost_done);
2016 unhex_val = unhex[(unsigned char)ch];
2018 if (unhex_val == -1) {
2019 if (ch == ';' || ch == ' ') {
2020 UPDATE_STATE(s_chunk_parameters);
2024 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2028 t = parser->content_length;
2032 /* Overflow? Test against a conservative limit for simplicity. */
2033 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2034 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2038 parser->content_length = t;
2042 case s_chunk_parameters:
2044 assert(parser->flags & F_CHUNKED);
2045 /* just ignore this shit. TODO check for overflow */
2047 UPDATE_STATE(s_chunk_size_almost_done);
2053 case s_chunk_size_almost_done:
2055 assert(parser->flags & F_CHUNKED);
2056 STRICT_CHECK(ch != LF);
2061 if (parser->content_length == 0) {
2062 parser->flags |= F_TRAILING;
2063 UPDATE_STATE(s_header_field_start);
2065 UPDATE_STATE(s_chunk_data);
2067 CALLBACK_NOTIFY(chunk_header);
2073 uint64_t to_read = MIN(parser->content_length,
2074 (uint64_t) ((data + len) - p));
2076 assert(parser->flags & F_CHUNKED);
2077 assert(parser->content_length != 0
2078 && parser->content_length != ULLONG_MAX);
2080 /* See the explanation in s_body_identity for why the content
2081 * length and data pointers are managed this way.
2084 parser->content_length -= to_read;
2087 if (parser->content_length == 0) {
2088 UPDATE_STATE(s_chunk_data_almost_done);
2094 case s_chunk_data_almost_done:
2095 assert(parser->flags & F_CHUNKED);
2096 assert(parser->content_length == 0);
2097 STRICT_CHECK(ch != CR);
2098 UPDATE_STATE(s_chunk_data_done);
2099 CALLBACK_DATA(body);
2102 case s_chunk_data_done:
2103 assert(parser->flags & F_CHUNKED);
2104 STRICT_CHECK(ch != LF);
2107 UPDATE_STATE(s_chunk_size_start);
2108 CALLBACK_NOTIFY(chunk_complete);
2112 assert(0 && "unhandled state");
2113 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2118 /* Run callbacks for any marks that we have leftover after we ran out of
2119 * bytes. There should be at most one of these set, so it's OK to invoke
2120 * them in series (unset marks will not result in callbacks).
2122 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2123 * overflowed 'data' and this allows us to correct for the off-by-one that
2124 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2125 * value that's in-bounds).
2128 assert(((header_field_mark ? 1 : 0) +
2129 (header_value_mark ? 1 : 0) +
2130 (url_mark ? 1 : 0) +
2131 (body_mark ? 1 : 0) +
2132 (status_mark ? 1 : 0)) <= 1);
2134 CALLBACK_DATA_NOADVANCE(header_field);
2135 CALLBACK_DATA_NOADVANCE(header_value);
2136 CALLBACK_DATA_NOADVANCE(url);
2137 CALLBACK_DATA_NOADVANCE(body);
2138 CALLBACK_DATA_NOADVANCE(status);
2143 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2144 SET_ERRNO(HPE_UNKNOWN);
2151 /* Does the parser need to see an EOF to find the end of the message? */
2153 http_message_needs_eof (const http_parser *parser)
2155 if (parser->type == HTTP_REQUEST) {
2159 /* See RFC 2616 section 4.4 */
2160 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2161 parser->status_code == 204 || /* No Content */
2162 parser->status_code == 304 || /* Not Modified */
2163 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2167 /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2168 if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) &&
2169 (parser->flags & F_CHUNKED) == 0) {
2173 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2182 http_should_keep_alive (const http_parser *parser)
2184 if (parser->http_major > 0 && parser->http_minor > 0) {
2186 if (parser->flags & F_CONNECTION_CLOSE) {
2190 /* HTTP/1.0 or earlier */
2191 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2196 return !http_message_needs_eof(parser);
2201 http_method_str (enum http_method m)
2203 return ELEM_AT(method_strings, m, "<unknown>");
2207 http_status_str (enum http_status s)
2210 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2213 default: return "<unknown>";
2218 http_parser_init (http_parser *parser, enum http_parser_type t)
2220 void *data = parser->data; /* preserve application data */
2221 memset(parser, 0, sizeof(*parser));
2222 parser->data = data;
2224 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2225 parser->http_errno = HPE_OK;
2229 http_parser_settings_init(http_parser_settings *settings)
2231 memset(settings, 0, sizeof(*settings));
2235 http_errno_name(enum http_errno err) {
2236 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2237 return http_strerror_tab[err].name;
2241 http_errno_description(enum http_errno err) {
2242 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2243 return http_strerror_tab[err].description;
2246 static enum http_host_state
2247 http_parse_host_char(enum http_host_state s, const char ch) {
2249 case s_http_userinfo:
2250 case s_http_userinfo_start:
2252 return s_http_host_start;
2255 if (IS_USERINFO_CHAR(ch)) {
2256 return s_http_userinfo;
2260 case s_http_host_start:
2262 return s_http_host_v6_start;
2265 if (IS_HOST_CHAR(ch)) {
2272 if (IS_HOST_CHAR(ch)) {
2277 case s_http_host_v6_end:
2279 return s_http_host_port_start;
2284 case s_http_host_v6:
2286 return s_http_host_v6_end;
2290 case s_http_host_v6_start:
2291 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2292 return s_http_host_v6;
2295 if (s == s_http_host_v6 && ch == '%') {
2296 return s_http_host_v6_zone_start;
2300 case s_http_host_v6_zone:
2302 return s_http_host_v6_end;
2306 case s_http_host_v6_zone_start:
2307 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2308 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2310 return s_http_host_v6_zone;
2314 case s_http_host_port:
2315 case s_http_host_port_start:
2317 return s_http_host_port;
2325 return s_http_host_dead;
2329 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2330 enum http_host_state s;
2333 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2335 assert(u->field_set & (1 << UF_HOST));
2337 u->field_data[UF_HOST].len = 0;
2339 s = found_at ? s_http_userinfo_start : s_http_host_start;
2341 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2342 enum http_host_state new_s = http_parse_host_char(s, *p);
2344 if (new_s == s_http_host_dead) {
2350 if (s != s_http_host) {
2351 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2353 u->field_data[UF_HOST].len++;
2356 case s_http_host_v6:
2357 if (s != s_http_host_v6) {
2358 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2360 u->field_data[UF_HOST].len++;
2363 case s_http_host_v6_zone_start:
2364 case s_http_host_v6_zone:
2365 u->field_data[UF_HOST].len++;
2368 case s_http_host_port:
2369 if (s != s_http_host_port) {
2370 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2371 u->field_data[UF_PORT].len = 0;
2372 u->field_set |= (1 << UF_PORT);
2374 u->field_data[UF_PORT].len++;
2377 case s_http_userinfo:
2378 if (s != s_http_userinfo) {
2379 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2380 u->field_data[UF_USERINFO].len = 0;
2381 u->field_set |= (1 << UF_USERINFO);
2383 u->field_data[UF_USERINFO].len++;
2392 /* Make sure we don't end somewhere unexpected */
2394 case s_http_host_start:
2395 case s_http_host_v6_start:
2396 case s_http_host_v6:
2397 case s_http_host_v6_zone_start:
2398 case s_http_host_v6_zone:
2399 case s_http_host_port_start:
2400 case s_http_userinfo:
2401 case s_http_userinfo_start:
2411 http_parser_url_init(struct http_parser_url *u) {
2412 memset(u, 0, sizeof(*u));
2416 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2417 struct http_parser_url *u)
2421 enum http_parser_url_fields uf, old_uf;
2428 u->port = u->field_set = 0;
2429 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2432 for (p = buf; p < buf + buflen; p++) {
2433 s = parse_url_char(s, *p);
2435 /* Figure out the next field that we're operating on */
2440 /* Skip delimeters */
2441 case s_req_schema_slash:
2442 case s_req_schema_slash_slash:
2443 case s_req_server_start:
2444 case s_req_query_string_start:
2445 case s_req_fragment_start:
2452 case s_req_server_with_at:
2464 case s_req_query_string:
2468 case s_req_fragment:
2473 assert(!"Unexpected state");
2477 /* Nothing's changed; soldier on */
2479 u->field_data[uf].len++;
2483 u->field_data[uf].off = (uint16_t)(p - buf);
2484 u->field_data[uf].len = 1;
2486 u->field_set |= (1 << uf);
2490 /* host must be present if there is a schema */
2491 /* parsing http:///toto will fail */
2492 if ((u->field_set & (1 << UF_SCHEMA)) &&
2493 (u->field_set & (1 << UF_HOST)) == 0) {
2497 if (u->field_set & (1 << UF_HOST)) {
2498 if (http_parse_host(buf, u, found_at) != 0) {
2503 /* CONNECT requests can only contain "hostname:port" */
2504 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2508 if (u->field_set & (1 << UF_PORT)) {
2515 off = u->field_data[UF_PORT].off;
2516 len = u->field_data[UF_PORT].len;
2517 end = buf + off + len;
2519 /* NOTE: The characters are already validated and are in the [0-9] range */
2520 assert(off + len <= buflen && "Port number overflow");
2522 for (p = buf + off; p < end; p++) {
2526 /* Ports have a max value of 2^16 */
2532 u->port = (uint16_t) v;
2539 http_parser_pause(http_parser *parser, int paused) {
2540 /* Users should only be pausing/unpausing a parser that is not in an error
2541 * state. In non-debug builds, there's not much that we can do about this
2542 * other than ignore it.
2544 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2545 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2546 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2547 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2549 assert(0 && "Attempting to pause parser in error state");
2554 http_body_is_final(const struct http_parser *parser) {
2555 return parser->state == s_message_done;
2559 http_parser_version(void) {
2560 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2561 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2562 HTTP_PARSER_VERSION_PATCH * 0x00001;
2566 http_parser_set_max_header_size(uint32_t size) {
2567 max_header_size = size;