1 /* Copyright Joyent, Inc. and other Node contributors.
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 #include "http_parser.h"
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
43 # define BIT_AT(a, i) \
44 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
45 (1 << ((unsigned int) (i) & 7))))
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #define SET_ERRNO(e) \
54 parser->nread = nread; \
55 parser->http_errno = (e); \
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
62 parser->nread = nread; \
63 parser->state = CURRENT_STATE(); \
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
89 UPDATE_STATE(parser->state); \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
116 UPDATE_STATE(parser->state); \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
135 /* Set the mark FOR; non-destructive if mark is already set */
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed max_header_size. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. max_header_size is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
154 #define COUNT_HEADER_SIZE(V) \
156 nread += (uint32_t)(V); \
157 if (UNLIKELY(nread > max_header_size)) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
174 static const char *method_strings[] =
176 #define XX(num, name, string) #string,
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 ' ', '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
236 #if HTTP_PARSER_STRICT
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
280 { s_dead = 1 /* important that this is > 0 */
293 , s_res_first_status_code
297 , s_res_line_almost_done
302 , s_req_spaces_before_url
305 , s_req_schema_slash_slash
308 , s_req_server_with_at
310 , s_req_query_string_start
312 , s_req_fragment_start
325 , s_req_line_almost_done
327 , s_header_field_start
329 , s_header_value_discard_ws
330 , s_header_value_discard_ws_almost_done
331 , s_header_value_discard_lws
332 , s_header_value_start
336 , s_header_almost_done
341 , s_chunk_size_almost_done
343 , s_headers_almost_done
346 /* Important: 's_headers_done' must be the last 'header' state. All
347 * states beyond this must be 'body' states. It is used for overflow
348 * checking. See the PARSING_HEADER() macro.
352 , s_chunk_data_almost_done
356 , s_body_identity_eof
362 #define PARSING_HEADER(state) (state <= s_headers_done)
371 , h_matching_connection
372 , h_matching_proxy_connection
373 , h_matching_content_length
374 , h_matching_transfer_encoding
379 , h_content_length_num
380 , h_content_length_ws
381 , h_transfer_encoding
384 , h_matching_transfer_encoding_token_start
385 , h_matching_transfer_encoding_chunked
386 , h_matching_transfer_encoding_token
388 , h_matching_connection_token_start
389 , h_matching_connection_keep_alive
390 , h_matching_connection_close
391 , h_matching_connection_upgrade
392 , h_matching_connection_token
394 , h_transfer_encoding_chunked
395 , h_connection_keep_alive
397 , h_connection_upgrade
403 , s_http_userinfo_start
406 , s_http_host_v6_start
410 , s_http_host_v6_zone_start
411 , s_http_host_v6_zone
412 , s_http_host_port_start
416 /* Macros for character classes; depends on strict-mode */
419 #define LOWER(c) (unsigned char)(c | 0x20)
420 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
422 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
423 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
425 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
427 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429 (c) == '$' || (c) == ',')
431 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
433 #if HTTP_PARSER_STRICT
434 #define TOKEN(c) STRICT_TOKEN(c)
435 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
436 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
438 #define TOKEN(c) tokens[(unsigned char)c]
439 #define IS_URL_CHAR(c) \
440 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441 #define IS_HOST_CHAR(c) \
442 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
446 * Verify that a char is a valid visible (printable) US-ASCII
447 * character or %x80-FF
449 #define IS_HEADER_CHAR(ch) \
450 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
452 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
455 #if HTTP_PARSER_STRICT
456 # define STRICT_CHECK(cond) \
459 SET_ERRNO(HPE_STRICT); \
463 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
465 # define STRICT_CHECK(cond)
466 # define NEW_MESSAGE() start_state
470 /* Map errno values to strings for human-readable output */
471 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
474 const char *description;
475 } http_strerror_tab[] = {
476 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
478 #undef HTTP_STRERROR_GEN
480 int http_message_needs_eof(const http_parser *parser);
484 * This is designed to be shared by http_parser_execute() for URL validation,
485 * hence it has a state transition + byte-for-byte interface. In addition, it
486 * is meant to be embedded in http_parser_parse_url(), which does the dirty
487 * work of turning state transitions URL components for its API.
489 * This function should only be invoked with non-space characters. It is
490 * assumed that the caller cares about (and can detect) the transition between
491 * URL and non-URL states by looking for these.
494 parse_url_char(enum state s, const char ch)
496 if (ch == ' ' || ch == '\r' || ch == '\n') {
500 #if HTTP_PARSER_STRICT
501 if (ch == '\t' || ch == '\f') {
507 case s_req_spaces_before_url:
508 /* Proxied requests are followed by scheme of an absolute URI (alpha).
509 * All methods except CONNECT are followed by '/' or '*'.
512 if (ch == '/' || ch == '*') {
528 return s_req_schema_slash;
533 case s_req_schema_slash:
535 return s_req_schema_slash_slash;
540 case s_req_schema_slash_slash:
542 return s_req_server_start;
547 case s_req_server_with_at:
553 case s_req_server_start:
560 return s_req_query_string_start;
564 return s_req_server_with_at;
567 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
574 if (IS_URL_CHAR(ch)) {
580 return s_req_query_string_start;
583 return s_req_fragment_start;
588 case s_req_query_string_start:
589 case s_req_query_string:
590 if (IS_URL_CHAR(ch)) {
591 return s_req_query_string;
596 /* allow extra '?' in query string */
597 return s_req_query_string;
600 return s_req_fragment_start;
605 case s_req_fragment_start:
606 if (IS_URL_CHAR(ch)) {
607 return s_req_fragment;
612 return s_req_fragment;
621 if (IS_URL_CHAR(ch)) {
637 /* We should never fall out of the switch above unless there's an error */
641 size_t http_parser_execute (http_parser *parser,
642 const http_parser_settings *settings,
648 const char *p = data;
649 const char *header_field_mark = 0;
650 const char *header_value_mark = 0;
651 const char *url_mark = 0;
652 const char *body_mark = 0;
653 const char *status_mark = 0;
654 enum state p_state = (enum state) parser->state;
655 const unsigned int lenient = parser->lenient_http_headers;
656 uint32_t nread = parser->nread;
658 /* We're in an error state. Don't bother doing anything. */
659 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
664 switch (CURRENT_STATE()) {
665 case s_body_identity_eof:
666 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
669 CALLBACK_NOTIFY_NOADVANCE(message_complete);
673 case s_start_req_or_res:
679 SET_ERRNO(HPE_INVALID_EOF_STATE);
685 if (CURRENT_STATE() == s_header_field)
686 header_field_mark = data;
687 if (CURRENT_STATE() == s_header_value)
688 header_value_mark = data;
689 switch (CURRENT_STATE()) {
692 case s_req_schema_slash:
693 case s_req_schema_slash_slash:
694 case s_req_server_start:
696 case s_req_server_with_at:
697 case s_req_query_string_start:
698 case s_req_query_string:
699 case s_req_fragment_start:
710 for (p=data; p != data + len; p++) {
713 if (PARSING_HEADER(CURRENT_STATE()))
714 COUNT_HEADER_SIZE(1);
717 switch (CURRENT_STATE()) {
720 /* this state is used after a 'Connection: close' message
721 * the parser will error out if it reads another message
723 if (LIKELY(ch == CR || ch == LF))
726 SET_ERRNO(HPE_CLOSED_CONNECTION);
729 case s_start_req_or_res:
731 if (ch == CR || ch == LF)
734 parser->content_length = ULLONG_MAX;
737 UPDATE_STATE(s_res_or_resp_H);
739 CALLBACK_NOTIFY(message_begin);
741 parser->type = HTTP_REQUEST;
742 UPDATE_STATE(s_start_req);
749 case s_res_or_resp_H:
751 parser->type = HTTP_RESPONSE;
752 UPDATE_STATE(s_res_HT);
754 if (UNLIKELY(ch != 'E')) {
755 SET_ERRNO(HPE_INVALID_CONSTANT);
759 parser->type = HTTP_REQUEST;
760 parser->method = HTTP_HEAD;
762 UPDATE_STATE(s_req_method);
768 if (ch == CR || ch == LF)
771 parser->content_length = ULLONG_MAX;
774 UPDATE_STATE(s_res_H);
776 SET_ERRNO(HPE_INVALID_CONSTANT);
780 CALLBACK_NOTIFY(message_begin);
785 STRICT_CHECK(ch != 'T');
786 UPDATE_STATE(s_res_HT);
790 STRICT_CHECK(ch != 'T');
791 UPDATE_STATE(s_res_HTT);
795 STRICT_CHECK(ch != 'P');
796 UPDATE_STATE(s_res_HTTP);
800 STRICT_CHECK(ch != '/');
801 UPDATE_STATE(s_res_http_major);
804 case s_res_http_major:
805 if (UNLIKELY(!IS_NUM(ch))) {
806 SET_ERRNO(HPE_INVALID_VERSION);
810 parser->http_major = ch - '0';
811 UPDATE_STATE(s_res_http_dot);
816 if (UNLIKELY(ch != '.')) {
817 SET_ERRNO(HPE_INVALID_VERSION);
821 UPDATE_STATE(s_res_http_minor);
825 case s_res_http_minor:
826 if (UNLIKELY(!IS_NUM(ch))) {
827 SET_ERRNO(HPE_INVALID_VERSION);
831 parser->http_minor = ch - '0';
832 UPDATE_STATE(s_res_http_end);
837 if (UNLIKELY(ch != ' ')) {
838 SET_ERRNO(HPE_INVALID_VERSION);
842 UPDATE_STATE(s_res_first_status_code);
846 case s_res_first_status_code:
853 SET_ERRNO(HPE_INVALID_STATUS);
856 parser->status_code = ch - '0';
857 UPDATE_STATE(s_res_status_code);
861 case s_res_status_code:
866 UPDATE_STATE(s_res_status_start);
870 UPDATE_STATE(s_res_status_start);
874 SET_ERRNO(HPE_INVALID_STATUS);
880 parser->status_code *= 10;
881 parser->status_code += ch - '0';
883 if (UNLIKELY(parser->status_code > 999)) {
884 SET_ERRNO(HPE_INVALID_STATUS);
891 case s_res_status_start:
894 UPDATE_STATE(s_res_status);
897 if (ch == CR || ch == LF)
905 UPDATE_STATE(s_res_line_almost_done);
906 CALLBACK_DATA(status);
911 UPDATE_STATE(s_header_field_start);
912 CALLBACK_DATA(status);
918 case s_res_line_almost_done:
919 STRICT_CHECK(ch != LF);
920 UPDATE_STATE(s_header_field_start);
925 if (ch == CR || ch == LF)
928 parser->content_length = ULLONG_MAX;
930 if (UNLIKELY(!IS_ALPHA(ch))) {
931 SET_ERRNO(HPE_INVALID_METHOD);
935 parser->method = (enum http_method) 0;
938 case 'A': parser->method = HTTP_ACL; break;
939 case 'B': parser->method = HTTP_BIND; break;
940 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
941 case 'D': parser->method = HTTP_DELETE; break;
942 case 'G': parser->method = HTTP_GET; break;
943 case 'H': parser->method = HTTP_HEAD; break;
944 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
945 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
946 case 'N': parser->method = HTTP_NOTIFY; break;
947 case 'O': parser->method = HTTP_OPTIONS; break;
948 case 'P': parser->method = HTTP_POST;
949 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
951 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
952 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
953 case 'T': parser->method = HTTP_TRACE; break;
954 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
956 SET_ERRNO(HPE_INVALID_METHOD);
959 UPDATE_STATE(s_req_method);
961 CALLBACK_NOTIFY(message_begin);
969 if (UNLIKELY(ch == '\0')) {
970 SET_ERRNO(HPE_INVALID_METHOD);
974 matcher = method_strings[parser->method];
975 if (ch == ' ' && matcher[parser->index] == '\0') {
976 UPDATE_STATE(s_req_spaces_before_url);
977 } else if (ch == matcher[parser->index]) {
979 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
981 switch (parser->method << 16 | parser->index << 8 | ch) {
982 #define XX(meth, pos, ch, new_meth) \
983 case (HTTP_##meth << 16 | pos << 8 | ch): \
984 parser->method = HTTP_##new_meth; break;
986 XX(POST, 1, 'U', PUT)
987 XX(POST, 1, 'A', PATCH)
988 XX(POST, 1, 'R', PROPFIND)
989 XX(PUT, 2, 'R', PURGE)
990 XX(CONNECT, 1, 'H', CHECKOUT)
991 XX(CONNECT, 2, 'P', COPY)
992 XX(MKCOL, 1, 'O', MOVE)
993 XX(MKCOL, 1, 'E', MERGE)
994 XX(MKCOL, 1, '-', MSEARCH)
995 XX(MKCOL, 2, 'A', MKACTIVITY)
996 XX(MKCOL, 3, 'A', MKCALENDAR)
997 XX(SUBSCRIBE, 1, 'E', SEARCH)
998 XX(SUBSCRIBE, 1, 'O', SOURCE)
999 XX(REPORT, 2, 'B', REBIND)
1000 XX(PROPFIND, 4, 'P', PROPPATCH)
1001 XX(LOCK, 1, 'I', LINK)
1002 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1003 XX(UNLOCK, 2, 'B', UNBIND)
1004 XX(UNLOCK, 3, 'I', UNLINK)
1007 SET_ERRNO(HPE_INVALID_METHOD);
1011 SET_ERRNO(HPE_INVALID_METHOD);
1019 case s_req_spaces_before_url:
1021 if (ch == ' ') break;
1024 if (parser->method == HTTP_CONNECT) {
1025 UPDATE_STATE(s_req_server_start);
1028 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1029 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1030 SET_ERRNO(HPE_INVALID_URL);
1038 case s_req_schema_slash:
1039 case s_req_schema_slash_slash:
1040 case s_req_server_start:
1043 /* No whitespace allowed here */
1047 SET_ERRNO(HPE_INVALID_URL);
1050 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1051 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1052 SET_ERRNO(HPE_INVALID_URL);
1061 case s_req_server_with_at:
1063 case s_req_query_string_start:
1064 case s_req_query_string:
1065 case s_req_fragment_start:
1066 case s_req_fragment:
1070 UPDATE_STATE(s_req_http_start);
1075 parser->http_major = 0;
1076 parser->http_minor = 9;
1077 UPDATE_STATE((ch == CR) ?
1078 s_req_line_almost_done :
1079 s_header_field_start);
1083 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1084 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1085 SET_ERRNO(HPE_INVALID_URL);
1092 case s_req_http_start:
1097 UPDATE_STATE(s_req_http_H);
1100 if (parser->method == HTTP_SOURCE) {
1101 UPDATE_STATE(s_req_http_I);
1106 SET_ERRNO(HPE_INVALID_CONSTANT);
1112 STRICT_CHECK(ch != 'T');
1113 UPDATE_STATE(s_req_http_HT);
1117 STRICT_CHECK(ch != 'T');
1118 UPDATE_STATE(s_req_http_HTT);
1121 case s_req_http_HTT:
1122 STRICT_CHECK(ch != 'P');
1123 UPDATE_STATE(s_req_http_HTTP);
1127 STRICT_CHECK(ch != 'C');
1128 UPDATE_STATE(s_req_http_IC);
1132 STRICT_CHECK(ch != 'E');
1133 UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1136 case s_req_http_HTTP:
1137 STRICT_CHECK(ch != '/');
1138 UPDATE_STATE(s_req_http_major);
1141 case s_req_http_major:
1142 if (UNLIKELY(!IS_NUM(ch))) {
1143 SET_ERRNO(HPE_INVALID_VERSION);
1147 parser->http_major = ch - '0';
1148 UPDATE_STATE(s_req_http_dot);
1151 case s_req_http_dot:
1153 if (UNLIKELY(ch != '.')) {
1154 SET_ERRNO(HPE_INVALID_VERSION);
1158 UPDATE_STATE(s_req_http_minor);
1162 case s_req_http_minor:
1163 if (UNLIKELY(!IS_NUM(ch))) {
1164 SET_ERRNO(HPE_INVALID_VERSION);
1168 parser->http_minor = ch - '0';
1169 UPDATE_STATE(s_req_http_end);
1172 case s_req_http_end:
1175 UPDATE_STATE(s_req_line_almost_done);
1180 UPDATE_STATE(s_header_field_start);
1184 SET_ERRNO(HPE_INVALID_VERSION);
1189 /* end of request line */
1190 case s_req_line_almost_done:
1192 if (UNLIKELY(ch != LF)) {
1193 SET_ERRNO(HPE_LF_EXPECTED);
1197 UPDATE_STATE(s_header_field_start);
1201 case s_header_field_start:
1204 UPDATE_STATE(s_headers_almost_done);
1209 /* they might be just sending \n instead of \r\n so this would be
1210 * the second \n to denote the end of headers*/
1211 UPDATE_STATE(s_headers_almost_done);
1218 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1225 UPDATE_STATE(s_header_field);
1229 parser->header_state = h_C;
1233 parser->header_state = h_matching_proxy_connection;
1237 parser->header_state = h_matching_transfer_encoding;
1241 parser->header_state = h_matching_upgrade;
1245 parser->header_state = h_general;
1251 case s_header_field:
1253 const char* start = p;
1254 for (; p != data + len; p++) {
1261 switch (parser->header_state) {
1263 size_t left = data + len - p;
1264 const char* pe = p + MIN(left, max_header_size);
1265 while (p+1 < pe && TOKEN(p[1])) {
1273 parser->header_state = (c == 'o' ? h_CO : h_general);
1278 parser->header_state = (c == 'n' ? h_CON : h_general);
1285 parser->header_state = h_matching_connection;
1288 parser->header_state = h_matching_content_length;
1291 parser->header_state = h_general;
1298 case h_matching_connection:
1300 if (parser->index > sizeof(CONNECTION)-1
1301 || c != CONNECTION[parser->index]) {
1302 parser->header_state = h_general;
1303 } else if (parser->index == sizeof(CONNECTION)-2) {
1304 parser->header_state = h_connection;
1308 /* proxy-connection */
1310 case h_matching_proxy_connection:
1312 if (parser->index > sizeof(PROXY_CONNECTION)-1
1313 || c != PROXY_CONNECTION[parser->index]) {
1314 parser->header_state = h_general;
1315 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1316 parser->header_state = h_connection;
1320 /* content-length */
1322 case h_matching_content_length:
1324 if (parser->index > sizeof(CONTENT_LENGTH)-1
1325 || c != CONTENT_LENGTH[parser->index]) {
1326 parser->header_state = h_general;
1327 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1328 parser->header_state = h_content_length;
1332 /* transfer-encoding */
1334 case h_matching_transfer_encoding:
1336 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1337 || c != TRANSFER_ENCODING[parser->index]) {
1338 parser->header_state = h_general;
1339 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1340 parser->header_state = h_transfer_encoding;
1341 parser->flags |= F_TRANSFER_ENCODING;
1347 case h_matching_upgrade:
1349 if (parser->index > sizeof(UPGRADE)-1
1350 || c != UPGRADE[parser->index]) {
1351 parser->header_state = h_general;
1352 } else if (parser->index == sizeof(UPGRADE)-2) {
1353 parser->header_state = h_upgrade;
1358 case h_content_length:
1359 case h_transfer_encoding:
1361 if (ch != ' ') parser->header_state = h_general;
1365 assert(0 && "Unknown header_state");
1370 if (p == data + len) {
1372 COUNT_HEADER_SIZE(p - start);
1376 COUNT_HEADER_SIZE(p - start);
1379 UPDATE_STATE(s_header_value_discard_ws);
1380 CALLBACK_DATA(header_field);
1384 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1388 case s_header_value_discard_ws:
1389 if (ch == ' ' || ch == '\t') break;
1392 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1397 UPDATE_STATE(s_header_value_discard_lws);
1403 case s_header_value_start:
1407 UPDATE_STATE(s_header_value);
1412 switch (parser->header_state) {
1414 parser->flags |= F_UPGRADE;
1415 parser->header_state = h_general;
1418 case h_transfer_encoding:
1419 /* looking for 'Transfer-Encoding: chunked' */
1421 parser->header_state = h_matching_transfer_encoding_chunked;
1423 parser->header_state = h_matching_transfer_encoding_token;
1427 /* Multi-value `Transfer-Encoding` header */
1428 case h_matching_transfer_encoding_token_start:
1431 case h_content_length:
1432 if (UNLIKELY(!IS_NUM(ch))) {
1433 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1437 if (parser->flags & F_CONTENTLENGTH) {
1438 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1442 parser->flags |= F_CONTENTLENGTH;
1443 parser->content_length = ch - '0';
1444 parser->header_state = h_content_length_num;
1447 /* when obsolete line folding is encountered for content length
1448 * continue to the s_header_value state */
1449 case h_content_length_ws:
1453 /* looking for 'Connection: keep-alive' */
1455 parser->header_state = h_matching_connection_keep_alive;
1456 /* looking for 'Connection: close' */
1457 } else if (c == 'c') {
1458 parser->header_state = h_matching_connection_close;
1459 } else if (c == 'u') {
1460 parser->header_state = h_matching_connection_upgrade;
1462 parser->header_state = h_matching_connection_token;
1466 /* Multi-value `Connection` header */
1467 case h_matching_connection_token_start:
1471 parser->header_state = h_general;
1477 case s_header_value:
1479 const char* start = p;
1480 enum header_states h_state = (enum header_states) parser->header_state;
1481 for (; p != data + len; p++) {
1484 UPDATE_STATE(s_header_almost_done);
1485 parser->header_state = h_state;
1486 CALLBACK_DATA(header_value);
1491 UPDATE_STATE(s_header_almost_done);
1492 COUNT_HEADER_SIZE(p - start);
1493 parser->header_state = h_state;
1494 CALLBACK_DATA_NOADVANCE(header_value);
1498 if (!lenient && !IS_HEADER_CHAR(ch)) {
1499 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1508 size_t left = data + len - p;
1509 const char* pe = p + MIN(left, max_header_size);
1511 for (; p != pe; p++) {
1513 if (ch == CR || ch == LF) {
1517 if (!lenient && !IS_HEADER_CHAR(ch)) {
1518 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1522 if (p == data + len)
1528 case h_transfer_encoding:
1529 assert(0 && "Shouldn't get here.");
1532 case h_content_length:
1533 if (ch == ' ') break;
1534 h_state = h_content_length_num;
1537 case h_content_length_num:
1542 h_state = h_content_length_ws;
1546 if (UNLIKELY(!IS_NUM(ch))) {
1547 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1548 parser->header_state = h_state;
1552 t = parser->content_length;
1556 /* Overflow? Test against a conservative limit for simplicity. */
1557 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1558 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1559 parser->header_state = h_state;
1563 parser->content_length = t;
1567 case h_content_length_ws:
1568 if (ch == ' ') break;
1569 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1570 parser->header_state = h_state;
1573 /* Transfer-Encoding: chunked */
1574 case h_matching_transfer_encoding_token_start:
1575 /* looking for 'Transfer-Encoding: chunked' */
1577 h_state = h_matching_transfer_encoding_chunked;
1578 } else if (STRICT_TOKEN(c)) {
1579 /* TODO(indutny): similar code below does this, but why?
1580 * At the very least it seems to be inconsistent given that
1581 * h_matching_transfer_encoding_token does not check for
1584 h_state = h_matching_transfer_encoding_token;
1585 } else if (c == ' ' || c == '\t') {
1588 h_state = h_general;
1592 case h_matching_transfer_encoding_chunked:
1594 if (parser->index > sizeof(CHUNKED)-1
1595 || c != CHUNKED[parser->index]) {
1596 h_state = h_matching_transfer_encoding_token;
1597 } else if (parser->index == sizeof(CHUNKED)-2) {
1598 h_state = h_transfer_encoding_chunked;
1602 case h_matching_transfer_encoding_token:
1604 h_state = h_matching_transfer_encoding_token_start;
1609 case h_matching_connection_token_start:
1610 /* looking for 'Connection: keep-alive' */
1612 h_state = h_matching_connection_keep_alive;
1613 /* looking for 'Connection: close' */
1614 } else if (c == 'c') {
1615 h_state = h_matching_connection_close;
1616 } else if (c == 'u') {
1617 h_state = h_matching_connection_upgrade;
1618 } else if (STRICT_TOKEN(c)) {
1619 h_state = h_matching_connection_token;
1620 } else if (c == ' ' || c == '\t') {
1623 h_state = h_general;
1627 /* looking for 'Connection: keep-alive' */
1628 case h_matching_connection_keep_alive:
1630 if (parser->index > sizeof(KEEP_ALIVE)-1
1631 || c != KEEP_ALIVE[parser->index]) {
1632 h_state = h_matching_connection_token;
1633 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1634 h_state = h_connection_keep_alive;
1638 /* looking for 'Connection: close' */
1639 case h_matching_connection_close:
1641 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1642 h_state = h_matching_connection_token;
1643 } else if (parser->index == sizeof(CLOSE)-2) {
1644 h_state = h_connection_close;
1648 /* looking for 'Connection: upgrade' */
1649 case h_matching_connection_upgrade:
1651 if (parser->index > sizeof(UPGRADE) - 1 ||
1652 c != UPGRADE[parser->index]) {
1653 h_state = h_matching_connection_token;
1654 } else if (parser->index == sizeof(UPGRADE)-2) {
1655 h_state = h_connection_upgrade;
1659 case h_matching_connection_token:
1661 h_state = h_matching_connection_token_start;
1666 case h_transfer_encoding_chunked:
1667 if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1670 case h_connection_keep_alive:
1671 case h_connection_close:
1672 case h_connection_upgrade:
1674 if (h_state == h_connection_keep_alive) {
1675 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1676 } else if (h_state == h_connection_close) {
1677 parser->flags |= F_CONNECTION_CLOSE;
1678 } else if (h_state == h_connection_upgrade) {
1679 parser->flags |= F_CONNECTION_UPGRADE;
1681 h_state = h_matching_connection_token_start;
1683 } else if (ch != ' ') {
1684 h_state = h_matching_connection_token;
1689 UPDATE_STATE(s_header_value);
1690 h_state = h_general;
1694 parser->header_state = h_state;
1696 if (p == data + len)
1699 COUNT_HEADER_SIZE(p - start);
1703 case s_header_almost_done:
1705 if (UNLIKELY(ch != LF)) {
1706 SET_ERRNO(HPE_LF_EXPECTED);
1710 UPDATE_STATE(s_header_value_lws);
1714 case s_header_value_lws:
1716 if (ch == ' ' || ch == '\t') {
1717 if (parser->header_state == h_content_length_num) {
1718 /* treat obsolete line folding as space */
1719 parser->header_state = h_content_length_ws;
1721 UPDATE_STATE(s_header_value_start);
1725 /* finished the header */
1726 switch (parser->header_state) {
1727 case h_connection_keep_alive:
1728 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1730 case h_connection_close:
1731 parser->flags |= F_CONNECTION_CLOSE;
1733 case h_transfer_encoding_chunked:
1734 parser->flags |= F_CHUNKED;
1736 case h_connection_upgrade:
1737 parser->flags |= F_CONNECTION_UPGRADE;
1743 UPDATE_STATE(s_header_field_start);
1747 case s_header_value_discard_ws_almost_done:
1749 STRICT_CHECK(ch != LF);
1750 UPDATE_STATE(s_header_value_discard_lws);
1754 case s_header_value_discard_lws:
1756 if (ch == ' ' || ch == '\t') {
1757 UPDATE_STATE(s_header_value_discard_ws);
1760 switch (parser->header_state) {
1761 case h_connection_keep_alive:
1762 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1764 case h_connection_close:
1765 parser->flags |= F_CONNECTION_CLOSE;
1767 case h_connection_upgrade:
1768 parser->flags |= F_CONNECTION_UPGRADE;
1770 case h_transfer_encoding_chunked:
1771 parser->flags |= F_CHUNKED;
1773 case h_content_length:
1774 /* do not allow empty content length */
1775 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1782 /* header value was empty */
1784 UPDATE_STATE(s_header_field_start);
1785 CALLBACK_DATA_NOADVANCE(header_value);
1790 case s_headers_almost_done:
1792 STRICT_CHECK(ch != LF);
1794 if (parser->flags & F_TRAILING) {
1795 /* End of a chunked request */
1796 UPDATE_STATE(s_message_done);
1797 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1801 /* Cannot us transfer-encoding and a content-length header together
1802 per the HTTP specification. (RFC 7230 Section 3.3.3) */
1803 if ((parser->flags & F_TRANSFER_ENCODING) &&
1804 (parser->flags & F_CONTENTLENGTH)) {
1805 /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1808 if (!lenient || (parser->flags & F_CHUNKED)) {
1809 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1814 UPDATE_STATE(s_headers_done);
1816 /* Set this here so that on_headers_complete() callbacks can see it */
1817 if ((parser->flags & F_UPGRADE) &&
1818 (parser->flags & F_CONNECTION_UPGRADE)) {
1819 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1820 * mandatory only when it is a 101 Switching Protocols response,
1821 * otherwise it is purely informational, to announce support.
1824 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1826 parser->upgrade = (parser->method == HTTP_CONNECT);
1829 /* Here we call the headers_complete callback. This is somewhat
1830 * different than other callbacks because if the user returns 1, we
1831 * will interpret that as saying that this message has no body. This
1832 * is needed for the annoying case of recieving a response to a HEAD
1835 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1836 * we have to simulate it by handling a change in errno below.
1838 if (settings->on_headers_complete) {
1839 switch (settings->on_headers_complete(parser)) {
1844 parser->upgrade = 1;
1848 parser->flags |= F_SKIPBODY;
1852 SET_ERRNO(HPE_CB_headers_complete);
1853 RETURN(p - data); /* Error */
1857 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1864 case s_headers_done:
1867 STRICT_CHECK(ch != LF);
1872 hasBody = parser->flags & F_CHUNKED ||
1873 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1874 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1875 (parser->flags & F_SKIPBODY) || !hasBody)) {
1876 /* Exit, the rest of the message is in a different protocol. */
1877 UPDATE_STATE(NEW_MESSAGE());
1878 CALLBACK_NOTIFY(message_complete);
1879 RETURN((p - data) + 1);
1882 if (parser->flags & F_SKIPBODY) {
1883 UPDATE_STATE(NEW_MESSAGE());
1884 CALLBACK_NOTIFY(message_complete);
1885 } else if (parser->flags & F_CHUNKED) {
1886 /* chunked encoding - ignore Content-Length header,
1887 * prepare for a chunk */
1888 UPDATE_STATE(s_chunk_size_start);
1889 } else if (parser->flags & F_TRANSFER_ENCODING) {
1890 if (parser->type == HTTP_REQUEST && !lenient) {
1891 /* RFC 7230 3.3.3 */
1893 /* If a Transfer-Encoding header field
1894 * is present in a request and the chunked transfer coding is not
1895 * the final encoding, the message body length cannot be determined
1896 * reliably; the server MUST respond with the 400 (Bad Request)
1897 * status code and then close the connection.
1899 SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1900 RETURN(p - data); /* Error */
1902 /* RFC 7230 3.3.3 */
1904 /* If a Transfer-Encoding header field is present in a response and
1905 * the chunked transfer coding is not the final encoding, the
1906 * message body length is determined by reading the connection until
1907 * it is closed by the server.
1909 UPDATE_STATE(s_body_identity_eof);
1912 if (parser->content_length == 0) {
1913 /* Content-Length header given but zero: Content-Length: 0\r\n */
1914 UPDATE_STATE(NEW_MESSAGE());
1915 CALLBACK_NOTIFY(message_complete);
1916 } else if (parser->content_length != ULLONG_MAX) {
1917 /* Content-Length header given and non-zero */
1918 UPDATE_STATE(s_body_identity);
1920 if (!http_message_needs_eof(parser)) {
1921 /* Assume content-length 0 - read the next */
1922 UPDATE_STATE(NEW_MESSAGE());
1923 CALLBACK_NOTIFY(message_complete);
1925 /* Read body until EOF */
1926 UPDATE_STATE(s_body_identity_eof);
1934 case s_body_identity:
1936 uint64_t to_read = MIN(parser->content_length,
1937 (uint64_t) ((data + len) - p));
1939 assert(parser->content_length != 0
1940 && parser->content_length != ULLONG_MAX);
1942 /* The difference between advancing content_length and p is because
1943 * the latter will automaticaly advance on the next loop iteration.
1944 * Further, if content_length ends up at 0, we want to see the last
1945 * byte again for our message complete callback.
1948 parser->content_length -= to_read;
1951 if (parser->content_length == 0) {
1952 UPDATE_STATE(s_message_done);
1954 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1956 * The alternative to doing this is to wait for the next byte to
1957 * trigger the data callback, just as in every other case. The
1958 * problem with this is that this makes it difficult for the test
1959 * harness to distinguish between complete-on-EOF and
1960 * complete-on-length. It's not clear that this distinction is
1961 * important for applications, but let's keep it for now.
1963 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1970 /* read until EOF */
1971 case s_body_identity_eof:
1977 case s_message_done:
1978 UPDATE_STATE(NEW_MESSAGE());
1979 CALLBACK_NOTIFY(message_complete);
1980 if (parser->upgrade) {
1981 /* Exit, the rest of the message is in a different protocol. */
1982 RETURN((p - data) + 1);
1986 case s_chunk_size_start:
1989 assert(parser->flags & F_CHUNKED);
1991 unhex_val = unhex[(unsigned char)ch];
1992 if (UNLIKELY(unhex_val == -1)) {
1993 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1997 parser->content_length = unhex_val;
1998 UPDATE_STATE(s_chunk_size);
2006 assert(parser->flags & F_CHUNKED);
2009 UPDATE_STATE(s_chunk_size_almost_done);
2013 unhex_val = unhex[(unsigned char)ch];
2015 if (unhex_val == -1) {
2016 if (ch == ';' || ch == ' ') {
2017 UPDATE_STATE(s_chunk_parameters);
2021 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2025 t = parser->content_length;
2029 /* Overflow? Test against a conservative limit for simplicity. */
2030 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2031 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2035 parser->content_length = t;
2039 case s_chunk_parameters:
2041 assert(parser->flags & F_CHUNKED);
2042 /* just ignore this shit. TODO check for overflow */
2044 UPDATE_STATE(s_chunk_size_almost_done);
2050 case s_chunk_size_almost_done:
2052 assert(parser->flags & F_CHUNKED);
2053 STRICT_CHECK(ch != LF);
2058 if (parser->content_length == 0) {
2059 parser->flags |= F_TRAILING;
2060 UPDATE_STATE(s_header_field_start);
2062 UPDATE_STATE(s_chunk_data);
2064 CALLBACK_NOTIFY(chunk_header);
2070 uint64_t to_read = MIN(parser->content_length,
2071 (uint64_t) ((data + len) - p));
2073 assert(parser->flags & F_CHUNKED);
2074 assert(parser->content_length != 0
2075 && parser->content_length != ULLONG_MAX);
2077 /* See the explanation in s_body_identity for why the content
2078 * length and data pointers are managed this way.
2081 parser->content_length -= to_read;
2084 if (parser->content_length == 0) {
2085 UPDATE_STATE(s_chunk_data_almost_done);
2091 case s_chunk_data_almost_done:
2092 assert(parser->flags & F_CHUNKED);
2093 assert(parser->content_length == 0);
2094 STRICT_CHECK(ch != CR);
2095 UPDATE_STATE(s_chunk_data_done);
2096 CALLBACK_DATA(body);
2099 case s_chunk_data_done:
2100 assert(parser->flags & F_CHUNKED);
2101 STRICT_CHECK(ch != LF);
2104 UPDATE_STATE(s_chunk_size_start);
2105 CALLBACK_NOTIFY(chunk_complete);
2109 assert(0 && "unhandled state");
2110 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2115 /* Run callbacks for any marks that we have leftover after we ran out of
2116 * bytes. There should be at most one of these set, so it's OK to invoke
2117 * them in series (unset marks will not result in callbacks).
2119 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2120 * overflowed 'data' and this allows us to correct for the off-by-one that
2121 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2122 * value that's in-bounds).
2125 assert(((header_field_mark ? 1 : 0) +
2126 (header_value_mark ? 1 : 0) +
2127 (url_mark ? 1 : 0) +
2128 (body_mark ? 1 : 0) +
2129 (status_mark ? 1 : 0)) <= 1);
2131 CALLBACK_DATA_NOADVANCE(header_field);
2132 CALLBACK_DATA_NOADVANCE(header_value);
2133 CALLBACK_DATA_NOADVANCE(url);
2134 CALLBACK_DATA_NOADVANCE(body);
2135 CALLBACK_DATA_NOADVANCE(status);
2140 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2141 SET_ERRNO(HPE_UNKNOWN);
2148 /* Does the parser need to see an EOF to find the end of the message? */
2150 http_message_needs_eof (const http_parser *parser)
2152 if (parser->type == HTTP_REQUEST) {
2156 /* See RFC 2616 section 4.4 */
2157 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2158 parser->status_code == 204 || /* No Content */
2159 parser->status_code == 304 || /* Not Modified */
2160 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2164 /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2165 if ((parser->flags & F_TRANSFER_ENCODING) &&
2166 (parser->flags & F_CHUNKED) == 0) {
2170 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2179 http_should_keep_alive (const http_parser *parser)
2181 if (parser->http_major > 0 && parser->http_minor > 0) {
2183 if (parser->flags & F_CONNECTION_CLOSE) {
2187 /* HTTP/1.0 or earlier */
2188 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2193 return !http_message_needs_eof(parser);
2198 http_method_str (enum http_method m)
2200 return ELEM_AT(method_strings, m, "<unknown>");
2204 http_status_str (enum http_status s)
2207 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2210 default: return "<unknown>";
2215 http_parser_init (http_parser *parser, enum http_parser_type t)
2217 void *data = parser->data; /* preserve application data */
2218 memset(parser, 0, sizeof(*parser));
2219 parser->data = data;
2221 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2222 parser->http_errno = HPE_OK;
2226 http_parser_settings_init(http_parser_settings *settings)
2228 memset(settings, 0, sizeof(*settings));
2232 http_errno_name(enum http_errno err) {
2233 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2234 return http_strerror_tab[err].name;
2238 http_errno_description(enum http_errno err) {
2239 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2240 return http_strerror_tab[err].description;
2243 static enum http_host_state
2244 http_parse_host_char(enum http_host_state s, const char ch) {
2246 case s_http_userinfo:
2247 case s_http_userinfo_start:
2249 return s_http_host_start;
2252 if (IS_USERINFO_CHAR(ch)) {
2253 return s_http_userinfo;
2257 case s_http_host_start:
2259 return s_http_host_v6_start;
2262 if (IS_HOST_CHAR(ch)) {
2269 if (IS_HOST_CHAR(ch)) {
2274 case s_http_host_v6_end:
2276 return s_http_host_port_start;
2281 case s_http_host_v6:
2283 return s_http_host_v6_end;
2287 case s_http_host_v6_start:
2288 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2289 return s_http_host_v6;
2292 if (s == s_http_host_v6 && ch == '%') {
2293 return s_http_host_v6_zone_start;
2297 case s_http_host_v6_zone:
2299 return s_http_host_v6_end;
2303 case s_http_host_v6_zone_start:
2304 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2305 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2307 return s_http_host_v6_zone;
2311 case s_http_host_port:
2312 case s_http_host_port_start:
2314 return s_http_host_port;
2322 return s_http_host_dead;
2326 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2327 enum http_host_state s;
2330 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2332 assert(u->field_set & (1 << UF_HOST));
2334 u->field_data[UF_HOST].len = 0;
2336 s = found_at ? s_http_userinfo_start : s_http_host_start;
2338 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2339 enum http_host_state new_s = http_parse_host_char(s, *p);
2341 if (new_s == s_http_host_dead) {
2347 if (s != s_http_host) {
2348 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2350 u->field_data[UF_HOST].len++;
2353 case s_http_host_v6:
2354 if (s != s_http_host_v6) {
2355 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2357 u->field_data[UF_HOST].len++;
2360 case s_http_host_v6_zone_start:
2361 case s_http_host_v6_zone:
2362 u->field_data[UF_HOST].len++;
2365 case s_http_host_port:
2366 if (s != s_http_host_port) {
2367 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2368 u->field_data[UF_PORT].len = 0;
2369 u->field_set |= (1 << UF_PORT);
2371 u->field_data[UF_PORT].len++;
2374 case s_http_userinfo:
2375 if (s != s_http_userinfo) {
2376 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2377 u->field_data[UF_USERINFO].len = 0;
2378 u->field_set |= (1 << UF_USERINFO);
2380 u->field_data[UF_USERINFO].len++;
2389 /* Make sure we don't end somewhere unexpected */
2391 case s_http_host_start:
2392 case s_http_host_v6_start:
2393 case s_http_host_v6:
2394 case s_http_host_v6_zone_start:
2395 case s_http_host_v6_zone:
2396 case s_http_host_port_start:
2397 case s_http_userinfo:
2398 case s_http_userinfo_start:
2408 http_parser_url_init(struct http_parser_url *u) {
2409 memset(u, 0, sizeof(*u));
2413 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2414 struct http_parser_url *u)
2418 enum http_parser_url_fields uf, old_uf;
2425 u->port = u->field_set = 0;
2426 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2429 for (p = buf; p < buf + buflen; p++) {
2430 s = parse_url_char(s, *p);
2432 /* Figure out the next field that we're operating on */
2437 /* Skip delimeters */
2438 case s_req_schema_slash:
2439 case s_req_schema_slash_slash:
2440 case s_req_server_start:
2441 case s_req_query_string_start:
2442 case s_req_fragment_start:
2449 case s_req_server_with_at:
2461 case s_req_query_string:
2465 case s_req_fragment:
2470 assert(!"Unexpected state");
2474 /* Nothing's changed; soldier on */
2476 u->field_data[uf].len++;
2480 u->field_data[uf].off = (uint16_t)(p - buf);
2481 u->field_data[uf].len = 1;
2483 u->field_set |= (1 << uf);
2487 /* host must be present if there is a schema */
2488 /* parsing http:///toto will fail */
2489 if ((u->field_set & (1 << UF_SCHEMA)) &&
2490 (u->field_set & (1 << UF_HOST)) == 0) {
2494 if (u->field_set & (1 << UF_HOST)) {
2495 if (http_parse_host(buf, u, found_at) != 0) {
2500 /* CONNECT requests can only contain "hostname:port" */
2501 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2505 if (u->field_set & (1 << UF_PORT)) {
2512 off = u->field_data[UF_PORT].off;
2513 len = u->field_data[UF_PORT].len;
2514 end = buf + off + len;
2516 /* NOTE: The characters are already validated and are in the [0-9] range */
2517 assert(off + len <= buflen && "Port number overflow");
2519 for (p = buf + off; p < end; p++) {
2523 /* Ports have a max value of 2^16 */
2529 u->port = (uint16_t) v;
2536 http_parser_pause(http_parser *parser, int paused) {
2537 /* Users should only be pausing/unpausing a parser that is not in an error
2538 * state. In non-debug builds, there's not much that we can do about this
2539 * other than ignore it.
2541 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2542 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2543 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2544 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2546 assert(0 && "Attempting to pause parser in error state");
2551 http_body_is_final(const struct http_parser *parser) {
2552 return parser->state == s_message_done;
2556 http_parser_version(void) {
2557 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2558 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2559 HTTP_PARSER_VERSION_PATCH * 0x00001;
2563 http_parser_set_max_header_size(uint32_t size) {
2564 max_header_size = size;