1 /* Copyright Joyent, Inc. and other Node contributors.
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 #include "http_parser.h"
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
43 # define BIT_AT(a, i) \
44 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
45 (1 << ((unsigned int) (i) & 7))))
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #define SET_ERRNO(e) \
54 parser->nread = nread; \
55 parser->http_errno = (e); \
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
62 parser->nread = nread; \
63 parser->state = CURRENT_STATE(); \
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
89 UPDATE_STATE(parser->state); \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
116 UPDATE_STATE(parser->state); \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
135 /* Set the mark FOR; non-destructive if mark is already set */
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed max_header_size. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. max_header_size is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
154 #define COUNT_HEADER_SIZE(V) \
156 nread += (uint32_t)(V); \
157 if (UNLIKELY(nread > max_header_size)) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
174 static const char *method_strings[] =
176 #define XX(num, name, string) #string,
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 ' ', '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
236 #if HTTP_PARSER_STRICT
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
280 { s_dead = 1 /* important that this is > 0 */
293 , s_res_first_status_code
297 , s_res_line_almost_done
302 , s_req_spaces_before_url
305 , s_req_schema_slash_slash
308 , s_req_server_with_at
310 , s_req_query_string_start
312 , s_req_fragment_start
325 , s_req_line_almost_done
327 , s_header_field_start
329 , s_header_value_discard_ws
330 , s_header_value_discard_ws_almost_done
331 , s_header_value_discard_lws
332 , s_header_value_start
336 , s_header_almost_done
341 , s_chunk_size_almost_done
343 , s_headers_almost_done
346 /* Important: 's_headers_done' must be the last 'header' state. All
347 * states beyond this must be 'body' states. It is used for overflow
348 * checking. See the PARSING_HEADER() macro.
352 , s_chunk_data_almost_done
356 , s_body_identity_eof
362 #define PARSING_HEADER(state) (state <= s_headers_done)
371 , h_matching_connection
372 , h_matching_proxy_connection
373 , h_matching_content_length
374 , h_matching_transfer_encoding
379 , h_content_length_num
380 , h_content_length_ws
381 , h_transfer_encoding
384 , h_matching_transfer_encoding_chunked
385 , h_matching_connection_token_start
386 , h_matching_connection_keep_alive
387 , h_matching_connection_close
388 , h_matching_connection_upgrade
389 , h_matching_connection_token
391 , h_transfer_encoding_chunked
392 , h_connection_keep_alive
394 , h_connection_upgrade
400 , s_http_userinfo_start
403 , s_http_host_v6_start
407 , s_http_host_v6_zone_start
408 , s_http_host_v6_zone
409 , s_http_host_port_start
413 /* Macros for character classes; depends on strict-mode */
416 #define LOWER(c) (unsigned char)(c | 0x20)
417 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
418 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
419 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
420 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
421 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
422 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
424 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
425 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
426 (c) == '$' || (c) == ',')
428 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
430 #if HTTP_PARSER_STRICT
431 #define TOKEN(c) STRICT_TOKEN(c)
432 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
433 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
435 #define TOKEN(c) tokens[(unsigned char)c]
436 #define IS_URL_CHAR(c) \
437 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
438 #define IS_HOST_CHAR(c) \
439 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443 * Verify that a char is a valid visible (printable) US-ASCII
444 * character or %x80-FF
446 #define IS_HEADER_CHAR(ch) \
447 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
449 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
452 #if HTTP_PARSER_STRICT
453 # define STRICT_CHECK(cond) \
456 SET_ERRNO(HPE_STRICT); \
460 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
462 # define STRICT_CHECK(cond)
463 # define NEW_MESSAGE() start_state
467 /* Map errno values to strings for human-readable output */
468 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
471 const char *description;
472 } http_strerror_tab[] = {
473 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
475 #undef HTTP_STRERROR_GEN
477 int http_message_needs_eof(const http_parser *parser);
481 * This is designed to be shared by http_parser_execute() for URL validation,
482 * hence it has a state transition + byte-for-byte interface. In addition, it
483 * is meant to be embedded in http_parser_parse_url(), which does the dirty
484 * work of turning state transitions URL components for its API.
486 * This function should only be invoked with non-space characters. It is
487 * assumed that the caller cares about (and can detect) the transition between
488 * URL and non-URL states by looking for these.
491 parse_url_char(enum state s, const char ch)
493 if (ch == ' ' || ch == '\r' || ch == '\n') {
497 #if HTTP_PARSER_STRICT
498 if (ch == '\t' || ch == '\f') {
504 case s_req_spaces_before_url:
505 /* Proxied requests are followed by scheme of an absolute URI (alpha).
506 * All methods except CONNECT are followed by '/' or '*'.
509 if (ch == '/' || ch == '*') {
525 return s_req_schema_slash;
530 case s_req_schema_slash:
532 return s_req_schema_slash_slash;
537 case s_req_schema_slash_slash:
539 return s_req_server_start;
544 case s_req_server_with_at:
550 case s_req_server_start:
557 return s_req_query_string_start;
561 return s_req_server_with_at;
564 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
571 if (IS_URL_CHAR(ch)) {
577 return s_req_query_string_start;
580 return s_req_fragment_start;
585 case s_req_query_string_start:
586 case s_req_query_string:
587 if (IS_URL_CHAR(ch)) {
588 return s_req_query_string;
593 /* allow extra '?' in query string */
594 return s_req_query_string;
597 return s_req_fragment_start;
602 case s_req_fragment_start:
603 if (IS_URL_CHAR(ch)) {
604 return s_req_fragment;
609 return s_req_fragment;
618 if (IS_URL_CHAR(ch)) {
634 /* We should never fall out of the switch above unless there's an error */
638 size_t http_parser_execute (http_parser *parser,
639 const http_parser_settings *settings,
645 const char *p = data;
646 const char *header_field_mark = 0;
647 const char *header_value_mark = 0;
648 const char *url_mark = 0;
649 const char *body_mark = 0;
650 const char *status_mark = 0;
651 enum state p_state = (enum state) parser->state;
652 const unsigned int lenient = parser->lenient_http_headers;
653 uint32_t nread = parser->nread;
655 /* We're in an error state. Don't bother doing anything. */
656 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
661 switch (CURRENT_STATE()) {
662 case s_body_identity_eof:
663 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
666 CALLBACK_NOTIFY_NOADVANCE(message_complete);
670 case s_start_req_or_res:
676 SET_ERRNO(HPE_INVALID_EOF_STATE);
682 if (CURRENT_STATE() == s_header_field)
683 header_field_mark = data;
684 if (CURRENT_STATE() == s_header_value)
685 header_value_mark = data;
686 switch (CURRENT_STATE()) {
689 case s_req_schema_slash:
690 case s_req_schema_slash_slash:
691 case s_req_server_start:
693 case s_req_server_with_at:
694 case s_req_query_string_start:
695 case s_req_query_string:
696 case s_req_fragment_start:
707 for (p=data; p != data + len; p++) {
710 if (PARSING_HEADER(CURRENT_STATE()))
711 COUNT_HEADER_SIZE(1);
714 switch (CURRENT_STATE()) {
717 /* this state is used after a 'Connection: close' message
718 * the parser will error out if it reads another message
720 if (LIKELY(ch == CR || ch == LF))
723 SET_ERRNO(HPE_CLOSED_CONNECTION);
726 case s_start_req_or_res:
728 if (ch == CR || ch == LF)
731 parser->content_length = ULLONG_MAX;
734 UPDATE_STATE(s_res_or_resp_H);
736 CALLBACK_NOTIFY(message_begin);
738 parser->type = HTTP_REQUEST;
739 UPDATE_STATE(s_start_req);
746 case s_res_or_resp_H:
748 parser->type = HTTP_RESPONSE;
749 UPDATE_STATE(s_res_HT);
751 if (UNLIKELY(ch != 'E')) {
752 SET_ERRNO(HPE_INVALID_CONSTANT);
756 parser->type = HTTP_REQUEST;
757 parser->method = HTTP_HEAD;
759 UPDATE_STATE(s_req_method);
765 if (ch == CR || ch == LF)
768 parser->content_length = ULLONG_MAX;
771 UPDATE_STATE(s_res_H);
773 SET_ERRNO(HPE_INVALID_CONSTANT);
777 CALLBACK_NOTIFY(message_begin);
782 STRICT_CHECK(ch != 'T');
783 UPDATE_STATE(s_res_HT);
787 STRICT_CHECK(ch != 'T');
788 UPDATE_STATE(s_res_HTT);
792 STRICT_CHECK(ch != 'P');
793 UPDATE_STATE(s_res_HTTP);
797 STRICT_CHECK(ch != '/');
798 UPDATE_STATE(s_res_http_major);
801 case s_res_http_major:
802 if (UNLIKELY(!IS_NUM(ch))) {
803 SET_ERRNO(HPE_INVALID_VERSION);
807 parser->http_major = ch - '0';
808 UPDATE_STATE(s_res_http_dot);
813 if (UNLIKELY(ch != '.')) {
814 SET_ERRNO(HPE_INVALID_VERSION);
818 UPDATE_STATE(s_res_http_minor);
822 case s_res_http_minor:
823 if (UNLIKELY(!IS_NUM(ch))) {
824 SET_ERRNO(HPE_INVALID_VERSION);
828 parser->http_minor = ch - '0';
829 UPDATE_STATE(s_res_http_end);
834 if (UNLIKELY(ch != ' ')) {
835 SET_ERRNO(HPE_INVALID_VERSION);
839 UPDATE_STATE(s_res_first_status_code);
843 case s_res_first_status_code:
850 SET_ERRNO(HPE_INVALID_STATUS);
853 parser->status_code = ch - '0';
854 UPDATE_STATE(s_res_status_code);
858 case s_res_status_code:
863 UPDATE_STATE(s_res_status_start);
867 UPDATE_STATE(s_res_status_start);
871 SET_ERRNO(HPE_INVALID_STATUS);
877 parser->status_code *= 10;
878 parser->status_code += ch - '0';
880 if (UNLIKELY(parser->status_code > 999)) {
881 SET_ERRNO(HPE_INVALID_STATUS);
888 case s_res_status_start:
891 UPDATE_STATE(s_res_status);
894 if (ch == CR || ch == LF)
902 UPDATE_STATE(s_res_line_almost_done);
903 CALLBACK_DATA(status);
908 UPDATE_STATE(s_header_field_start);
909 CALLBACK_DATA(status);
915 case s_res_line_almost_done:
916 STRICT_CHECK(ch != LF);
917 UPDATE_STATE(s_header_field_start);
922 if (ch == CR || ch == LF)
925 parser->content_length = ULLONG_MAX;
927 if (UNLIKELY(!IS_ALPHA(ch))) {
928 SET_ERRNO(HPE_INVALID_METHOD);
932 parser->method = (enum http_method) 0;
935 case 'A': parser->method = HTTP_ACL; break;
936 case 'B': parser->method = HTTP_BIND; break;
937 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
938 case 'D': parser->method = HTTP_DELETE; break;
939 case 'G': parser->method = HTTP_GET; break;
940 case 'H': parser->method = HTTP_HEAD; break;
941 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
942 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
943 case 'N': parser->method = HTTP_NOTIFY; break;
944 case 'O': parser->method = HTTP_OPTIONS; break;
945 case 'P': parser->method = HTTP_POST;
946 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
948 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
949 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
950 case 'T': parser->method = HTTP_TRACE; break;
951 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
953 SET_ERRNO(HPE_INVALID_METHOD);
956 UPDATE_STATE(s_req_method);
958 CALLBACK_NOTIFY(message_begin);
966 if (UNLIKELY(ch == '\0')) {
967 SET_ERRNO(HPE_INVALID_METHOD);
971 matcher = method_strings[parser->method];
972 if (ch == ' ' && matcher[parser->index] == '\0') {
973 UPDATE_STATE(s_req_spaces_before_url);
974 } else if (ch == matcher[parser->index]) {
976 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
978 switch (parser->method << 16 | parser->index << 8 | ch) {
979 #define XX(meth, pos, ch, new_meth) \
980 case (HTTP_##meth << 16 | pos << 8 | ch): \
981 parser->method = HTTP_##new_meth; break;
983 XX(POST, 1, 'U', PUT)
984 XX(POST, 1, 'A', PATCH)
985 XX(POST, 1, 'R', PROPFIND)
986 XX(PUT, 2, 'R', PURGE)
987 XX(CONNECT, 1, 'H', CHECKOUT)
988 XX(CONNECT, 2, 'P', COPY)
989 XX(MKCOL, 1, 'O', MOVE)
990 XX(MKCOL, 1, 'E', MERGE)
991 XX(MKCOL, 1, '-', MSEARCH)
992 XX(MKCOL, 2, 'A', MKACTIVITY)
993 XX(MKCOL, 3, 'A', MKCALENDAR)
994 XX(SUBSCRIBE, 1, 'E', SEARCH)
995 XX(SUBSCRIBE, 1, 'O', SOURCE)
996 XX(REPORT, 2, 'B', REBIND)
997 XX(PROPFIND, 4, 'P', PROPPATCH)
998 XX(LOCK, 1, 'I', LINK)
999 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1000 XX(UNLOCK, 2, 'B', UNBIND)
1001 XX(UNLOCK, 3, 'I', UNLINK)
1004 SET_ERRNO(HPE_INVALID_METHOD);
1008 SET_ERRNO(HPE_INVALID_METHOD);
1016 case s_req_spaces_before_url:
1018 if (ch == ' ') break;
1021 if (parser->method == HTTP_CONNECT) {
1022 UPDATE_STATE(s_req_server_start);
1025 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1026 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1027 SET_ERRNO(HPE_INVALID_URL);
1035 case s_req_schema_slash:
1036 case s_req_schema_slash_slash:
1037 case s_req_server_start:
1040 /* No whitespace allowed here */
1044 SET_ERRNO(HPE_INVALID_URL);
1047 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1048 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1049 SET_ERRNO(HPE_INVALID_URL);
1058 case s_req_server_with_at:
1060 case s_req_query_string_start:
1061 case s_req_query_string:
1062 case s_req_fragment_start:
1063 case s_req_fragment:
1067 UPDATE_STATE(s_req_http_start);
1072 parser->http_major = 0;
1073 parser->http_minor = 9;
1074 UPDATE_STATE((ch == CR) ?
1075 s_req_line_almost_done :
1076 s_header_field_start);
1080 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1081 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1082 SET_ERRNO(HPE_INVALID_URL);
1089 case s_req_http_start:
1094 UPDATE_STATE(s_req_http_H);
1097 if (parser->method == HTTP_SOURCE) {
1098 UPDATE_STATE(s_req_http_I);
1103 SET_ERRNO(HPE_INVALID_CONSTANT);
1109 STRICT_CHECK(ch != 'T');
1110 UPDATE_STATE(s_req_http_HT);
1114 STRICT_CHECK(ch != 'T');
1115 UPDATE_STATE(s_req_http_HTT);
1118 case s_req_http_HTT:
1119 STRICT_CHECK(ch != 'P');
1120 UPDATE_STATE(s_req_http_HTTP);
1124 STRICT_CHECK(ch != 'C');
1125 UPDATE_STATE(s_req_http_IC);
1129 STRICT_CHECK(ch != 'E');
1130 UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1133 case s_req_http_HTTP:
1134 STRICT_CHECK(ch != '/');
1135 UPDATE_STATE(s_req_http_major);
1138 case s_req_http_major:
1139 if (UNLIKELY(!IS_NUM(ch))) {
1140 SET_ERRNO(HPE_INVALID_VERSION);
1144 parser->http_major = ch - '0';
1145 UPDATE_STATE(s_req_http_dot);
1148 case s_req_http_dot:
1150 if (UNLIKELY(ch != '.')) {
1151 SET_ERRNO(HPE_INVALID_VERSION);
1155 UPDATE_STATE(s_req_http_minor);
1159 case s_req_http_minor:
1160 if (UNLIKELY(!IS_NUM(ch))) {
1161 SET_ERRNO(HPE_INVALID_VERSION);
1165 parser->http_minor = ch - '0';
1166 UPDATE_STATE(s_req_http_end);
1169 case s_req_http_end:
1172 UPDATE_STATE(s_req_line_almost_done);
1177 UPDATE_STATE(s_header_field_start);
1181 SET_ERRNO(HPE_INVALID_VERSION);
1186 /* end of request line */
1187 case s_req_line_almost_done:
1189 if (UNLIKELY(ch != LF)) {
1190 SET_ERRNO(HPE_LF_EXPECTED);
1194 UPDATE_STATE(s_header_field_start);
1198 case s_header_field_start:
1201 UPDATE_STATE(s_headers_almost_done);
1206 /* they might be just sending \n instead of \r\n so this would be
1207 * the second \n to denote the end of headers*/
1208 UPDATE_STATE(s_headers_almost_done);
1215 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1222 UPDATE_STATE(s_header_field);
1226 parser->header_state = h_C;
1230 parser->header_state = h_matching_proxy_connection;
1234 parser->header_state = h_matching_transfer_encoding;
1238 parser->header_state = h_matching_upgrade;
1242 parser->header_state = h_general;
1248 case s_header_field:
1250 const char* start = p;
1251 for (; p != data + len; p++) {
1258 switch (parser->header_state) {
1260 size_t limit = data + len - p;
1261 limit = MIN(limit, max_header_size);
1262 while (p+1 < data + limit && TOKEN(p[1])) {
1270 parser->header_state = (c == 'o' ? h_CO : h_general);
1275 parser->header_state = (c == 'n' ? h_CON : h_general);
1282 parser->header_state = h_matching_connection;
1285 parser->header_state = h_matching_content_length;
1288 parser->header_state = h_general;
1295 case h_matching_connection:
1297 if (parser->index > sizeof(CONNECTION)-1
1298 || c != CONNECTION[parser->index]) {
1299 parser->header_state = h_general;
1300 } else if (parser->index == sizeof(CONNECTION)-2) {
1301 parser->header_state = h_connection;
1305 /* proxy-connection */
1307 case h_matching_proxy_connection:
1309 if (parser->index > sizeof(PROXY_CONNECTION)-1
1310 || c != PROXY_CONNECTION[parser->index]) {
1311 parser->header_state = h_general;
1312 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1313 parser->header_state = h_connection;
1317 /* content-length */
1319 case h_matching_content_length:
1321 if (parser->index > sizeof(CONTENT_LENGTH)-1
1322 || c != CONTENT_LENGTH[parser->index]) {
1323 parser->header_state = h_general;
1324 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1325 parser->header_state = h_content_length;
1329 /* transfer-encoding */
1331 case h_matching_transfer_encoding:
1333 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1334 || c != TRANSFER_ENCODING[parser->index]) {
1335 parser->header_state = h_general;
1336 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1337 parser->header_state = h_transfer_encoding;
1343 case h_matching_upgrade:
1345 if (parser->index > sizeof(UPGRADE)-1
1346 || c != UPGRADE[parser->index]) {
1347 parser->header_state = h_general;
1348 } else if (parser->index == sizeof(UPGRADE)-2) {
1349 parser->header_state = h_upgrade;
1354 case h_content_length:
1355 case h_transfer_encoding:
1357 if (ch != ' ') parser->header_state = h_general;
1361 assert(0 && "Unknown header_state");
1366 if (p == data + len) {
1368 COUNT_HEADER_SIZE(p - start);
1372 COUNT_HEADER_SIZE(p - start);
1375 UPDATE_STATE(s_header_value_discard_ws);
1376 CALLBACK_DATA(header_field);
1380 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1384 case s_header_value_discard_ws:
1385 if (ch == ' ' || ch == '\t') break;
1388 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1393 UPDATE_STATE(s_header_value_discard_lws);
1399 case s_header_value_start:
1403 UPDATE_STATE(s_header_value);
1408 switch (parser->header_state) {
1410 parser->flags |= F_UPGRADE;
1411 parser->header_state = h_general;
1414 case h_transfer_encoding:
1415 /* looking for 'Transfer-Encoding: chunked' */
1417 parser->header_state = h_matching_transfer_encoding_chunked;
1419 parser->header_state = h_general;
1423 case h_content_length:
1424 if (UNLIKELY(!IS_NUM(ch))) {
1425 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1429 if (parser->flags & F_CONTENTLENGTH) {
1430 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1434 parser->flags |= F_CONTENTLENGTH;
1435 parser->content_length = ch - '0';
1436 parser->header_state = h_content_length_num;
1439 /* when obsolete line folding is encountered for content length
1440 * continue to the s_header_value state */
1441 case h_content_length_ws:
1445 /* looking for 'Connection: keep-alive' */
1447 parser->header_state = h_matching_connection_keep_alive;
1448 /* looking for 'Connection: close' */
1449 } else if (c == 'c') {
1450 parser->header_state = h_matching_connection_close;
1451 } else if (c == 'u') {
1452 parser->header_state = h_matching_connection_upgrade;
1454 parser->header_state = h_matching_connection_token;
1458 /* Multi-value `Connection` header */
1459 case h_matching_connection_token_start:
1463 parser->header_state = h_general;
1469 case s_header_value:
1471 const char* start = p;
1472 enum header_states h_state = (enum header_states) parser->header_state;
1473 for (; p != data + len; p++) {
1476 UPDATE_STATE(s_header_almost_done);
1477 parser->header_state = h_state;
1478 CALLBACK_DATA(header_value);
1483 UPDATE_STATE(s_header_almost_done);
1484 COUNT_HEADER_SIZE(p - start);
1485 parser->header_state = h_state;
1486 CALLBACK_DATA_NOADVANCE(header_value);
1490 if (!lenient && !IS_HEADER_CHAR(ch)) {
1491 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1502 size_t limit = data + len - p;
1504 limit = MIN(limit, max_header_size);
1506 p_cr = (const char*) memchr(p, CR, limit);
1507 p_lf = (const char*) memchr(p, LF, limit);
1509 if (p_lf != NULL && p_cr >= p_lf)
1513 } else if (UNLIKELY(p_lf != NULL)) {
1523 case h_transfer_encoding:
1524 assert(0 && "Shouldn't get here.");
1527 case h_content_length:
1528 if (ch == ' ') break;
1529 h_state = h_content_length_num;
1532 case h_content_length_num:
1537 h_state = h_content_length_ws;
1541 if (UNLIKELY(!IS_NUM(ch))) {
1542 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1543 parser->header_state = h_state;
1547 t = parser->content_length;
1551 /* Overflow? Test against a conservative limit for simplicity. */
1552 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1553 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1554 parser->header_state = h_state;
1558 parser->content_length = t;
1562 case h_content_length_ws:
1563 if (ch == ' ') break;
1564 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1565 parser->header_state = h_state;
1568 /* Transfer-Encoding: chunked */
1569 case h_matching_transfer_encoding_chunked:
1571 if (parser->index > sizeof(CHUNKED)-1
1572 || c != CHUNKED[parser->index]) {
1573 h_state = h_general;
1574 } else if (parser->index == sizeof(CHUNKED)-2) {
1575 h_state = h_transfer_encoding_chunked;
1579 case h_matching_connection_token_start:
1580 /* looking for 'Connection: keep-alive' */
1582 h_state = h_matching_connection_keep_alive;
1583 /* looking for 'Connection: close' */
1584 } else if (c == 'c') {
1585 h_state = h_matching_connection_close;
1586 } else if (c == 'u') {
1587 h_state = h_matching_connection_upgrade;
1588 } else if (STRICT_TOKEN(c)) {
1589 h_state = h_matching_connection_token;
1590 } else if (c == ' ' || c == '\t') {
1593 h_state = h_general;
1597 /* looking for 'Connection: keep-alive' */
1598 case h_matching_connection_keep_alive:
1600 if (parser->index > sizeof(KEEP_ALIVE)-1
1601 || c != KEEP_ALIVE[parser->index]) {
1602 h_state = h_matching_connection_token;
1603 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1604 h_state = h_connection_keep_alive;
1608 /* looking for 'Connection: close' */
1609 case h_matching_connection_close:
1611 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1612 h_state = h_matching_connection_token;
1613 } else if (parser->index == sizeof(CLOSE)-2) {
1614 h_state = h_connection_close;
1618 /* looking for 'Connection: upgrade' */
1619 case h_matching_connection_upgrade:
1621 if (parser->index > sizeof(UPGRADE) - 1 ||
1622 c != UPGRADE[parser->index]) {
1623 h_state = h_matching_connection_token;
1624 } else if (parser->index == sizeof(UPGRADE)-2) {
1625 h_state = h_connection_upgrade;
1629 case h_matching_connection_token:
1631 h_state = h_matching_connection_token_start;
1636 case h_transfer_encoding_chunked:
1637 if (ch != ' ') h_state = h_general;
1640 case h_connection_keep_alive:
1641 case h_connection_close:
1642 case h_connection_upgrade:
1644 if (h_state == h_connection_keep_alive) {
1645 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1646 } else if (h_state == h_connection_close) {
1647 parser->flags |= F_CONNECTION_CLOSE;
1648 } else if (h_state == h_connection_upgrade) {
1649 parser->flags |= F_CONNECTION_UPGRADE;
1651 h_state = h_matching_connection_token_start;
1653 } else if (ch != ' ') {
1654 h_state = h_matching_connection_token;
1659 UPDATE_STATE(s_header_value);
1660 h_state = h_general;
1664 parser->header_state = h_state;
1666 if (p == data + len)
1669 COUNT_HEADER_SIZE(p - start);
1673 case s_header_almost_done:
1675 if (UNLIKELY(ch != LF)) {
1676 SET_ERRNO(HPE_LF_EXPECTED);
1680 UPDATE_STATE(s_header_value_lws);
1684 case s_header_value_lws:
1686 if (ch == ' ' || ch == '\t') {
1687 if (parser->header_state == h_content_length_num) {
1688 /* treat obsolete line folding as space */
1689 parser->header_state = h_content_length_ws;
1691 UPDATE_STATE(s_header_value_start);
1695 /* finished the header */
1696 switch (parser->header_state) {
1697 case h_connection_keep_alive:
1698 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1700 case h_connection_close:
1701 parser->flags |= F_CONNECTION_CLOSE;
1703 case h_transfer_encoding_chunked:
1704 parser->flags |= F_CHUNKED;
1706 case h_connection_upgrade:
1707 parser->flags |= F_CONNECTION_UPGRADE;
1713 UPDATE_STATE(s_header_field_start);
1717 case s_header_value_discard_ws_almost_done:
1719 STRICT_CHECK(ch != LF);
1720 UPDATE_STATE(s_header_value_discard_lws);
1724 case s_header_value_discard_lws:
1726 if (ch == ' ' || ch == '\t') {
1727 UPDATE_STATE(s_header_value_discard_ws);
1730 switch (parser->header_state) {
1731 case h_connection_keep_alive:
1732 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1734 case h_connection_close:
1735 parser->flags |= F_CONNECTION_CLOSE;
1737 case h_connection_upgrade:
1738 parser->flags |= F_CONNECTION_UPGRADE;
1740 case h_transfer_encoding_chunked:
1741 parser->flags |= F_CHUNKED;
1743 case h_content_length:
1744 /* do not allow empty content length */
1745 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1752 /* header value was empty */
1754 UPDATE_STATE(s_header_field_start);
1755 CALLBACK_DATA_NOADVANCE(header_value);
1760 case s_headers_almost_done:
1762 STRICT_CHECK(ch != LF);
1764 if (parser->flags & F_TRAILING) {
1765 /* End of a chunked request */
1766 UPDATE_STATE(s_message_done);
1767 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1771 /* Cannot use chunked encoding and a content-length header together
1772 per the HTTP specification. */
1773 if ((parser->flags & F_CHUNKED) &&
1774 (parser->flags & F_CONTENTLENGTH)) {
1775 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1779 UPDATE_STATE(s_headers_done);
1781 /* Set this here so that on_headers_complete() callbacks can see it */
1782 if ((parser->flags & F_UPGRADE) &&
1783 (parser->flags & F_CONNECTION_UPGRADE)) {
1784 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1785 * mandatory only when it is a 101 Switching Protocols response,
1786 * otherwise it is purely informational, to announce support.
1789 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1791 parser->upgrade = (parser->method == HTTP_CONNECT);
1794 /* Here we call the headers_complete callback. This is somewhat
1795 * different than other callbacks because if the user returns 1, we
1796 * will interpret that as saying that this message has no body. This
1797 * is needed for the annoying case of recieving a response to a HEAD
1800 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1801 * we have to simulate it by handling a change in errno below.
1803 if (settings->on_headers_complete) {
1804 switch (settings->on_headers_complete(parser)) {
1809 parser->upgrade = 1;
1813 parser->flags |= F_SKIPBODY;
1817 SET_ERRNO(HPE_CB_headers_complete);
1818 RETURN(p - data); /* Error */
1822 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1829 case s_headers_done:
1832 STRICT_CHECK(ch != LF);
1837 hasBody = parser->flags & F_CHUNKED ||
1838 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1839 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1840 (parser->flags & F_SKIPBODY) || !hasBody)) {
1841 /* Exit, the rest of the message is in a different protocol. */
1842 UPDATE_STATE(NEW_MESSAGE());
1843 CALLBACK_NOTIFY(message_complete);
1844 RETURN((p - data) + 1);
1847 if (parser->flags & F_SKIPBODY) {
1848 UPDATE_STATE(NEW_MESSAGE());
1849 CALLBACK_NOTIFY(message_complete);
1850 } else if (parser->flags & F_CHUNKED) {
1851 /* chunked encoding - ignore Content-Length header */
1852 UPDATE_STATE(s_chunk_size_start);
1854 if (parser->content_length == 0) {
1855 /* Content-Length header given but zero: Content-Length: 0\r\n */
1856 UPDATE_STATE(NEW_MESSAGE());
1857 CALLBACK_NOTIFY(message_complete);
1858 } else if (parser->content_length != ULLONG_MAX) {
1859 /* Content-Length header given and non-zero */
1860 UPDATE_STATE(s_body_identity);
1862 if (!http_message_needs_eof(parser)) {
1863 /* Assume content-length 0 - read the next */
1864 UPDATE_STATE(NEW_MESSAGE());
1865 CALLBACK_NOTIFY(message_complete);
1867 /* Read body until EOF */
1868 UPDATE_STATE(s_body_identity_eof);
1876 case s_body_identity:
1878 uint64_t to_read = MIN(parser->content_length,
1879 (uint64_t) ((data + len) - p));
1881 assert(parser->content_length != 0
1882 && parser->content_length != ULLONG_MAX);
1884 /* The difference between advancing content_length and p is because
1885 * the latter will automaticaly advance on the next loop iteration.
1886 * Further, if content_length ends up at 0, we want to see the last
1887 * byte again for our message complete callback.
1890 parser->content_length -= to_read;
1893 if (parser->content_length == 0) {
1894 UPDATE_STATE(s_message_done);
1896 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1898 * The alternative to doing this is to wait for the next byte to
1899 * trigger the data callback, just as in every other case. The
1900 * problem with this is that this makes it difficult for the test
1901 * harness to distinguish between complete-on-EOF and
1902 * complete-on-length. It's not clear that this distinction is
1903 * important for applications, but let's keep it for now.
1905 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1912 /* read until EOF */
1913 case s_body_identity_eof:
1919 case s_message_done:
1920 UPDATE_STATE(NEW_MESSAGE());
1921 CALLBACK_NOTIFY(message_complete);
1922 if (parser->upgrade) {
1923 /* Exit, the rest of the message is in a different protocol. */
1924 RETURN((p - data) + 1);
1928 case s_chunk_size_start:
1931 assert(parser->flags & F_CHUNKED);
1933 unhex_val = unhex[(unsigned char)ch];
1934 if (UNLIKELY(unhex_val == -1)) {
1935 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1939 parser->content_length = unhex_val;
1940 UPDATE_STATE(s_chunk_size);
1948 assert(parser->flags & F_CHUNKED);
1951 UPDATE_STATE(s_chunk_size_almost_done);
1955 unhex_val = unhex[(unsigned char)ch];
1957 if (unhex_val == -1) {
1958 if (ch == ';' || ch == ' ') {
1959 UPDATE_STATE(s_chunk_parameters);
1963 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1967 t = parser->content_length;
1971 /* Overflow? Test against a conservative limit for simplicity. */
1972 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1973 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1977 parser->content_length = t;
1981 case s_chunk_parameters:
1983 assert(parser->flags & F_CHUNKED);
1984 /* just ignore this shit. TODO check for overflow */
1986 UPDATE_STATE(s_chunk_size_almost_done);
1992 case s_chunk_size_almost_done:
1994 assert(parser->flags & F_CHUNKED);
1995 STRICT_CHECK(ch != LF);
2000 if (parser->content_length == 0) {
2001 parser->flags |= F_TRAILING;
2002 UPDATE_STATE(s_header_field_start);
2004 UPDATE_STATE(s_chunk_data);
2006 CALLBACK_NOTIFY(chunk_header);
2012 uint64_t to_read = MIN(parser->content_length,
2013 (uint64_t) ((data + len) - p));
2015 assert(parser->flags & F_CHUNKED);
2016 assert(parser->content_length != 0
2017 && parser->content_length != ULLONG_MAX);
2019 /* See the explanation in s_body_identity for why the content
2020 * length and data pointers are managed this way.
2023 parser->content_length -= to_read;
2026 if (parser->content_length == 0) {
2027 UPDATE_STATE(s_chunk_data_almost_done);
2033 case s_chunk_data_almost_done:
2034 assert(parser->flags & F_CHUNKED);
2035 assert(parser->content_length == 0);
2036 STRICT_CHECK(ch != CR);
2037 UPDATE_STATE(s_chunk_data_done);
2038 CALLBACK_DATA(body);
2041 case s_chunk_data_done:
2042 assert(parser->flags & F_CHUNKED);
2043 STRICT_CHECK(ch != LF);
2046 UPDATE_STATE(s_chunk_size_start);
2047 CALLBACK_NOTIFY(chunk_complete);
2051 assert(0 && "unhandled state");
2052 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2057 /* Run callbacks for any marks that we have leftover after we ran out of
2058 * bytes. There should be at most one of these set, so it's OK to invoke
2059 * them in series (unset marks will not result in callbacks).
2061 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2062 * overflowed 'data' and this allows us to correct for the off-by-one that
2063 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2064 * value that's in-bounds).
2067 assert(((header_field_mark ? 1 : 0) +
2068 (header_value_mark ? 1 : 0) +
2069 (url_mark ? 1 : 0) +
2070 (body_mark ? 1 : 0) +
2071 (status_mark ? 1 : 0)) <= 1);
2073 CALLBACK_DATA_NOADVANCE(header_field);
2074 CALLBACK_DATA_NOADVANCE(header_value);
2075 CALLBACK_DATA_NOADVANCE(url);
2076 CALLBACK_DATA_NOADVANCE(body);
2077 CALLBACK_DATA_NOADVANCE(status);
2082 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2083 SET_ERRNO(HPE_UNKNOWN);
2090 /* Does the parser need to see an EOF to find the end of the message? */
2092 http_message_needs_eof (const http_parser *parser)
2094 if (parser->type == HTTP_REQUEST) {
2098 /* See RFC 2616 section 4.4 */
2099 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2100 parser->status_code == 204 || /* No Content */
2101 parser->status_code == 304 || /* Not Modified */
2102 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2106 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2115 http_should_keep_alive (const http_parser *parser)
2117 if (parser->http_major > 0 && parser->http_minor > 0) {
2119 if (parser->flags & F_CONNECTION_CLOSE) {
2123 /* HTTP/1.0 or earlier */
2124 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2129 return !http_message_needs_eof(parser);
2134 http_method_str (enum http_method m)
2136 return ELEM_AT(method_strings, m, "<unknown>");
2140 http_status_str (enum http_status s)
2143 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2146 default: return "<unknown>";
2151 http_parser_init (http_parser *parser, enum http_parser_type t)
2153 void *data = parser->data; /* preserve application data */
2154 memset(parser, 0, sizeof(*parser));
2155 parser->data = data;
2157 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2158 parser->http_errno = HPE_OK;
2162 http_parser_settings_init(http_parser_settings *settings)
2164 memset(settings, 0, sizeof(*settings));
2168 http_errno_name(enum http_errno err) {
2169 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2170 return http_strerror_tab[err].name;
2174 http_errno_description(enum http_errno err) {
2175 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2176 return http_strerror_tab[err].description;
2179 static enum http_host_state
2180 http_parse_host_char(enum http_host_state s, const char ch) {
2182 case s_http_userinfo:
2183 case s_http_userinfo_start:
2185 return s_http_host_start;
2188 if (IS_USERINFO_CHAR(ch)) {
2189 return s_http_userinfo;
2193 case s_http_host_start:
2195 return s_http_host_v6_start;
2198 if (IS_HOST_CHAR(ch)) {
2205 if (IS_HOST_CHAR(ch)) {
2210 case s_http_host_v6_end:
2212 return s_http_host_port_start;
2217 case s_http_host_v6:
2219 return s_http_host_v6_end;
2223 case s_http_host_v6_start:
2224 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2225 return s_http_host_v6;
2228 if (s == s_http_host_v6 && ch == '%') {
2229 return s_http_host_v6_zone_start;
2233 case s_http_host_v6_zone:
2235 return s_http_host_v6_end;
2239 case s_http_host_v6_zone_start:
2240 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2241 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2243 return s_http_host_v6_zone;
2247 case s_http_host_port:
2248 case s_http_host_port_start:
2250 return s_http_host_port;
2258 return s_http_host_dead;
2262 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2263 enum http_host_state s;
2266 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2268 assert(u->field_set & (1 << UF_HOST));
2270 u->field_data[UF_HOST].len = 0;
2272 s = found_at ? s_http_userinfo_start : s_http_host_start;
2274 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2275 enum http_host_state new_s = http_parse_host_char(s, *p);
2277 if (new_s == s_http_host_dead) {
2283 if (s != s_http_host) {
2284 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2286 u->field_data[UF_HOST].len++;
2289 case s_http_host_v6:
2290 if (s != s_http_host_v6) {
2291 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2293 u->field_data[UF_HOST].len++;
2296 case s_http_host_v6_zone_start:
2297 case s_http_host_v6_zone:
2298 u->field_data[UF_HOST].len++;
2301 case s_http_host_port:
2302 if (s != s_http_host_port) {
2303 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2304 u->field_data[UF_PORT].len = 0;
2305 u->field_set |= (1 << UF_PORT);
2307 u->field_data[UF_PORT].len++;
2310 case s_http_userinfo:
2311 if (s != s_http_userinfo) {
2312 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2313 u->field_data[UF_USERINFO].len = 0;
2314 u->field_set |= (1 << UF_USERINFO);
2316 u->field_data[UF_USERINFO].len++;
2325 /* Make sure we don't end somewhere unexpected */
2327 case s_http_host_start:
2328 case s_http_host_v6_start:
2329 case s_http_host_v6:
2330 case s_http_host_v6_zone_start:
2331 case s_http_host_v6_zone:
2332 case s_http_host_port_start:
2333 case s_http_userinfo:
2334 case s_http_userinfo_start:
2344 http_parser_url_init(struct http_parser_url *u) {
2345 memset(u, 0, sizeof(*u));
2349 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2350 struct http_parser_url *u)
2354 enum http_parser_url_fields uf, old_uf;
2361 u->port = u->field_set = 0;
2362 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2365 for (p = buf; p < buf + buflen; p++) {
2366 s = parse_url_char(s, *p);
2368 /* Figure out the next field that we're operating on */
2373 /* Skip delimeters */
2374 case s_req_schema_slash:
2375 case s_req_schema_slash_slash:
2376 case s_req_server_start:
2377 case s_req_query_string_start:
2378 case s_req_fragment_start:
2385 case s_req_server_with_at:
2397 case s_req_query_string:
2401 case s_req_fragment:
2406 assert(!"Unexpected state");
2410 /* Nothing's changed; soldier on */
2412 u->field_data[uf].len++;
2416 u->field_data[uf].off = (uint16_t)(p - buf);
2417 u->field_data[uf].len = 1;
2419 u->field_set |= (1 << uf);
2423 /* host must be present if there is a schema */
2424 /* parsing http:///toto will fail */
2425 if ((u->field_set & (1 << UF_SCHEMA)) &&
2426 (u->field_set & (1 << UF_HOST)) == 0) {
2430 if (u->field_set & (1 << UF_HOST)) {
2431 if (http_parse_host(buf, u, found_at) != 0) {
2436 /* CONNECT requests can only contain "hostname:port" */
2437 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2441 if (u->field_set & (1 << UF_PORT)) {
2448 off = u->field_data[UF_PORT].off;
2449 len = u->field_data[UF_PORT].len;
2450 end = buf + off + len;
2452 /* NOTE: The characters are already validated and are in the [0-9] range */
2453 assert(off + len <= buflen && "Port number overflow");
2455 for (p = buf + off; p < end; p++) {
2459 /* Ports have a max value of 2^16 */
2465 u->port = (uint16_t) v;
2472 http_parser_pause(http_parser *parser, int paused) {
2473 /* Users should only be pausing/unpausing a parser that is not in an error
2474 * state. In non-debug builds, there's not much that we can do about this
2475 * other than ignore it.
2477 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2478 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2479 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2480 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2482 assert(0 && "Attempting to pause parser in error state");
2487 http_body_is_final(const struct http_parser *parser) {
2488 return parser->state == s_message_done;
2492 http_parser_version(void) {
2493 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2494 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2495 HTTP_PARSER_VERSION_PATCH * 0x00001;
2499 http_parser_set_max_header_size(uint32_t size) {
2500 max_header_size = size;