1 /* Copyright Joyent, Inc. and other Node contributors.
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 #include "http_parser.h"
29 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
33 # define MIN(a,b) ((a) < (b) ? (a) : (b))
37 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
41 # define BIT_AT(a, i) \
42 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
43 (1 << ((unsigned int) (i) & 7))))
47 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50 #define SET_ERRNO(e) \
52 parser->nread = nread; \
53 parser->http_errno = (e); \
56 #define CURRENT_STATE() p_state
57 #define UPDATE_STATE(V) p_state = (enum state) (V);
60 parser->nread = nread; \
61 parser->state = CURRENT_STATE(); \
69 # define LIKELY(X) __builtin_expect(!!(X), 1)
70 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
72 # define LIKELY(X) (X)
73 # define UNLIKELY(X) (X)
77 /* Run the notify callback FOR, returning ER if it fails */
78 #define CALLBACK_NOTIFY_(FOR, ER) \
80 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
82 if (LIKELY(settings->on_##FOR)) { \
83 parser->state = CURRENT_STATE(); \
84 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
85 SET_ERRNO(HPE_CB_##FOR); \
87 UPDATE_STATE(parser->state); \
89 /* We either errored above or got paused; get out */ \
90 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
96 /* Run the notify callback FOR and consume the current byte */
97 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
99 /* Run the notify callback FOR and don't consume the current byte */
100 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
102 /* Run data callback FOR with LEN bytes, returning ER if it fails */
103 #define CALLBACK_DATA_(FOR, LEN, ER) \
105 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108 if (LIKELY(settings->on_##FOR)) { \
109 parser->state = CURRENT_STATE(); \
111 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
112 SET_ERRNO(HPE_CB_##FOR); \
114 UPDATE_STATE(parser->state); \
116 /* We either errored above or got paused; get out */ \
117 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
125 /* Run the data callback FOR and consume the current byte */
126 #define CALLBACK_DATA(FOR) \
127 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
129 /* Run the data callback FOR and don't consume the current byte */
130 #define CALLBACK_DATA_NOADVANCE(FOR) \
131 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
133 /* Set the mark FOR; non-destructive if mark is already set */
141 /* Don't allow the total size of the HTTP headers (including the status
142 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
143 * embedders against denial-of-service attacks where the attacker feeds
144 * us a never-ending header that the embedder keeps buffering.
146 * This check is arguably the responsibility of embedders but we're doing
147 * it on the embedder's behalf because most won't bother and this way we
148 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
149 * than any reasonable request or response so this should never affect
150 * day-to-day operation.
152 #define COUNT_HEADER_SIZE(V) \
155 if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) { \
156 SET_ERRNO(HPE_HEADER_OVERFLOW); \
162 #define PROXY_CONNECTION "proxy-connection"
163 #define CONNECTION "connection"
164 #define CONTENT_LENGTH "content-length"
165 #define TRANSFER_ENCODING "transfer-encoding"
166 #define UPGRADE "upgrade"
167 #define CHUNKED "chunked"
168 #define KEEP_ALIVE "keep-alive"
169 #define CLOSE "close"
172 static const char *method_strings[] =
174 #define XX(num, name, string) #string,
180 /* Tokens as defined by rfc 2616. Also lowercases them.
181 * token = 1*<any CHAR except CTLs or separators>
182 * separators = "(" | ")" | "<" | ">" | "@"
183 * | "," | ";" | ":" | "\" | <">
184 * | "/" | "[" | "]" | "?" | "="
185 * | "{" | "}" | SP | HT
187 static const char tokens[256] = {
188 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
189 0, 0, 0, 0, 0, 0, 0, 0,
190 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
197 ' ', '!', 0, '#', '$', '%', '&', '\'',
198 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
199 0, 0, '*', '+', 0, '-', '.', 0,
200 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
201 '0', '1', '2', '3', '4', '5', '6', '7',
202 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
203 '8', '9', 0, 0, 0, 0, 0, 0,
204 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
205 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
206 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
207 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
208 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
209 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
210 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
211 'x', 'y', 'z', 0, 0, 0, '^', '_',
212 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
213 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
214 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
215 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
216 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
217 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
218 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
219 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222 static const int8_t unhex[256] =
223 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
224 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
225 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
227 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
234 #if HTTP_PARSER_STRICT
241 static const uint8_t normal_url_char[32] = {
242 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
243 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
244 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
245 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
246 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
247 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
248 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
251 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
252 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
253 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
254 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
258 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
260 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
278 { s_dead = 1 /* important that this is > 0 */
291 , s_res_first_status_code
295 , s_res_line_almost_done
300 , s_req_spaces_before_url
303 , s_req_schema_slash_slash
306 , s_req_server_with_at
308 , s_req_query_string_start
310 , s_req_fragment_start
321 , s_req_line_almost_done
323 , s_header_field_start
325 , s_header_value_discard_ws
326 , s_header_value_discard_ws_almost_done
327 , s_header_value_discard_lws
328 , s_header_value_start
332 , s_header_almost_done
337 , s_chunk_size_almost_done
339 , s_headers_almost_done
342 /* Important: 's_headers_done' must be the last 'header' state. All
343 * states beyond this must be 'body' states. It is used for overflow
344 * checking. See the PARSING_HEADER() macro.
348 , s_chunk_data_almost_done
352 , s_body_identity_eof
358 #define PARSING_HEADER(state) (state <= s_headers_done)
367 , h_matching_connection
368 , h_matching_proxy_connection
369 , h_matching_content_length
370 , h_matching_transfer_encoding
375 , h_content_length_num
376 , h_content_length_ws
377 , h_transfer_encoding
380 , h_matching_transfer_encoding_chunked
381 , h_matching_connection_token_start
382 , h_matching_connection_keep_alive
383 , h_matching_connection_close
384 , h_matching_connection_upgrade
385 , h_matching_connection_token
387 , h_transfer_encoding_chunked
388 , h_connection_keep_alive
390 , h_connection_upgrade
396 , s_http_userinfo_start
399 , s_http_host_v6_start
403 , s_http_host_v6_zone_start
404 , s_http_host_v6_zone
405 , s_http_host_port_start
409 /* Macros for character classes; depends on strict-mode */
412 #define LOWER(c) (unsigned char)(c | 0x20)
413 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
415 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
416 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
418 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
420 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422 (c) == '$' || (c) == ',')
424 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
426 #if HTTP_PARSER_STRICT
427 #define TOKEN(c) STRICT_TOKEN(c)
428 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
429 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
431 #define TOKEN(c) tokens[(unsigned char)c]
432 #define IS_URL_CHAR(c) \
433 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434 #define IS_HOST_CHAR(c) \
435 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
439 * Verify that a char is a valid visible (printable) US-ASCII
440 * character or %x80-FF
442 #define IS_HEADER_CHAR(ch) \
443 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
445 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
448 #if HTTP_PARSER_STRICT
449 # define STRICT_CHECK(cond) \
452 SET_ERRNO(HPE_STRICT); \
456 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
458 # define STRICT_CHECK(cond)
459 # define NEW_MESSAGE() start_state
463 /* Map errno values to strings for human-readable output */
464 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
467 const char *description;
468 } http_strerror_tab[] = {
469 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
471 #undef HTTP_STRERROR_GEN
473 int http_message_needs_eof(const http_parser *parser);
477 * This is designed to be shared by http_parser_execute() for URL validation,
478 * hence it has a state transition + byte-for-byte interface. In addition, it
479 * is meant to be embedded in http_parser_parse_url(), which does the dirty
480 * work of turning state transitions URL components for its API.
482 * This function should only be invoked with non-space characters. It is
483 * assumed that the caller cares about (and can detect) the transition between
484 * URL and non-URL states by looking for these.
487 parse_url_char(enum state s, const char ch)
489 if (ch == ' ' || ch == '\r' || ch == '\n') {
493 #if HTTP_PARSER_STRICT
494 if (ch == '\t' || ch == '\f') {
500 case s_req_spaces_before_url:
501 /* Proxied requests are followed by scheme of an absolute URI (alpha).
502 * All methods except CONNECT are followed by '/' or '*'.
505 if (ch == '/' || ch == '*') {
521 return s_req_schema_slash;
526 case s_req_schema_slash:
528 return s_req_schema_slash_slash;
533 case s_req_schema_slash_slash:
535 return s_req_server_start;
540 case s_req_server_with_at:
546 case s_req_server_start:
553 return s_req_query_string_start;
557 return s_req_server_with_at;
560 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
567 if (IS_URL_CHAR(ch)) {
573 return s_req_query_string_start;
576 return s_req_fragment_start;
581 case s_req_query_string_start:
582 case s_req_query_string:
583 if (IS_URL_CHAR(ch)) {
584 return s_req_query_string;
589 /* allow extra '?' in query string */
590 return s_req_query_string;
593 return s_req_fragment_start;
598 case s_req_fragment_start:
599 if (IS_URL_CHAR(ch)) {
600 return s_req_fragment;
605 return s_req_fragment;
614 if (IS_URL_CHAR(ch)) {
630 /* We should never fall out of the switch above unless there's an error */
634 size_t http_parser_execute (http_parser *parser,
635 const http_parser_settings *settings,
641 const char *p = data;
642 const char *header_field_mark = 0;
643 const char *header_value_mark = 0;
644 const char *url_mark = 0;
645 const char *body_mark = 0;
646 const char *status_mark = 0;
647 enum state p_state = (enum state) parser->state;
648 const unsigned int lenient = parser->lenient_http_headers;
649 uint32_t nread = parser->nread;
651 /* We're in an error state. Don't bother doing anything. */
652 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
657 switch (CURRENT_STATE()) {
658 case s_body_identity_eof:
659 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
662 CALLBACK_NOTIFY_NOADVANCE(message_complete);
666 case s_start_req_or_res:
672 SET_ERRNO(HPE_INVALID_EOF_STATE);
678 if (CURRENT_STATE() == s_header_field)
679 header_field_mark = data;
680 if (CURRENT_STATE() == s_header_value)
681 header_value_mark = data;
682 switch (CURRENT_STATE()) {
685 case s_req_schema_slash:
686 case s_req_schema_slash_slash:
687 case s_req_server_start:
689 case s_req_server_with_at:
690 case s_req_query_string_start:
691 case s_req_query_string:
692 case s_req_fragment_start:
703 for (p=data; p != data + len; p++) {
706 if (PARSING_HEADER(CURRENT_STATE()))
707 COUNT_HEADER_SIZE(1);
710 switch (CURRENT_STATE()) {
713 /* this state is used after a 'Connection: close' message
714 * the parser will error out if it reads another message
716 if (LIKELY(ch == CR || ch == LF))
719 SET_ERRNO(HPE_CLOSED_CONNECTION);
722 case s_start_req_or_res:
724 if (ch == CR || ch == LF)
727 parser->content_length = ULLONG_MAX;
730 UPDATE_STATE(s_res_or_resp_H);
732 CALLBACK_NOTIFY(message_begin);
734 parser->type = HTTP_REQUEST;
735 UPDATE_STATE(s_start_req);
742 case s_res_or_resp_H:
744 parser->type = HTTP_RESPONSE;
745 UPDATE_STATE(s_res_HT);
747 if (UNLIKELY(ch != 'E')) {
748 SET_ERRNO(HPE_INVALID_CONSTANT);
752 parser->type = HTTP_REQUEST;
753 parser->method = HTTP_HEAD;
755 UPDATE_STATE(s_req_method);
761 if (ch == CR || ch == LF)
764 parser->content_length = ULLONG_MAX;
767 UPDATE_STATE(s_res_H);
769 SET_ERRNO(HPE_INVALID_CONSTANT);
773 CALLBACK_NOTIFY(message_begin);
778 STRICT_CHECK(ch != 'T');
779 UPDATE_STATE(s_res_HT);
783 STRICT_CHECK(ch != 'T');
784 UPDATE_STATE(s_res_HTT);
788 STRICT_CHECK(ch != 'P');
789 UPDATE_STATE(s_res_HTTP);
793 STRICT_CHECK(ch != '/');
794 UPDATE_STATE(s_res_http_major);
797 case s_res_http_major:
798 if (UNLIKELY(!IS_NUM(ch))) {
799 SET_ERRNO(HPE_INVALID_VERSION);
803 parser->http_major = ch - '0';
804 UPDATE_STATE(s_res_http_dot);
809 if (UNLIKELY(ch != '.')) {
810 SET_ERRNO(HPE_INVALID_VERSION);
814 UPDATE_STATE(s_res_http_minor);
818 case s_res_http_minor:
819 if (UNLIKELY(!IS_NUM(ch))) {
820 SET_ERRNO(HPE_INVALID_VERSION);
824 parser->http_minor = ch - '0';
825 UPDATE_STATE(s_res_http_end);
830 if (UNLIKELY(ch != ' ')) {
831 SET_ERRNO(HPE_INVALID_VERSION);
835 UPDATE_STATE(s_res_first_status_code);
839 case s_res_first_status_code:
846 SET_ERRNO(HPE_INVALID_STATUS);
849 parser->status_code = ch - '0';
850 UPDATE_STATE(s_res_status_code);
854 case s_res_status_code:
859 UPDATE_STATE(s_res_status_start);
863 UPDATE_STATE(s_res_status_start);
867 SET_ERRNO(HPE_INVALID_STATUS);
873 parser->status_code *= 10;
874 parser->status_code += ch - '0';
876 if (UNLIKELY(parser->status_code > 999)) {
877 SET_ERRNO(HPE_INVALID_STATUS);
884 case s_res_status_start:
887 UPDATE_STATE(s_res_status);
890 if (ch == CR || ch == LF)
898 UPDATE_STATE(s_res_line_almost_done);
899 CALLBACK_DATA(status);
904 UPDATE_STATE(s_header_field_start);
905 CALLBACK_DATA(status);
911 case s_res_line_almost_done:
912 STRICT_CHECK(ch != LF);
913 UPDATE_STATE(s_header_field_start);
918 if (ch == CR || ch == LF)
921 parser->content_length = ULLONG_MAX;
923 if (UNLIKELY(!IS_ALPHA(ch))) {
924 SET_ERRNO(HPE_INVALID_METHOD);
928 parser->method = (enum http_method) 0;
931 case 'A': parser->method = HTTP_ACL; break;
932 case 'B': parser->method = HTTP_BIND; break;
933 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
934 case 'D': parser->method = HTTP_DELETE; break;
935 case 'G': parser->method = HTTP_GET; break;
936 case 'H': parser->method = HTTP_HEAD; break;
937 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
938 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
939 case 'N': parser->method = HTTP_NOTIFY; break;
940 case 'O': parser->method = HTTP_OPTIONS; break;
941 case 'P': parser->method = HTTP_POST;
942 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
944 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
945 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
946 case 'T': parser->method = HTTP_TRACE; break;
947 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
949 SET_ERRNO(HPE_INVALID_METHOD);
952 UPDATE_STATE(s_req_method);
954 CALLBACK_NOTIFY(message_begin);
962 if (UNLIKELY(ch == '\0')) {
963 SET_ERRNO(HPE_INVALID_METHOD);
967 matcher = method_strings[parser->method];
968 if (ch == ' ' && matcher[parser->index] == '\0') {
969 UPDATE_STATE(s_req_spaces_before_url);
970 } else if (ch == matcher[parser->index]) {
972 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
974 switch (parser->method << 16 | parser->index << 8 | ch) {
975 #define XX(meth, pos, ch, new_meth) \
976 case (HTTP_##meth << 16 | pos << 8 | ch): \
977 parser->method = HTTP_##new_meth; break;
979 XX(POST, 1, 'U', PUT)
980 XX(POST, 1, 'A', PATCH)
981 XX(POST, 1, 'R', PROPFIND)
982 XX(PUT, 2, 'R', PURGE)
983 XX(CONNECT, 1, 'H', CHECKOUT)
984 XX(CONNECT, 2, 'P', COPY)
985 XX(MKCOL, 1, 'O', MOVE)
986 XX(MKCOL, 1, 'E', MERGE)
987 XX(MKCOL, 1, '-', MSEARCH)
988 XX(MKCOL, 2, 'A', MKACTIVITY)
989 XX(MKCOL, 3, 'A', MKCALENDAR)
990 XX(SUBSCRIBE, 1, 'E', SEARCH)
991 XX(SUBSCRIBE, 1, 'O', SOURCE)
992 XX(REPORT, 2, 'B', REBIND)
993 XX(PROPFIND, 4, 'P', PROPPATCH)
994 XX(LOCK, 1, 'I', LINK)
995 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
996 XX(UNLOCK, 2, 'B', UNBIND)
997 XX(UNLOCK, 3, 'I', UNLINK)
1000 SET_ERRNO(HPE_INVALID_METHOD);
1004 SET_ERRNO(HPE_INVALID_METHOD);
1012 case s_req_spaces_before_url:
1014 if (ch == ' ') break;
1017 if (parser->method == HTTP_CONNECT) {
1018 UPDATE_STATE(s_req_server_start);
1021 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1022 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1023 SET_ERRNO(HPE_INVALID_URL);
1031 case s_req_schema_slash:
1032 case s_req_schema_slash_slash:
1033 case s_req_server_start:
1036 /* No whitespace allowed here */
1040 SET_ERRNO(HPE_INVALID_URL);
1043 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1044 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1045 SET_ERRNO(HPE_INVALID_URL);
1054 case s_req_server_with_at:
1056 case s_req_query_string_start:
1057 case s_req_query_string:
1058 case s_req_fragment_start:
1059 case s_req_fragment:
1063 UPDATE_STATE(s_req_http_start);
1068 parser->http_major = 0;
1069 parser->http_minor = 9;
1070 UPDATE_STATE((ch == CR) ?
1071 s_req_line_almost_done :
1072 s_header_field_start);
1076 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1077 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1078 SET_ERRNO(HPE_INVALID_URL);
1085 case s_req_http_start:
1088 UPDATE_STATE(s_req_http_H);
1093 SET_ERRNO(HPE_INVALID_CONSTANT);
1099 STRICT_CHECK(ch != 'T');
1100 UPDATE_STATE(s_req_http_HT);
1104 STRICT_CHECK(ch != 'T');
1105 UPDATE_STATE(s_req_http_HTT);
1108 case s_req_http_HTT:
1109 STRICT_CHECK(ch != 'P');
1110 UPDATE_STATE(s_req_http_HTTP);
1113 case s_req_http_HTTP:
1114 STRICT_CHECK(ch != '/');
1115 UPDATE_STATE(s_req_http_major);
1118 case s_req_http_major:
1119 if (UNLIKELY(!IS_NUM(ch))) {
1120 SET_ERRNO(HPE_INVALID_VERSION);
1124 parser->http_major = ch - '0';
1125 UPDATE_STATE(s_req_http_dot);
1128 case s_req_http_dot:
1130 if (UNLIKELY(ch != '.')) {
1131 SET_ERRNO(HPE_INVALID_VERSION);
1135 UPDATE_STATE(s_req_http_minor);
1139 case s_req_http_minor:
1140 if (UNLIKELY(!IS_NUM(ch))) {
1141 SET_ERRNO(HPE_INVALID_VERSION);
1145 parser->http_minor = ch - '0';
1146 UPDATE_STATE(s_req_http_end);
1149 case s_req_http_end:
1152 UPDATE_STATE(s_req_line_almost_done);
1157 UPDATE_STATE(s_header_field_start);
1161 SET_ERRNO(HPE_INVALID_VERSION);
1166 /* end of request line */
1167 case s_req_line_almost_done:
1169 if (UNLIKELY(ch != LF)) {
1170 SET_ERRNO(HPE_LF_EXPECTED);
1174 UPDATE_STATE(s_header_field_start);
1178 case s_header_field_start:
1181 UPDATE_STATE(s_headers_almost_done);
1186 /* they might be just sending \n instead of \r\n so this would be
1187 * the second \n to denote the end of headers*/
1188 UPDATE_STATE(s_headers_almost_done);
1195 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1202 UPDATE_STATE(s_header_field);
1206 parser->header_state = h_C;
1210 parser->header_state = h_matching_proxy_connection;
1214 parser->header_state = h_matching_transfer_encoding;
1218 parser->header_state = h_matching_upgrade;
1222 parser->header_state = h_general;
1228 case s_header_field:
1230 const char* start = p;
1231 for (; p != data + len; p++) {
1238 switch (parser->header_state) {
1240 size_t limit = data + len - p;
1241 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1242 while (p+1 < data + limit && TOKEN(p[1])) {
1250 parser->header_state = (c == 'o' ? h_CO : h_general);
1255 parser->header_state = (c == 'n' ? h_CON : h_general);
1262 parser->header_state = h_matching_connection;
1265 parser->header_state = h_matching_content_length;
1268 parser->header_state = h_general;
1275 case h_matching_connection:
1277 if (parser->index > sizeof(CONNECTION)-1
1278 || c != CONNECTION[parser->index]) {
1279 parser->header_state = h_general;
1280 } else if (parser->index == sizeof(CONNECTION)-2) {
1281 parser->header_state = h_connection;
1285 /* proxy-connection */
1287 case h_matching_proxy_connection:
1289 if (parser->index > sizeof(PROXY_CONNECTION)-1
1290 || c != PROXY_CONNECTION[parser->index]) {
1291 parser->header_state = h_general;
1292 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1293 parser->header_state = h_connection;
1297 /* content-length */
1299 case h_matching_content_length:
1301 if (parser->index > sizeof(CONTENT_LENGTH)-1
1302 || c != CONTENT_LENGTH[parser->index]) {
1303 parser->header_state = h_general;
1304 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1305 parser->header_state = h_content_length;
1309 /* transfer-encoding */
1311 case h_matching_transfer_encoding:
1313 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1314 || c != TRANSFER_ENCODING[parser->index]) {
1315 parser->header_state = h_general;
1316 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1317 parser->header_state = h_transfer_encoding;
1323 case h_matching_upgrade:
1325 if (parser->index > sizeof(UPGRADE)-1
1326 || c != UPGRADE[parser->index]) {
1327 parser->header_state = h_general;
1328 } else if (parser->index == sizeof(UPGRADE)-2) {
1329 parser->header_state = h_upgrade;
1334 case h_content_length:
1335 case h_transfer_encoding:
1337 if (ch != ' ') parser->header_state = h_general;
1341 assert(0 && "Unknown header_state");
1346 if (p == data + len) {
1348 COUNT_HEADER_SIZE(p - start);
1352 COUNT_HEADER_SIZE(p - start);
1355 UPDATE_STATE(s_header_value_discard_ws);
1356 CALLBACK_DATA(header_field);
1360 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1364 case s_header_value_discard_ws:
1365 if (ch == ' ' || ch == '\t') break;
1368 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1373 UPDATE_STATE(s_header_value_discard_lws);
1379 case s_header_value_start:
1383 UPDATE_STATE(s_header_value);
1388 switch (parser->header_state) {
1390 parser->flags |= F_UPGRADE;
1391 parser->header_state = h_general;
1394 case h_transfer_encoding:
1395 /* looking for 'Transfer-Encoding: chunked' */
1397 parser->header_state = h_matching_transfer_encoding_chunked;
1399 parser->header_state = h_general;
1403 case h_content_length:
1404 if (UNLIKELY(!IS_NUM(ch))) {
1405 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1409 if (parser->flags & F_CONTENTLENGTH) {
1410 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1414 parser->flags |= F_CONTENTLENGTH;
1415 parser->content_length = ch - '0';
1416 parser->header_state = h_content_length_num;
1420 /* looking for 'Connection: keep-alive' */
1422 parser->header_state = h_matching_connection_keep_alive;
1423 /* looking for 'Connection: close' */
1424 } else if (c == 'c') {
1425 parser->header_state = h_matching_connection_close;
1426 } else if (c == 'u') {
1427 parser->header_state = h_matching_connection_upgrade;
1429 parser->header_state = h_matching_connection_token;
1433 /* Multi-value `Connection` header */
1434 case h_matching_connection_token_start:
1438 parser->header_state = h_general;
1444 case s_header_value:
1446 const char* start = p;
1447 enum header_states h_state = (enum header_states) parser->header_state;
1448 for (; p != data + len; p++) {
1451 UPDATE_STATE(s_header_almost_done);
1452 parser->header_state = h_state;
1453 CALLBACK_DATA(header_value);
1458 UPDATE_STATE(s_header_almost_done);
1459 COUNT_HEADER_SIZE(p - start);
1460 parser->header_state = h_state;
1461 CALLBACK_DATA_NOADVANCE(header_value);
1465 if (!lenient && !IS_HEADER_CHAR(ch)) {
1466 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1477 size_t limit = data + len - p;
1479 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1481 p_cr = (const char*) memchr(p, CR, limit);
1482 p_lf = (const char*) memchr(p, LF, limit);
1484 if (p_lf != NULL && p_cr >= p_lf)
1488 } else if (UNLIKELY(p_lf != NULL)) {
1498 case h_transfer_encoding:
1499 assert(0 && "Shouldn't get here.");
1502 case h_content_length:
1503 if (ch == ' ') break;
1504 h_state = h_content_length_num;
1507 case h_content_length_num:
1512 h_state = h_content_length_ws;
1516 if (UNLIKELY(!IS_NUM(ch))) {
1517 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1518 parser->header_state = h_state;
1522 t = parser->content_length;
1526 /* Overflow? Test against a conservative limit for simplicity. */
1527 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1528 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1529 parser->header_state = h_state;
1533 parser->content_length = t;
1537 case h_content_length_ws:
1538 if (ch == ' ') break;
1539 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1540 parser->header_state = h_state;
1543 /* Transfer-Encoding: chunked */
1544 case h_matching_transfer_encoding_chunked:
1546 if (parser->index > sizeof(CHUNKED)-1
1547 || c != CHUNKED[parser->index]) {
1548 h_state = h_general;
1549 } else if (parser->index == sizeof(CHUNKED)-2) {
1550 h_state = h_transfer_encoding_chunked;
1554 case h_matching_connection_token_start:
1555 /* looking for 'Connection: keep-alive' */
1557 h_state = h_matching_connection_keep_alive;
1558 /* looking for 'Connection: close' */
1559 } else if (c == 'c') {
1560 h_state = h_matching_connection_close;
1561 } else if (c == 'u') {
1562 h_state = h_matching_connection_upgrade;
1563 } else if (STRICT_TOKEN(c)) {
1564 h_state = h_matching_connection_token;
1565 } else if (c == ' ' || c == '\t') {
1568 h_state = h_general;
1572 /* looking for 'Connection: keep-alive' */
1573 case h_matching_connection_keep_alive:
1575 if (parser->index > sizeof(KEEP_ALIVE)-1
1576 || c != KEEP_ALIVE[parser->index]) {
1577 h_state = h_matching_connection_token;
1578 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1579 h_state = h_connection_keep_alive;
1583 /* looking for 'Connection: close' */
1584 case h_matching_connection_close:
1586 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1587 h_state = h_matching_connection_token;
1588 } else if (parser->index == sizeof(CLOSE)-2) {
1589 h_state = h_connection_close;
1593 /* looking for 'Connection: upgrade' */
1594 case h_matching_connection_upgrade:
1596 if (parser->index > sizeof(UPGRADE) - 1 ||
1597 c != UPGRADE[parser->index]) {
1598 h_state = h_matching_connection_token;
1599 } else if (parser->index == sizeof(UPGRADE)-2) {
1600 h_state = h_connection_upgrade;
1604 case h_matching_connection_token:
1606 h_state = h_matching_connection_token_start;
1611 case h_transfer_encoding_chunked:
1612 if (ch != ' ') h_state = h_general;
1615 case h_connection_keep_alive:
1616 case h_connection_close:
1617 case h_connection_upgrade:
1619 if (h_state == h_connection_keep_alive) {
1620 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1621 } else if (h_state == h_connection_close) {
1622 parser->flags |= F_CONNECTION_CLOSE;
1623 } else if (h_state == h_connection_upgrade) {
1624 parser->flags |= F_CONNECTION_UPGRADE;
1626 h_state = h_matching_connection_token_start;
1628 } else if (ch != ' ') {
1629 h_state = h_matching_connection_token;
1634 UPDATE_STATE(s_header_value);
1635 h_state = h_general;
1639 parser->header_state = h_state;
1641 if (p == data + len)
1644 COUNT_HEADER_SIZE(p - start);
1648 case s_header_almost_done:
1650 if (UNLIKELY(ch != LF)) {
1651 SET_ERRNO(HPE_LF_EXPECTED);
1655 UPDATE_STATE(s_header_value_lws);
1659 case s_header_value_lws:
1661 if (ch == ' ' || ch == '\t') {
1662 UPDATE_STATE(s_header_value_start);
1666 /* finished the header */
1667 switch (parser->header_state) {
1668 case h_connection_keep_alive:
1669 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1671 case h_connection_close:
1672 parser->flags |= F_CONNECTION_CLOSE;
1674 case h_transfer_encoding_chunked:
1675 parser->flags |= F_CHUNKED;
1677 case h_connection_upgrade:
1678 parser->flags |= F_CONNECTION_UPGRADE;
1684 UPDATE_STATE(s_header_field_start);
1688 case s_header_value_discard_ws_almost_done:
1690 STRICT_CHECK(ch != LF);
1691 UPDATE_STATE(s_header_value_discard_lws);
1695 case s_header_value_discard_lws:
1697 if (ch == ' ' || ch == '\t') {
1698 UPDATE_STATE(s_header_value_discard_ws);
1701 switch (parser->header_state) {
1702 case h_connection_keep_alive:
1703 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1705 case h_connection_close:
1706 parser->flags |= F_CONNECTION_CLOSE;
1708 case h_connection_upgrade:
1709 parser->flags |= F_CONNECTION_UPGRADE;
1711 case h_transfer_encoding_chunked:
1712 parser->flags |= F_CHUNKED;
1718 /* header value was empty */
1720 UPDATE_STATE(s_header_field_start);
1721 CALLBACK_DATA_NOADVANCE(header_value);
1726 case s_headers_almost_done:
1728 STRICT_CHECK(ch != LF);
1730 if (parser->flags & F_TRAILING) {
1731 /* End of a chunked request */
1732 UPDATE_STATE(s_message_done);
1733 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1737 /* Cannot use chunked encoding and a content-length header together
1738 per the HTTP specification. */
1739 if ((parser->flags & F_CHUNKED) &&
1740 (parser->flags & F_CONTENTLENGTH)) {
1741 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1745 UPDATE_STATE(s_headers_done);
1747 /* Set this here so that on_headers_complete() callbacks can see it */
1748 if ((parser->flags & F_UPGRADE) &&
1749 (parser->flags & F_CONNECTION_UPGRADE)) {
1750 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1751 * mandatory only when it is a 101 Switching Protocols response,
1752 * otherwise it is purely informational, to announce support.
1755 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1757 parser->upgrade = (parser->method == HTTP_CONNECT);
1760 /* Here we call the headers_complete callback. This is somewhat
1761 * different than other callbacks because if the user returns 1, we
1762 * will interpret that as saying that this message has no body. This
1763 * is needed for the annoying case of recieving a response to a HEAD
1766 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1767 * we have to simulate it by handling a change in errno below.
1769 if (settings->on_headers_complete) {
1770 switch (settings->on_headers_complete(parser)) {
1775 parser->upgrade = 1;
1779 parser->flags |= F_SKIPBODY;
1783 SET_ERRNO(HPE_CB_headers_complete);
1784 RETURN(p - data); /* Error */
1788 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1795 case s_headers_done:
1798 STRICT_CHECK(ch != LF);
1803 hasBody = parser->flags & F_CHUNKED ||
1804 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1805 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1806 (parser->flags & F_SKIPBODY) || !hasBody)) {
1807 /* Exit, the rest of the message is in a different protocol. */
1808 UPDATE_STATE(NEW_MESSAGE());
1809 CALLBACK_NOTIFY(message_complete);
1810 RETURN((p - data) + 1);
1813 if (parser->flags & F_SKIPBODY) {
1814 UPDATE_STATE(NEW_MESSAGE());
1815 CALLBACK_NOTIFY(message_complete);
1816 } else if (parser->flags & F_CHUNKED) {
1817 /* chunked encoding - ignore Content-Length header */
1818 UPDATE_STATE(s_chunk_size_start);
1820 if (parser->content_length == 0) {
1821 /* Content-Length header given but zero: Content-Length: 0\r\n */
1822 UPDATE_STATE(NEW_MESSAGE());
1823 CALLBACK_NOTIFY(message_complete);
1824 } else if (parser->content_length != ULLONG_MAX) {
1825 /* Content-Length header given and non-zero */
1826 UPDATE_STATE(s_body_identity);
1828 if (!http_message_needs_eof(parser)) {
1829 /* Assume content-length 0 - read the next */
1830 UPDATE_STATE(NEW_MESSAGE());
1831 CALLBACK_NOTIFY(message_complete);
1833 /* Read body until EOF */
1834 UPDATE_STATE(s_body_identity_eof);
1842 case s_body_identity:
1844 uint64_t to_read = MIN(parser->content_length,
1845 (uint64_t) ((data + len) - p));
1847 assert(parser->content_length != 0
1848 && parser->content_length != ULLONG_MAX);
1850 /* The difference between advancing content_length and p is because
1851 * the latter will automaticaly advance on the next loop iteration.
1852 * Further, if content_length ends up at 0, we want to see the last
1853 * byte again for our message complete callback.
1856 parser->content_length -= to_read;
1859 if (parser->content_length == 0) {
1860 UPDATE_STATE(s_message_done);
1862 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1864 * The alternative to doing this is to wait for the next byte to
1865 * trigger the data callback, just as in every other case. The
1866 * problem with this is that this makes it difficult for the test
1867 * harness to distinguish between complete-on-EOF and
1868 * complete-on-length. It's not clear that this distinction is
1869 * important for applications, but let's keep it for now.
1871 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1878 /* read until EOF */
1879 case s_body_identity_eof:
1885 case s_message_done:
1886 UPDATE_STATE(NEW_MESSAGE());
1887 CALLBACK_NOTIFY(message_complete);
1888 if (parser->upgrade) {
1889 /* Exit, the rest of the message is in a different protocol. */
1890 RETURN((p - data) + 1);
1894 case s_chunk_size_start:
1897 assert(parser->flags & F_CHUNKED);
1899 unhex_val = unhex[(unsigned char)ch];
1900 if (UNLIKELY(unhex_val == -1)) {
1901 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1905 parser->content_length = unhex_val;
1906 UPDATE_STATE(s_chunk_size);
1914 assert(parser->flags & F_CHUNKED);
1917 UPDATE_STATE(s_chunk_size_almost_done);
1921 unhex_val = unhex[(unsigned char)ch];
1923 if (unhex_val == -1) {
1924 if (ch == ';' || ch == ' ') {
1925 UPDATE_STATE(s_chunk_parameters);
1929 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1933 t = parser->content_length;
1937 /* Overflow? Test against a conservative limit for simplicity. */
1938 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1939 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1943 parser->content_length = t;
1947 case s_chunk_parameters:
1949 assert(parser->flags & F_CHUNKED);
1950 /* just ignore this shit. TODO check for overflow */
1952 UPDATE_STATE(s_chunk_size_almost_done);
1958 case s_chunk_size_almost_done:
1960 assert(parser->flags & F_CHUNKED);
1961 STRICT_CHECK(ch != LF);
1966 if (parser->content_length == 0) {
1967 parser->flags |= F_TRAILING;
1968 UPDATE_STATE(s_header_field_start);
1970 UPDATE_STATE(s_chunk_data);
1972 CALLBACK_NOTIFY(chunk_header);
1978 uint64_t to_read = MIN(parser->content_length,
1979 (uint64_t) ((data + len) - p));
1981 assert(parser->flags & F_CHUNKED);
1982 assert(parser->content_length != 0
1983 && parser->content_length != ULLONG_MAX);
1985 /* See the explanation in s_body_identity for why the content
1986 * length and data pointers are managed this way.
1989 parser->content_length -= to_read;
1992 if (parser->content_length == 0) {
1993 UPDATE_STATE(s_chunk_data_almost_done);
1999 case s_chunk_data_almost_done:
2000 assert(parser->flags & F_CHUNKED);
2001 assert(parser->content_length == 0);
2002 STRICT_CHECK(ch != CR);
2003 UPDATE_STATE(s_chunk_data_done);
2004 CALLBACK_DATA(body);
2007 case s_chunk_data_done:
2008 assert(parser->flags & F_CHUNKED);
2009 STRICT_CHECK(ch != LF);
2012 UPDATE_STATE(s_chunk_size_start);
2013 CALLBACK_NOTIFY(chunk_complete);
2017 assert(0 && "unhandled state");
2018 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2023 /* Run callbacks for any marks that we have leftover after we ran out of
2024 * bytes. There should be at most one of these set, so it's OK to invoke
2025 * them in series (unset marks will not result in callbacks).
2027 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2028 * overflowed 'data' and this allows us to correct for the off-by-one that
2029 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2030 * value that's in-bounds).
2033 assert(((header_field_mark ? 1 : 0) +
2034 (header_value_mark ? 1 : 0) +
2035 (url_mark ? 1 : 0) +
2036 (body_mark ? 1 : 0) +
2037 (status_mark ? 1 : 0)) <= 1);
2039 CALLBACK_DATA_NOADVANCE(header_field);
2040 CALLBACK_DATA_NOADVANCE(header_value);
2041 CALLBACK_DATA_NOADVANCE(url);
2042 CALLBACK_DATA_NOADVANCE(body);
2043 CALLBACK_DATA_NOADVANCE(status);
2048 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2049 SET_ERRNO(HPE_UNKNOWN);
2056 /* Does the parser need to see an EOF to find the end of the message? */
2058 http_message_needs_eof (const http_parser *parser)
2060 if (parser->type == HTTP_REQUEST) {
2064 /* See RFC 2616 section 4.4 */
2065 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2066 parser->status_code == 204 || /* No Content */
2067 parser->status_code == 304 || /* Not Modified */
2068 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2072 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2081 http_should_keep_alive (const http_parser *parser)
2083 if (parser->http_major > 0 && parser->http_minor > 0) {
2085 if (parser->flags & F_CONNECTION_CLOSE) {
2089 /* HTTP/1.0 or earlier */
2090 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2095 return !http_message_needs_eof(parser);
2100 http_method_str (enum http_method m)
2102 return ELEM_AT(method_strings, m, "<unknown>");
2106 http_status_str (enum http_status s)
2109 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2112 default: return "<unknown>";
2117 http_parser_init (http_parser *parser, enum http_parser_type t)
2119 void *data = parser->data; /* preserve application data */
2120 memset(parser, 0, sizeof(*parser));
2121 parser->data = data;
2123 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2124 parser->http_errno = HPE_OK;
2128 http_parser_settings_init(http_parser_settings *settings)
2130 memset(settings, 0, sizeof(*settings));
2134 http_errno_name(enum http_errno err) {
2135 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2136 return http_strerror_tab[err].name;
2140 http_errno_description(enum http_errno err) {
2141 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2142 return http_strerror_tab[err].description;
2145 static enum http_host_state
2146 http_parse_host_char(enum http_host_state s, const char ch) {
2148 case s_http_userinfo:
2149 case s_http_userinfo_start:
2151 return s_http_host_start;
2154 if (IS_USERINFO_CHAR(ch)) {
2155 return s_http_userinfo;
2159 case s_http_host_start:
2161 return s_http_host_v6_start;
2164 if (IS_HOST_CHAR(ch)) {
2171 if (IS_HOST_CHAR(ch)) {
2176 case s_http_host_v6_end:
2178 return s_http_host_port_start;
2183 case s_http_host_v6:
2185 return s_http_host_v6_end;
2189 case s_http_host_v6_start:
2190 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2191 return s_http_host_v6;
2194 if (s == s_http_host_v6 && ch == '%') {
2195 return s_http_host_v6_zone_start;
2199 case s_http_host_v6_zone:
2201 return s_http_host_v6_end;
2205 case s_http_host_v6_zone_start:
2206 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2207 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2209 return s_http_host_v6_zone;
2213 case s_http_host_port:
2214 case s_http_host_port_start:
2216 return s_http_host_port;
2224 return s_http_host_dead;
2228 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2229 enum http_host_state s;
2232 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2234 assert(u->field_set & (1 << UF_HOST));
2236 u->field_data[UF_HOST].len = 0;
2238 s = found_at ? s_http_userinfo_start : s_http_host_start;
2240 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2241 enum http_host_state new_s = http_parse_host_char(s, *p);
2243 if (new_s == s_http_host_dead) {
2249 if (s != s_http_host) {
2250 u->field_data[UF_HOST].off = p - buf;
2252 u->field_data[UF_HOST].len++;
2255 case s_http_host_v6:
2256 if (s != s_http_host_v6) {
2257 u->field_data[UF_HOST].off = p - buf;
2259 u->field_data[UF_HOST].len++;
2262 case s_http_host_v6_zone_start:
2263 case s_http_host_v6_zone:
2264 u->field_data[UF_HOST].len++;
2267 case s_http_host_port:
2268 if (s != s_http_host_port) {
2269 u->field_data[UF_PORT].off = p - buf;
2270 u->field_data[UF_PORT].len = 0;
2271 u->field_set |= (1 << UF_PORT);
2273 u->field_data[UF_PORT].len++;
2276 case s_http_userinfo:
2277 if (s != s_http_userinfo) {
2278 u->field_data[UF_USERINFO].off = p - buf ;
2279 u->field_data[UF_USERINFO].len = 0;
2280 u->field_set |= (1 << UF_USERINFO);
2282 u->field_data[UF_USERINFO].len++;
2291 /* Make sure we don't end somewhere unexpected */
2293 case s_http_host_start:
2294 case s_http_host_v6_start:
2295 case s_http_host_v6:
2296 case s_http_host_v6_zone_start:
2297 case s_http_host_v6_zone:
2298 case s_http_host_port_start:
2299 case s_http_userinfo:
2300 case s_http_userinfo_start:
2310 http_parser_url_init(struct http_parser_url *u) {
2311 memset(u, 0, sizeof(*u));
2315 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2316 struct http_parser_url *u)
2320 enum http_parser_url_fields uf, old_uf;
2327 u->port = u->field_set = 0;
2328 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2331 for (p = buf; p < buf + buflen; p++) {
2332 s = parse_url_char(s, *p);
2334 /* Figure out the next field that we're operating on */
2339 /* Skip delimeters */
2340 case s_req_schema_slash:
2341 case s_req_schema_slash_slash:
2342 case s_req_server_start:
2343 case s_req_query_string_start:
2344 case s_req_fragment_start:
2351 case s_req_server_with_at:
2363 case s_req_query_string:
2367 case s_req_fragment:
2372 assert(!"Unexpected state");
2376 /* Nothing's changed; soldier on */
2378 u->field_data[uf].len++;
2382 u->field_data[uf].off = p - buf;
2383 u->field_data[uf].len = 1;
2385 u->field_set |= (1 << uf);
2389 /* host must be present if there is a schema */
2390 /* parsing http:///toto will fail */
2391 if ((u->field_set & (1 << UF_SCHEMA)) &&
2392 (u->field_set & (1 << UF_HOST)) == 0) {
2396 if (u->field_set & (1 << UF_HOST)) {
2397 if (http_parse_host(buf, u, found_at) != 0) {
2402 /* CONNECT requests can only contain "hostname:port" */
2403 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2407 if (u->field_set & (1 << UF_PORT)) {
2414 off = u->field_data[UF_PORT].off;
2415 len = u->field_data[UF_PORT].len;
2416 end = buf + off + len;
2418 /* NOTE: The characters are already validated and are in the [0-9] range */
2419 assert(off + len <= buflen && "Port number overflow");
2421 for (p = buf + off; p < end; p++) {
2425 /* Ports have a max value of 2^16 */
2431 u->port = (uint16_t) v;
2438 http_parser_pause(http_parser *parser, int paused) {
2439 /* Users should only be pausing/unpausing a parser that is not in an error
2440 * state. In non-debug builds, there's not much that we can do about this
2441 * other than ignore it.
2443 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2444 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2445 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2446 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2448 assert(0 && "Attempting to pause parser in error state");
2453 http_body_is_final(const struct http_parser *parser) {
2454 return parser->state == s_message_done;
2458 http_parser_version(void) {
2459 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2460 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2461 HTTP_PARSER_VERSION_PATCH * 0x00001;