Commit 8114ee4c by Vicent Martí

Merge pull request #405 from carlosmn/http-ls

Implement ls-remote over HTTP
parents e1b86444 4ee8418a
......@@ -22,7 +22,10 @@ STRING(REGEX REPLACE "^.*LIBGIT2_VERSION \"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1"
SET(LIBGIT2_VERSION_STRING "${LIBGIT2_VERSION_MAJOR}.${LIBGIT2_VERSION_MINOR}.${LIBGIT2_VERSION_REV}")
# Find required dependencies
INCLUDE_DIRECTORIES(src include)
INCLUDE_DIRECTORIES(src include deps/http-parser)
FILE(GLOB SRC_HTTP deps/http-parser/*.c)
IF (NOT WIN32)
FIND_PACKAGE(ZLIB)
ENDIF()
......@@ -91,7 +94,7 @@ ELSE()
ENDIF ()
# Compile and link libgit2
ADD_LIBRARY(git2 ${SRC} ${SRC_ZLIB})
ADD_LIBRARY(git2 ${SRC} ${SRC_ZLIB} ${SRC_HTTP})
IF (WIN32)
TARGET_LINK_LIBRARIES(git2 ws2_32)
......@@ -122,7 +125,7 @@ IF (BUILD_TESTS)
INCLUDE_DIRECTORIES(tests)
FILE(GLOB SRC_TEST tests/t??-*.c)
ADD_EXECUTABLE(libgit2_test tests/test_main.c tests/test_lib.c tests/test_helpers.c ${SRC} ${SRC_TEST} ${SRC_ZLIB})
ADD_EXECUTABLE(libgit2_test tests/test_main.c tests/test_lib.c tests/test_helpers.c ${SRC} ${SRC_TEST} ${SRC_ZLIB} ${SRC_HTTP})
TARGET_LINK_LIBRARIES(libgit2_test ${CMAKE_THREAD_LIBS_INIT})
IF (WIN32)
TARGET_LINK_LIBRARIES(libgit2_test ws2_32)
......
http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
Igor Sysoev.
Additional changes are licensed under the same terms as NGINX and
copyright Joyent, Inc. and other Node contributors. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
*
* Additional changes are licensed under the same terms as NGINX and
* copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <http_parser.h>
#include <assert.h>
#include <stddef.h>
#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
#if HTTP_PARSER_DEBUG
#define SET_ERRNO(e) \
do { \
parser->http_errno = (e); \
parser->error_lineno = __LINE__; \
} while (0)
#else
#define SET_ERRNO(e) \
do { \
parser->http_errno = (e); \
} while(0)
#endif
#define CALLBACK2(FOR) \
do { \
if (settings->on_##FOR) { \
if (0 != settings->on_##FOR(parser)) { \
SET_ERRNO(HPE_CB_##FOR); \
return (p - data); \
} \
} \
} while (0)
#define MARK(FOR) \
do { \
FOR##_mark = p; \
} while (0)
#define CALLBACK(FOR) \
do { \
if (FOR##_mark) { \
if (settings->on_##FOR) { \
if (0 != settings->on_##FOR(parser, \
FOR##_mark, \
p - FOR##_mark)) \
{ \
SET_ERRNO(HPE_CB_##FOR); \
return (p - data); \
} \
} \
FOR##_mark = NULL; \
} \
} while (0)
#define PROXY_CONNECTION "proxy-connection"
#define CONNECTION "connection"
#define CONTENT_LENGTH "content-length"
#define TRANSFER_ENCODING "transfer-encoding"
#define UPGRADE "upgrade"
#define CHUNKED "chunked"
#define KEEP_ALIVE "keep-alive"
#define CLOSE "close"
static const char *method_strings[] =
{ "DELETE"
, "GET"
, "HEAD"
, "POST"
, "PUT"
, "CONNECT"
, "OPTIONS"
, "TRACE"
, "COPY"
, "LOCK"
, "MKCOL"
, "MOVE"
, "PROPFIND"
, "PROPPATCH"
, "UNLOCK"
, "REPORT"
, "MKACTIVITY"
, "CHECKOUT"
, "MERGE"
, "M-SEARCH"
, "NOTIFY"
, "SUBSCRIBE"
, "UNSUBSCRIBE"
, "PATCH"
};
/* Tokens as defined by rfc 2616. Also lowercases them.
* token = 1*<any CHAR except CTLs or separators>
* separators = "(" | ")" | "<" | ">" | "@"
* | "," | ";" | ":" | "\" | <">
* | "/" | "[" | "]" | "?" | "="
* | "{" | "}" | SP | HT
*/
static const char tokens[256] = {
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
0, 0, 0, 0, 0, 0, 0, 0,
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
0, 0, 0, 0, 0, 0, 0, 0,
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
0, 0, 0, 0, 0, 0, 0, 0,
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
0, 0, 0, 0, 0, 0, 0, 0,
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
' ', '!', '"', '#', '$', '%', '&', '\'',
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
0, 0, '*', '+', 0, '-', '.', '/',
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
'0', '1', '2', '3', '4', '5', '6', '7',
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
'8', '9', 0, 0, 0, 0, 0, 0,
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
'x', 'y', 'z', 0, 0, 0, '^', '_',
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
'x', 'y', 'z', 0, '|', '}', '~', 0 };
static const int8_t unhex[256] =
{-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};
static const uint8_t normal_url_char[256] = {
/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
0, 0, 0, 0, 0, 0, 0, 0,
/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
0, 0, 0, 0, 0, 0, 0, 0,
/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
0, 0, 0, 0, 0, 0, 0, 0,
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
0, 0, 0, 0, 0, 0, 0, 0,
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
0, 1, 1, 0, 1, 1, 1, 1,
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1, 1, 1, 1, 1, 1, 1, 1,
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1, 1, 1, 1, 1, 1, 1, 1,
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1, 1, 1, 1, 1, 1, 1, 0,
/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1, 1, 1, 1, 1, 1, 1, 1,
/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1, 1, 1, 1, 1, 1, 1, 1,
/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1, 1, 1, 1, 1, 1, 1, 1,
/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1, 1, 1, 1, 1, 1, 1, 1,
/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1, 1, 1, 1, 1, 1, 1, 1,
/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1, 1, 1, 1, 1, 1, 1, 1,
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1, 1, 1, 1, 1, 1, 1, 1,
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1, 1, 1, 1, 1, 1, 1, 0, };
enum state
{ s_dead = 1 /* important that this is > 0 */
, s_start_req_or_res
, s_res_or_resp_H
, s_start_res
, s_res_H
, s_res_HT
, s_res_HTT
, s_res_HTTP
, s_res_first_http_major
, s_res_http_major
, s_res_first_http_minor
, s_res_http_minor
, s_res_first_status_code
, s_res_status_code
, s_res_status
, s_res_line_almost_done
, s_start_req
, s_req_method
, s_req_spaces_before_url
, s_req_schema
, s_req_schema_slash
, s_req_schema_slash_slash
, s_req_host
, s_req_port
, s_req_path
, s_req_query_string_start
, s_req_query_string
, s_req_fragment_start
, s_req_fragment
, s_req_http_start
, s_req_http_H
, s_req_http_HT
, s_req_http_HTT
, s_req_http_HTTP
, s_req_first_http_major
, s_req_http_major
, s_req_first_http_minor
, s_req_http_minor
, s_req_line_almost_done
, s_header_field_start
, s_header_field
, s_header_value_start
, s_header_value
, s_header_value_lws
, s_header_almost_done
, s_chunk_size_start
, s_chunk_size
, s_chunk_parameters
, s_chunk_size_almost_done
, s_headers_almost_done
/* Important: 's_headers_almost_done' must be the last 'header' state. All
* states beyond this must be 'body' states. It is used for overflow
* checking. See the PARSING_HEADER() macro.
*/
, s_chunk_data
, s_chunk_data_almost_done
, s_chunk_data_done
, s_body_identity
, s_body_identity_eof
};
#define PARSING_HEADER(state) (state <= s_headers_almost_done)
enum header_states
{ h_general = 0
, h_C
, h_CO
, h_CON
, h_matching_connection
, h_matching_proxy_connection
, h_matching_content_length
, h_matching_transfer_encoding
, h_matching_upgrade
, h_connection
, h_content_length
, h_transfer_encoding
, h_upgrade
, h_matching_transfer_encoding_chunked
, h_matching_connection_keep_alive
, h_matching_connection_close
, h_transfer_encoding_chunked
, h_connection_keep_alive
, h_connection_close
};
/* Macros for character classes; depends on strict-mode */
#define CR '\r'
#define LF '\n'
#define LOWER(c) (unsigned char)(c | 0x20)
#define TOKEN(c) (tokens[(unsigned char)c])
#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
#if HTTP_PARSER_STRICT
#define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
#else
#define IS_URL_CHAR(c) \
(normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
#define IS_HOST_CHAR(c) \
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
#endif
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
#if HTTP_PARSER_STRICT
# define STRICT_CHECK(cond) \
do { \
if (cond) { \
SET_ERRNO(HPE_STRICT); \
goto error; \
} \
} while (0)
# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
#else
# define STRICT_CHECK(cond)
# define NEW_MESSAGE() start_state
#endif
/* Map errno values to strings for human-readable output */
#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
static struct {
const char *name;
const char *description;
} http_strerror_tab[] = {
HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
};
#undef HTTP_STRERROR_GEN
size_t http_parser_execute (http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len)
{
char c, ch;
int8_t unhex_val;
const char *p = data, *pe;
int64_t to_read;
enum state state;
enum header_states header_state;
uint64_t index = parser->index;
uint64_t nread = parser->nread;
/* We're in an error state. Don't bother doing anything. */
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
return 0;
}
state = (enum state) parser->state;
header_state = (enum header_states) parser->header_state;
if (len == 0) {
switch (state) {
case s_body_identity_eof:
CALLBACK2(message_complete);
return 0;
case s_dead:
case s_start_req_or_res:
case s_start_res:
case s_start_req:
return 0;
default:
SET_ERRNO(HPE_INVALID_EOF_STATE);
return 1;
}
}
/* technically we could combine all of these (except for url_mark) into one
variable, saving stack space, but it seems more clear to have them
separated. */
const char *header_field_mark = 0;
const char *header_value_mark = 0;
const char *url_mark = 0;
if (state == s_header_field)
header_field_mark = data;
if (state == s_header_value)
header_value_mark = data;
if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
|| state == s_req_schema_slash_slash || state == s_req_port
|| state == s_req_query_string_start || state == s_req_query_string
|| state == s_req_host
|| state == s_req_fragment_start || state == s_req_fragment)
url_mark = data;
for (p=data, pe=data+len; p != pe; p++) {
ch = *p;
if (PARSING_HEADER(state)) {
++nread;
/* Buffer overflow attack */
if (nread > HTTP_MAX_HEADER_SIZE) {
SET_ERRNO(HPE_HEADER_OVERFLOW);
goto error;
}
}
switch (state) {
case s_dead:
/* this state is used after a 'Connection: close' message
* the parser will error out if it reads another message
*/
SET_ERRNO(HPE_CLOSED_CONNECTION);
goto error;
case s_start_req_or_res:
{
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->content_length = -1;
CALLBACK2(message_begin);
if (ch == 'H')
state = s_res_or_resp_H;
else {
parser->type = HTTP_REQUEST;
goto start_req_method_assign;
}
break;
}
case s_res_or_resp_H:
if (ch == 'T') {
parser->type = HTTP_RESPONSE;
state = s_res_HT;
} else {
if (ch != 'E') {
SET_ERRNO(HPE_INVALID_CONSTANT);
goto error;
}
parser->type = HTTP_REQUEST;
parser->method = HTTP_HEAD;
index = 2;
state = s_req_method;
}
break;
case s_start_res:
{
parser->flags = 0;
parser->content_length = -1;
CALLBACK2(message_begin);
switch (ch) {
case 'H':
state = s_res_H;
break;
case CR:
case LF:
break;
default:
SET_ERRNO(HPE_INVALID_CONSTANT);
goto error;
}
break;
}
case s_res_H:
STRICT_CHECK(ch != 'T');
state = s_res_HT;
break;
case s_res_HT:
STRICT_CHECK(ch != 'T');
state = s_res_HTT;
break;
case s_res_HTT:
STRICT_CHECK(ch != 'P');
state = s_res_HTTP;
break;
case s_res_HTTP:
STRICT_CHECK(ch != '/');
state = s_res_first_http_major;
break;
case s_res_first_http_major:
if (ch < '1' || ch > '9') {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major = ch - '0';
state = s_res_http_major;
break;
/* major HTTP version or dot */
case s_res_http_major:
{
if (ch == '.') {
state = s_res_first_http_minor;
break;
}
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major *= 10;
parser->http_major += ch - '0';
if (parser->http_major > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
/* first digit of minor HTTP version */
case s_res_first_http_minor:
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor = ch - '0';
state = s_res_http_minor;
break;
/* minor HTTP version or end of request line */
case s_res_http_minor:
{
if (ch == ' ') {
state = s_res_first_status_code;
break;
}
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor *= 10;
parser->http_minor += ch - '0';
if (parser->http_minor > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
case s_res_first_status_code:
{
if (!IS_NUM(ch)) {
if (ch == ' ') {
break;
}
SET_ERRNO(HPE_INVALID_STATUS);
goto error;
}
parser->status_code = ch - '0';
state = s_res_status_code;
break;
}
case s_res_status_code:
{
if (!IS_NUM(ch)) {
switch (ch) {
case ' ':
state = s_res_status;
break;
case CR:
state = s_res_line_almost_done;
break;
case LF:
state = s_header_field_start;
break;
default:
SET_ERRNO(HPE_INVALID_STATUS);
goto error;
}
break;
}
parser->status_code *= 10;
parser->status_code += ch - '0';
if (parser->status_code > 999) {
SET_ERRNO(HPE_INVALID_STATUS);
goto error;
}
break;
}
case s_res_status:
/* the human readable status. e.g. "NOT FOUND"
* we are not humans so just ignore this */
if (ch == CR) {
state = s_res_line_almost_done;
break;
}
if (ch == LF) {
state = s_header_field_start;
break;
}
break;
case s_res_line_almost_done:
STRICT_CHECK(ch != LF);
state = s_header_field_start;
break;
case s_start_req:
{
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->content_length = -1;
CALLBACK2(message_begin);
if (!IS_ALPHA(ch)) {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
start_req_method_assign:
parser->method = (enum http_method) 0;
index = 1;
switch (ch) {
case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
case 'D': parser->method = HTTP_DELETE; break;
case 'G': parser->method = HTTP_GET; break;
case 'H': parser->method = HTTP_HEAD; break;
case 'L': parser->method = HTTP_LOCK; break;
case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
case 'N': parser->method = HTTP_NOTIFY; break;
case 'O': parser->method = HTTP_OPTIONS; break;
case 'P': parser->method = HTTP_POST;
/* or PROPFIND or PROPPATCH or PUT or PATCH */
break;
case 'R': parser->method = HTTP_REPORT; break;
case 'S': parser->method = HTTP_SUBSCRIBE; break;
case 'T': parser->method = HTTP_TRACE; break;
case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
default:
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
state = s_req_method;
break;
}
case s_req_method:
{
if (ch == '\0') {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
const char *matcher = method_strings[parser->method];
if (ch == ' ' && matcher[index] == '\0') {
state = s_req_spaces_before_url;
} else if (ch == matcher[index]) {
; /* nada */
} else if (parser->method == HTTP_CONNECT) {
if (index == 1 && ch == 'H') {
parser->method = HTTP_CHECKOUT;
} else if (index == 2 && ch == 'P') {
parser->method = HTTP_COPY;
} else {
goto error;
}
} else if (parser->method == HTTP_MKCOL) {
if (index == 1 && ch == 'O') {
parser->method = HTTP_MOVE;
} else if (index == 1 && ch == 'E') {
parser->method = HTTP_MERGE;
} else if (index == 1 && ch == '-') {
parser->method = HTTP_MSEARCH;
} else if (index == 2 && ch == 'A') {
parser->method = HTTP_MKACTIVITY;
} else {
goto error;
}
} else if (index == 1 && parser->method == HTTP_POST) {
if (ch == 'R') {
parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
} else if (ch == 'U') {
parser->method = HTTP_PUT;
} else if (ch == 'A') {
parser->method = HTTP_PATCH;
} else {
goto error;
}
} else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
parser->method = HTTP_UNSUBSCRIBE;
} else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
parser->method = HTTP_PROPPATCH;
} else {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
++index;
break;
}
case s_req_spaces_before_url:
{
if (ch == ' ') break;
if (ch == '/' || ch == '*') {
MARK(url);
state = s_req_path;
break;
}
/* Proxied requests are followed by scheme of an absolute URI (alpha).
* CONNECT is followed by a hostname, which begins with alphanum.
* All other methods are followed by '/' or '*' (handled above).
*/
if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
MARK(url);
state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
break;
}
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
case s_req_schema:
{
if (IS_ALPHA(ch)) break;
if (ch == ':') {
state = s_req_schema_slash;
break;
}
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
case s_req_schema_slash:
STRICT_CHECK(ch != '/');
state = s_req_schema_slash_slash;
break;
case s_req_schema_slash_slash:
STRICT_CHECK(ch != '/');
state = s_req_host;
break;
case s_req_host:
{
if (IS_HOST_CHAR(ch)) break;
switch (ch) {
case ':':
state = s_req_port;
break;
case '/':
state = s_req_path;
break;
case ' ':
/* The request line looks like:
* "GET http://foo.bar.com HTTP/1.1"
* That is, there is no path.
*/
CALLBACK(url);
state = s_req_http_start;
break;
case '?':
state = s_req_query_string_start;
break;
default:
SET_ERRNO(HPE_INVALID_HOST);
goto error;
}
break;
}
case s_req_port:
{
if (IS_NUM(ch)) break;
switch (ch) {
case '/':
state = s_req_path;
break;
case ' ':
/* The request line looks like:
* "GET http://foo.bar.com:1234 HTTP/1.1"
* That is, there is no path.
*/
CALLBACK(url);
state = s_req_http_start;
break;
case '?':
state = s_req_query_string_start;
break;
default:
SET_ERRNO(HPE_INVALID_PORT);
goto error;
}
break;
}
case s_req_path:
{
if (IS_URL_CHAR(ch)) break;
switch (ch) {
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
state = s_req_query_string_start;
break;
case '#':
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_PATH);
goto error;
}
break;
}
case s_req_query_string_start:
{
if (IS_URL_CHAR(ch)) {
state = s_req_query_string;
break;
}
switch (ch) {
case '?':
break; /* XXX ignore extra '?' ... is this right? */
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '#':
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_QUERY_STRING);
goto error;
}
break;
}
case s_req_query_string:
{
if (IS_URL_CHAR(ch)) break;
switch (ch) {
case '?':
/* allow extra '?' in query string */
break;
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '#':
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_QUERY_STRING);
goto error;
}
break;
}
case s_req_fragment_start:
{
if (IS_URL_CHAR(ch)) {
state = s_req_fragment;
break;
}
switch (ch) {
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
state = s_req_fragment;
break;
case '#':
break;
default:
SET_ERRNO(HPE_INVALID_FRAGMENT);
goto error;
}
break;
}
case s_req_fragment:
{
if (IS_URL_CHAR(ch)) break;
switch (ch) {
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
case '#':
break;
default:
SET_ERRNO(HPE_INVALID_FRAGMENT);
goto error;
}
break;
}
case s_req_http_start:
switch (ch) {
case 'H':
state = s_req_http_H;
break;
case ' ':
break;
default:
SET_ERRNO(HPE_INVALID_CONSTANT);
goto error;
}
break;
case s_req_http_H:
STRICT_CHECK(ch != 'T');
state = s_req_http_HT;
break;
case s_req_http_HT:
STRICT_CHECK(ch != 'T');
state = s_req_http_HTT;
break;
case s_req_http_HTT:
STRICT_CHECK(ch != 'P');
state = s_req_http_HTTP;
break;
case s_req_http_HTTP:
STRICT_CHECK(ch != '/');
state = s_req_first_http_major;
break;
/* first digit of major HTTP version */
case s_req_first_http_major:
if (ch < '1' || ch > '9') {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major = ch - '0';
state = s_req_http_major;
break;
/* major HTTP version or dot */
case s_req_http_major:
{
if (ch == '.') {
state = s_req_first_http_minor;
break;
}
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major *= 10;
parser->http_major += ch - '0';
if (parser->http_major > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
/* first digit of minor HTTP version */
case s_req_first_http_minor:
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor = ch - '0';
state = s_req_http_minor;
break;
/* minor HTTP version or end of request line */
case s_req_http_minor:
{
if (ch == CR) {
state = s_req_line_almost_done;
break;
}
if (ch == LF) {
state = s_header_field_start;
break;
}
/* XXX allow spaces after digit? */
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor *= 10;
parser->http_minor += ch - '0';
if (parser->http_minor > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
/* end of request line */
case s_req_line_almost_done:
{
if (ch != LF) {
SET_ERRNO(HPE_LF_EXPECTED);
goto error;
}
state = s_header_field_start;
break;
}
case s_header_field_start:
header_field_start:
{
if (ch == CR) {
state = s_headers_almost_done;
break;
}
if (ch == LF) {
/* they might be just sending \n instead of \r\n so this would be
* the second \n to denote the end of headers*/
state = s_headers_almost_done;
goto headers_almost_done;
}
c = TOKEN(ch);
if (!c) {
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}
MARK(header_field);
index = 0;
state = s_header_field;
switch (c) {
case 'c':
header_state = h_C;
break;
case 'p':
header_state = h_matching_proxy_connection;
break;
case 't':
header_state = h_matching_transfer_encoding;
break;
case 'u':
header_state = h_matching_upgrade;
break;
default:
header_state = h_general;
break;
}
break;
}
case s_header_field:
{
c = TOKEN(ch);
if (c) {
switch (header_state) {
case h_general:
break;
case h_C:
index++;
header_state = (c == 'o' ? h_CO : h_general);
break;
case h_CO:
index++;
header_state = (c == 'n' ? h_CON : h_general);
break;
case h_CON:
index++;
switch (c) {
case 'n':
header_state = h_matching_connection;
break;
case 't':
header_state = h_matching_content_length;
break;
default:
header_state = h_general;
break;
}
break;
/* connection */
case h_matching_connection:
index++;
if (index > sizeof(CONNECTION)-1
|| c != CONNECTION[index]) {
header_state = h_general;
} else if (index == sizeof(CONNECTION)-2) {
header_state = h_connection;
}
break;
/* proxy-connection */
case h_matching_proxy_connection:
index++;
if (index > sizeof(PROXY_CONNECTION)-1
|| c != PROXY_CONNECTION[index]) {
header_state = h_general;
} else if (index == sizeof(PROXY_CONNECTION)-2) {
header_state = h_connection;
}
break;
/* content-length */
case h_matching_content_length:
index++;
if (index > sizeof(CONTENT_LENGTH)-1
|| c != CONTENT_LENGTH[index]) {
header_state = h_general;
} else if (index == sizeof(CONTENT_LENGTH)-2) {
header_state = h_content_length;
}
break;
/* transfer-encoding */
case h_matching_transfer_encoding:
index++;
if (index > sizeof(TRANSFER_ENCODING)-1
|| c != TRANSFER_ENCODING[index]) {
header_state = h_general;
} else if (index == sizeof(TRANSFER_ENCODING)-2) {
header_state = h_transfer_encoding;
}
break;
/* upgrade */
case h_matching_upgrade:
index++;
if (index > sizeof(UPGRADE)-1
|| c != UPGRADE[index]) {
header_state = h_general;
} else if (index == sizeof(UPGRADE)-2) {
header_state = h_upgrade;
}
break;
case h_connection:
case h_content_length:
case h_transfer_encoding:
case h_upgrade:
if (ch != ' ') header_state = h_general;
break;
default:
assert(0 && "Unknown header_state");
break;
}
break;
}
if (ch == ':') {
CALLBACK(header_field);
state = s_header_value_start;
break;
}
if (ch == CR) {
state = s_header_almost_done;
CALLBACK(header_field);
break;
}
if (ch == LF) {
CALLBACK(header_field);
state = s_header_field_start;
break;
}
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}
case s_header_value_start:
{
if (ch == ' ' || ch == '\t') break;
MARK(header_value);
state = s_header_value;
index = 0;
if (ch == CR) {
CALLBACK(header_value);
header_state = h_general;
state = s_header_almost_done;
break;
}
if (ch == LF) {
CALLBACK(header_value);
state = s_header_field_start;
break;
}
c = LOWER(ch);
switch (header_state) {
case h_upgrade:
parser->flags |= F_UPGRADE;
header_state = h_general;
break;
case h_transfer_encoding:
/* looking for 'Transfer-Encoding: chunked' */
if ('c' == c) {
header_state = h_matching_transfer_encoding_chunked;
} else {
header_state = h_general;
}
break;
case h_content_length:
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
goto error;
}
parser->content_length = ch - '0';
break;
case h_connection:
/* looking for 'Connection: keep-alive' */
if (c == 'k') {
header_state = h_matching_connection_keep_alive;
/* looking for 'Connection: close' */
} else if (c == 'c') {
header_state = h_matching_connection_close;
} else {
header_state = h_general;
}
break;
default:
header_state = h_general;
break;
}
break;
}
case s_header_value:
{
if (ch == CR) {
CALLBACK(header_value);
state = s_header_almost_done;
break;
}
if (ch == LF) {
CALLBACK(header_value);
goto header_almost_done;
}
c = LOWER(ch);
switch (header_state) {
case h_general:
break;
case h_connection:
case h_transfer_encoding:
assert(0 && "Shouldn't get here.");
break;
case h_content_length:
if (ch == ' ') break;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
goto error;
}
parser->content_length *= 10;
parser->content_length += ch - '0';
break;
/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_chunked:
index++;
if (index > sizeof(CHUNKED)-1
|| c != CHUNKED[index]) {
header_state = h_general;
} else if (index == sizeof(CHUNKED)-2) {
header_state = h_transfer_encoding_chunked;
}
break;
/* looking for 'Connection: keep-alive' */
case h_matching_connection_keep_alive:
index++;
if (index > sizeof(KEEP_ALIVE)-1
|| c != KEEP_ALIVE[index]) {
header_state = h_general;
} else if (index == sizeof(KEEP_ALIVE)-2) {
header_state = h_connection_keep_alive;
}
break;
/* looking for 'Connection: close' */
case h_matching_connection_close:
index++;
if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
header_state = h_general;
} else if (index == sizeof(CLOSE)-2) {
header_state = h_connection_close;
}
break;
case h_transfer_encoding_chunked:
case h_connection_keep_alive:
case h_connection_close:
if (ch != ' ') header_state = h_general;
break;
default:
state = s_header_value;
header_state = h_general;
break;
}
break;
}
case s_header_almost_done:
header_almost_done:
{
STRICT_CHECK(ch != LF);
state = s_header_value_lws;
switch (header_state) {
case h_connection_keep_alive:
parser->flags |= F_CONNECTION_KEEP_ALIVE;
break;
case h_connection_close:
parser->flags |= F_CONNECTION_CLOSE;
break;
case h_transfer_encoding_chunked:
parser->flags |= F_CHUNKED;
break;
default:
break;
}
break;
}
case s_header_value_lws:
{
if (ch == ' ' || ch == '\t')
state = s_header_value_start;
else
{
state = s_header_field_start;
goto header_field_start;
}
break;
}
case s_headers_almost_done:
headers_almost_done:
{
STRICT_CHECK(ch != LF);
if (parser->flags & F_TRAILING) {
/* End of a chunked request */
CALLBACK2(message_complete);
state = NEW_MESSAGE();
break;
}
nread = 0;
if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
parser->upgrade = 1;
}
/* Here we call the headers_complete callback. This is somewhat
* different than other callbacks because if the user returns 1, we
* will interpret that as saying that this message has no body. This
* is needed for the annoying case of recieving a response to a HEAD
* request.
*/
if (settings->on_headers_complete) {
switch (settings->on_headers_complete(parser)) {
case 0:
break;
case 1:
parser->flags |= F_SKIPBODY;
break;
default:
parser->state = state;
SET_ERRNO(HPE_CB_headers_complete);
return p - data; /* Error */
}
}
/* Exit, the rest of the connect is in a different protocol. */
if (parser->upgrade) {
CALLBACK2(message_complete);
return (p - data) + 1;
}
if (parser->flags & F_SKIPBODY) {
CALLBACK2(message_complete);
state = NEW_MESSAGE();
} else if (parser->flags & F_CHUNKED) {
/* chunked encoding - ignore Content-Length header */
state = s_chunk_size_start;
} else {
if (parser->content_length == 0) {
/* Content-Length header given but zero: Content-Length: 0\r\n */
CALLBACK2(message_complete);
state = NEW_MESSAGE();
} else if (parser->content_length > 0) {
/* Content-Length header given and non-zero */
state = s_body_identity;
} else {
if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
/* Assume content-length 0 - read the next */
CALLBACK2(message_complete);
state = NEW_MESSAGE();
} else {
/* Read body until EOF */
state = s_body_identity_eof;
}
}
}
break;
}
case s_body_identity:
to_read = MIN(pe - p, (int64_t)parser->content_length);
if (to_read > 0) {
if (settings->on_body) settings->on_body(parser, p, to_read);
p += to_read - 1;
parser->content_length -= to_read;
if (parser->content_length == 0) {
CALLBACK2(message_complete);
state = NEW_MESSAGE();
}
}
break;
/* read until EOF */
case s_body_identity_eof:
to_read = pe - p;
if (to_read > 0) {
if (settings->on_body) settings->on_body(parser, p, to_read);
p += to_read - 1;
}
break;
case s_chunk_size_start:
{
assert(nread == 1);
assert(parser->flags & F_CHUNKED);
unhex_val = unhex[(unsigned char)ch];
if (unhex_val == -1) {
SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
goto error;
}
parser->content_length = unhex_val;
state = s_chunk_size;
break;
}
case s_chunk_size:
{
assert(parser->flags & F_CHUNKED);
if (ch == CR) {
state = s_chunk_size_almost_done;
break;
}
unhex_val = unhex[(unsigned char)ch];
if (unhex_val == -1) {
if (ch == ';' || ch == ' ') {
state = s_chunk_parameters;
break;
}
SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
goto error;
}
parser->content_length *= 16;
parser->content_length += unhex_val;
break;
}
case s_chunk_parameters:
{
assert(parser->flags & F_CHUNKED);
/* just ignore this shit. TODO check for overflow */
if (ch == CR) {
state = s_chunk_size_almost_done;
break;
}
break;
}
case s_chunk_size_almost_done:
{
assert(parser->flags & F_CHUNKED);
STRICT_CHECK(ch != LF);
nread = 0;
if (parser->content_length == 0) {
parser->flags |= F_TRAILING;
state = s_header_field_start;
} else {
state = s_chunk_data;
}
break;
}
case s_chunk_data:
{
assert(parser->flags & F_CHUNKED);
to_read = MIN(pe - p, (int64_t)(parser->content_length));
if (to_read > 0) {
if (settings->on_body) settings->on_body(parser, p, to_read);
p += to_read - 1;
}
if (to_read == parser->content_length) {
state = s_chunk_data_almost_done;
}
parser->content_length -= to_read;
break;
}
case s_chunk_data_almost_done:
assert(parser->flags & F_CHUNKED);
STRICT_CHECK(ch != CR);
state = s_chunk_data_done;
break;
case s_chunk_data_done:
assert(parser->flags & F_CHUNKED);
STRICT_CHECK(ch != LF);
state = s_chunk_size_start;
break;
default:
assert(0 && "unhandled state");
SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
goto error;
}
}
CALLBACK(header_field);
CALLBACK(header_value);
CALLBACK(url);
parser->state = state;
parser->header_state = header_state;
parser->index = index;
parser->nread = nread;
return len;
error:
if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
SET_ERRNO(HPE_UNKNOWN);
}
return (p - data);
}
int
http_should_keep_alive (http_parser *parser)
{
if (parser->http_major > 0 && parser->http_minor > 0) {
/* HTTP/1.1 */
if (parser->flags & F_CONNECTION_CLOSE) {
return 0;
} else {
return 1;
}
} else {
/* HTTP/1.0 or earlier */
if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
return 1;
} else {
return 0;
}
}
}
const char * http_method_str (enum http_method m)
{
return method_strings[m];
}
void
http_parser_init (http_parser *parser, enum http_parser_type t)
{
parser->type = t;
parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
parser->nread = 0;
parser->upgrade = 0;
parser->flags = 0;
parser->method = 0;
parser->http_errno = 0;
}
const char *
http_errno_name(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].name;
}
const char *
http_errno_description(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].description;
}
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C" {
#endif
#define HTTP_PARSER_VERSION_MAJOR 1
#define HTTP_PARSER_VERSION_MINOR 0
#include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__) && !defined(_MSC_VER)
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int size_t;
typedef int ssize_t;
#else
#include <stdint.h>
#endif
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
# define HTTP_PARSER_STRICT 1
#endif
/* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to
* the error reporting facility.
*/
#ifndef HTTP_PARSER_DEBUG
# define HTTP_PARSER_DEBUG 0
#endif
/* Maximium header size allowed */
#define HTTP_MAX_HEADER_SIZE (80*1024)
typedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;
typedef struct http_parser_result http_parser_result;
/* Callbacks should return non-zero to indicate an error. The parser will
* then halt execution.
*
* The one exception is on_headers_complete. In a HTTP_RESPONSE parser
* returning '1' from on_headers_complete will tell the parser that it
* should not expect a body. This is used when receiving a response to a
* HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
* chunked' headers that indicate the presence of a body.
*
* http_data_cb does not return data chunks. It will be call arbitrarally
* many times for each string. E.G. you might get 10 callbacks for "on_path"
* each providing just a few characters more data.
*/
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
typedef int (*http_cb) (http_parser*);
/* Request Methods */
enum http_method
{ HTTP_DELETE = 0
, HTTP_GET
, HTTP_HEAD
, HTTP_POST
, HTTP_PUT
/* pathological */
, HTTP_CONNECT
, HTTP_OPTIONS
, HTTP_TRACE
/* webdav */
, HTTP_COPY
, HTTP_LOCK
, HTTP_MKCOL
, HTTP_MOVE
, HTTP_PROPFIND
, HTTP_PROPPATCH
, HTTP_UNLOCK
/* subversion */
, HTTP_REPORT
, HTTP_MKACTIVITY
, HTTP_CHECKOUT
, HTTP_MERGE
/* upnp */
, HTTP_MSEARCH
, HTTP_NOTIFY
, HTTP_SUBSCRIBE
, HTTP_UNSUBSCRIBE
/* RFC-5789 */
, HTTP_PATCH
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
/* Flag values for http_parser.flags field */
enum flags
{ F_CHUNKED = 1 << 0
, F_CONNECTION_KEEP_ALIVE = 1 << 1
, F_CONNECTION_CLOSE = 1 << 2
, F_TRAILING = 1 << 3
, F_UPGRADE = 1 << 4
, F_SKIPBODY = 1 << 5
};
/* Map for errno-related constants
*
* The provided argument should be a macro that takes 2 arguments.
*/
#define HTTP_ERRNO_MAP(XX) \
/* No error */ \
XX(OK, "success") \
\
/* Callback-related errors */ \
XX(CB_message_begin, "the on_message_begin callback failed") \
XX(CB_path, "the on_path callback failed") \
XX(CB_query_string, "the on_query_string callback failed") \
XX(CB_url, "the on_url callback failed") \
XX(CB_fragment, "the on_fragment callback failed") \
XX(CB_header_field, "the on_header_field callback failed") \
XX(CB_header_value, "the on_header_value callback failed") \
XX(CB_headers_complete, "the on_headers_complete callback failed") \
XX(CB_body, "the on_body callback failed") \
XX(CB_message_complete, "the on_message_complete callback failed") \
\
/* Parsing-related errors */ \
XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \
XX(HEADER_OVERFLOW, \
"too many header bytes seen; overflow detected") \
XX(CLOSED_CONNECTION, \
"data received after completed connection: close message") \
XX(INVALID_VERSION, "invalid HTTP version") \
XX(INVALID_STATUS, "invalid HTTP status code") \
XX(INVALID_METHOD, "invalid HTTP method") \
XX(INVALID_URL, "invalid URL") \
XX(INVALID_HOST, "invalid host") \
XX(INVALID_PORT, "invalid port") \
XX(INVALID_PATH, "invalid path") \
XX(INVALID_QUERY_STRING, "invalid query string") \
XX(INVALID_FRAGMENT, "invalid fragment") \
XX(LF_EXPECTED, "LF character expected") \
XX(INVALID_HEADER_TOKEN, "invalid character in header") \
XX(INVALID_CONTENT_LENGTH, \
"invalid character in content-length header") \
XX(INVALID_CHUNK_SIZE, \
"invalid character in chunk size header") \
XX(INVALID_CONSTANT, "invalid constant string") \
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
XX(STRICT, "strict mode assertion failed") \
XX(UNKNOWN, "an unknown error occurred")
/* Define HPE_* values for each errno value above */
#define HTTP_ERRNO_GEN(n, s) HPE_##n,
enum http_errno {
HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
};
#undef HTTP_ERRNO_GEN
/* Get an http_errno value from an http_parser */
#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno)
/* Get the line number that generated the current error */
#if HTTP_PARSER_DEBUG
#define HTTP_PARSER_ERRNO_LINE(p) ((p)->error_lineno)
#else
#define HTTP_PARSER_ERRNO_LINE(p) 0
#endif
struct http_parser {
/** PRIVATE **/
unsigned char type : 2;
unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */
unsigned char state;
unsigned char header_state;
unsigned char index;
uint32_t nread;
int64_t content_length;
/** READ-ONLY **/
unsigned short http_major;
unsigned short http_minor;
unsigned short status_code; /* responses only */
unsigned char method; /* requests only */
unsigned char http_errno : 7;
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
unsigned char upgrade : 1;
#if HTTP_PARSER_DEBUG
uint32_t error_lineno;
#endif
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
struct http_parser_settings {
http_cb on_message_begin;
http_data_cb on_url;
http_data_cb on_header_field;
http_data_cb on_header_value;
http_cb on_headers_complete;
http_data_cb on_body;
http_cb on_message_complete;
};
void http_parser_init(http_parser *parser, enum http_parser_type type);
size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len);
/* If http_should_keep_alive() in the on_headers_complete or
* on_message_complete callback returns true, then this will be should be
* the last message on the connection.
* If you are the server, respond with the "Connection: close" header.
* If you are the client, close the connection.
*/
int http_should_keep_alive(http_parser *parser);
/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method m);
/* Return a string name of the given error */
const char *http_errno_name(enum http_errno err);
/* Return a string description of the given error */
const char *http_errno_description(enum http_errno err);
#ifdef __cplusplus
}
#endif
#endif
......@@ -99,3 +99,15 @@ void git_buf_free(git_buf *buf)
{
free(buf->ptr);
}
void git_buf_clear(git_buf *buf)
{
buf->size = 0;
}
void git_buf_consume(git_buf *buf, const char *end)
{
size_t consumed = end - buf->ptr;
memmove(buf->ptr, end, buf->size - consumed);
buf->size -= consumed;
}
......@@ -24,6 +24,8 @@ void git_buf_puts(git_buf *buf, const char *string);
void git_buf_printf(git_buf *buf, const char *format, ...) GIT_FORMAT_PRINTF(2, 3);
const char *git_buf_cstr(git_buf *buf);
void git_buf_free(git_buf *buf);
void git_buf_clear(git_buf *buf);
void git_buf_consume(git_buf *buf, const char *end);
#define git_buf_PUTS(buf, str) git_buf_put(buf, str, sizeof(str) - 1)
......
......@@ -27,7 +27,7 @@ void gitno_buffer_setup(gitno_buffer *buf, char *data, unsigned int len, int fd)
memset(buf, 0x0, sizeof(gitno_buffer));
memset(data, 0x0, len);
buf->data = data;
buf->len = len - 1;
buf->len = len;
buf->offset = 0;
buf->fd = fd;
}
......@@ -84,6 +84,7 @@ int gitno_connect(const char *host, const char *port)
ret = getaddrinfo(host, port, &hints, &info);
if (ret != 0) {
error = GIT_EOSERR;
info = NULL;
goto cleanup;
}
......@@ -121,7 +122,7 @@ int gitno_send(int s, const char *msg, size_t len, int flags)
while (off < len) {
ret = send(s, msg + off, len - off, flags);
if (ret < 0)
return GIT_EOSERR;
return git__throw(GIT_EOSERR, "Error sending data: %s", strerror(errno));
off += ret;
}
......@@ -143,3 +144,33 @@ int gitno_select_in(gitno_buffer *buf, long int sec, long int usec)
/* The select(2) interface is silly */
return select(buf->fd + 1, &fds, NULL, NULL, &tv);
}
int gitno_extract_host_and_port(char **host, char **port, const char *url, const char *default_port)
{
char *colon, *slash, *delim;
int error = GIT_SUCCESS;
colon = strchr(url, ':');
slash = strchr(url, '/');
if (slash == NULL)
return git__throw(GIT_EOBJCORRUPTED, "Malformed URL: missing /");
if (colon == NULL) {
*port = git__strdup(default_port);
} else {
*port = git__strndup(colon + 1, slash - colon - 1);
}
if (*port == NULL)
return GIT_ENOMEM;;
delim = colon == NULL ? slash : colon;
*host = git__strndup(url, delim - url);
if (*host == NULL) {
free(*port);
error = GIT_ENOMEM;
}
return error;
}
......@@ -29,4 +29,6 @@ int gitno_connect(const char *host, const char *port);
int gitno_send(int s, const char *msg, size_t len, int flags);
int gitno_select_in(gitno_buffer *buf, long int sec, long int usec);
int gitno_extract_host_and_port(char **host, char **port, const char *url, const char *default_port);
#endif
......@@ -80,13 +80,30 @@ static int pack_pkt(git_pkt **out)
return GIT_SUCCESS;
}
static int comment_pkt(git_pkt **out, const char *line, size_t len)
{
git_pkt_comment *pkt;
pkt = git__malloc(sizeof(git_pkt_comment) + len + 1);
if (pkt == NULL)
return GIT_ENOMEM;
pkt->type = GIT_PKT_COMMENT;
memcpy(pkt->comment, line, len);
pkt->comment[len] = '\0';
*out = (git_pkt *) pkt;
return GIT_SUCCESS;
}
/*
* Parse an other-ref line.
*/
static int ref_pkt(git_pkt **out, const char *line, size_t len)
{
git_pkt_ref *pkt;
int error, has_caps = 0;
int error;
pkt = git__malloc(sizeof(git_pkt_ref));
if (pkt == NULL)
......@@ -110,9 +127,6 @@ static int ref_pkt(git_pkt **out, const char *line, size_t len)
line += GIT_OID_HEXSZ + 1;
len -= (GIT_OID_HEXSZ + 1);
if (strlen(line) < len)
has_caps = 1;
if (line[len - 1] == '\n')
--len;
......@@ -124,7 +138,7 @@ static int ref_pkt(git_pkt **out, const char *line, size_t len)
memcpy(pkt->head.name, line, len);
pkt->head.name[len] = '\0';
if (has_caps) {
if (strlen(pkt->head.name) < len) {
pkt->capabilities = strchr(pkt->head.name, '\0') + 1;
}
......@@ -227,6 +241,8 @@ int git_pkt_parse_line(git_pkt **head, const char *line, const char **out, size_
error = ack_pkt(head, line, len);
else if (!git__prefixcmp(line, "NAK"))
error = nak_pkt(head);
else if (*line == '#')
error = comment_pkt(head, line, len);
else
error = ref_pkt(head, line, len);
......
......@@ -20,6 +20,7 @@ enum git_pkt_type {
GIT_PKT_ACK,
GIT_PKT_NAK,
GIT_PKT_PACK,
GIT_PKT_COMMENT,
};
/* Used for multi-ack */
......@@ -56,6 +57,11 @@ typedef struct {
enum git_ack_status status;
} git_pkt_ack;
typedef struct {
enum git_pkt_type type;
char comment[GIT_FLEX_ARRAY];
} git_pkt_comment;
int git_pkt_parse_line(git_pkt **head, const char *line, const char **out, size_t len);
int git_pkt_send_flush(int s);
int git_pkt_send_done(int s);
......
/*
* This file is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2,
* as published by the Free Software Foundation.
*
* In addition to the permissions in the GNU General Public License,
* the authors give you unlimited permission to link the compiled
* version of this file into combinations with other programs,
* and to distribute those combinations without any restriction
* coming from the use of this file. (The General Public License
* restrictions do apply in other respects; for example, they cover
* modification of the file, and distribution when not linked into
* a combined executable.)
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include <stdlib.h>
#include "git2.h"
#include "http_parser.h"
#include "transport.h"
#include "common.h"
#include "netops.h"
#include "buffer.h"
#include "pkt.h"
typedef enum {
NONE,
FIELD,
VALUE
} last_cb_type;
typedef struct {
git_transport parent;
git_vector refs;
int socket;
git_buf buf;
git_remote_head **heads;
int error;
int transfer_finished :1,
ct_found :1,
ct_finished :1,
last_cb :3;
char *content_type;
char *service;
} transport_http;
static int gen_request(git_buf *buf, const char *url, const char *host, const char *service)
{
const char *path = url;
path = strchr(path, '/');
if (path == NULL) /* Is 'git fetch http://host.com/' valid? */
path = "/";
git_buf_printf(buf, "GET %s/info/refs?service=git-%s HTTP/1.1\r\n", path, service);
git_buf_puts(buf, "User-Agent: git/1.0 (libgit2 " LIBGIT2_VERSION ")\r\n");
git_buf_printf(buf, "Host: %s\r\n", host);
git_buf_puts(buf, "Accept: */*\r\n" "Pragma: no-cache\r\n\r\n");
if (git_buf_oom(buf))
return GIT_ENOMEM;
return GIT_SUCCESS;
}
static int do_connect(transport_http *t, const char *service)
{
int s = -1, error;;
const char *url = t->parent.url, *prefix = "http://";
char *host = NULL, *port = NULL;
git_buf request = GIT_BUF_INIT;
if (!git__prefixcmp(url, prefix))
url += strlen(prefix);
error = gitno_extract_host_and_port(&host, &port, url, "80");
if (error < GIT_SUCCESS)
goto cleanup;
t->service = git__strdup(service);
if (t->service == NULL) {
error = GIT_ENOMEM;
goto cleanup;
}
s = gitno_connect(host, port);
if (s < GIT_SUCCESS) {
error = git__throw(error, "Failed to connect to host");
}
t->socket = s;
/* Generate and send the HTTP request */
error = gen_request(&request, url, host, service);
if (error < GIT_SUCCESS) {
error = git__throw(error, "Failed to generate request");
goto cleanup;
}
error = gitno_send(s, git_buf_cstr(&request), strlen(git_buf_cstr(&request)), 0);
if (error < GIT_SUCCESS)
error = git__rethrow(error, "Failed to send the HTTP request");
cleanup:
git_buf_free(&request);
free(host);
free(port);
return error;
}
/*
* The HTTP parser is streaming, so we need to wait until we're in the
* field handler before we can be sure that we can store the previous
* value. Right now, we only care about the
* Content-Type. on_header_{field,value} should be kept generic enough
* to work for any request.
*/
static const char *typestr = "Content-Type";
static int on_header_field(http_parser *parser, const char *str, size_t len)
{
transport_http *t = (transport_http *) parser->data;
git_buf *buf = &t->buf;
if (t->last_cb == VALUE && t->ct_found) {
t->ct_finished = 1;
t->ct_found = 0;
t->content_type = git__strdup(git_buf_cstr(buf));
if (t->content_type == NULL)
return t->error = GIT_ENOMEM;
git_buf_clear(buf);
}
if (t->ct_found) {
t->last_cb = FIELD;
return 0;
}
if (t->last_cb != FIELD)
git_buf_clear(buf);
git_buf_put(buf, str, len);
t->last_cb = FIELD;
return git_buf_oom(buf);
}
static int on_header_value(http_parser *parser, const char *str, size_t len)
{
transport_http *t = (transport_http *) parser->data;
git_buf *buf = &t->buf;
if (t->ct_finished) {
t->last_cb = VALUE;
return 0;
}
if (t->last_cb == VALUE)
git_buf_put(buf, str, len);
if (t->last_cb == FIELD && !strcmp(git_buf_cstr(buf), typestr)) {
t->ct_found = 1;
git_buf_clear(buf);
git_buf_put(buf, str, len);
}
t->last_cb = VALUE;
return git_buf_oom(buf);
}
static int on_headers_complete(http_parser *parser)
{
transport_http *t = (transport_http *) parser->data;
git_buf *buf = &t->buf;
if (t->content_type == NULL) {
t->content_type = git__strdup(git_buf_cstr(buf));
if (t->content_type == NULL)
return t->error = GIT_ENOMEM;
}
git_buf_clear(buf);
git_buf_printf(buf, "application/x-git-%s-advertisement", t->service);
if (git_buf_oom(buf))
return GIT_ENOMEM;
if (strcmp(t->content_type, git_buf_cstr(buf)))
return t->error = git__throw(GIT_EOBJCORRUPTED, "Content-Type '%s' is wrong", t->content_type);
git_buf_clear(buf);
return 0;
}
static int on_body_store_refs(http_parser *parser, const char *str, size_t len)
{
transport_http *t = (transport_http *) parser->data;
git_buf *buf = &t->buf;
git_vector *refs = &t->refs;
int error;
const char *line_end, *ptr;
static int first_pkt = 1;
if (len == 0) { /* EOF */
if (buf->size != 0)
return t->error = git__throw(GIT_ERROR, "EOF and unprocessed data");
else
return 0;
}
git_buf_put(buf, str, len);
ptr = buf->ptr;
while (1) {
git_pkt *pkt;
if (buf->size == 0)
return 0;
error = git_pkt_parse_line(&pkt, ptr, &line_end, buf->size);
if (error == GIT_ESHORTBUFFER)
return 0; /* Ask for more */
if (error < GIT_SUCCESS)
return t->error = git__rethrow(error, "Failed to parse pkt-line");
git_buf_consume(buf, line_end);
if (first_pkt) {
first_pkt = 0;
if (pkt->type != GIT_PKT_COMMENT)
return t->error = git__throw(GIT_EOBJCORRUPTED, "Not a valid smart HTTP response");
}
error = git_vector_insert(refs, pkt);
if (error < GIT_SUCCESS)
return t->error = git__rethrow(error, "Failed to add pkt to list");
}
return error;
}
static int on_message_complete(http_parser *parser)
{
transport_http *t = (transport_http *) parser->data;
t->transfer_finished = 1;
return 0;
}
static int store_refs(transport_http *t)
{
int error = GIT_SUCCESS;
http_parser parser;
http_parser_settings settings;
char buffer[1024];
gitno_buffer buf;
http_parser_init(&parser, HTTP_RESPONSE);
parser.data = t;
memset(&settings, 0x0, sizeof(http_parser_settings));
settings.on_header_field = on_header_field;
settings.on_header_value = on_header_value;
settings.on_headers_complete = on_headers_complete;
settings.on_body = on_body_store_refs;
settings.on_message_complete = on_message_complete;
gitno_buffer_setup(&buf, buffer, sizeof(buffer), t->socket);
while(1) {
size_t parsed;
error = gitno_recv(&buf);
if (error < GIT_SUCCESS)
return git__rethrow(error, "Error receiving data from network");
parsed = http_parser_execute(&parser, &settings, buf.data, buf.offset);
/* Both should happen at the same time */
if (parsed != buf.offset || t->error < GIT_SUCCESS)
return git__rethrow(t->error, "Error parsing HTTP data");
gitno_consume_n(&buf, parsed);
if (error == 0 || t->transfer_finished)
return GIT_SUCCESS;
}
return error;
}
static int http_connect(git_transport *transport, int direction)
{
transport_http *t = (transport_http *) transport;
int error;
if (direction == GIT_DIR_PUSH)
return git__throw(GIT_EINVALIDARGS, "Pushing over HTTP is not supported");
t->parent.direction = direction;
error = git_vector_init(&t->refs, 16, NULL);
if (error < GIT_SUCCESS)
return git__rethrow(error, "Failed to init refs vector");
error = do_connect(t, "upload-pack");
if (error < GIT_SUCCESS) {
error = git__rethrow(error, "Failed to connect to host");
goto cleanup;
}
error = store_refs(t);
cleanup:
git_buf_clear(&t->buf);
git_buf_free(&t->buf);
return error;
}
static int http_ls(git_transport *transport, git_headarray *array)
{
transport_http *t = (transport_http *) transport;
git_vector *refs = &t->refs;
unsigned int i;
int len = 0;
git_pkt_ref *p;
array->heads = git__calloc(refs->length, sizeof(git_remote_head*));
if (array->heads == NULL)
return GIT_ENOMEM;
git_vector_foreach(refs, i, p) {
if (p->type != GIT_PKT_REF)
continue;
array->heads[len] = &p->head;
len++;
}
array->len = len;
t->heads = array->heads;
return GIT_SUCCESS;
}
static int http_close(git_transport *transport)
{
transport_http *t = (transport_http *) transport;
int error;
error = close(t->socket);
if (error < 0)
return git__throw(GIT_EOSERR, "Failed to close the socket: %s", strerror(errno));
return GIT_SUCCESS;
}
static void http_free(git_transport *transport)
{
transport_http *t = (transport_http *) transport;
git_vector *refs = &t->refs;
unsigned int i;
git_pkt *p;
git_vector_foreach(refs, i, p) {
git_pkt_free(p);
}
git_vector_free(refs);
free(t->heads);
free(t->content_type);
free(t->service);
free(t->parent.url);
free(t);
}
int git_transport_http(git_transport **out)
{
transport_http *t;
t = git__malloc(sizeof(transport_http));
if (t == NULL)
return GIT_ENOMEM;
memset(t, 0x0, sizeof(transport_http));
t->parent.connect = http_connect;
t->parent.ls = http_ls;
t->parent.close = http_close;
t->parent.free = http_free;
*out = (git_transport *) t;
return GIT_SUCCESS;
}
......@@ -15,7 +15,7 @@ struct {
git_transport_cb fn;
} transports[] = {
{"git://", git_transport_git},
{"http://", git_transport_dummy},
{"http://", git_transport_http},
{"https://", git_transport_dummy},
{"file://", git_transport_local},
{"git+ssh://", git_transport_dummy},
......
......@@ -107,6 +107,7 @@ struct git_transport {
int git_transport_local(struct git_transport **transport);
int git_transport_git(struct git_transport **transport);
int git_transport_http(struct git_transport **transport);
int git_transport_dummy(struct git_transport **transport);
#endif
......@@ -84,37 +84,6 @@ cleanup:
return error;
}
/* The URL should already have been stripped of the protocol */
static int extract_host_and_port(char **host, char **port, const char *url)
{
char *colon, *slash, *delim;
int error = GIT_SUCCESS;
colon = strchr(url, ':');
slash = strchr(url, '/');
if (slash == NULL)
return git__throw(GIT_EOBJCORRUPTED, "Malformed URL: missing /");
if (colon == NULL) {
*port = git__strdup(GIT_DEFAULT_PORT);
} else {
*port = git__strndup(colon + 1, slash - colon - 1);
}
if (*port == NULL)
return GIT_ENOMEM;;
delim = colon == NULL ? slash : colon;
*host = git__strndup(url, delim - url);
if (*host == NULL) {
free(*port);
error = GIT_ENOMEM;
}
return error;
}
/*
* Parse the URL and connect to a server, storing the socket in
* out. For convenience this also takes care of asking for the remote
......@@ -130,9 +99,10 @@ static int do_connect(transport_git *t, const char *url)
if (!git__prefixcmp(url, prefix))
url += strlen(prefix);
error = extract_host_and_port(&host, &port, url);
error = gitno_extract_host_and_port(&host, &port, url, GIT_DEFAULT_PORT);
if (error < GIT_SUCCESS)
return error;
s = gitno_connect(host, port);
connected = 1;
error = send_request(s, NULL, url);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment