url-parser.c

URL parsing library
git clone git://git.finwo.net/lib/url-parser.c
Log | Files | Refs | README | LICENSE

commit 56b8135dfb2ae13dd1c53663b7c435c3edc1c910
parent a02e951fa36667732f027ae152833138c79289dd
Author: finwo <finwo@pm.me>
Date:   Sat,  7 Mar 2026 02:51:15 +0100

Add length limiters to prevent buffer overflows

Diffstat:
Msrc/url-parser.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/url-parser.h | 1+
Atest/length-limits | 0
Atest/length-limits.test.c | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/src/url-parser.c b/src/url-parser.c @@ -41,6 +41,15 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <limits.h> + +static const char *_last_error = NULL; + +const char *parse_url_last_error(void) { + return _last_error; +} + +#define SET_ERROR(msg) do { _last_error = (msg); } while(0) /* * Prototype declarations @@ -86,8 +95,8 @@ parse_url(const char *url) struct parsed_url *purl; const char *tmpstr; const char *curstr; - int len; - int i; + size_t len; + size_t i; int bracket_flag; int is_path; @@ -120,6 +129,11 @@ parse_url(const char *url) } /* Get the scheme length */ len = tmpstr - curstr; + if ( len > 64 ) { + SET_ERROR("ERR_SCHEME_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } /* Check restrictions */ for ( i = 0; i < len; i++ ) { if ( !_is_scheme_char(curstr[i]) ) { @@ -201,6 +215,11 @@ parse_url(const char *url) } else { len = tmpstr - curstr; } + if ( len > 255 ) { + SET_ERROR("ERR_USERNAME_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { purl->username = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->username ) { @@ -220,6 +239,11 @@ parse_url(const char *url) tmpstr++; } len = tmpstr - curstr; + if ( len > 255 ) { + SET_ERROR("ERR_PASSWORD_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { purl->password = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->password ) { @@ -262,6 +286,11 @@ parse_url(const char *url) tmpstr++; } len = tmpstr - curstr; + if ( len > 255 ) { + SET_ERROR("ERR_HOST_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { purl->host = malloc(sizeof(char) * (len + 1)); @@ -287,7 +316,22 @@ parse_url(const char *url) tmpstr++; } len = tmpstr - curstr; + if ( len > 5 ) { + SET_ERROR("ERR_PORT_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { + char port_str[6]; + size_t copy_len = len < 5 ? len : 5; + (void)strncpy(port_str, curstr, copy_len); + port_str[copy_len] = '\0'; + long port_val = strtol(port_str, NULL, 10); + if ( port_val < 1 || port_val > 65535 ) { + SET_ERROR("ERR_PORT_INVALID"); + parsed_url_free(purl); + return NULL; + } purl->port = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->port ) { parsed_url_free(purl); @@ -311,6 +355,11 @@ parse_url(const char *url) tmpstr++; } len = tmpstr - curstr; + if ( len > 4096 ) { + SET_ERROR("ERR_PATH_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { purl->path = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->path ) { @@ -330,6 +379,11 @@ parse_url(const char *url) tmpstr++; } len = tmpstr - curstr; + if ( len > 4096 ) { + SET_ERROR("ERR_QUERY_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { purl->query = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->query ) { @@ -350,6 +404,11 @@ parse_url(const char *url) tmpstr++; } len = tmpstr - curstr; + if ( len > 4096 ) { + SET_ERROR("ERR_FRAGMENT_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } if ( len > 0 ) { purl->fragment = malloc(sizeof(char) * (len + 1)); if ( NULL == purl->fragment ) { diff --git a/src/url-parser.h b/src/url-parser.h @@ -61,6 +61,7 @@ extern "C" { */ struct parsed_url * parse_url(const char *); void parsed_url_free(struct parsed_url *); + const char *parse_url_last_error(void); #ifdef __cplusplus } diff --git a/test/length-limits b/test/length-limits Binary files differ. diff --git a/test/length-limits.test.c b/test/length-limits.test.c @@ -0,0 +1,115 @@ +#include "url-parser.h" +#include "test.h" + +void test_scheme_too_long() { + char scheme[72]; + memset(scheme, 'a', 65); + scheme[0] = 'h'; + scheme[1] = 't'; + scheme[2] = 't'; + scheme[3] = 'p'; + scheme[65] = '\0'; + char url[128]; + snprintf(url, sizeof(url), "%s://example.com", scheme); + struct parsed_url *purl = parse_url(url); + ASSERT("scheme too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_SCHEME_TOO_LONG", parse_url_last_error()); +} + +void test_username_too_long() { + char user[262]; + memset(user, 'a', 256); + user[256] = '\0'; + char url[320]; + snprintf(url, sizeof(url), "http://%s@example.com/", user); + struct parsed_url *purl = parse_url(url); + ASSERT("username too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_USERNAME_TOO_LONG", parse_url_last_error()); +} + +void test_password_too_long() { + char pass[262]; + memset(pass, 'a', 256); + pass[256] = '\0'; + char url[320]; + snprintf(url, sizeof(url), "http://user:%s@example.com/", pass); + struct parsed_url *purl = parse_url(url); + ASSERT("password too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_PASSWORD_TOO_LONG", parse_url_last_error()); +} + +void test_host_too_long() { + char host[262]; + memset(host, 'a', 256); + host[256] = '\0'; + char url[320]; + snprintf(url, sizeof(url), "http://%s/", host); + struct parsed_url *purl = parse_url(url); + ASSERT("host too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_HOST_TOO_LONG", parse_url_last_error()); +} + +void test_port_too_long() { + struct parsed_url *purl = parse_url("http://example.com:123456/"); + ASSERT("port too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_PORT_TOO_LONG", parse_url_last_error()); +} + +void test_port_invalid_zero() { + struct parsed_url *purl = parse_url("http://example.com:0/"); + ASSERT("port zero returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_PORT_INVALID", parse_url_last_error()); +} + +void test_port_invalid_overflow() { + struct parsed_url *purl = parse_url("http://example.com:65536/"); + ASSERT("port overflow returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_PORT_INVALID", parse_url_last_error()); +} + +void test_path_too_long() { + char path[4100]; + memset(path, 'a', 4097); + path[4097] = '\0'; + char url[4200]; + snprintf(url, sizeof(url), "http://example.com/%s", path); + struct parsed_url *purl = parse_url(url); + ASSERT("path too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_PATH_TOO_LONG", parse_url_last_error()); +} + +void test_query_too_long() { + char query[4100]; + memset(query, 'a', 4097); + query[4097] = '\0'; + char url[4200]; + snprintf(url, sizeof(url), "http://example.com/?%s", query); + struct parsed_url *purl = parse_url(url); + ASSERT("query too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_QUERY_TOO_LONG", parse_url_last_error()); +} + +void test_fragment_too_long() { + char frag[4100]; + memset(frag, 'a', 4097); + frag[4097] = '\0'; + char url[4200]; + snprintf(url, sizeof(url), "http://example.com/#%s", frag); + struct parsed_url *purl = parse_url(url); + ASSERT("fragment too long returns NULL", purl == NULL); + ASSERT_STRING_EQUALS("ERR_FRAGMENT_TOO_LONG", parse_url_last_error()); +} + +int main() { + RUN(test_scheme_too_long); + RUN(test_username_too_long); + RUN(test_password_too_long); + RUN(test_host_too_long); + RUN(test_port_too_long); + RUN(test_port_invalid_zero); + RUN(test_port_invalid_overflow); + RUN(test_path_too_long); + RUN(test_query_too_long); + RUN(test_fragment_too_long); + return TEST_REPORT(); +}