url-parser.c

Git mirror of http://draft.scyphus.co.jp/lang/c/url_parser.html
git clone git://git.finwo.net/lib/url-parser.c
Log | Files | Refs

commit e31c1c5b0248278729771f9f8c091161afda8c56
Author: finwo <finwo@pm.me>
Date:   Thu, 14 May 2020 16:43:19 +0200

Copied from origin

Diffstat:
Aurl_parser.c | 328+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aurl_parser.h | 48++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 376 insertions(+), 0 deletions(-)

diff --git a/url_parser.c b/url_parser.c @@ -0,0 +1,328 @@ +/*_ + * Copyright 2010-2011 Scyphus Solutions Co. Ltd. All rights reserved. + * + * Authors: + * Hirochika Asai + */ + +#include "url_parser.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +/* + * Prototype declarations + */ +static __inline__ int _is_scheme_char(int); + +/* + * Check whether the character is permitted in scheme string + */ +static __inline__ int +_is_scheme_char(int c) +{ + return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; +} + +/* + * See RFC 1738, 3986 + */ +struct parsed_url * +parse_url(const char *url) +{ + struct parsed_url *purl; + const char *tmpstr; + const char *curstr; + int len; + int i; + int userpass_flag; + int bracket_flag; + + /* Allocate the parsed url storage */ + purl = malloc(sizeof(struct http_parsed_url)); + if ( NULL == purl ) { + return NULL; + } + purl->scheme = NULL; + purl->host = NULL; + purl->port = NULL; + purl->path = NULL; + purl->query = NULL; + purl->fragment = NULL; + purl->username = NULL; + purl->password = NULL; + + curstr = url; + + /* + * <scheme>:<scheme-specific-part> + * <scheme> := [a-z\+\-\.]+ + * upper case = lower case for resiliency + */ + /* Read scheme */ + tmpstr = strchr(curstr, ':'); + if ( NULL == tmpstr ) { + /* Not found the character */ + http_parsed_url_free(purl); + return NULL; + } + /* Get the scheme length */ + len = tmpstr - curstr; + /* Check restrictions */ + for ( i = 0; i < len; i++ ) { + if ( !_is_scheme_char(curstr[i]) ) { + /* Invalid format */ + http_parsed_url_free(purl); + return NULL; + } + } + /* Copy the scheme to the storage */ + purl->scheme = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->scheme ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->scheme, curstr, len); + purl->scheme[len] = '\0'; + /* Make the character to lower if it is upper case. */ + for ( i = 0; i < len; i++ ) { + purl->scheme[i] = tolower(purl->scheme[i]); + } + /* Skip ':' */ + tmpstr++; + curstr = tmpstr; + + /* + * //<user>:<password>@<host>:<port>/<url-path> + * Any ":", "@" and "/" must be encoded. + */ + /* Eat "//" */ + for ( i = 0; i < 2; i++ ) { + if ( '/' != *curstr ) { + http_parsed_url_free(purl); + return NULL; + } + curstr++; + } + + /* Check if the user (and password) are specified. */ + userpass_flag = 0; + tmpstr = curstr; + while ( '\0' != *tmpstr ) { + if ( '@' == *tmpstr ) { + /* Username and password are specified */ + userpass_flag = 1; + break; + } else if ( '/' == *tmpstr ) { + /* End of <host>:<port> specification */ + userpass_flag = 0; + break; + } + tmpstr++; + } + + /* User and password specification */ + tmpstr = curstr; + if ( userpass_flag ) { + /* Read username */ + while ( '\0' != *tmpstr && ':' != *tmpstr && '@' != *tmpstr ) { + tmpstr++; + } + len = tmpstr - curstr; + purl->username = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->username ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->username, curstr, len); + purl->username[len] = '\0'; + /* Proceed current pointer */ + curstr = tmpstr; + if ( ':' == *curstr ) { + /* Skip ':' */ + curstr++; + /* Read password */ + tmpstr = curstr; + while ( '\0' != *tmpstr && '@' != *tmpstr ) { + tmpstr++; + } + len = tmpstr - curstr; + purl->password = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->password ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->password, curstr, len); + purl->password[len] = '\0'; + curstr = tmpstr; + } + /* Skip '@' */ + if ( '@' != *curstr ) { + http_parsed_url_free(purl); + return NULL; + } + curstr++; + } + + if ( '[' == *curstr ) { + bracket_flag = 1; + } else { + bracket_flag = 0; + } + /* Proceed on by delimiters with reading host */ + tmpstr = curstr; + while ( '\0' != *tmpstr ) { + if ( bracket_flag && ']' == *tmpstr ) { + /* End of IPv6 address. */ + tmpstr++; + break; + } else if ( !bracket_flag && (':' == *tmpstr || '/' == *tmpstr) ) { + /* Port number is specified. */ + break; + } + tmpstr++; + } + len = tmpstr - curstr; + purl->host = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->host || len <= 0 ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->host, curstr, len); + purl->host[len] = '\0'; + curstr = tmpstr; + + /* Is port number specified? */ + if ( ':' == *curstr ) { + curstr++; + /* Read port number */ + tmpstr = curstr; + while ( '\0' != *tmpstr && '/' != *tmpstr ) { + tmpstr++; + } + len = tmpstr - curstr; + purl->port = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->port ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->port, curstr, len); + purl->port[len] = '\0'; + curstr = tmpstr; + } + + /* End of the string */ + if ( '\0' == *curstr ) { + return purl; + } + + /* Skip '/' */ + if ( '/' != *curstr ) { + http_parsed_url_free(purl); + return NULL; + } + curstr++; + + /* Parse path */ + tmpstr = curstr; + while ( '\0' != *tmpstr && '#' != *tmpstr && '?' != *tmpstr ) { + tmpstr++; + } + len = tmpstr - curstr; + purl->path = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->path ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->path, curstr, len); + purl->path[len] = '\0'; + curstr = tmpstr; + + /* Is query specified? */ + if ( '?' == *curstr ) { + /* Skip '?' */ + curstr++; + /* Read query */ + tmpstr = curstr; + while ( '\0' != *tmpstr && '#' != *tmpstr ) { + tmpstr++; + } + len = tmpstr - curstr; + purl->query = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->query ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->query, curstr, len); + purl->query[len] = '\0'; + curstr = tmpstr; + } + + /* Is fragment specified? */ + if ( '#' == *curstr ) { + /* Skip '#' */ + curstr++; + /* Read fragment */ + tmpstr = curstr; + while ( '\0' != *tmpstr ) { + tmpstr++; + } + len = tmpstr - curstr; + purl->fragment = malloc(sizeof(char) * (len + 1)); + if ( NULL == purl->fragment ) { + http_parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->fragment, curstr, len); + purl->fragment[len] = '\0'; + curstr = tmpstr; + } + + return purl; +} + +/* + * Free memory of parsed url + */ +void +parsed_url_free(struct parsed_url *purl) +{ + if ( NULL != purl ) { + if ( NULL != purl->scheme ) { + free(purl->scheme); + } + if ( NULL != purl->host ) { + free(purl->host); + } + if ( NULL != purl->port ) { + free(purl->port); + } + if ( NULL != purl->path ) { + free(purl->path); + } + if ( NULL != purl->query ) { + free(purl->query); + } + if ( NULL != purl->fragment ) { + free(purl->fragment); + } + if ( NULL != purl->username ) { + free(purl->username); + } + if ( NULL != purl->password ) { + free(purl->password); + } + free(purl); + } +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: sw=4 ts=4 fdm=marker + * vim<600: sw=4 ts=4 + */ diff --git a/url_parser.h b/url_parser.h @@ -0,0 +1,48 @@ +/*_ + * Copyright 2010 Scyphus Solutions Co. Ltd. All rights reserved. + * + * Authors: + * Hirochika Asai + */ + +#ifndef _URL_PARSER_H +#define _URL_PARSER_H + +/* + * URL storage + */ +struct parsed_url { + char *scheme; /* mandatory */ + char *host; /* mandatory */ + char *port; /* optional */ + char *path; /* optional */ + char *query; /* optional */ + char *fragment; /* optional */ + char *username; /* optional */ + char *password; /* optional */ +}; + +#ifdef __cplusplus +extern "C" { +#endif + + /* + * Declaration of function prototypes + */ + struct parsed_url * parse_url(const char *); + void parsed_url_free(struct parsed_url *); + +#ifdef __cplusplus +} +#endif + +#endif /* _URL_PARSER_H */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: sw=4 ts=4 fdm=marker + * vim<600: sw=4 ts=4 + */