url-parser.c

URL parsing library
git clone git://git.finwo.net/lib/url-parser.c
Log | Files | Refs | README | LICENSE

commit 4ce4748f96abc072396d75b58d305d538c9f057a
parent 56b8135dfb2ae13dd1c53663b7c435c3edc1c910
Author: finwo <finwo@pm.me>
Date:   Wed, 11 Mar 2026 23:50:25 +0100

Fix strncpy warning; auto-formatting

Diffstat:
A.clang-format | 334+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M.gitignore | 1+
AMakefile | 9+++++++++
Msrc/url-parser.c | 683+++++++++++++++++++++++++++++++++++++++----------------------------------------
Msrc/url-parser.h | 28++++++++++++++--------------
Dtest/length-limits | 0
6 files changed, 698 insertions(+), 357 deletions(-)

diff --git a/.clang-format b/.clang-format @@ -0,0 +1,334 @@ +--- +Language: Cpp +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: true + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCaseArrows: false + AlignCaseColons: false +AlignConsecutiveTableGenBreakingDAGArgColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveTableGenCondOperatorColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveTableGenDefinitionColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Never +AllowShortCaseExpressionOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: false +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: true +AllowShortNamespacesOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AttributeMacros: + - __capability + - absl_nonnull + - absl_nullable + - absl_nullability_unknown +BinPackArguments: true +BinPackLongBracedList: true +BinPackParameters: BinPack +BitFieldColonSpacing: Both +BracedInitializerIndentWidth: -1 +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave +BreakAfterJavaFieldAnnotations: false +BreakAfterReturnType: None +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Attach +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTemplateCloser: false +BreakBeforeTernaryOperators: true +BreakBinaryOperations: Never +BreakConstructorInitializers: BeforeColon +BreakFunctionDefinitionParameters: false +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +BreakTemplateDeclarations: Yes +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +EnumTrailingComma: Leave +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^<ext/.*\.h>' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 3 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: true +IndentCaseLabels: true +IndentExportBlock: true +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: true +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLines: + AtEndOfFile: false + AtStartOfBlock: false + AtStartOfFile: false +KeepFormFeed: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MainIncludeChar: Quote +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +OneLineFormatOffRegex: '' +PackConstructorInitializers: NextLine +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakBeforeMemberAccess: 150 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: pb + BasedOnStyle: google +ReferenceAlignment: Pointer +ReflowComments: Always +RemoveBracesLLVM: false +RemoveEmptyLinesInUnwrappedLines: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SkipMacroDefinitionBody: false +SortIncludes: + Enabled: true + IgnoreCase: false +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterOperatorKeyword: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterNot: false + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + ExceptDoubleParentheses: false + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Auto +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TableGenBreakInsideDAGArg: DontBreak +TabWidth: 8 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +WrapNamespaceBodyWithEmptyLines: Leave +... + diff --git a/.gitignore b/.gitignore @@ -1,6 +1,7 @@ /test/unix-socket /test/empty-host /test/basic +/test/length-limits *.o .DS_Store ._.DS_Store diff --git a/Makefile b/Makefile @@ -0,0 +1,9 @@ +FIND=$(shell which gfind find | head -1) + +.PHONY: test +test: + @$(MAKE) -C test run + +.PHONY: format +format: + $(FIND) src/ -type f \( -name '*.c' -o -name '*.h' \) -exec clang-format -i {} + diff --git a/src/url-parser.c b/src/url-parser.c @@ -37,19 +37,22 @@ #include "url-parser.h" +#include <ctype.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ctype.h> -#include <limits.h> static const char *_last_error = NULL; const char *parse_url_last_error(void) { - return _last_error; + return _last_error; } -#define SET_ERROR(msg) do { _last_error = (msg); } while(0) +#define SET_ERROR(msg) \ + do { \ + _last_error = (msg); \ + } while (0) /* * Prototype declarations @@ -59,401 +62,395 @@ static __inline__ int _is_scheme_char(int); /* * Check if scheme is a path-based scheme (unix socket, file path, etc.) */ -static __inline__ int -_is_path_scheme(const char *scheme) -{ - if ( NULL == scheme ) { - return 0; - } - if ( 0 == strncmp(scheme, "unix", 4) && scheme[4] == '\0' ) { - return 1; - } - if ( 0 == strncmp(scheme, "file", 4) && scheme[4] == '\0' ) { - return 1; - } - if ( 0 == strncmp(scheme, "cunix", 5) && scheme[5] == '\0' ) { - return 1; - } +static __inline__ int _is_path_scheme(const char *scheme) { + if (NULL == scheme) { return 0; + } + if (0 == strncmp(scheme, "unix", 4) && scheme[4] == '\0') { + return 1; + } + if (0 == strncmp(scheme, "file", 4) && scheme[4] == '\0') { + return 1; + } + if (0 == strncmp(scheme, "cunix", 5) && scheme[5] == '\0') { + return 1; + } + return 0; } /* * Check whether the character is permitted in scheme string */ -static __inline__ int -_is_scheme_char(int c) -{ - return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; +static __inline__ int _is_scheme_char(int c) { + return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; } /* * See RFC 1738, 3986 */ -struct parsed_url * -parse_url(const char *url) -{ - struct parsed_url *purl; - const char *tmpstr; - const char *curstr; - size_t len; - size_t i; - int bracket_flag; - int is_path; +struct parsed_url *parse_url(const char *url) { + struct parsed_url *purl; + const char *tmpstr; + const char *curstr; + size_t len; + size_t i; + int bracket_flag; + int is_path; - /* Allocate the parsed url storage */ - purl = malloc(sizeof(struct parsed_url)); - if ( NULL == purl ) { - return NULL; + /* Allocate the parsed url storage */ + purl = malloc(sizeof(struct parsed_url)); + if (NULL == purl) { + return NULL; + } + purl->scheme = NULL; + purl->host = NULL; + purl->port = NULL; + purl->path = NULL; + purl->query = NULL; + purl->fragment = NULL; + purl->username = NULL; + purl->password = NULL; + + curstr = url; + + /* + * <scheme>:<scheme-specific-part> + * <scheme> := [a-z\+\-\.]+ + * upper case = lower case for resiliency + */ + /* Read scheme */ + tmpstr = strchr(curstr, ':'); + if (NULL == tmpstr) { + parsed_url_free(purl); + return NULL; + } + /* Get the scheme length */ + len = tmpstr - curstr; + if (len > 64) { + SET_ERROR("ERR_SCHEME_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + /* Check restrictions */ + for (i = 0; i < len; i++) { + if (!_is_scheme_char(curstr[i])) { + parsed_url_free(purl); + return NULL; } - purl->scheme = NULL; - purl->host = NULL; - purl->port = NULL; - purl->path = NULL; - purl->query = NULL; - purl->fragment = NULL; - purl->username = NULL; - purl->password = NULL; + } + /* Copy the scheme to the storage */ + purl->scheme = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->scheme) { + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + (void)strncpy(purl->scheme, curstr, len); + } + purl->scheme[len] = '\0'; + /* Make the character to lower if it is upper case. */ + for (i = 0; i < len; i++) { + purl->scheme[i] = tolower(purl->scheme[i]); + } - curstr = url; + /* Check if this is a path-based scheme */ + is_path = _is_path_scheme(purl->scheme); - /* - * <scheme>:<scheme-specific-part> - * <scheme> := [a-z\+\-\.]+ - * upper case = lower case for resiliency - */ - /* Read scheme */ - tmpstr = strchr(curstr, ':'); - if ( NULL == tmpstr ) { - parsed_url_free(purl); - return NULL; + /* Skip ':' */ + tmpstr++; + curstr = tmpstr; + + /* + * Normalize: ensure we have // after scheme + * If missing, treat everything as path + */ + if ('/' != curstr[0] || '/' != curstr[1]) { + /* No // - entire rest is path */ + tmpstr = curstr; + while ('\0' != *tmpstr) { + tmpstr++; } - /* Get the scheme length */ len = tmpstr - curstr; - if ( len > 64 ) { - SET_ERROR("ERR_SCHEME_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } - /* Check restrictions */ - for ( i = 0; i < len; i++ ) { - if ( !_is_scheme_char(curstr[i]) ) { - parsed_url_free(purl); - return NULL; - } - } - /* Copy the scheme to the storage */ - purl->scheme = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->scheme ) { + if (len > 0) { + purl->path = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->path) { parsed_url_free(purl); return NULL; + } + (void)strncpy(purl->path, curstr, len); + purl->path[len] = '\0'; } - (void)strncpy(purl->scheme, curstr, len); - purl->scheme[len] = '\0'; - /* Make the character to lower if it is upper case. */ - for ( i = 0; i < len; i++ ) { - purl->scheme[i] = tolower(purl->scheme[i]); - } + return purl; + } - /* Check if this is a path-based scheme */ - is_path = _is_path_scheme(purl->scheme); + /* Skip the "//" */ + curstr += 2; - /* Skip ':' */ + /* + * Detect and consume username:password, consume @ + */ + tmpstr = curstr; + while ('\0' != *tmpstr && '@' != *tmpstr) { tmpstr++; - curstr = tmpstr; + } - /* - * Normalize: ensure we have // after scheme - * If missing, treat everything as path - */ - if ( '/' != curstr[0] || '/' != curstr[1] ) { - /* No // - entire rest is path */ - tmpstr = curstr; - while ( '\0' != *tmpstr ) { - tmpstr++; - } - len = tmpstr - curstr; - if ( len > 0 ) { - purl->path = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->path ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->path, curstr, len); - purl->path[len] = '\0'; - } - return purl; + if ('@' == *tmpstr) { + /* Has userinfo */ + /* First check if there's a password (look for : before @) */ + const char *colon = curstr; + int has_password = 0; + while (colon < tmpstr) { + if (':' == *colon) { + has_password = 1; + break; + } + colon++; } - /* Skip the "//" */ - curstr += 2; - - /* - * Detect and consume username:password, consume @ - */ - tmpstr = curstr; - while ( '\0' != *tmpstr && '@' != *tmpstr ) { - tmpstr++; + /* Read username */ + const char *username_start = curstr; + if (has_password) { + len = colon - curstr; + } else { + len = tmpstr - curstr; + } + if (len > 255) { + SET_ERROR("ERR_USERNAME_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + purl->username = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->username) { + parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->username, username_start, len); + purl->username[len] = '\0'; } - if ( '@' == *tmpstr ) { - /* Has userinfo */ - /* First check if there's a password (look for : before @) */ - const char *colon = curstr; - int has_password = 0; - while (colon < tmpstr) { - if (':' == *colon) { - has_password = 1; - break; - } - colon++; - } - - /* Read username */ - const char *username_start = curstr; - if (has_password) { - len = colon - curstr; - } else { - len = tmpstr - curstr; - } - if ( len > 255 ) { - SET_ERROR("ERR_USERNAME_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } - if ( len > 0 ) { - purl->username = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->username ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->username, username_start, len); - purl->username[len] = '\0'; + /* Skip to password or @ */ + if (has_password) { + curstr = colon + 1; + /* Read password */ + tmpstr = curstr; + while ('\0' != *tmpstr && '@' != *tmpstr) { + tmpstr++; + } + len = tmpstr - curstr; + if (len > 255) { + SET_ERROR("ERR_PASSWORD_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + purl->password = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->password) { + parsed_url_free(purl); + return NULL; } + (void)strncpy(purl->password, curstr, len); + purl->password[len] = '\0'; + } + curstr = tmpstr; + } else { + /* No password - advance past username to @ */ + curstr = username_start + len; + } + /* Skip @ */ + while ('@' == *curstr) { + curstr++; + } + } - /* Skip to password or @ */ - if (has_password) { - curstr = colon + 1; - /* Read password */ - tmpstr = curstr; - while ( '\0' != *tmpstr && '@' != *tmpstr ) { - tmpstr++; - } - len = tmpstr - curstr; - if ( len > 255 ) { - SET_ERROR("ERR_PASSWORD_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } - if ( len > 0 ) { - purl->password = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->password ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->password, curstr, len); - purl->password[len] = '\0'; - } - curstr = tmpstr; - } else { - /* No password - advance past username to @ */ - curstr = username_start + len; - } - /* Skip @ */ - while ( '@' == *curstr ) { - curstr++; - } + /* + * If NOT a path scheme, detect and consume host:port + */ + if (!is_path) { + if ('[' == *curstr) { + bracket_flag = 1; + curstr++; + } else { + bracket_flag = 0; } - /* - * If NOT a path scheme, detect and consume host:port - */ - if ( !is_path ) { - if ( '[' == *curstr ) { - bracket_flag = 1; - curstr++; - } else { - bracket_flag = 0; - } + /* Read host */ + tmpstr = curstr; + while ('\0' != *tmpstr) { + if (bracket_flag && ']' == *tmpstr) { + break; + } else if (!bracket_flag && (':' == *tmpstr || '/' == *tmpstr || '?' == *tmpstr || '#' == *tmpstr)) { + break; + } + tmpstr++; + } + len = tmpstr - curstr; + if (len > 255) { + SET_ERROR("ERR_HOST_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } - /* Read host */ - tmpstr = curstr; - while ( '\0' != *tmpstr ) { - if ( bracket_flag && ']' == *tmpstr ) { - break; - } else if ( !bracket_flag && (':' == *tmpstr || '/' == *tmpstr || '?' == *tmpstr || '#' == *tmpstr) ) { - break; - } - tmpstr++; - } - len = tmpstr - curstr; - if ( len > 255 ) { - SET_ERROR("ERR_HOST_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } + if (len > 0) { + purl->host = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->host) { + parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->host, curstr, len); + purl->host[len] = '\0'; + } + curstr = tmpstr; - if ( len > 0 ) { - purl->host = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->host ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->host, curstr, len); - purl->host[len] = '\0'; - } - curstr = tmpstr; + /* Skip ']' if IPv6 */ + if (']' == *curstr) { + curstr++; + } - /* Skip ']' if IPv6 */ - if ( ']' == *curstr ) { - curstr++; + /* Read port */ + if (':' == *curstr) { + curstr++; + tmpstr = curstr; + while ('\0' != *tmpstr && '/' != *tmpstr && '?' != *tmpstr && '#' != *tmpstr) { + tmpstr++; + } + len = tmpstr - curstr; + if (len > 5) { + SET_ERROR("ERR_PORT_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + char port_str[6]; + size_t copy_len = len < 5 ? len : 5; + (void)strncpy(port_str, curstr, copy_len); + port_str[copy_len] = '\0'; + long port_val = strtol(port_str, NULL, 10); + if (port_val < 1 || port_val > 65535) { + SET_ERROR("ERR_PORT_INVALID"); + parsed_url_free(purl); + return NULL; } - - /* Read port */ - if ( ':' == *curstr ) { - curstr++; - tmpstr = curstr; - while ( '\0' != *tmpstr && '/' != *tmpstr && '?' != *tmpstr && '#' != *tmpstr ) { - tmpstr++; - } - len = tmpstr - curstr; - if ( len > 5 ) { - SET_ERROR("ERR_PORT_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } - if ( len > 0 ) { - char port_str[6]; - size_t copy_len = len < 5 ? len : 5; - (void)strncpy(port_str, curstr, copy_len); - port_str[copy_len] = '\0'; - long port_val = strtol(port_str, NULL, 10); - if ( port_val < 1 || port_val > 65535 ) { - SET_ERROR("ERR_PORT_INVALID"); - parsed_url_free(purl); - return NULL; - } - purl->port = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->port ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->port, curstr, len); - purl->port[len] = '\0'; - } - curstr = tmpstr; + purl->port = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->port) { + parsed_url_free(purl); + return NULL; } + (void)strncpy(purl->port, curstr, len); + purl->port[len] = '\0'; + } + curstr = tmpstr; } + } + + /* End of string? */ + if ('\0' == *curstr) { + return purl; + } - /* End of string? */ - if ( '\0' == *curstr ) { - return purl; + /* Parse path */ + tmpstr = curstr; + while ('\0' != *tmpstr && '?' != *tmpstr && '#' != *tmpstr) { + tmpstr++; + } + len = tmpstr - curstr; + if (len > 4096) { + SET_ERROR("ERR_PATH_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + purl->path = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->path) { + parsed_url_free(purl); + return NULL; } + (void)strncpy(purl->path, curstr, len); + purl->path[len] = '\0'; + } + curstr = tmpstr; - /* Parse path */ + /* Parse query */ + if ('?' == *curstr) { + curstr++; tmpstr = curstr; - while ( '\0' != *tmpstr && '?' != *tmpstr && '#' != *tmpstr ) { - tmpstr++; + while ('\0' != *tmpstr && '#' != *tmpstr) { + tmpstr++; } len = tmpstr - curstr; - if ( len > 4096 ) { - SET_ERROR("ERR_PATH_TOO_LONG"); + if (len > 4096) { + SET_ERROR("ERR_QUERY_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + purl->query = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->query) { parsed_url_free(purl); return NULL; - } - if ( len > 0 ) { - purl->path = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->path ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->path, curstr, len); - purl->path[len] = '\0'; + } + (void)strncpy(purl->query, curstr, len); + purl->query[len] = '\0'; } curstr = tmpstr; + } - /* Parse query */ - if ( '?' == *curstr ) { - curstr++; - tmpstr = curstr; - while ( '\0' != *tmpstr && '#' != *tmpstr ) { - tmpstr++; - } - len = tmpstr - curstr; - if ( len > 4096 ) { - SET_ERROR("ERR_QUERY_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } - if ( len > 0 ) { - purl->query = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->query ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->query, curstr, len); - purl->query[len] = '\0'; - } - curstr = tmpstr; + /* Parse fragment */ + if ('#' == *curstr) { + curstr++; + tmpstr = curstr; + while ('\0' != *tmpstr) { + tmpstr++; } - - /* Parse fragment */ - if ( '#' == *curstr ) { - curstr++; - tmpstr = curstr; - while ( '\0' != *tmpstr ) { - tmpstr++; - } - len = tmpstr - curstr; - if ( len > 4096 ) { - SET_ERROR("ERR_FRAGMENT_TOO_LONG"); - parsed_url_free(purl); - return NULL; - } - if ( len > 0 ) { - purl->fragment = malloc(sizeof(char) * (len + 1)); - if ( NULL == purl->fragment ) { - parsed_url_free(purl); - return NULL; - } - (void)strncpy(purl->fragment, curstr, len); - purl->fragment[len] = '\0'; - } + len = tmpstr - curstr; + if (len > 4096) { + SET_ERROR("ERR_FRAGMENT_TOO_LONG"); + parsed_url_free(purl); + return NULL; + } + if (len > 0) { + purl->fragment = malloc(sizeof(char) * (len + 1)); + if (NULL == purl->fragment) { + parsed_url_free(purl); + return NULL; + } + (void)strncpy(purl->fragment, curstr, len); + purl->fragment[len] = '\0'; } + } - return purl; + return purl; } /* * Free memory of parsed url */ -void -parsed_url_free(struct parsed_url *purl) -{ - if ( NULL != purl ) { - if ( NULL != purl->scheme ) { - free(purl->scheme); - } - if ( NULL != purl->host ) { - free(purl->host); - } - if ( NULL != purl->port ) { - free(purl->port); - } - if ( NULL != purl->path ) { - free(purl->path); - } - if ( NULL != purl->query ) { - free(purl->query); - } - if ( NULL != purl->fragment ) { - free(purl->fragment); - } - if ( NULL != purl->username ) { - free(purl->username); - } - if ( NULL != purl->password ) { - free(purl->password); - } - free(purl); +void parsed_url_free(struct parsed_url *purl) { + if (NULL != purl) { + if (NULL != purl->scheme) { + free(purl->scheme); + } + if (NULL != purl->host) { + free(purl->host); + } + if (NULL != purl->port) { + free(purl->port); + } + if (NULL != purl->path) { + free(purl->path); + } + if (NULL != purl->query) { + free(purl->query); + } + if (NULL != purl->fragment) { + free(purl->fragment); + } + if (NULL != purl->username) { + free(purl->username); + } + if (NULL != purl->password) { + free(purl->password); } + free(purl); + } } diff --git a/src/url-parser.h b/src/url-parser.h @@ -42,26 +42,26 @@ * URL storage */ struct parsed_url { - char *scheme; /* mandatory */ - char *host; /* mandatory */ - char *port; /* optional */ - char *path; /* optional */ - char *query; /* optional */ - char *fragment; /* optional */ - char *username; /* optional */ - char *password; /* optional */ + char *scheme; /* mandatory */ + char *host; /* mandatory */ + char *port; /* optional */ + char *path; /* optional */ + char *query; /* optional */ + char *fragment; /* optional */ + char *username; /* optional */ + char *password; /* optional */ }; #ifdef __cplusplus extern "C" { #endif - /* - * Declaration of function prototypes - */ - struct parsed_url * parse_url(const char *); - void parsed_url_free(struct parsed_url *); - const char *parse_url_last_error(void); +/* + * Declaration of function prototypes + */ +struct parsed_url *parse_url(const char *); +void parsed_url_free(struct parsed_url *); +const char *parse_url_last_error(void); #ifdef __cplusplus } diff --git a/test/length-limits b/test/length-limits Binary files differ.