url-parser.c (11474B)
1 /*_ 2 * Copyright (c) 2026 finwo 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy of 5 * this software and associated documentation files (the "Software"), to use, copy, 6 * modify, and distribute the Software, subject to the following conditions: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions, and the following disclaimer. 10 * 11 * 2. Redistributions in binary form, or any public offering of the Software 12 * (including hosted or managed services), must reproduce the above copyright 13 * notice, this list of conditions, and the following disclaimer in the 14 * documentation and/or other materials provided. 15 * 16 * 3. Any redistribution or public offering of the Software must clearly attribute 17 * the Software to the original copyright holder, reference this License, and 18 * include a link to the official project repository or website. 19 * 20 * 4. The Software may not be renamed, rebranded, or marketed in a manner that 21 * implies it is an independent or proprietary product. Derivative works must 22 * clearly state that they are based on the Software. 23 * 24 * 5. Modifications to copies of the Software must carry prominent notices stating 25 * that changes were made, the nature of the modifications, and the date of the 26 * modifications. 27 * 28 * Any violation of these conditions terminates the permissions granted herein. 29 * 30 * THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 31 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 32 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT 33 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 34 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 36 */ 37 38 #include "url-parser.h" 39 40 #include <ctype.h> 41 #include <limits.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 46 static const char *_last_error = NULL; 47 48 const char *parse_url_last_error(void) { 49 return _last_error; 50 } 51 52 #define SET_ERROR(msg) \ 53 do { \ 54 _last_error = (msg); \ 55 } while (0) 56 57 /* 58 * Prototype declarations 59 */ 60 static __inline__ int _is_scheme_char(int); 61 62 /* 63 * Check if scheme is a path-based scheme (unix socket, file path, etc.) 64 */ 65 static __inline__ int _is_path_scheme(const char *scheme) { 66 if (NULL == scheme) { 67 return 0; 68 } 69 if (0 == strncmp(scheme, "unix", 4) && scheme[4] == '\0') { 70 return 1; 71 } 72 if (0 == strncmp(scheme, "file", 4) && scheme[4] == '\0') { 73 return 1; 74 } 75 if (0 == strncmp(scheme, "cunix", 5) && scheme[5] == '\0') { 76 return 1; 77 } 78 return 0; 79 } 80 81 /* 82 * Check whether the character is permitted in scheme string 83 */ 84 static __inline__ int _is_scheme_char(int c) { 85 return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; 86 } 87 88 /* 89 * See RFC 1738, 3986 90 */ 91 struct parsed_url *parse_url(const char *url) { 92 struct parsed_url *purl; 93 const char *tmpstr; 94 const char *curstr; 95 size_t len; 96 size_t i; 97 int bracket_flag; 98 int is_path; 99 100 /* Allocate the parsed url storage */ 101 purl = malloc(sizeof(struct parsed_url)); 102 if (NULL == purl) { 103 return NULL; 104 } 105 purl->scheme = NULL; 106 purl->host = NULL; 107 purl->port = NULL; 108 purl->path = NULL; 109 purl->query = NULL; 110 purl->fragment = NULL; 111 purl->username = NULL; 112 purl->password = NULL; 113 114 curstr = url; 115 116 /* 117 * <scheme>:<scheme-specific-part> 118 * <scheme> := [a-z\+\-\.]+ 119 * upper case = lower case for resiliency 120 */ 121 /* Read scheme */ 122 tmpstr = strchr(curstr, ':'); 123 if (NULL == tmpstr) { 124 parsed_url_free(purl); 125 return NULL; 126 } 127 /* Get the scheme length */ 128 len = tmpstr - curstr; 129 if (len > 64) { 130 SET_ERROR("ERR_SCHEME_TOO_LONG"); 131 parsed_url_free(purl); 132 return NULL; 133 } 134 /* Check restrictions */ 135 for (i = 0; i < len; i++) { 136 if (!_is_scheme_char(curstr[i])) { 137 parsed_url_free(purl); 138 return NULL; 139 } 140 } 141 /* Copy the scheme to the storage */ 142 purl->scheme = malloc(sizeof(char) * (len + 1)); 143 if (NULL == purl->scheme) { 144 parsed_url_free(purl); 145 return NULL; 146 } 147 if (len > 0) { 148 (void)strncpy(purl->scheme, curstr, len); 149 } 150 purl->scheme[len] = '\0'; 151 /* Make the character to lower if it is upper case. */ 152 for (i = 0; i < len; i++) { 153 purl->scheme[i] = tolower(purl->scheme[i]); 154 } 155 156 /* Check if this is a path-based scheme */ 157 is_path = _is_path_scheme(purl->scheme); 158 159 /* Skip ':' */ 160 tmpstr++; 161 curstr = tmpstr; 162 163 /* 164 * Normalize: ensure we have // after scheme 165 * If missing, treat everything as path 166 */ 167 if ('/' != curstr[0] || '/' != curstr[1]) { 168 /* No // - entire rest is path */ 169 tmpstr = curstr; 170 while ('\0' != *tmpstr) { 171 tmpstr++; 172 } 173 len = tmpstr - curstr; 174 if (len > 0) { 175 purl->path = malloc(sizeof(char) * (len + 1)); 176 if (NULL == purl->path) { 177 parsed_url_free(purl); 178 return NULL; 179 } 180 (void)strncpy(purl->path, curstr, len); 181 purl->path[len] = '\0'; 182 } 183 return purl; 184 } 185 186 /* Skip the "//" */ 187 curstr += 2; 188 189 /* 190 * Detect and consume username:password, consume @ 191 */ 192 tmpstr = curstr; 193 while ('\0' != *tmpstr && '@' != *tmpstr) { 194 tmpstr++; 195 } 196 197 if ('@' == *tmpstr) { 198 /* Has userinfo */ 199 /* First check if there's a password (look for : before @) */ 200 const char *colon = curstr; 201 int has_password = 0; 202 while (colon < tmpstr) { 203 if (':' == *colon) { 204 has_password = 1; 205 break; 206 } 207 colon++; 208 } 209 210 /* Read username */ 211 const char *username_start = curstr; 212 if (has_password) { 213 len = colon - curstr; 214 } else { 215 len = tmpstr - curstr; 216 } 217 if (len > 255) { 218 SET_ERROR("ERR_USERNAME_TOO_LONG"); 219 parsed_url_free(purl); 220 return NULL; 221 } 222 if (len > 0) { 223 purl->username = malloc(sizeof(char) * (len + 1)); 224 if (NULL == purl->username) { 225 parsed_url_free(purl); 226 return NULL; 227 } 228 (void)strncpy(purl->username, username_start, len); 229 purl->username[len] = '\0'; 230 } 231 232 /* Skip to password or @ */ 233 if (has_password) { 234 curstr = colon + 1; 235 /* Read password */ 236 tmpstr = curstr; 237 while ('\0' != *tmpstr && '@' != *tmpstr) { 238 tmpstr++; 239 } 240 len = tmpstr - curstr; 241 if (len > 255) { 242 SET_ERROR("ERR_PASSWORD_TOO_LONG"); 243 parsed_url_free(purl); 244 return NULL; 245 } 246 if (len > 0) { 247 purl->password = malloc(sizeof(char) * (len + 1)); 248 if (NULL == purl->password) { 249 parsed_url_free(purl); 250 return NULL; 251 } 252 (void)strncpy(purl->password, curstr, len); 253 purl->password[len] = '\0'; 254 } 255 curstr = tmpstr; 256 } else { 257 /* No password - advance past username to @ */ 258 curstr = username_start + len; 259 } 260 /* Skip @ */ 261 while ('@' == *curstr) { 262 curstr++; 263 } 264 } 265 266 /* 267 * If NOT a path scheme, detect and consume host:port 268 */ 269 if (!is_path) { 270 if ('[' == *curstr) { 271 bracket_flag = 1; 272 curstr++; 273 } else { 274 bracket_flag = 0; 275 } 276 277 /* Read host */ 278 tmpstr = curstr; 279 while ('\0' != *tmpstr) { 280 if (bracket_flag && ']' == *tmpstr) { 281 break; 282 } else if (!bracket_flag && (':' == *tmpstr || '/' == *tmpstr || '?' == *tmpstr || '#' == *tmpstr)) { 283 break; 284 } 285 tmpstr++; 286 } 287 len = tmpstr - curstr; 288 if (len > 255) { 289 SET_ERROR("ERR_HOST_TOO_LONG"); 290 parsed_url_free(purl); 291 return NULL; 292 } 293 294 if (len > 0) { 295 purl->host = malloc(sizeof(char) * (len + 1)); 296 if (NULL == purl->host) { 297 parsed_url_free(purl); 298 return NULL; 299 } 300 (void)strncpy(purl->host, curstr, len); 301 purl->host[len] = '\0'; 302 } 303 curstr = tmpstr; 304 305 /* Skip ']' if IPv6 */ 306 if (']' == *curstr) { 307 curstr++; 308 } 309 310 /* Read port */ 311 if (':' == *curstr) { 312 curstr++; 313 tmpstr = curstr; 314 while ('\0' != *tmpstr && '/' != *tmpstr && '?' != *tmpstr && '#' != *tmpstr) { 315 tmpstr++; 316 } 317 len = tmpstr - curstr; 318 if (len > 5) { 319 SET_ERROR("ERR_PORT_TOO_LONG"); 320 parsed_url_free(purl); 321 return NULL; 322 } 323 if (len > 0) { 324 char port_str[6]; 325 size_t copy_len = len < 5 ? len : 5; 326 (void)strncpy(port_str, curstr, copy_len); 327 port_str[copy_len] = '\0'; 328 long port_val = strtol(port_str, NULL, 10); 329 if (port_val < 1 || port_val > 65535) { 330 SET_ERROR("ERR_PORT_INVALID"); 331 parsed_url_free(purl); 332 return NULL; 333 } 334 purl->port = malloc(sizeof(char) * (len + 1)); 335 if (NULL == purl->port) { 336 parsed_url_free(purl); 337 return NULL; 338 } 339 (void)strncpy(purl->port, curstr, len); 340 purl->port[len] = '\0'; 341 } 342 curstr = tmpstr; 343 } 344 } 345 346 /* End of string? */ 347 if ('\0' == *curstr) { 348 return purl; 349 } 350 351 /* Parse path */ 352 tmpstr = curstr; 353 while ('\0' != *tmpstr && '?' != *tmpstr && '#' != *tmpstr) { 354 tmpstr++; 355 } 356 len = tmpstr - curstr; 357 if (len > 4096) { 358 SET_ERROR("ERR_PATH_TOO_LONG"); 359 parsed_url_free(purl); 360 return NULL; 361 } 362 if (len > 0) { 363 purl->path = malloc(sizeof(char) * (len + 1)); 364 if (NULL == purl->path) { 365 parsed_url_free(purl); 366 return NULL; 367 } 368 (void)strncpy(purl->path, curstr, len); 369 purl->path[len] = '\0'; 370 } 371 curstr = tmpstr; 372 373 /* Parse query */ 374 if ('?' == *curstr) { 375 curstr++; 376 tmpstr = curstr; 377 while ('\0' != *tmpstr && '#' != *tmpstr) { 378 tmpstr++; 379 } 380 len = tmpstr - curstr; 381 if (len > 4096) { 382 SET_ERROR("ERR_QUERY_TOO_LONG"); 383 parsed_url_free(purl); 384 return NULL; 385 } 386 if (len > 0) { 387 purl->query = malloc(sizeof(char) * (len + 1)); 388 if (NULL == purl->query) { 389 parsed_url_free(purl); 390 return NULL; 391 } 392 (void)strncpy(purl->query, curstr, len); 393 purl->query[len] = '\0'; 394 } 395 curstr = tmpstr; 396 } 397 398 /* Parse fragment */ 399 if ('#' == *curstr) { 400 curstr++; 401 tmpstr = curstr; 402 while ('\0' != *tmpstr) { 403 tmpstr++; 404 } 405 len = tmpstr - curstr; 406 if (len > 4096) { 407 SET_ERROR("ERR_FRAGMENT_TOO_LONG"); 408 parsed_url_free(purl); 409 return NULL; 410 } 411 if (len > 0) { 412 purl->fragment = malloc(sizeof(char) * (len + 1)); 413 if (NULL == purl->fragment) { 414 parsed_url_free(purl); 415 return NULL; 416 } 417 (void)strncpy(purl->fragment, curstr, len); 418 purl->fragment[len] = '\0'; 419 } 420 } 421 422 return purl; 423 } 424 425 /* 426 * Free memory of parsed url 427 */ 428 void parsed_url_free(struct parsed_url *purl) { 429 if (NULL != purl) { 430 if (NULL != purl->scheme) { 431 free(purl->scheme); 432 } 433 if (NULL != purl->host) { 434 free(purl->host); 435 } 436 if (NULL != purl->port) { 437 free(purl->port); 438 } 439 if (NULL != purl->path) { 440 free(purl->path); 441 } 442 if (NULL != purl->query) { 443 free(purl->query); 444 } 445 if (NULL != purl->fragment) { 446 free(purl->fragment); 447 } 448 if (NULL != purl->username) { 449 free(purl->username); 450 } 451 if (NULL != purl->password) { 452 free(purl->password); 453 } 454 free(purl); 455 } 456 }