url-parser.c (7993B)
1 /*_ 2 * Copyright 2010-2011 Scyphus Solutions Co. Ltd. All rights reserved. 3 * 4 * Authors: 5 * Hirochika Asai 6 */ 7 8 #include "url-parser.h" 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <ctype.h> 14 15 /* 16 * Prototype declarations 17 */ 18 static __inline__ int _is_scheme_char(int); 19 20 /* 21 * Check whether the character is permitted in scheme string 22 */ 23 static __inline__ int 24 _is_scheme_char(int c) 25 { 26 return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1; 27 } 28 29 /* 30 * See RFC 1738, 3986 31 */ 32 struct parsed_url * 33 parse_url(const char *url) 34 { 35 struct parsed_url *purl; 36 const char *tmpstr; 37 const char *curstr; 38 int len; 39 int i; 40 int userpass_flag; 41 int bracket_flag; 42 43 /* Allocate the parsed url storage */ 44 purl = malloc(sizeof(struct parsed_url)); 45 if ( NULL == purl ) { 46 return NULL; 47 } 48 purl->scheme = NULL; 49 purl->host = NULL; 50 purl->port = NULL; 51 purl->path = NULL; 52 purl->query = NULL; 53 purl->fragment = NULL; 54 purl->username = NULL; 55 purl->password = NULL; 56 57 curstr = url; 58 59 /* 60 * <scheme>:<scheme-specific-part> 61 * <scheme> := [a-z\+\-\.]+ 62 * upper case = lower case for resiliency 63 */ 64 /* Read scheme */ 65 tmpstr = strchr(curstr, ':'); 66 if ( NULL == tmpstr ) { 67 /* Not found the character */ 68 parsed_url_free(purl); 69 return NULL; 70 } 71 /* Get the scheme length */ 72 len = tmpstr - curstr; 73 /* Check restrictions */ 74 for ( i = 0; i < len; i++ ) { 75 if ( !_is_scheme_char(curstr[i]) ) { 76 /* Invalid format */ 77 parsed_url_free(purl); 78 return NULL; 79 } 80 } 81 /* Copy the scheme to the storage */ 82 purl->scheme = malloc(sizeof(char) * (len + 1)); 83 if ( NULL == purl->scheme ) { 84 parsed_url_free(purl); 85 return NULL; 86 } 87 (void)strncpy(purl->scheme, curstr, len); 88 purl->scheme[len] = '\0'; 89 /* Make the character to lower if it is upper case. */ 90 for ( i = 0; i < len; i++ ) { 91 purl->scheme[i] = tolower(purl->scheme[i]); 92 } 93 /* Skip ':' */ 94 tmpstr++; 95 curstr = tmpstr; 96 97 /* 98 * //<user>:<password>@<host>:<port>/<url-path> 99 * Any ":", "@" and "/" must be encoded. 100 */ 101 /* Eat "//" */ 102 for ( i = 0; i < 2; i++ ) { 103 if ( '/' != *curstr ) { 104 parsed_url_free(purl); 105 return NULL; 106 } 107 curstr++; 108 } 109 110 /* Check if the user (and password) are specified. */ 111 userpass_flag = 0; 112 tmpstr = curstr; 113 while ( '\0' != *tmpstr ) { 114 if ( '@' == *tmpstr ) { 115 /* Username and password are specified */ 116 userpass_flag = 1; 117 break; 118 } else if ( '/' == *tmpstr ) { 119 /* End of <host>:<port> specification */ 120 userpass_flag = 0; 121 break; 122 } 123 tmpstr++; 124 } 125 126 /* User and password specification */ 127 tmpstr = curstr; 128 if ( userpass_flag ) { 129 /* Read username */ 130 while ( '\0' != *tmpstr && ':' != *tmpstr && '@' != *tmpstr ) { 131 tmpstr++; 132 } 133 len = tmpstr - curstr; 134 purl->username = malloc(sizeof(char) * (len + 1)); 135 if ( NULL == purl->username ) { 136 parsed_url_free(purl); 137 return NULL; 138 } 139 (void)strncpy(purl->username, curstr, len); 140 purl->username[len] = '\0'; 141 /* Proceed current pointer */ 142 curstr = tmpstr; 143 if ( ':' == *curstr ) { 144 /* Skip ':' */ 145 curstr++; 146 /* Read password */ 147 tmpstr = curstr; 148 while ( '\0' != *tmpstr && '@' != *tmpstr ) { 149 tmpstr++; 150 } 151 len = tmpstr - curstr; 152 purl->password = malloc(sizeof(char) * (len + 1)); 153 if ( NULL == purl->password ) { 154 parsed_url_free(purl); 155 return NULL; 156 } 157 (void)strncpy(purl->password, curstr, len); 158 purl->password[len] = '\0'; 159 curstr = tmpstr; 160 } 161 /* Skip '@' */ 162 if ( '@' != *curstr ) { 163 parsed_url_free(purl); 164 return NULL; 165 } 166 curstr++; 167 } 168 169 if ( '[' == *curstr ) { 170 bracket_flag = 1; 171 } else { 172 bracket_flag = 0; 173 } 174 /* Proceed on by delimiters with reading host */ 175 tmpstr = curstr; 176 while ( '\0' != *tmpstr ) { 177 if ( bracket_flag && ']' == *tmpstr ) { 178 /* End of IPv6 address. */ 179 tmpstr++; 180 break; 181 } else if ( !bracket_flag && (':' == *tmpstr || '/' == *tmpstr) ) { 182 /* Port number is specified. */ 183 break; 184 } 185 tmpstr++; 186 } 187 len = tmpstr - curstr; 188 purl->host = malloc(sizeof(char) * (len + 1)); 189 if ( NULL == purl->host || len <= 0 ) { 190 parsed_url_free(purl); 191 return NULL; 192 } 193 (void)strncpy(purl->host, curstr, len); 194 purl->host[len] = '\0'; 195 curstr = tmpstr; 196 197 /* Is port number specified? */ 198 if ( ':' == *curstr ) { 199 curstr++; 200 /* Read port number */ 201 tmpstr = curstr; 202 while ( '\0' != *tmpstr && '/' != *tmpstr ) { 203 tmpstr++; 204 } 205 len = tmpstr - curstr; 206 purl->port = malloc(sizeof(char) * (len + 1)); 207 if ( NULL == purl->port ) { 208 parsed_url_free(purl); 209 return NULL; 210 } 211 (void)strncpy(purl->port, curstr, len); 212 purl->port[len] = '\0'; 213 curstr = tmpstr; 214 } 215 216 /* End of the string */ 217 if ( '\0' == *curstr ) { 218 return purl; 219 } 220 221 /* Skip '/' */ 222 if ( '/' != *curstr ) { 223 parsed_url_free(purl); 224 return NULL; 225 } 226 curstr++; 227 228 /* Parse path */ 229 tmpstr = curstr; 230 while ( '\0' != *tmpstr && '#' != *tmpstr && '?' != *tmpstr ) { 231 tmpstr++; 232 } 233 len = tmpstr - curstr; 234 purl->path = malloc(sizeof(char) * (len + 1)); 235 if ( NULL == purl->path ) { 236 parsed_url_free(purl); 237 return NULL; 238 } 239 (void)strncpy(purl->path, curstr, len); 240 purl->path[len] = '\0'; 241 curstr = tmpstr; 242 243 /* Is query specified? */ 244 if ( '?' == *curstr ) { 245 /* Skip '?' */ 246 curstr++; 247 /* Read query */ 248 tmpstr = curstr; 249 while ( '\0' != *tmpstr && '#' != *tmpstr ) { 250 tmpstr++; 251 } 252 len = tmpstr - curstr; 253 purl->query = malloc(sizeof(char) * (len + 1)); 254 if ( NULL == purl->query ) { 255 parsed_url_free(purl); 256 return NULL; 257 } 258 (void)strncpy(purl->query, curstr, len); 259 purl->query[len] = '\0'; 260 curstr = tmpstr; 261 } 262 263 /* Is fragment specified? */ 264 if ( '#' == *curstr ) { 265 /* Skip '#' */ 266 curstr++; 267 /* Read fragment */ 268 tmpstr = curstr; 269 while ( '\0' != *tmpstr ) { 270 tmpstr++; 271 } 272 len = tmpstr - curstr; 273 purl->fragment = malloc(sizeof(char) * (len + 1)); 274 if ( NULL == purl->fragment ) { 275 parsed_url_free(purl); 276 return NULL; 277 } 278 (void)strncpy(purl->fragment, curstr, len); 279 purl->fragment[len] = '\0'; 280 curstr = tmpstr; 281 } 282 283 return purl; 284 } 285 286 /* 287 * Free memory of parsed url 288 */ 289 void parsed_url_free(struct parsed_url *purl) { 290 if ( NULL != purl ) { 291 if ( NULL != purl->scheme ) { 292 free(purl->scheme); 293 } 294 if ( NULL != purl->host ) { 295 free(purl->host); 296 } 297 if ( NULL != purl->port ) { 298 free(purl->port); 299 } 300 if ( NULL != purl->path ) { 301 free(purl->path); 302 } 303 if ( NULL != purl->query ) { 304 free(purl->query); 305 } 306 if ( NULL != purl->fragment ) { 307 free(purl->fragment); 308 } 309 if ( NULL != purl->username ) { 310 free(purl->username); 311 } 312 if ( NULL != purl->password ) { 313 free(purl->password); 314 } 315 free(purl); 316 } 317 }