url-parser.c

Git mirror of http://draft.scyphus.co.jp/lang/c/url_parser.html
git clone git://git.finwo.net/lib/url-parser.c
Log | Files | Refs

url-parser.c (7993B)


      1 /*_
      2  * Copyright 2010-2011 Scyphus Solutions Co. Ltd.  All rights reserved.
      3  *
      4  * Authors:
      5  *      Hirochika Asai
      6  */
      7 
      8 #include "url-parser.h"
      9 
     10 #include <stdio.h>
     11 #include <stdlib.h>
     12 #include <string.h>
     13 #include <ctype.h>
     14 
     15 /*
     16  * Prototype declarations
     17  */
     18 static __inline__ int _is_scheme_char(int);
     19 
     20 /*
     21  * Check whether the character is permitted in scheme string
     22  */
     23 static __inline__ int
     24 _is_scheme_char(int c)
     25 {
     26     return (!isalpha(c) && '+' != c && '-' != c && '.' != c) ? 0 : 1;
     27 }
     28 
     29 /*
     30  * See RFC 1738, 3986
     31  */
     32 struct parsed_url *
     33 parse_url(const char *url)
     34 {
     35     struct parsed_url *purl;
     36     const char *tmpstr;
     37     const char *curstr;
     38     int len;
     39     int i;
     40     int userpass_flag;
     41     int bracket_flag;
     42 
     43     /* Allocate the parsed url storage */
     44     purl = malloc(sizeof(struct parsed_url));
     45     if ( NULL == purl ) {
     46         return NULL;
     47     }
     48     purl->scheme = NULL;
     49     purl->host = NULL;
     50     purl->port = NULL;
     51     purl->path = NULL;
     52     purl->query = NULL;
     53     purl->fragment = NULL;
     54     purl->username = NULL;
     55     purl->password = NULL;
     56 
     57     curstr = url;
     58 
     59     /*
     60      * <scheme>:<scheme-specific-part>
     61      * <scheme> := [a-z\+\-\.]+
     62      *             upper case = lower case for resiliency
     63      */
     64     /* Read scheme */
     65     tmpstr = strchr(curstr, ':');
     66     if ( NULL == tmpstr ) {
     67         /* Not found the character */
     68         parsed_url_free(purl);
     69         return NULL;
     70     }
     71     /* Get the scheme length */
     72     len = tmpstr - curstr;
     73     /* Check restrictions */
     74     for ( i = 0; i < len; i++ ) {
     75         if ( !_is_scheme_char(curstr[i]) ) {
     76             /* Invalid format */
     77             parsed_url_free(purl);
     78             return NULL;
     79         }
     80     }
     81     /* Copy the scheme to the storage */
     82     purl->scheme = malloc(sizeof(char) * (len + 1));
     83     if ( NULL == purl->scheme ) {
     84         parsed_url_free(purl);
     85         return NULL;
     86     }
     87     (void)strncpy(purl->scheme, curstr, len);
     88     purl->scheme[len] = '\0';
     89     /* Make the character to lower if it is upper case. */
     90     for ( i = 0; i < len; i++ ) {
     91         purl->scheme[i] = tolower(purl->scheme[i]);
     92     }
     93     /* Skip ':' */
     94     tmpstr++;
     95     curstr = tmpstr;
     96 
     97     /*
     98      * //<user>:<password>@<host>:<port>/<url-path>
     99      * Any ":", "@" and "/" must be encoded.
    100      */
    101     /* Eat "//" */
    102     for ( i = 0; i < 2; i++ ) {
    103         if ( '/' != *curstr ) {
    104             parsed_url_free(purl);
    105             return NULL;
    106         }
    107         curstr++;
    108     }
    109 
    110     /* Check if the user (and password) are specified. */
    111     userpass_flag = 0;
    112     tmpstr = curstr;
    113     while ( '\0' != *tmpstr ) {
    114         if ( '@' == *tmpstr ) {
    115             /* Username and password are specified */
    116             userpass_flag = 1;
    117             break;
    118         } else if ( '/' == *tmpstr ) {
    119             /* End of <host>:<port> specification */
    120             userpass_flag = 0;
    121             break;
    122         }
    123         tmpstr++;
    124     }
    125 
    126     /* User and password specification */
    127     tmpstr = curstr;
    128     if ( userpass_flag ) {
    129         /* Read username */
    130         while ( '\0' != *tmpstr && ':' != *tmpstr && '@' != *tmpstr ) {
    131             tmpstr++;
    132         }
    133         len = tmpstr - curstr;
    134         purl->username = malloc(sizeof(char) * (len + 1));
    135         if ( NULL == purl->username ) {
    136             parsed_url_free(purl);
    137             return NULL;
    138         }
    139         (void)strncpy(purl->username, curstr, len);
    140         purl->username[len] = '\0';
    141         /* Proceed current pointer */
    142         curstr = tmpstr;
    143         if ( ':' == *curstr ) {
    144             /* Skip ':' */
    145             curstr++;
    146             /* Read password */
    147             tmpstr = curstr;
    148             while ( '\0' != *tmpstr && '@' != *tmpstr ) {
    149                 tmpstr++;
    150             }
    151             len = tmpstr - curstr;
    152             purl->password = malloc(sizeof(char) * (len + 1));
    153             if ( NULL == purl->password ) {
    154                 parsed_url_free(purl);
    155                 return NULL;
    156             }
    157             (void)strncpy(purl->password, curstr, len);
    158             purl->password[len] = '\0';
    159             curstr = tmpstr;
    160         }
    161         /* Skip '@' */
    162         if ( '@' != *curstr ) {
    163             parsed_url_free(purl);
    164             return NULL;
    165         }
    166         curstr++;
    167     }
    168 
    169     if ( '[' == *curstr ) {
    170         bracket_flag = 1;
    171     } else {
    172         bracket_flag = 0;
    173     }
    174     /* Proceed on by delimiters with reading host */
    175     tmpstr = curstr;
    176     while ( '\0' != *tmpstr ) {
    177         if ( bracket_flag && ']' == *tmpstr ) {
    178             /* End of IPv6 address. */
    179             tmpstr++;
    180             break;
    181         } else if ( !bracket_flag && (':' == *tmpstr || '/' == *tmpstr) ) {
    182             /* Port number is specified. */
    183             break;
    184         }
    185         tmpstr++;
    186     }
    187     len = tmpstr - curstr;
    188     purl->host = malloc(sizeof(char) * (len + 1));
    189     if ( NULL == purl->host || len <= 0 ) {
    190         parsed_url_free(purl);
    191         return NULL;
    192     }
    193     (void)strncpy(purl->host, curstr, len);
    194     purl->host[len] = '\0';
    195     curstr = tmpstr;
    196 
    197     /* Is port number specified? */
    198     if ( ':' == *curstr ) {
    199         curstr++;
    200         /* Read port number */
    201         tmpstr = curstr;
    202         while ( '\0' != *tmpstr && '/' != *tmpstr ) {
    203             tmpstr++;
    204         }
    205         len = tmpstr - curstr;
    206         purl->port = malloc(sizeof(char) * (len + 1));
    207         if ( NULL == purl->port ) {
    208             parsed_url_free(purl);
    209             return NULL;
    210         }
    211         (void)strncpy(purl->port, curstr, len);
    212         purl->port[len] = '\0';
    213         curstr = tmpstr;
    214     }
    215 
    216     /* End of the string */
    217     if ( '\0' == *curstr ) {
    218         return purl;
    219     }
    220 
    221     /* Skip '/' */
    222     if ( '/' != *curstr ) {
    223         parsed_url_free(purl);
    224         return NULL;
    225     }
    226     curstr++;
    227 
    228     /* Parse path */
    229     tmpstr = curstr;
    230     while ( '\0' != *tmpstr && '#' != *tmpstr  && '?' != *tmpstr ) {
    231         tmpstr++;
    232     }
    233     len = tmpstr - curstr;
    234     purl->path = malloc(sizeof(char) * (len + 1));
    235     if ( NULL == purl->path ) {
    236         parsed_url_free(purl);
    237         return NULL;
    238     }
    239     (void)strncpy(purl->path, curstr, len);
    240     purl->path[len] = '\0';
    241     curstr = tmpstr;
    242 
    243     /* Is query specified? */
    244     if ( '?' == *curstr ) {
    245         /* Skip '?' */
    246         curstr++;
    247         /* Read query */
    248         tmpstr = curstr;
    249         while ( '\0' != *tmpstr && '#' != *tmpstr ) {
    250             tmpstr++;
    251         }
    252         len = tmpstr - curstr;
    253         purl->query = malloc(sizeof(char) * (len + 1));
    254         if ( NULL == purl->query ) {
    255             parsed_url_free(purl);
    256             return NULL;
    257         }
    258         (void)strncpy(purl->query, curstr, len);
    259         purl->query[len] = '\0';
    260         curstr = tmpstr;
    261     }
    262 
    263     /* Is fragment specified? */
    264     if ( '#' == *curstr ) {
    265         /* Skip '#' */
    266         curstr++;
    267         /* Read fragment */
    268         tmpstr = curstr;
    269         while ( '\0' != *tmpstr ) {
    270             tmpstr++;
    271         }
    272         len = tmpstr - curstr;
    273         purl->fragment = malloc(sizeof(char) * (len + 1));
    274         if ( NULL == purl->fragment ) {
    275             parsed_url_free(purl);
    276             return NULL;
    277         }
    278         (void)strncpy(purl->fragment, curstr, len);
    279         purl->fragment[len] = '\0';
    280         curstr = tmpstr;
    281     }
    282 
    283     return purl;
    284 }
    285 
    286 /*
    287  * Free memory of parsed url
    288  */
    289 void parsed_url_free(struct parsed_url *purl) {
    290     if ( NULL != purl ) {
    291         if ( NULL != purl->scheme ) {
    292             free(purl->scheme);
    293         }
    294         if ( NULL != purl->host ) {
    295             free(purl->host);
    296         }
    297         if ( NULL != purl->port ) {
    298             free(purl->port);
    299         }
    300         if ( NULL != purl->path ) {
    301             free(purl->path);
    302         }
    303         if ( NULL != purl->query ) {
    304             free(purl->query);
    305         }
    306         if ( NULL != purl->fragment ) {
    307             free(purl->fragment);
    308         }
    309         if ( NULL != purl->username ) {
    310             free(purl->username);
    311         }
    312         if ( NULL != purl->password ) {
    313             free(purl->password);
    314         }
    315         free(purl);
    316     }
    317 }