2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Uniform Resource Identifiers
33 * @v uri_string URI as a string
36 * Splits a URI into its component parts. The return URI structure is
37 * dynamically allocated and must eventually be freed by calling
40 struct uri * parse_uri ( const char *uri_string ) {
45 char *authority = NULL;
48 /* Allocate space for URI struct and a copy of the string */
49 raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
50 uri = malloc ( sizeof ( *uri ) + raw_len );
53 raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
55 /* Zero URI struct and copy in the raw string */
56 memset ( uri, 0, sizeof ( *uri ) );
57 memcpy ( raw, uri_string, raw_len );
59 /* Start by chopping off the fragment, if it exists */
60 if ( ( tmp = strchr ( raw, '#' ) ) ) {
65 /* Identify absolute/relative URI */
66 if ( ( tmp = strchr ( raw, ':' ) ) ) {
67 /* Absolute URI: identify hierarchical/opaque */
71 /* Absolute URI with hierarchical part */
74 /* Absolute URI with opaque part */
82 /* If we don't have a path (i.e. we have an absolute URI with
83 * an opaque portion, we're already finished processing
88 /* Chop off the query, if it exists */
89 if ( ( tmp = strchr ( path, '?' ) ) ) {
94 /* Identify net/absolute/relative path */
95 if ( strncmp ( path, "//", 2 ) == 0 ) {
96 /* Net path. If this is terminated by the first '/'
97 * of an absolute path, then we have no space for a
98 * terminator after the authority field, so shuffle
99 * the authority down by one byte, overwriting one of
102 authority = ( path + 2 );
103 if ( ( tmp = strchr ( authority, '/' ) ) ) {
106 memmove ( ( authority - 1 ), authority,
107 ( tmp - authority ) );
112 /* Absolute/relative path */
116 /* Split authority into user[:password] and host[:port] portions */
117 if ( ( tmp = strchr ( authority, '@' ) ) ) {
118 /* Has user[:password] */
121 uri->user = authority;
122 if ( ( tmp = strchr ( authority, ':' ) ) ) {
128 /* No user:password */
129 uri->host = authority;
132 /* Split host into host[:port] */
133 if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
139 DBG ( "URI \"%s\" split into", raw );
141 DBG ( " scheme \"%s\"", uri->scheme );
143 DBG ( " opaque \"%s\"", uri->opaque );
145 DBG ( " user \"%s\"", uri->user );
147 DBG ( " password \"%s\"", uri->password );
149 DBG ( " host \"%s\"", uri->host );
151 DBG ( " port \"%s\"", uri->port );
153 DBG ( " path \"%s\"", uri->path );
155 DBG ( " query \"%s\"", uri->query );
157 DBG ( " fragment \"%s\"", uri->fragment );