From: Prashanth Mundkur Date: Thu, 23 Jul 2009 22:03:54 +0000 (-0700) Subject: [http] add uri parsing support X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=186366103a0bac2794edd30e0b7c1e5f92a4c1bd;p=xenclient%2Ftoolstack.git [http] add uri parsing support --- diff --git a/libs/http/Makefile b/libs/http/Makefile index 78543cf..cf3fd09 100644 --- a/libs/http/Makefile +++ b/libs/http/Makefile @@ -1,7 +1,7 @@ TOPLEVEL=../.. include $(TOPLEVEL)/common.make -OBJS = http +OBJS = httputils uri http INTF = $(foreach obj, $(OBJS),$(obj).cmi) LIBS = http.cma http.cmxa diff --git a/libs/http/OMakefile b/libs/http/OMakefile index ee93055..b63ad52 100644 --- a/libs/http/OMakefile +++ b/libs/http/OMakefile @@ -1,4 +1,6 @@ HTTP_FILES[] = + httputils + uri http LIB = http diff --git a/libs/http/http.ml b/libs/http/http.ml index ea514ee..dc31fbb 100644 --- a/libs/http/http.ml +++ b/libs/http/http.ml @@ -20,8 +20,6 @@ let dbg fmt = let logger s = if !verbose then Printf.printf "%s" s in Printf.ksprintf logger fmt -(* TODO: Put these functions in some libs *) - let optval = function | Some v -> v | None -> assert false let is_space c = c = ' ' || c = '\t' || c = '\r' || c = '\n' @@ -32,20 +30,6 @@ let rev_string_of_chars cl = ignore (List.fold_left (fun idx c -> s.[idx] <- c; idx - 1) (len - 1) cl); s -let is_digit c = - match c with '0' .. '9' -> true | _ -> false -let is_hex c = - match c with '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true | _ -> false - -let digit_value c = - (Char.code c) - (Char.code '0') -let hex_value c = - match c with - | '0' .. '9' -> digit_value c - | 'a' .. 'f' -> 10 + (Char.code c) - (Char.code 'a') - | 'A' .. 'F' -> 10 + (Char.code c) - (Char.code 'A') - | _ -> 0 (* should never happen if guarded with is_hex *) - let is_ctl_char c = (* Note that this considers '\t', '\n' and '\r' as control chars. *) let code = Char.code c in @@ -269,6 +253,14 @@ module Request_header = struct | Options -> "OPTIONS" | Extension m -> m + type url = + | Star + | Uri of Uri.t + + let string_of_url = function + | Star -> "*" + | Uri u -> Uri.to_string (Uri.normalize u) + type cursor = | Start | In_method of char list @@ -303,7 +295,7 @@ module Request_header = struct { mutable cursor: cursor; mutable s_meth: meth option; - mutable s_uri: string option; + mutable s_url: url option; mutable s_version: version option; mutable s_headers: header_fields; mutable num_bytes_parsed: int @@ -314,6 +306,7 @@ module Request_header = struct | Incomplete_request_line | Parse_error of cursor * char | Internal_error of string + | Invalid_request_uri of Uri.error let string_of_error = function | Unsupported_version s -> @@ -325,6 +318,8 @@ module Request_header = struct (string_of_cursor cursor) c | Internal_error s -> Printf.sprintf "Internal request-parsing error: %s" s + | Invalid_request_uri e -> + Printf.sprintf "Invalid request uri: %s" (Uri.string_of_error e) exception Http_error of error @@ -334,7 +329,7 @@ module Request_header = struct { cursor = Start; s_meth = None; - s_uri = None; + s_url = None; s_version = Some HTTP09; s_headers = []; num_bytes_parsed = 0 @@ -383,7 +378,13 @@ module Request_header = struct ) | In_uri cl -> if is_space c then begin - s.s_uri <- Some (rev_string_of_chars cl); + let u = rev_string_of_chars cl in + let url = (match u with + | "*" -> Star + | _ -> (try Uri (Uri.of_string u) + with Uri.Uri_error e -> raise_error (Invalid_request_uri e)) + ) in + s.s_url <- Some url; (match c with | ' ' | '\t' -> s.cursor <- Uri_SP | '\r' -> s.cursor <- Uri_CR @@ -439,7 +440,7 @@ module Request_header = struct { version: version; meth: meth; - uri: string; + url: url; headers: header_fields; } @@ -451,7 +452,7 @@ module Request_header = struct match state.cursor with | Done -> Some { version = optval state.s_version; meth = optval state.s_meth; - uri = optval state.s_uri; + url = optval state.s_url; headers = state.s_headers } | _ -> None @@ -474,7 +475,7 @@ module Request_header = struct let serialize buf req = Buffer.add_string buf (string_of_meth req.meth); Buffer.add_string buf " "; - Buffer.add_string buf req.uri; + Buffer.add_string buf (string_of_url req.url); if req.version <> HTTP09 then begin Buffer.add_string buf " "; Buffer.add_string buf (string_of_version req.version); @@ -541,57 +542,57 @@ module Response_header = struct let status_info = function (* Informational 1xx *) - | Status_continue -> 100, "Continue" + | Status_continue -> 100, "Continue" | Status_switching_protocols -> 101, "Switching Protocols" (* Successful 2xx *) - | Status_ok -> 200, "Ok" - | Status_created -> 201, "Created" - | Status_accepted -> 202, "Accepted" - | Status_non_authoritative -> 203, "Non-Authoritative Information" - | Status_no_content -> 204, "No Content" - | Status_reset_content -> 205, "Reset Content" - | Status_partial_content -> 206, "Partial Content" + | Status_ok -> 200, "Ok" + | Status_created -> 201, "Created" + | Status_accepted -> 202, "Accepted" + | Status_non_authoritative -> 203, "Non-Authoritative Information" + | Status_no_content -> 204, "No Content" + | Status_reset_content -> 205, "Reset Content" + | Status_partial_content -> 206, "Partial Content" (* Redirection 3xx *) - | Status_multiple_choices -> 300, "Multiple Choices" - | Status_moved_permanently -> 301, "Moved Permanently" - | Status_found -> 302, "Found" - | Status_see_other -> 303, "See Other" - | Status_not_modified -> 304, "Not Modified" - | Status_use_proxy -> 305, "Use Proxy" - | Status_temporary_redirect -> 307, "Temporary Redirect" + | Status_multiple_choices -> 300, "Multiple Choices" + | Status_moved_permanently -> 301, "Moved Permanently" + | Status_found -> 302, "Found" + | Status_see_other -> 303, "See Other" + | Status_not_modified -> 304, "Not Modified" + | Status_use_proxy -> 305, "Use Proxy" + | Status_temporary_redirect -> 307, "Temporary Redirect" (* Client Error 4xx *) - | Status_bad_request -> 400, "Bad Request" - | Status_unauthorized -> 401, "Unauthorized" - | Status_payment_required -> 402, "Payment Required" - | Status_forbidden -> 403, "Forbidden" - | Status_not_found -> 404, "Not Found" - | Status_method_not_allowed -> 405, "Method Not Allowed" - | Status_not_acceptable -> 406, "Not Acceptable" - | Status_proxy_authentication_required -> 407, "Proxy Authentication Required" - | Status_request_timeout -> 408, "Request Timeout" - | Status_conflict -> 409, "Conflict" - | Status_gone -> 410, "Gone" - | Status_length_required -> 411, "Length Required" - | Status_precondition_failed -> 412, "Precondition Failed" - | Status_request_entity_too_large -> 413, "Request Entity Too Large" - | Status_request_uri_too_large -> 414, "Request-URI Too Large" - | Status_unsupported_media_type -> 415, "Unsupported Media Type" + | Status_bad_request -> 400, "Bad Request" + | Status_unauthorized -> 401, "Unauthorized" + | Status_payment_required -> 402, "Payment Required" + | Status_forbidden -> 403, "Forbidden" + | Status_not_found -> 404, "Not Found" + | Status_method_not_allowed -> 405, "Method Not Allowed" + | Status_not_acceptable -> 406, "Not Acceptable" + | Status_proxy_authentication_required -> 407, "Proxy Authentication Required" + | Status_request_timeout -> 408, "Request Timeout" + | Status_conflict -> 409, "Conflict" + | Status_gone -> 410, "Gone" + | Status_length_required -> 411, "Length Required" + | Status_precondition_failed -> 412, "Precondition Failed" + | Status_request_entity_too_large -> 413, "Request Entity Too Large" + | Status_request_uri_too_large -> 414, "Request-URI Too Large" + | Status_unsupported_media_type -> 415, "Unsupported Media Type" | Status_requested_range_not_satisfiable -> 416, "Range Not Satisfiable" - | Status_expectation_failed -> 417, "Expectation Failed" + | Status_expectation_failed -> 417, "Expectation Failed" (* Server Error 5xx *) - | Status_internal_server_error -> 500, "Internal Server Error" - | Status_not_implemented -> 501, "Not Implemented" - | Status_bad_gateway -> 502, "Bad Gateway" - | Status_service_unavailable -> 503, "Service Unavailable" - | Status_gateway_timeout -> 504, "Gateway Timeout" - | Status_version_not_supported -> 505, "HTTP Version Not Supported" + | Status_internal_server_error -> 500, "Internal Server Error" + | Status_not_implemented -> 501, "Not Implemented" + | Status_bad_gateway -> 502, "Bad Gateway" + | Status_service_unavailable -> 503, "Service Unavailable" + | Status_gateway_timeout -> 504, "Gateway Timeout" + | Status_version_not_supported -> 505, "HTTP Version Not Supported" (* Other *) - | Status_other (code, reason) -> code, reason + | Status_other (code, reason) -> code, reason type cursor = | Start @@ -605,19 +606,24 @@ module Response_header = struct | Done let string_of_cursor = function - | Start -> "Start" + | Start -> + "Start" | In_version cl -> Printf.sprintf "In-version \"%s\"" (rev_string_of_chars cl) - | Version_SP -> "Version-SP" + | Version_SP -> + "Version-SP" | In_status_code (sc, nd) -> Printf.sprintf "In-status-code %d (after %d digits)" sc nd - | Status_code_SP -> "Status-code-SP" + | Status_code_SP -> + "Status-code-SP" | In_reason_phrase cl -> Printf.sprintf "In-reason-phrase \"%s\"" (rev_string_of_chars cl) - | Resp_line_CR -> "Resp-line-CR" + | Resp_line_CR -> + "Resp-line-CR" | In_headers hs -> Printf.sprintf "In-headers (%s)" (Headers.string_of_cursor hs.Headers.cursor) - | Done -> "Done" + | Done -> + "Done" type state = { @@ -697,7 +703,7 @@ module Response_header = struct | Version_SP -> (match c with | ' ' | '\t' -> () - | _ when is_digit c -> s.cursor <- In_status_code ((digit_value c), 1) + | _ when Httputils.is_digit c -> s.cursor <- In_status_code ((Httputils.digit_value c), 1) | _ -> raise_bad_char () ) | In_status_code (sc, nd) -> @@ -711,8 +717,8 @@ module Response_header = struct | '\n' -> s.s_status_code <- Some sc; s.cursor <- In_headers (Headers.init_state ()) - | _ when is_digit c -> - let nsc = 10 * sc + (digit_value c) in + | _ when Httputils.is_digit c -> + let nsc = 10 * sc + (Httputils.digit_value c) in if nd >= 3 then raise_error (Unsupported_status_code nsc) else s.cursor <- In_status_code (nsc, nd + 1) | _ -> @@ -818,15 +824,15 @@ module Payload = struct let string_of_cursor = function | Start_chunk_length -> "Start-chunk-length" - | In_chunk_length -> "In-chunk-length" - | Chunk_length_CR -> "Chunk-length-CR" + | In_chunk_length -> "In-chunk-length" + | Chunk_length_CR -> "Chunk-length-CR" | In_chunk_extension -> "In-chunk-extension" - | In_chunk -> "In-chunk" - | Chunk_CR -> "Chunk-CR" - | In_body -> "In-body" + | In_chunk -> "In-chunk" + | Chunk_CR -> "Chunk-CR" + | In_body -> "In-body" | In_trailer hs -> Printf.sprintf "In-trailer (%s)" (Headers.string_of_cursor hs.Headers.cursor) - | Done -> "Done" + | Done -> "Done" type content_length = | Chunked @@ -1010,8 +1016,8 @@ module Payload = struct end end | Start_chunk_length -> - if is_hex c then begin - s.remaining_length <- Int64.of_int (hex_value c); + if Httputils.is_hex c then begin + s.remaining_length <- Int64.of_int (Httputils.hex_value c); s.cursor <- In_chunk_length end else raise_bad_char () | In_chunk_length -> @@ -1024,10 +1030,10 @@ module Payload = struct else s.cursor <- In_chunk | ' ' | '\t' | ';' -> s.cursor <- In_chunk_extension - | _ when is_hex c -> + | _ when Httputils.is_hex c -> (* TODO: check for overflow!! *) s.remaining_length <- (Int64.add (Int64.shift_left s.remaining_length 4) - (Int64.of_int (hex_value c))) + (Int64.of_int (Httputils.hex_value c))) | _ -> raise_bad_char () ) | In_chunk_extension -> diff --git a/libs/http/http.mli b/libs/http/http.mli index 2c28269..6abf038 100644 --- a/libs/http/http.mli +++ b/libs/http/http.mli @@ -44,6 +44,11 @@ module Request_header : sig | Extension of string val string_of_meth : meth -> string + type url = + | Star + | Uri of Uri.t + val string_of_url : url -> string + type state val init_state : unit -> state val num_bytes_parsed : state -> int @@ -52,7 +57,7 @@ module Request_header : sig { version : version; meth : meth; - uri : string; + url : url; headers : header_fields; } type parse_result = diff --git a/libs/http/httputils.ml b/libs/http/httputils.ml new file mode 100644 index 0000000..52ce6b0 --- /dev/null +++ b/libs/http/httputils.ml @@ -0,0 +1,28 @@ +(* + * Copyright (C) 2009 Citrix Ltd. + * Author Prashanth Mundkur + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let is_digit c = + match c with '0' .. '9' -> true | _ -> false +let is_hex c = + match c with '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true | _ -> false + +let digit_value c = + (Char.code c) - (Char.code '0') +let hex_value c = + match c with + | '0' .. '9' -> digit_value c + | 'a' .. 'f' -> 10 + (Char.code c) - (Char.code 'a') + | 'A' .. 'F' -> 10 + (Char.code c) - (Char.code 'A') + | _ -> 0 (* should never happen if guarded with is_hex *) diff --git a/libs/http/httputils.mli b/libs/http/httputils.mli new file mode 100644 index 0000000..143f82a --- /dev/null +++ b/libs/http/httputils.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) 2009 Citrix Ltd. + * Author Prashanth Mundkur + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val is_digit : char -> bool +val is_hex : char -> bool +val digit_value : char -> int +val hex_value : char -> int diff --git a/libs/http/uri.ml b/libs/http/uri.ml new file mode 100644 index 0000000..7092899 --- /dev/null +++ b/libs/http/uri.ml @@ -0,0 +1,290 @@ +(* + * Copyright (C) 2009 Citrix Ltd. + * Author Prashanth Mundkur + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(* This implements URI parsing and handling as specified in RFC 3986. *) + +type authority = { + userinfo : string option; + host : string; + port : int option; +} + +type scheme = string + +type t = { + scheme : scheme option; + authority : authority option; + path : string option; + query : string option; + fragment : string option; +} + +type error = + | Invalid_uri of string + | Invalid_authority of string + | Invalid_port of string + | Missing_required_components of string + +let string_of_error = function + | Invalid_uri s -> + Printf.sprintf "\"%s\" is an invalid URI" s + | Invalid_authority a -> + Printf.sprintf "\"%s\" is an invalid authority" a + | Invalid_port p -> + Printf.sprintf "\"%s\" is an invalid port" p + | Missing_required_components s -> + Printf.sprintf "\"%s\" is an incomplete URI" s + +exception Uri_error of error + +let raise_error e = + raise (Uri_error e) + +let explode s = + let rec unfold indx acc = + if indx < 0 then acc + else unfold (indx - 1) (s.[indx] :: acc) + in unfold ((String.length s) - 1) [] + +let implode cl = + let len = List.length cl in + let s = String.create len in + ignore (List.fold_left (fun idx c -> s.[idx] <- c; idx + 1) 0 cl); + s + +let lowercase s = + implode (List.map Char.lowercase (explode s)) + +let maybe f x = match x with None -> None | Some x -> Some (f x) + +let is_sub_delim = function + | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' -> true + | _ -> false +let is_gen_delim = function + | ':' | '/' | '?' | '#' | '[' | ']' | '@' -> true + | _ -> false +let is_reserved c = is_gen_delim c || is_sub_delim c +let is_unreserved = function + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' -> true + | '-' | '.' | '_' | '~' -> true + | _ -> false +let is_pchar c = + is_unreserved c || is_sub_delim c || c = ':' || c = '@' + +let pct_decode s = + let cl = explode s in + let rec decode cl acc = + match cl with + | '%':: h :: l :: rest when Httputils.is_hex h && Httputils.is_hex l -> + let hn, ln = Httputils.hex_value h, Httputils.hex_value l in + let chr = Char.chr ((hn lsl 4) + ln) in + (* TODO: the is_unreserved check is the most + conservative, but may be overly so. We + need to pass in the context in which the + decoding is being done, represented as the + character classes of allowed decoded + characters. + *) + if is_unreserved chr then decode rest (chr :: acc) + else decode rest (Char.uppercase l :: Char.uppercase h :: '%' :: acc) + | c :: rest -> + decode rest (c :: acc) + | [] -> + implode (List.rev acc) + in decode cl [] + +(* This is based on the "remove_dot_segments" algorithm specified in + Section 5.2.4 of RFC 3986. Note that this algorithm passes empty + path segments (e.g. the one between a and b in "a//b") through to + the output. +*) +let remove_dot_segments path = + let slash = Str.regexp "/" in + let has_leading_slash p = + String.length p > 0 && p.[0] = '/' in + let rec remove in_segs out_segs = + match in_segs, out_segs with + | "." :: in_rest, _ -> + remove in_rest out_segs + | ".." :: in_rest, _ :: out_rest -> + remove in_rest out_rest + | ".." :: in_rest, [] -> + remove in_rest [] + | s :: in_rest, out_rest -> + remove in_rest (s :: out_rest) + | [], _ -> + List.rev out_segs in + let out_segs = remove (Str.split slash path) [] in + let out = String.concat "/" out_segs in + if has_leading_slash path then "/" ^ out else out + +(* Regular expression for authority = [userinfo "@"] host [":" port] *) +let auth_re = "\\(\\([^@]*\\)@\\)?\\([^:]*\\)\\(:\\([0-9]*\\)\\)?" + +let parse_authority s = + let get_opt_group n = + try Some (Str.matched_group n s) + with Not_found -> None in + let get_opt_port () = + match get_opt_group 5 with + | None -> None + | Some s -> + if String.length s = 0 then None + else (try Some (int_of_string s) + with Failure _ -> raise_error (Invalid_port s) + ) + in + let auth_regexp = Str.regexp auth_re in + if Str.string_match auth_regexp s 0 then + Some { userinfo = get_opt_group 2; + host = Str.matched_group 3 s; + port = get_opt_port (); + } + else None + +(* The following regular expression and explanation is taken from + RFC3986, Appendix B: + + The following line is the regular expression for breaking-down a + well-formed URI reference into its components. + + ^(([^:/?#]+):)?(//([^/?#]* ))?([^?#]* )(\?([^#]* ))?(#(.* ))? + 12 2 1 3 4 43 5 56 7 76 8 9 98 + + For example, matching the above expression to + + http://www.ics.uci.edu/pub/ietf/uri/#Related + + results in the following subexpression matches: + + $1 = http: + $2 = http + $3 = //www.ics.uci.edu + $4 = www.ics.uci.edu + $5 = /pub/ietf/uri/ + $6 = + $7 = + $8 = #Related + $9 = Related + + where indicates that the component is not present, as + is the case for the query component in the above example. + Therefore, we can determine the value of the five components as + + scheme = $2 + authority = $4 + path = $5 + query = $7 + fragment = $9 +*) + +let scheme_re = "\\([^:/\\?#]+\\)" (* 2-2 *) +let authority_re = "\\([^/\\?#]*\\)" (* 4-4 *) +let path_re = "\\([^\\?#]*\\)" (* 5-5 *) +let query_re = "\\([^#]*\\)" (* 7-7 *) +let frag_re = "\\(.*\\)" (* 9-9 *) + +let uri_re = (Printf.sprintf "^\\(%s:\\)?\\(//%s\\)?%s\\(\\?%s\\)?\\(#%s\\)?$" + scheme_re authority_re path_re query_re frag_re) + +(* Get the five components of the uri using the regular expression + above. Note that an empty string matches the regular expression, is + not a valid URI. + + Also, the Str library uses static state, so we need to be careful + with interleaving calls to it with two or more regular expressions + at the same time (e.g. authority and uri) below. +*) +let parse_uri s = + let uri_regexp = Str.regexp uri_re in + let get_authority a = + match parse_authority a with + | None when String.length a > 0 + -> raise_error (Invalid_authority a) + | auth -> auth + in + if Str.string_match uri_regexp s 0 then + let get_opt_group n = + try Some (Str.matched_group n s) + with Not_found -> None in + let scheme = get_opt_group 2 in + let auth = get_opt_group 4 in + let path = get_opt_group 5 in + let query = get_opt_group 7 in + let fragment = get_opt_group 9 in + (* Parse authority regexp _after_ finishing with the uri regexp. *) + let authority = (match auth with + | None -> None + | Some a -> get_authority a + ) in + if scheme = None && path = None then + raise_error (Missing_required_components s); + { + scheme = scheme; + authority = authority; + path = path; + query = query; + fragment = fragment; + } + else raise_error (Invalid_uri s) + +let of_string s = parse_uri s + +(* See Section 5.3 of RFC 3986. *) +let authority_to_string a = + let mod_host = match a.userinfo with None -> a.host | Some u -> u ^ "@" ^ a.host + in match a.port with None -> mod_host | Some p -> mod_host ^ ":" ^ string_of_int p + +let to_string u = + let scheme = + match u.scheme with None -> "" | Some s -> s ^ ":" in + let append_authority acc = + match u.authority with None -> acc | Some a -> acc ^ "//" ^ (authority_to_string a) in + let append_path acc = + match u.path with None -> acc | Some p -> acc ^ p in + let append_query acc = + match u.query with None -> acc | Some q -> acc ^ "?" ^ q in + let append_fragment acc = + match u.fragment with None -> acc | Some f -> acc ^ "#" ^ f + in append_fragment (append_query (append_path (append_authority scheme))) + +(* See Section 6 of RFC 3986. This function implements case + normalization, percent-encoding normalization, and path segment + normalization, as described in Sections 6.2.2.1 through 6.2.2.3. +*) +let normalize_auth a = + { a with + userinfo = maybe pct_decode a.userinfo; + host = lowercase a.host; + } + +let normalize u = + let scheme = maybe lowercase u.scheme in + let authority = maybe normalize_auth u.authority in + (* Note that pct_decode needs to be done _before_ + remove_dot_segments, so that we can process any encoded "." + characters (which are in the unreserved set). Otherwise, + the embedded "." characters will escape their processing by + remove_dot_segments. + *) + let path = maybe (fun s -> remove_dot_segments (pct_decode s)) u.path in + let query = maybe pct_decode u.query in + let fragment = maybe pct_decode u.fragment in + { scheme = scheme; + authority = authority; + path = path; + query = query; + fragment = fragment; + } diff --git a/libs/http/uri.mli b/libs/http/uri.mli new file mode 100644 index 0000000..985e1ca --- /dev/null +++ b/libs/http/uri.mli @@ -0,0 +1,45 @@ +(* + * Copyright (C) 2009 Citrix Ltd. + * Author Prashanth Mundkur + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type authority = +{ + userinfo : string option; + host : string; + port : int option; +} + +type scheme = string + +type t = +{ + scheme : scheme option; + authority : authority option; + path : string option; + query : string option; + fragment : string option; +} + +type error = + | Invalid_uri of string + | Invalid_authority of string + | Invalid_port of string + | Missing_required_components of string +exception Uri_error of error +val string_of_error : error -> string + +val of_string : string -> t +val to_string : t -> string + +val normalize : t -> t