diff --git a/CHANGES.md b/CHANGES.md index 4cc0a0ef1..0d428a323 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,7 @@ `~chunked:true` and `~body_length`. - cohttp-lwt-unix: Don't blow up when certificates are not available and no-network requests are made. (akuhlens #1027) + Makes `cohttp-lwt.S.default_ctx` lazy. +- cohttp-lwt-unix: Add http/https proxy support for client requests (MisterDA #1080) ## v6.0.0~beta2 (2024-01-05) diff --git a/cohttp-lwt-unix.opam b/cohttp-lwt-unix.opam index 26b3a42dc..fbb2a67c1 100644 --- a/cohttp-lwt-unix.opam +++ b/cohttp-lwt-unix.opam @@ -59,3 +59,11 @@ build: [ "@doc" {with-doc} ] ] +pin-depends: [ + [ "conduit.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-lwt.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-lwt-unix.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-async.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-mirage.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "ca-certs.dev" "git+https://github.com/art-w/ca-certs.git#95d1250bdce4225cb92c345ba5d8f180e237eb2b" ] +] diff --git a/cohttp-lwt-unix.opam.template b/cohttp-lwt-unix.opam.template index 75ab83a7c..c20834fbd 100644 --- a/cohttp-lwt-unix.opam.template +++ b/cohttp-lwt-unix.opam.template @@ -12,3 +12,11 @@ build: [ "@doc" {with-doc} ] ] +pin-depends: [ + [ "conduit.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-lwt.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-lwt-unix.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-async.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "conduit-mirage.dev" "git+https://github.com/mirage/ocaml-conduit.git#055956ace8748bd6c190ce71e04e12d9a8a1acb2" ] + [ "ca-certs.dev" "git+https://github.com/art-w/ca-certs.git#95d1250bdce4225cb92c345ba5d8f180e237eb2b" ] +] diff --git a/cohttp-lwt-unix/examples/client_lwt_proxy.ml b/cohttp-lwt-unix/examples/client_lwt_proxy.ml new file mode 100644 index 000000000..397f66b91 --- /dev/null +++ b/cohttp-lwt-unix/examples/client_lwt_proxy.ml @@ -0,0 +1,146 @@ +open Lwt +open Cohttp +open Cohttp_lwt_unix + +let () = + if not @@ Debug.debug_active () then ( + Fmt_tty.setup_std_outputs ~style_renderer:`Ansi_tty ~utf_8:true (); + Logs.set_level ~all:true (Some Logs.Debug); + Logs.set_reporter Debug.default_reporter) + +let proxy_uri = ref None +let uri = ref [] +let proxy_authorization = ref None +let set_proxy_uri uri = proxy_uri := Some (Uri.of_string uri) + +let set_proxy_authorization auth = + proxy_authorization := + Some (Cohttp.Auth.credential_of_string ("Basic " ^ Base64.encode_exn auth)) + +let usage_msg = + {|Usage: test_client_proxy -proxy +Examples: +$ test_client_proxy -proxy http://localhost:8080 http://example.com +$ test_client_proxy -proxy https://localhost:8080 https://example.com +Options:|} + +let anon_fun args = uri := !uri @ [ args ] + +let speclist = + [ + ("-proxy", Arg.String set_proxy_uri, " Proxy uri"); + ("-proxyauth", Arg.String set_proxy_authorization, " Proxy authorization"); + ] + +(* Boilerplate code to handle redirects *) + +let rec http_get_and_follow ~max_redirects ?headers uri = + let open Lwt.Syntax in + let* ans = Cohttp_lwt_unix.Client.get ?headers uri in + follow_redirect ~max_redirects ?headers uri ans + +and follow_redirect ~max_redirects ?headers request_uri (response, body) = + let open Lwt.Syntax in + let status = Http.Response.status response in + (* The unconsumed body would otherwise leak memory *) + let* () = + if status <> `OK then Cohttp_lwt.Body.drain_body body else Lwt.return_unit + in + match status with + | `OK -> Lwt.return (response, body) + | `Permanent_redirect | `Moved_permanently -> + handle_redirect ~permanent:true ~max_redirects ?headers request_uri + response + | `Found | `Temporary_redirect -> + handle_redirect ~permanent:false ~max_redirects ?headers request_uri + response + | `Not_found | `Gone -> failwith "Not found" + | status -> + Lwt.fail_with + (Printf.sprintf "Unhandled status: %s" + (Cohttp.Code.string_of_status status)) + +and handle_redirect ~permanent ~max_redirects ?headers request_uri response = + if max_redirects <= 0 then failwith "Too many redirects" + else + let headers' = Http.Response.headers response in + let location = Http.Header.get headers' "location" in + match location with + | None -> failwith "Redirection without Location header" + | Some url -> + let open Lwt.Syntax in + let uri = Uri.of_string url in + let* () = + if permanent then + Logs_lwt.warn (fun m -> + m "Permanent redirection from %s to %s" + (Uri.to_string request_uri) + url) + else Lwt.return_unit + in + http_get_and_follow ?headers uri ~max_redirects:(max_redirects - 1) + +(* Interesting stuff *) + +let getenv_opt k = + match Sys.getenv_opt k with + | Some v -> Some (k, Uri.of_string v) + | None -> None + +let getenv_opt_case k = + match getenv_opt (String.lowercase_ascii k) with + | None -> getenv_opt (String.uppercase_ascii k) + | v -> v + +let main ~proxy ~uri ~credential () = + let all_proxy, no_proxy, scheme_proxy = + match proxy with + | None -> + ( Option.map Uri.of_string (Sys.getenv_opt "ALL_PROXY"), + Sys.getenv_opt "NO_PROXY", + [ + getenv_opt_case "httpunix_proxy"; + getenv_opt_case "https_proxy"; + getenv_opt "http_proxy"; + ] + |> List.filter_map (function + | Some (k, v) -> Some (String.(sub k 0 (rindex k '_')), v) + | n -> n) ) + | v -> (v, None, []) + in + + let proxy_headers = + Option.map + (fun credential -> + Http.Header.init_with "Proxy-Authorization" + (Cohttp.Auth.string_of_credential credential)) + credential + in + + let module Cache = Cohttp_lwt_unix.Connection_proxy in + let cache = + Cache.create ?all_proxy ~scheme_proxy ?no_proxy ?proxy_headers () + in + Client.set_cache (Cache.call cache); + + http_get_and_follow ~max_redirects:2 (Uri.of_string uri) + >>= fun (resp, body) -> + let code = resp |> Response.status |> Code.code_of_status in + Printf.printf "Response code: %d\n" code; + Printf.printf "Headers: %s\n" (resp |> Response.headers |> Header.to_string); + body |> Cohttp_lwt.Body.to_string >|= fun body -> + Printf.printf "Body of length: %d\n" (String.length body); + print_endline ("Received body\n" ^ body) + +(* Argument parsing *) + +let () = + Arg.parse speclist anon_fun usage_msg; + if List.length !uri <> 1 then ( + prerr_endline "Expected a single resource uri."; + prerr_endline usage_msg; + exit 1); + let proxy = !proxy_uri + and uri = List.hd !uri + and credential = !proxy_authorization in + Lwt_main.run (main ~proxy ~uri ~credential ()) diff --git a/cohttp-lwt-unix/examples/dune b/cohttp-lwt-unix/examples/dune index c323e76d5..b596cd6b3 100644 --- a/cohttp-lwt-unix/examples/dune +++ b/cohttp-lwt-unix/examples/dune @@ -1,8 +1,13 @@ (executables - (names client_lwt client_lwt_timeout docker_lwt server_lwt) - (libraries cohttp-lwt-unix)) + (names client_lwt client_lwt_timeout docker_lwt server_lwt client_lwt_proxy) + (libraries cohttp-lwt-unix fmt.tty)) (alias (name runtest) (package cohttp-lwt-unix) - (deps client_lwt.exe client_lwt_timeout.exe docker_lwt.exe server_lwt.exe)) + (deps + client_lwt.exe + client_lwt_timeout.exe + docker_lwt.exe + server_lwt.exe + client_lwt_proxy.exe)) diff --git a/cohttp-lwt-unix/src/cohttp_lwt_unix.ml b/cohttp-lwt-unix/src/cohttp_lwt_unix.ml index 120202039..db8239124 100644 --- a/cohttp-lwt-unix/src/cohttp_lwt_unix.ml +++ b/cohttp-lwt-unix/src/cohttp_lwt_unix.ml @@ -38,6 +38,14 @@ module Connection_cache = let sleep_ns ns = Lwt_unix.sleep (Int64.to_float ns /. 1_000_000_000.) end) +module Connection_proxy = + Cohttp_lwt.Connection_cache.Make_proxy + (Connection) + (struct + (* : Mirage_time.S *) + let sleep_ns ns = Lwt_unix.sleep (Int64.to_float ns /. 1_000_000_000.) + end) + module Client : sig (** The [Client] module implements the full UNIX HTTP client interface, including the UNIX-specific functions defined in {!C}. *) diff --git a/cohttp-lwt-unix/src/net.ml b/cohttp-lwt-unix/src/net.ml index cd0edefab..6a49c8ed3 100644 --- a/cohttp-lwt-unix/src/net.ml +++ b/cohttp-lwt-unix/src/net.ml @@ -35,15 +35,21 @@ let default_ctx = } type endp = Conduit.endp +type client = Conduit_lwt_unix.client let resolve ~ctx uri = Resolver_lwt.resolve_uri ~uri ctx.resolver -let connect_endp ~ctx:{ ctx; _ } endp = - Conduit_lwt_unix.endp_to_client ~ctx endp >>= fun client -> +let tunnel hostname (channels : IO.ic * IO.oc) : client = + `TLS_tunnel (`Hostname hostname, (fst channels).chan, snd channels) + +let connect_client ~ctx:{ ctx; _ } client = Conduit_lwt_unix.connect ~ctx client >|= fun (flow, ic, oc) -> let ic = Input_channel.create ic in (flow, ic, oc) +let connect_endp ~ctx endp = + Conduit_lwt_unix.endp_to_client ~ctx:ctx.ctx endp >>= connect_client ~ctx + let connect_uri ~ctx uri = resolve ~ctx uri >>= connect_endp ~ctx let close c = diff --git a/cohttp-lwt-unix/src/net.mli b/cohttp-lwt-unix/src/net.mli index c3f216b7e..42c09fd7e 100644 --- a/cohttp-lwt-unix/src/net.mli +++ b/cohttp-lwt-unix/src/net.mli @@ -24,6 +24,7 @@ include with module IO = Io and type ctx := ctx and type endp = Conduit.endp + and type client = Conduit_lwt_unix.client val init : ?ctx:Conduit_lwt_unix.ctx -> ?resolver:Resolver_lwt.t -> unit -> ctx (** [init ?ctx ?resolver ()] is a network context that is the same as the diff --git a/cohttp-lwt-unix/test/test_client.ml b/cohttp-lwt-unix/test/test_client.ml index 8ab5c34bd..47d7f6e24 100644 --- a/cohttp-lwt-unix/test/test_client.ml +++ b/cohttp-lwt-unix/test/test_client.ml @@ -114,7 +114,10 @@ let test_client uri = assert_equal ~printer:Fun.id "Spring" body; (* simple request function accepting custom requests. *) - let handler ?headers ?body meth uri = Client.call ?headers ?body meth uri in + let handler ?headers ?body ?absolute_form meth uri = + ignore absolute_form; + Client.call ?headers ?body meth uri + in tests handler uri (* The Client.{call, get, put, ...} functions by default use a new @@ -143,7 +146,8 @@ let test_non_persistent uri = (* the resolved endpoint may be buffered to avoid stressing the resolver: *) Connection.Net.resolve ~ctx:(Lazy.force Connection.Net.default_ctx) uri >>= fun endp -> - let handler ?headers ?body meth uri = + let handler ?headers ?body ?absolute_form meth uri = + ignore absolute_form; Connection.connect ~persistent:false endp >>= fun connection -> Connection.call connection ?headers ?body meth uri in @@ -159,7 +163,8 @@ let test_unknown uri = Connection.connect ~persistent:false endp >>= fun c -> let connection = ref c in (* reference to open connection *) - let rec handler ?headers ?body meth uri = + let rec handler ?headers ?body ?absolute_form meth uri = + ignore absolute_form; Lwt.catch (fun () -> Connection.call !connection ?headers ?body meth uri) (function diff --git a/cohttp-lwt.opam b/cohttp-lwt.opam index 46d388995..65370af89 100644 --- a/cohttp-lwt.opam +++ b/cohttp-lwt.opam @@ -26,7 +26,7 @@ doc: "https://mirage.github.io/ocaml-cohttp/" bug-reports: "https://github.com/mirage/ocaml-cohttp/issues" depends: [ "dune" {>= "3.0"} - "ocaml" {>= "4.08"} + "ocaml" {>= "4.13"} "http" {= version} "cohttp" {= version} "lwt" {>= "5.4.0"} diff --git a/cohttp-lwt/src/connection.ml b/cohttp-lwt/src/connection.ml index 879ed0810..0c454a724 100644 --- a/cohttp-lwt/src/connection.ml +++ b/cohttp-lwt/src/connection.ml @@ -33,6 +33,7 @@ module Make (Net : S.Net) : S.Connection with module Net = Net = struct (* enable warning when https://github.com/mirage/ocaml-conduit/pull/319 is released *) type req_resr = { + absolute_form : bool; uri : Uri.t; meth : Cohttp.Code.meth; headers : Header.t; @@ -164,12 +165,15 @@ module Make (Net : S.Net) : S.Connection with module Net = Net = struct queue_fail connection connection.in_flight e; Lwt.return_unit) - let call connection ?headers ?(body = `Empty) meth uri = + let call connection ?headers ?(body = `Empty) ?(absolute_form = false) meth + uri = let headers = match headers with Some h -> h | None -> Header.init () in match connection.state with | Connecting _ | Full _ -> let res, res_r = Lwt.wait () in - Queue.push { uri; meth; headers; body; res_r } connection.waiting; + Queue.push + { absolute_form; uri; meth; headers; body; res_r } + connection.waiting; Lwt_condition.broadcast connection.condition (); res | Closing _ | Half _ | Closed | Failed _ -> raise Retry @@ -193,7 +197,7 @@ module Make (Net : S.Net) : S.Connection with module Net = Net = struct *) Lwt.return_unit | Full (ic, oc) | Closing (ic, oc) -> - let ({ uri; meth; headers; body; res_r } as work) = + let ({ absolute_form; uri; meth; headers; body; res_r } as work) = Queue.take connection.waiting in @@ -222,7 +226,7 @@ module Make (Net : S.Net) : S.Connection with module Net = Net = struct else headers in - let req = Request.make ~encoding ~meth ~headers uri in + let req = Request.make ~encoding ~meth ~headers ~absolute_form uri in Queue.push work connection.in_flight; @@ -293,6 +297,34 @@ module Make (Net : S.Net) : S.Connection with module Net = Net = struct on_failure; connection + let create_tunnel ?(finalise = fun _ -> Lwt.return_unit) + ?(ctx = Lazy.force Net.default_ctx) proxy remote_host = + match proxy.state with + | Full (ic, oc) -> + let client = Net.tunnel remote_host (ic, oc) in + let channels = + Net.connect_client ~ctx client >>= fun (_, ic, oc) -> return (ic, oc) + in + let connection = + { + finalise; + in_flight = Queue.create (); + waiting = Queue.create (); + state = Connecting channels; + condition = Lwt_condition.create (); + persistent = `True; + } + in + let on_failure e = connection.state <- Failed e in + Lwt.on_any channels + (fun channels -> + connection.state <- Full channels; + Lwt.dont_wait (fun () -> reader connection) on_failure; + Lwt.dont_wait (fun () -> writer connection) on_failure) + on_failure; + connection + | _ -> failwith "Proxy connection wasn't in right state." + let connect ?finalise ?persistent ?ctx uri = let connection = create ?finalise ?persistent ?ctx uri in match connection.state with diff --git a/cohttp-lwt/src/connection_cache.ml b/cohttp-lwt/src/connection_cache.ml index 0dd46432d..695c52033 100644 --- a/cohttp-lwt/src/connection_cache.ml +++ b/cohttp-lwt/src/connection_cache.ml @@ -1,15 +1,6 @@ exception Retry = Connection.Retry -(** This functor establishes a new connection for each request. *) -module Make_no_cache (Connection : S.Connection) : sig - include S.Connection_cache - - val create : ?ctx:Connection.Net.ctx -> unit -> t - (** [create ?ctx ()] creates a connection for handling a single request. The - connection accepts only a single request and will automatically be closed - as soon as possible. - @param ctx See {!Connection.Net.ctx} *) -end = struct +module Make_no_cache (Connection : S.Connection) = struct module Net = Connection.Net module IO = Net.IO open IO @@ -18,12 +9,15 @@ end = struct let call = Fun.id - let create ?(ctx = Lazy.force Net.default_ctx) () ?headers ?body meth uri = + let create ?(ctx = Lazy.force Net.default_ctx) () ?headers ?body + ?absolute_form meth uri = Net.resolve ~ctx uri (* TODO: Support chunked encoding without ~persistent:true ? *) >>= Connection.connect ~ctx ~persistent:true >>= fun connection -> - let res = Connection.call connection ?headers ?body meth uri in + let res = + Connection.call connection ?headers ?body ?absolute_form meth uri + in (* this can be simplified when https://github.com/mirage/ocaml-conduit/pull/319 is released. *) Lwt.dont_wait (fun () -> @@ -38,38 +32,7 @@ end = struct res end -(** This functor keeps a cache of connections for reuse. Connections are reused - based on their remote {!type:Conduit.endp} (effectively IP / port). *) -module Make (Connection : S.Connection) (Sleep : S.Sleep) : sig - include S.Connection_cache - - val create : - ?ctx:Connection.Net.ctx -> - ?keep:int64 -> - ?retry:int -> - ?parallel:int -> - ?depth:int -> - unit -> - t - (** Create a new connection cache - - @param ctx Conduit context to use. See {!type:Connection.Net.ctx}. - @param keep Number of nanoseconds to keep an idle connection around. - @param retry - Number of times a {e gracefully} failed request is automatically - retried. {e graceful} means failed with {!exception:Connection.Retry}. - Requests with a [`Stream] {!module:Body} cannot be retried - automatically. Such requests will fail with - {!exception:Connection.Retry} and a new {!module:Body} will need to be - provided to retry. - @param parallel - maximum number of connections to establish to a single endpoint. Beware: - A single hostname may resolve to multiple endpoints. In such a case - connections may be created in excess to what was intended. - @param depth - maximum number of requests to queue and / or send on a single - connection. *) -end = struct +module Make (Connection : S.Connection) (Sleep : S.Sleep) = struct module Net = Connection.Net module IO = Net.IO open IO @@ -83,10 +46,11 @@ end = struct retry : int; parallel : int; depth : int; + proxy : Uri.t option; } let create ?(ctx = Lazy.force Net.default_ctx) ?(keep = 60_000_000_000L) - ?(retry = 2) ?(parallel = 4) ?(depth = 100) () = + ?(retry = 2) ?(parallel = 4) ?(depth = 100) ?proxy () = { cache = Hashtbl.create ~random:true 10; ctx; @@ -94,6 +58,7 @@ end = struct retry; parallel; depth; + proxy; } let rec get_connection self endp = @@ -109,7 +74,8 @@ end = struct Lwt.return_unit in let create () = - let connection = Connection.create ~finalise ~ctx:self.ctx endp + let connection = + Connection.create ~persistent:true ~finalise ~ctx:self.ctx endp and timeout = ref Lwt.return_unit in let rec busy () = Lwt.cancel !timeout; @@ -152,12 +118,31 @@ end = struct (fun _ -> get_connection self endp) (fun _ -> get_connection self endp)) - let call self ?headers ?body meth uri = - Net.resolve ~ctx:self.ctx uri >>= fun endp -> + let prepare self ?headers ?absolute_form meth uri = + match self.proxy with + | None -> + let absolute_form = Option.value ~default:false absolute_form in + Net.resolve ~ctx:self.ctx uri >>= fun endp -> + Lwt.return (endp, absolute_form, headers) + | Some proxy_uri -> + let absolute_form = + Option.value + ~default: + (not + (meth = `CONNECT + || (meth = `OPTIONS && Uri.path_and_query uri = "*"))) + absolute_form + in + Net.resolve ~ctx:self.ctx proxy_uri >>= fun endp -> + Lwt.return (endp, absolute_form, headers) + + let call self ?headers ?body ?absolute_form meth uri = + prepare self ?headers ?absolute_form meth uri + >>= fun (endp, absolute_form, headers) -> let rec request retry = get_connection self endp >>= fun conn -> Lwt.catch - (fun () -> Connection.call conn ?headers ?body meth uri) + (fun () -> Connection.call conn ?headers ?body ~absolute_form meth uri) (function | Retry -> ( match body with @@ -168,3 +153,310 @@ end = struct in request self.retry end + +module Make_tunnel (Connection : S.Connection) (Sleep : S.Sleep) : sig + include S.Connection_cache + + val create : + ?ctx:Connection.Net.ctx -> + ?keep:int64 -> + ?retry:int -> + ?parallel:int -> + ?depth:int -> + ?proxy_headers:Http.Header.t -> + Uri.t -> + unit -> + t +end = struct + module Net = Connection.Net + module IO = Net.IO + open IO + + type ctx = Net.ctx + type tunnel = { proxy : Connection.t; remote : Connection.t } + + type t = { + cache : (string, tunnel) Hashtbl.t; (* remote host * tunnel *) + proxy_uri : Uri.t; + mutable proxy_endp : Net.endp option; + proxy_headers : Http.Header.t; + ctx : ctx; + keep : int64; + retry : int; + parallel : int; + depth : int; + } + + let proxy_default_scheme uri = + match Uri.scheme uri with + | None -> Uri.with_scheme uri (Some "http") + | _ -> uri + + let create ?(ctx = Lazy.force Net.default_ctx) ?(keep = 60_000_000_000L) + ?(retry = 2) ?(parallel = 4) ?(depth = 100) + ?(proxy_headers = Http.Header.init ()) proxy_uri () = + if Uri.host proxy_uri = None then + Printf.ksprintf invalid_arg "No host was provided in URI %s." + (Uri.to_string proxy_uri); + { + cache = Hashtbl.create ~random:true 10; + proxy_uri = proxy_default_scheme proxy_uri; + proxy_endp = None; + proxy_headers; + ctx; + keep; + retry; + parallel; + depth; + } + + let rec request conn ?headers ?body ?absolute_form meth uri retry = + Lwt.catch + (fun () -> Connection.call conn ?headers ?body ?absolute_form meth uri) + (function + | Retry -> ( + match body with + | Some (`Stream _) -> Lwt.fail Retry + | None | Some `Empty | Some (`String _) | Some (`Strings _) -> + if retry <= 0 then Lwt.fail Retry + else + request conn ?headers ?body ?absolute_form meth uri (retry - 1) + ) + | e -> Lwt.fail e) + + let rec get_connection self ~proxy_endp ~remote ~remote_host ~remote_uri = + let finalise connection = + let rec remove keep = + let current = Hashtbl.find self.cache remote in + Hashtbl.remove self.cache remote; + if current.proxy == connection || current.remote == connection then + List.iter (Hashtbl.add self.cache remote) keep + else remove (current :: keep) + in + remove []; + Lwt.return_unit + in + let create () = + let proxy = + Connection.create ~persistent:true ~finalise ~ctx:self.ctx proxy_endp + in + request proxy ~headers:self.proxy_headers `CONNECT remote_uri self.retry + >>= fun (resp, _body) -> + let code = resp |> Http.Response.status |> Cohttp.Code.code_of_status in + if not (Cohttp.Code.is_success code) then + Printf.ksprintf failwith "Could not setup tunnel. Response code: %d\n" + code; + let remote = + Connection.create_tunnel ~finalise ~ctx:self.ctx proxy remote_host + and timeout = ref Lwt.return_unit in + let rec busy () = + Lwt.cancel !timeout; + if Connection.length remote = 0 then ( + timeout := + Sleep.sleep_ns self.keep >>= fun () -> + Connection.close remote; + Connection.close proxy; + (* failure is ignored *) + Lwt.return_unit); + Lwt.on_termination (Connection.notify remote) busy + in + busy (); + Lwt.return { proxy; remote } + in + match Hashtbl.find_all self.cache remote with + | [] -> + create () >>= fun tunnel -> + Hashtbl.add self.cache remote tunnel; + Lwt.return tunnel + | tunnels -> ( + let rec search length = function + | [ a ] -> (a, length + 1) + | a :: b :: tl + when Connection.length a.remote < Connection.length b.remote -> + search (length + 1) (a :: tl) + | _ :: tl -> search (length + 1) tl + | [] -> assert false + in + match search 0 tunnels with + | shallowest, _ when Connection.length shallowest.remote = 0 -> + Lwt.return shallowest + | _, length when length < self.parallel -> + create () >>= fun tunnel -> + Hashtbl.add self.cache remote tunnel; + Lwt.return tunnel + | shallowest, _ when Connection.length shallowest.remote < self.depth -> + Lwt.return shallowest + | _ -> + Lwt.try_bind + (fun () -> + Lwt.choose + (List.map + (fun { remote; _ } -> Connection.notify remote) + tunnels)) + (fun _ -> + get_connection self ~proxy_endp ~remote ~remote_host ~remote_uri) + (fun _ -> + get_connection self ~proxy_endp ~remote ~remote_host ~remote_uri) + ) + + let call self ?headers ?body ?absolute_form meth uri = + (match self.proxy_endp with + | None -> + Net.resolve ~ctx:self.ctx self.proxy_uri >>= fun proxy_endp -> + self.proxy_endp <- Some proxy_endp; + Lwt.return proxy_endp + | Some proxy_endp -> Lwt.return proxy_endp) + >>= fun proxy_endp -> + let remote_port = + match Uri_services.tcp_port_of_uri uri with + | Some p -> p + | None -> failwith "A port is required for the CONNECT method." + in + let remote_host = Option.get (Uri.host uri) in + let remote = remote_host ^ ":" ^ string_of_int remote_port + and remote_uri = Uri.with_port uri (Some remote_port) in + get_connection self ~proxy_endp ~remote ~remote_host ~remote_uri + >>= fun tunnel -> + request tunnel.remote ?headers ?body ?absolute_form meth uri self.retry +end + +type no_proxy_pattern = Name of string | Ipaddr_prefix of Ipaddr.Prefix.t +type no_proxy = Wildcard | Patterns of no_proxy_pattern list + +let trim_dots ~first_leading s = + let len = String.length s in + let i = ref 0 in + if first_leading && !i < len && String.unsafe_get s !i = '.' then incr i; + let j = ref (len - 1) in + while !j >= !i && String.unsafe_get s !j = '.' do + decr j + done; + if !j >= !i then String.sub s !i (!j - !i + 1) else "" + +let strncasecompare a b n = + let a = String.(sub a 0 (min (length a) n) |> lowercase_ascii) + and b = String.(sub b 0 (min (length b) n) |> lowercase_ascii) in + String.compare a b = 0 + +let no_proxy_from_env no_proxy = + if no_proxy = "*" then Wildcard + else + let patterns = + no_proxy + |> String.split_on_char ',' + |> List.filter_map (fun pattern -> + if pattern = "" then None else Some (String.trim pattern)) + |> List.map (fun pattern -> + match Ipaddr.of_string pattern with + | Ok addr -> Ipaddr_prefix (Ipaddr.Prefix.of_addr addr) + | Error _ -> ( + match Ipaddr.Prefix.of_string pattern with + | Ok prefix -> Ipaddr_prefix prefix + | Error _ -> Name (trim_dots ~first_leading:true pattern))) + in + Patterns patterns + +let check_no_proxy_patterns host = function + | Wildcard -> true + | _ when String.length host = 0 -> true + | Patterns patterns -> ( + match Ipaddr.of_string host with + | Ok hostip -> + List.exists + (function + | Name _ -> false + | Ipaddr_prefix network -> Ipaddr.Prefix.mem hostip network) + patterns + | Error _ -> + let name = trim_dots ~first_leading:false host in + List.exists + (function + | Ipaddr_prefix _ -> false + | Name pattern -> + let patternlen = String.length pattern + and namelen = String.length name in + if patternlen = namelen then + strncasecompare pattern name namelen + else if patternlen < namelen then + name.[namelen - patternlen - 1] = '.' + && strncasecompare pattern + (String.sub name (namelen - patternlen) + (patternlen - namelen - patternlen)) + patternlen + else false) + patterns) + +let tunnel_schemes = [ "https" ] + +module Make_proxy (Connection : S.Connection) (Sleep : S.Sleep) = struct + module Connection_cache = Make (Connection) (Sleep) + module Connection_tunnel = Make_tunnel (Connection) (Sleep) + + type proxy = Direct of Connection_cache.t | Tunnel of Connection_tunnel.t + + type t = { + proxies : (string * proxy) list; + direct : proxy option; + tunnel : proxy option; + no_proxy : Connection_cache.t; + no_proxy_patterns : no_proxy; + } + + let create ?ctx ?keep ?retry ?parallel ?depth ?(scheme_proxy = []) ?all_proxy + ?no_proxy ?proxy_headers () = + let create_default () = + Connection_cache.create ?ctx ?keep ?retry ?parallel ?depth () + and create_direct proxy = + Connection_cache.create ?ctx ?keep ?retry ?parallel ?depth ~proxy () + and create_tunnel proxy_uri = + Connection_tunnel.create ?ctx ?keep ?retry ?parallel ?depth ?proxy_headers + proxy_uri () + in + let no_proxy_patterns = + match no_proxy with + | None -> Patterns [] + | Some no_proxy -> no_proxy_from_env no_proxy + in + let no_proxy = create_default () in + let proxies = + List.map + (fun (scheme, uri) -> + let proxy = + if List.mem scheme tunnel_schemes then Tunnel (create_tunnel uri) + else Direct (create_direct uri) + in + (scheme, proxy)) + scheme_proxy + in + let direct, tunnel = + match all_proxy with + | Some uri -> + (Some (Direct (create_direct uri)), Some (Tunnel (create_tunnel uri))) + | None -> (None, None) + in + { no_proxy; direct; tunnel; proxies; no_proxy_patterns } + + let call self ?headers ?body ?absolute_form meth uri = + let proxy = + if + check_no_proxy_patterns + (Uri.host_with_default ~default:"" uri) + self.no_proxy_patterns + then None + (* Connection_cache.call self.no_proxy ?headers ?body ?absolute_form meth uri *) + else + let scheme = Option.value ~default:"" (Uri.scheme uri) in + match List.assoc scheme self.proxies with + | proxy -> Some proxy + | exception Not_found -> + if List.mem scheme tunnel_schemes then self.tunnel else self.direct + in + match proxy with + | None -> + Connection_cache.call self.no_proxy ?headers ?body ?absolute_form meth + uri + | Some (Tunnel proxy) -> + Connection_tunnel.call proxy ?headers ?body ?absolute_form meth uri + | Some (Direct proxy) -> + Connection_cache.call proxy ?headers ?body ?absolute_form meth uri +end diff --git a/cohttp-lwt/src/connection_cache.mli b/cohttp-lwt/src/connection_cache.mli new file mode 100644 index 000000000..1fc271d27 --- /dev/null +++ b/cohttp-lwt/src/connection_cache.mli @@ -0,0 +1,94 @@ +(** This functor establishes a new connection for each request. *) +module Make_no_cache (Connection : S.Connection) : sig + include S.Connection_cache + + val create : ?ctx:Connection.Net.ctx -> unit -> t + (** [create ?ctx ()] creates a connection for handling a single request. The + connection accepts only a single request and will automatically be closed + as soon as possible. + @param ctx See {!Connection.Net.ctx} *) +end + +(** This functor keeps a cache of connections for reuse. Connections are reused + based on their remote {!type:Conduit.endp} (effectively IP / port). *) +module Make (Connection : S.Connection) (Sleep : S.Sleep) : sig + include S.Connection_cache + + val create : + ?ctx:Connection.Net.ctx -> + ?keep:int64 -> + ?retry:int -> + ?parallel:int -> + ?depth:int -> + ?proxy:Uri.t -> + unit -> + t + (** Create a new connection cache + + @param ctx Conduit context to use. See {!type:Connection.Net.ctx}. + @param keep Number of nanoseconds to keep an idle connection around. + @param retry + Number of times a {e gracefully} failed request is automatically + retried. {e graceful} means failed with {!exception:Connection.Retry}. + Requests with a [`Stream] {!module:Body} cannot be retried + automatically. Such requests will fail with + {!exception:Connection.Retry} and a new {!module:Body} will need to be + provided to retry. + @param parallel + maximum number of connections to establish to a single endpoint. Beware: + A single hostname may resolve to multiple endpoints. In such a case + connections may be created in excess to what was intended. + @param depth + maximum number of requests to queue and / or send on a single + connection. + @param proxy A direct (non-tunneling) proxy to use. *) +end + +(** This functor keeps a cache of connections for reuse. Connections are reused + based on their remote {!type:Conduit.endp} (effectively IP / port). It also + supports automatically connecting and reconnecting to direct and tunneling + proxies, based on the remote URI scheme (HTTP will select direct proxies, + HTTPS tunneling proxies). *) +module Make_proxy (Connection : S.Connection) (Sleep : S.Sleep) : sig + include S.Connection_cache + + val create : + ?ctx:Connection.Net.ctx -> + ?keep:int64 -> + ?retry:int -> + ?parallel:int -> + ?depth:int -> + ?scheme_proxy:(string * Uri.t) list -> + ?all_proxy:Uri.t -> + ?no_proxy:string -> + ?proxy_headers:Http.Header.t -> + unit -> + t + (** Create a new connection cache. The outer connections to the proxy and the + inner connections share the same parameters. + + @param ctx Conduit context to use. See {!type:Connection.Net.ctx}. + @param keep Number of nanoseconds to keep an idle connection around. + @param retry + Number of times a {e gracefully} failed request is automatically + retried. {e graceful} means failed with {!exception:Connection.Retry}. + Requests with a [`Stream] {!module:Body} cannot be retried + automatically. Such requests will fail with + {!exception:Connection.Retry} and a new {!module:Body} will need to be + provided to retry. + @param parallel + maximum number of connections to establish to a single endpoint. Beware: + A single hostname may resolve to multiple endpoints. In such a case + connections may be created in excess to what was intended. + @param depth + maximum number of requests to queue and / or send on a single + connection. + @param scheme_proxy The proxy URI associated to each (remote) scheme. + @param all_proxy + The default proxy to use. Proxy for specific schemes have precedence + over this. + @param no_proxy + Disable proxies for specific hosts, specified as curl's [NO_PROXY]. + @see + @param proxy_headers Headers to pass to the proxy. *) +end diff --git a/cohttp-lwt/src/dune b/cohttp-lwt/src/dune index 12c5df290..d2d062103 100644 --- a/cohttp-lwt/src/dune +++ b/cohttp-lwt/src/dune @@ -4,4 +4,4 @@ (synopsis "Lwt backend") (preprocess (pps ppx_sexp_conv)) - (libraries lwt uri http_bytebuffer cohttp logs logs.lwt)) + (libraries lwt uri uri.services http_bytebuffer cohttp logs logs.lwt ipaddr)) diff --git a/cohttp-lwt/src/s.ml b/cohttp-lwt/src/s.ml index 18d8f7cab..c9af878cd 100644 --- a/cohttp-lwt/src/s.ml +++ b/cohttp-lwt/src/s.ml @@ -20,6 +20,7 @@ end module type Net = sig module IO : IO + type client type endp type ctx [@@deriving sexp_of] @@ -40,6 +41,8 @@ module type Net = sig (** [resolve ~ctx uri] resolves [uri] into an endpoint description. This is [Resolver_lwt.resolve_uri ~uri ctx.resolver]. *) + val tunnel : string -> IO.ic * IO.oc -> client + val connect_uri : ctx:ctx -> Uri.t -> (IO.conn * IO.ic * IO.oc) IO.t (** [connect_uri ~ctx uri] starts a {i flow} on the given [uri]. The choice of the protocol (with or without encryption) is done by the {i scheme} of the @@ -62,6 +65,7 @@ module type Net = sig (** [connect_endp ~ctx endp] starts a {i flow} to the given [endp]. [endp] describes address and protocol of the endpoint to connect to. *) + val connect_client : ctx:ctx -> client -> (IO.conn * IO.ic * IO.oc) IO.t val close_in : IO.ic -> unit val close_out : IO.oc -> unit val close : IO.ic -> IO.oc -> unit @@ -76,6 +80,7 @@ end type call = ?headers:Http.Header.t -> ?body:Body.t -> + ?absolute_form:bool -> Http.Method.t -> Uri.t -> (Cohttp.Response.t * Body.t) Lwt.t @@ -138,6 +143,9 @@ module type Connection = sig @param ctx See [Net.ctx] @param endp The remote address, port and protocol to connect to. *) + val create_tunnel : + ?finalise:(t -> unit Net.IO.t) -> ?ctx:Net.ctx -> t -> string -> t + val connect : ?finalise:(t -> unit Net.IO.t) -> ?persistent:bool -> diff --git a/cohttp-mirage/src/net.ml b/cohttp-mirage/src/net.ml index 72964fccb..00d2636f3 100644 --- a/cohttp-mirage/src/net.ml +++ b/cohttp-mirage/src/net.ml @@ -21,7 +21,10 @@ struct lazy { resolver = R.localhost; conduit = None; authenticator = None } type endp = Conduit.endp + type client + let tunnel = failwith "Unimplemented" + let connect_client = failwith "Unimplemented" let resolve ~ctx uri = R.resolve_uri ~uri ctx.resolver let connect_endp ~ctx endp = diff --git a/cohttp/src/dune b/cohttp/src/dune index bdc14c034..34636e5a1 100644 --- a/cohttp/src/dune +++ b/cohttp/src/dune @@ -17,7 +17,8 @@ sexplib0 stringext uri - uri-sexp)) + uri-sexp + uri.services)) (ocamllex accept_lexer) diff --git a/cohttp/src/request.ml b/cohttp/src/request.ml index 05ea64541..78746771b 100644 --- a/cohttp/src/request.ml +++ b/cohttp/src/request.ml @@ -42,16 +42,26 @@ let version t = t.version let encoding t = Header.get_transfer_encoding t.headers let make ?(meth = `GET) ?(version = `HTTP_1_1) ?encoding - ?(headers = Header.init ()) uri = - let headers = - Header.add_unless_exists headers "host" - (match Uri.scheme uri with - | Some "httpunix" -> "" - | _ -> ( - Uri.host_with_default ~default:"localhost" uri - ^ - match Uri.port uri with Some p -> ":" ^ string_of_int p | None -> "")) + ?(headers = Header.init ()) ?(absolute_form = false) uri = + let port () = + match Uri.port uri with + | Some p -> ":" ^ string_of_int p + | None when meth = `CONNECT -> ( + match Uri_services.tcp_port_of_uri uri with + | None -> failwith "A port is required for the CONNECT method." + | Some p -> ":" ^ string_of_int p) + | None -> "" + in + let host = + match Header.get headers "host" with + | None -> ( + match Uri.scheme uri with + | Some "httpunix" -> "" + | _ -> Uri.host_with_default ~default:"localhost" uri ^ port ()) + | Some host -> if String.contains host ':' then host else host ^ port () in + + let headers = Header.replace headers "host" host in let headers = Header.add_unless_exists headers "user-agent" Header.user_agent in @@ -66,7 +76,9 @@ let make ?(meth = `GET) ?(version = `HTTP_1_1) ?encoding Header.add_authorization headers auth | _, _, _ -> headers in - let resource = Uri.path_and_query uri in + let resource = + if absolute_form then Uri.to_string uri else Uri.path_and_query uri + in let headers = match encoding with | None -> headers @@ -80,7 +92,7 @@ let is_keep_alive t = Http.Request.is_keep_alive t adding content headers if appropriate. @param chunked Forces chunked encoding *) -let make_for_client ?headers ?chunked ?body_length meth uri = +let make_for_client ?headers ?chunked ?body_length ?absolute_form meth uri = let encoding = match (chunked, body_length) with | Some true, None -> Transfer.Chunked @@ -89,7 +101,7 @@ let make_for_client ?headers ?chunked ?body_length meth uri = | Some true, Some _ -> invalid_arg "cannot set both ?chunked and ?body_length:" in - make ~meth ~encoding ?headers uri + make ~meth ~encoding ?headers ?absolute_form uri let pp_hum ppf r = Format.fprintf ppf "%s" (r |> sexp_of_t |> Sexplib0.Sexp.to_string_hum) @@ -181,7 +193,9 @@ module Make (IO : S.IO) = struct let fst_line = Printf.sprintf "%s %s %s\r\n" (Http.Method.to_string req.meth) - (if req.resource = "" then "/" else req.resource) + (if req.meth = `CONNECT then Option.get (Header.get req.headers "host") + else if req.resource = "" then "/" + else req.resource) (Http.Version.to_string req.version) in IO.write oc fst_line >>= fun _ -> Header_IO.write req.headers oc diff --git a/cohttp/src/s.ml b/cohttp/src/s.ml index 915433ff9..8daf31664 100644 --- a/cohttp/src/s.ml +++ b/cohttp/src/s.ml @@ -102,6 +102,7 @@ module type Request = sig ?version:Code.version -> ?encoding:Transfer.encoding -> ?headers:Header.t -> + ?absolute_form:bool -> Uri.t -> t (** [make ()] is a value of {!type:t}. The default values for the request, if @@ -119,6 +120,7 @@ module type Request = sig ?headers:Header.t -> ?chunked:bool -> ?body_length:int64 -> + ?absolute_form:bool -> Code.meth -> Uri.t -> t diff --git a/dune-project b/dune-project index c3657fe7c..97d44921d 100644 --- a/dune-project +++ b/dune-project @@ -70,7 +70,7 @@ "This is a portable implementation of HTTP that uses the Lwt concurrency library\nto multiplex IO. It implements as much of the logic in an OS-independent way\nas possible, so that more specialised modules can be tailored for different\ntargets. For example, you can install `cohttp-lwt-unix` or `cohttp-lwt-jsoo`\nfor a Unix or JavaScript backend, or `cohttp-mirage` for the MirageOS unikernel\nversion of the library. All of these implementations share the same IO logic\nfrom this module.") (depends (ocaml - (>= 4.08)) + (>= 4.13)) (http (= :version)) (cohttp diff --git a/http/test/test_parser.ml b/http/test/test_parser.ml index d2c33f7dc..19707583e 100644 --- a/http/test/test_parser.ml +++ b/http/test/test_parser.ml @@ -162,6 +162,26 @@ let parse_result_notifies_start_of_body () = [%test_result: string] ~expect:"foobar" (String.sub buf ~pos:count ~len:(String.length buf - count)) +let parse_proxy_get () = + let buf = + "GET http://example.com/foo.html HTTP/1.1\r\n\ + Host: example.com\r\n\ + Proxy-Authorization: Basic dXNlcjpwYXNz\r\n\ + \r\n\ + foobar" + in + let expected_req = + make_req + ~headers: + (Http.Header.of_list + [ + ("Host", "example.com"); + ("Proxy-Authorization", "Basic dXNlcjpwYXNz"); + ]) + `GET "http://example.com/foo.html" + in + assert_req_success ~here:[ [%here] ] ~expected_req ~expected_consumed:104 buf + open Base_quickcheck let parse_chunk_length () = @@ -250,6 +270,7 @@ let () = test_case "validate http version" `Quick validate_http_version; test_case "parse result notified offset of start of optional body" `Quick parse_result_notifies_start_of_body; + test_case "parse a proxy GET request" `Quick parse_proxy_get; ] ); ( "chunked encoding", [