Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Lwt exception handling #250

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion obuilder
7 changes: 3 additions & 4 deletions ocurrent-plugin/connection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,8 @@ let submit ~job ~pool ~action ~cache_hint ?src ?secrets ~urgent t ~priority ~swi
| Lwt.Canceled as ex ->
if !stage = `Rate_limit then Prometheus.Gauge.dec_one Metrics.queue_rate_limit
else Log.warn (fun f -> f "Cancelled at unexpected point!");
Lwt.fail ex
| ex ->
Lwt.fail ex
Lwt.reraise ex
| ex -> Lwt.reraise ex
)
in
limiter_thread := Some use_thread;
Expand All @@ -139,7 +138,7 @@ let submit ~job ~pool ~action ~cache_hint ?src ?secrets ~urgent t ~priority ~swi
Lwt.pause () >>= fun () ->
if Capability.problem sched = None then (
(* The job failed but we're still connected to the scheduler. Report the error. *)
Lwt.fail_with (Fmt.str "%a" Capnp_rpc.Exception.pp err)
Fmt.failwith "%a" Capnp_rpc.Exception.pp err
) else (
limiter_thread := None;
begin match !stage with
Expand Down
4 changes: 2 additions & 2 deletions test/test_plugin.ml
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ let cancel_rate_limit () =
(fun _ -> failwith "Should have failed!")
(function
| Lwt.Canceled -> Lwt.return_unit
| ex -> Lwt.fail ex)
| ex -> Lwt.reraise ex)
>>= fun () ->
(* Finish connecting to the scheduler. *)
let sched =
Expand All @@ -221,7 +221,7 @@ let cancel_rate_limit () =
(fun _ -> failwith "Should have failed!")
(function
| Lwt.Canceled -> Lwt.return_unit
| ex -> Lwt.fail ex)
| ex -> Lwt.reraise ex)
>>= fun () ->
Lwt.return_unit

Expand Down
4 changes: 2 additions & 2 deletions worker/cluster_worker.ml
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ let run ?switch ?build ?(allow_push=[]) ?(healthcheck_period = 600.0) ?prune_thr
| _, Some switch when not (Lwt_switch.is_on switch) -> Lwt.return `Cancelled
| Some problem, _ ->
Log.info (fun f -> f "Worker loop failed (probably because queue connection failed): %a" Fmt.exn ex);
Lwt.fail (Failure (Fmt.to_to_string Capnp_rpc.Exception.pp problem)) (* Will retry *)
Fmt.failwith "%a" Capnp_rpc.Exception.pp problem (* Will retry *)
| None, _ ->
Lwt.return (`Crash ex)
)
Expand All @@ -566,6 +566,6 @@ let run ?switch ?build ?(allow_push=[]) ?(healthcheck_period = 600.0) ?prune_thr
in
reconnect () >>= function
| `Cancelled -> Lwt.return_unit
| `Crash ex -> Lwt.fail ex
| `Crash ex -> Lwt.reraise ex

module Obuilder_config = Obuilder_build.Config
8 changes: 4 additions & 4 deletions worker/context.ml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ let win32_unlink fn =
Lwt.catch
(fun () -> Lwt_unix.unlink fn)
(function
| Unix.Unix_error (Unix.EACCES, _, _) as exn ->
| Unix.Unix_error (Unix.EACCES, _, _) as ex ->
(* Try removing the read-only attribute before retrying unlink. We catch
any exception here and ignore it in favour of the original [exn]. *)
Lwt.catch
Expand All @@ -110,10 +110,10 @@ let win32_unlink fn =
(* If everything succeeded but the final removal still failed,
restore original permissions *)
Lwt_unix.chmod fn st_perm >>= fun () ->
Lwt.fail exn)
Lwt.reraise ex)
)
(fun _ -> Lwt.fail exn)
| exn -> Lwt.fail exn)
(fun _ -> Lwt.reraise ex)
| ex -> Lwt.reraise ex)

let unlink =
if Sys.win32 then
Expand Down