From 3f2172e7fc93311aaa928755027719a6c02a132f Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Tue, 13 Feb 2024 20:32:04 -0800 Subject: [PATCH] refactor: top down source tree construction Signed-off-by: Rudi Grinberg --- src/dune_rules/source_tree.ml | 509 ++++++++++++++-------------------- 1 file changed, 201 insertions(+), 308 deletions(-) diff --git a/src/dune_rules/source_tree.ml b/src/dune_rules/source_tree.ml index bdeea685453..428f29aa690 100644 --- a/src/dune_rules/source_tree.ml +++ b/src/dune_rules/source_tree.ml @@ -6,59 +6,29 @@ module Dirs_visited : sig type t val singleton : Path.Source.t -> Readdir.File.t -> t - - module Per_fn : sig - (** Stores the directories visited per node (basename) *) - - type dirs_visited := t - type t - - val init : t - val find : t -> Path.Source.t -> dirs_visited - val add : t -> dirs_visited -> path:Path.Source.t -> Filename.t * Readdir.File.t -> t - end + val empty : t + val add : t -> Path.Source.t -> Readdir.File.t -> t end = struct type t = Path.Source.t Readdir.File.Map.t + let empty = Readdir.File.Map.empty let singleton path file = Readdir.File.Map.singleton file path - module Per_fn = struct - type nonrec t = t Filename.Map.t - - let init = Filename.Map.empty - - let find t path = - Path.Source.basename path - |> Filename.Map.find t - |> Option.value ~default:Readdir.File.Map.empty - ;; - - let add (acc : t) dirs_visited ~path (fn, file) = - if Sys.win32 - then acc - else ( - let new_dirs_visited = - Readdir.File.Map.update dirs_visited file ~f:(function - | None -> Some path - | Some first_path -> - User_error.raise - [ Pp.textf - "Path %s has already been scanned. Cannot scan it again through \ - symlink %s" - (Path.Source.to_string_maybe_quoted first_path) - (Path.Source.to_string_maybe_quoted path) - ]) - in - Filename.Map.add_exn acc fn new_dirs_visited) - ;; - end -end - -module Output = struct - type 'a t = - { dir : 'a - ; visited : Dirs_visited.Per_fn.t - } + let add (t : t) (path : Path.Source.t) file = + if Sys.win32 + then t + else + Readdir.File.Map.update t file ~f:(function + | None -> Some path + | Some first_path -> + User_error.raise + [ Pp.textf + "Path %s has already been scanned. Cannot scan it again through symlink \ + %s" + (Path.Source.to_string_maybe_quoted first_path) + (Path.Source.to_string_maybe_quoted path) + ]) + ;; end module Dir0 = struct @@ -73,13 +43,11 @@ module Dir0 = struct and sub_dir = { sub_dir_status : Source_dir_status.t - ; virtual_ : bool - ; sub_dir_as_t : (Path.Source.t, t Output.t option) Memo.Cell.t + ; sub_dir_as_t : t Memo.t } let rec to_dyn { path; status; files; dune_file; sub_dirs; project = _ } = - let open Dyn in - Record + Dyn.record [ "path", Path.Source.to_dyn path ; "status", Source_dir_status.to_dyn status ; "files", Filename.Set.to_dyn files @@ -87,14 +55,8 @@ module Dir0 = struct ; ("dune_file", Dyn.(option opaque dune_file)) ] - and dyn_of_sub_dir { sub_dir_status; sub_dir_as_t; virtual_ } = - let open Dyn in - let path = Memo.Cell.input sub_dir_as_t in - record - [ "status", Source_dir_status.to_dyn sub_dir_status - ; "sub_dir_as_t", Path.Source.to_dyn path - ; "virtual_", bool virtual_ - ] + and dyn_of_sub_dir { sub_dir_status; sub_dir_as_t = _ } = + Dyn.record [ "status", Source_dir_status.to_dyn sub_dir_status ] ;; let path t = t.path @@ -109,272 +71,203 @@ module Dir0 = struct Filename.Set.add acc s) ;; - let sub_dir_as_t (s : sub_dir) = - let+ t = Memo.Cell.read s.sub_dir_as_t in - (Option.value_exn t).dir - ;; + let sub_dir_as_t (s : sub_dir) = s.sub_dir_as_t end -module rec Memoized : sig - val root : unit -> Dir0.t Memo.t - - (* Not part of the interface. Only necessary to call recursively *) - val find_dir_raw : Path.Source.t -> (Path.Source.t, Dir0.t Output.t option) Memo.Cell.t - val find_dir : Path.Source.t -> Dir0.t option Memo.t -end = struct - open Memoized - - module Get_subdir = struct - let status ~status_map ~(parent_status : Source_dir_status.t) dir - : Source_dir_status.t option - = - match Source_dir_status.Per_dir.status status_map ~dir with - | Ignored -> None - | Status status -> - Some - (match parent_status, status with - | Data_only, _ -> Data_only - | Vendored, Normal -> Vendored - | _, _ -> status) - ;; - - let make_subdir ~dir_status ~virtual_ path = - let sub_dir_as_t = find_dir_raw path in - { Dir0.sub_dir_status = dir_status; sub_dir_as_t; virtual_ } - ;; +let eval_status ~status_map ~(parent_status : Source_dir_status.t) dir + : Source_dir_status.t option + = + match Source_dir_status.Per_dir.status status_map ~dir with + | Ignored -> None + | Status status -> + Some + (match parent_status, status with + | Data_only, _ -> Data_only + | Vendored, Normal -> Vendored + | _, _ -> status) +;; - let physical ~dir ~dirs_visited ~dirs ~sub_dirs ~parent_status = - let status_map = - Source_dir_status.Spec.eval sub_dirs ~dirs:(List.map ~f:(fun (a, _) -> a) dirs) - in - List.fold_left - dirs - ~init:(Dirs_visited.Per_fn.init, Filename.Map.empty) - ~f:(fun (dirs_visited_acc, subdirs) ((fn, _) as info) -> - match status ~status_map ~parent_status fn with - | None -> dirs_visited_acc, subdirs - | Some dir_status -> - let dirs_visited_acc = - let path = Path.Source.relative dir fn in - Dirs_visited.Per_fn.add dirs_visited_acc dirs_visited ~path info - in - let subdirs = - Path.Source.relative dir fn - |> make_subdir ~dir_status ~virtual_:false - |> Filename.Map.add_exn subdirs fn - in - dirs_visited_acc, subdirs) - ;; +let error_unable_to_load ~path unix_error = + User_error.raise + [ Pp.textf "Unable to load source %s." (Path.Source.to_string_maybe_quoted path) + ; Unix_error.Detailed.pp ~prefix:"Reason: " unix_error + ] +;; - let virtual_ ~sub_dirs ~parent_status ~dune_file ~init ~path = +let rec physical ~project ~dir ~dirs_visited ~dirs ~sub_dirs ~dune_file ~parent_status = + let status_map = + Source_dir_status.Spec.eval sub_dirs ~dirs:(List.map ~f:(fun (a, _) -> a) dirs) + in + List.fold_left dirs ~init:Filename.Map.empty ~f:(fun subdirs (fn, file) -> + match eval_status ~status_map ~parent_status fn with + | None -> subdirs + | Some dir_status -> + let path = Path.Source.relative dir fn in + let dirs_visited = Dirs_visited.add dirs_visited path file in + { Dir0.sub_dir_status = dir_status + ; sub_dir_as_t = + Memo.lazy_cell (fun () -> + find_dir_raw + ~path + ~basename:fn + ~virtual_:false + ~dirs_visited + ~dune_file + ~status:dir_status + ~project) + |> Memo.Cell.read + } + |> Filename.Map.add_exn subdirs fn) + +and virtual_ ~project ~sub_dirs ~parent_status ~dune_file ~init ~path = + match dune_file with + | None -> init + | Some df -> + (* There's no files to read for virtual directories, but we still record + their entries *) + let dirs = Dune_file0.sub_dirnames df in + let status_map = Source_dir_status.Spec.eval sub_dirs ~dirs in + List.fold_left dirs ~init ~f:(fun acc fn -> + match eval_status ~status_map ~parent_status fn with + | None -> acc + | Some status -> + Filename.Map.update acc fn ~f:(function + (* Physical directories have already been added so they are + skipped here. + + CR-rgrinberg: we should still update the status for these + directories if it hasn't been set *) + | Some _ as r -> r + | None -> + Some + { Dir0.sub_dir_status = status + ; sub_dir_as_t = + Memo.lazy_cell (fun () -> + find_dir_raw + ~path:(Path.Source.relative path fn) + ~basename:fn + ~virtual_:true + ~dune_file + ~status + ~dirs_visited:Dirs_visited.empty + ~project) + |> Memo.Cell.read + })) + +and contents + readdir + ~path + ~dune_file + ~dirs_visited + ~project + ~(dir_status : Source_dir_status.t) + = + let files = Readdir.files readdir in + let+ dune_file = + Dune_file0.load ~dir:path dir_status project ~files ~parent:dune_file + in + let sub_dirs = + let sub_dirs = match dune_file with - | None -> init - | Some df -> - (* There's no files to read for virtual directories, but we still record - their entries *) - let dirs = Dune_file0.sub_dirnames df in - let status_map = Source_dir_status.Spec.eval sub_dirs ~dirs in - List.fold_left dirs ~init ~f:(fun acc fn -> - match status ~status_map ~parent_status fn with - | None -> acc - | Some dir_status -> - Filename.Map.update acc fn ~f:(function - (* Physical directories have already been added so they are - skipped here. - - CR-rgrinberg: we should still update the status for these - directories if it hasn't been set *) - | Some _ as r -> r - | None -> - let path = Path.Source.relative path fn in - (* CR-rgrinberg: we could introduce a simplified - call paths for virtual directories since we know the - children of a virtual directory are also virtual. *) - Some (make_subdir ~dir_status ~virtual_:true path))) - ;; - - let all ~dirs_visited ~dirs ~sub_dirs ~parent_status ~dune_file ~path = - let visited, init = - physical ~dir:path ~dirs_visited ~dirs ~sub_dirs ~parent_status - in - let init = virtual_ ~sub_dirs ~parent_status ~dune_file ~init ~path in - visited, init - ;; - end - - let contents - readdir - ~path - ~parent_dune_file - ~dirs_visited - ~project - ~(dir_status : Source_dir_status.t) - = - let files = Readdir.files readdir in - let+ dune_file = - Dune_file0.load ~dir:path dir_status project ~files ~parent:parent_dune_file + | None -> Source_dir_status.Spec.default + | Some dune_file -> Dune_file0.sub_dir_status dune_file in - let dirs_visited, sub_dirs = - let sub_dirs = - match dune_file with - | None -> Source_dir_status.Spec.default - | Some dune_file -> Dune_file0.sub_dir_status dune_file - in - Get_subdir.all + let dirs = + physical + ~project + ~dir:path ~dirs_visited ~dirs:(Readdir.dirs readdir) ~sub_dirs - ~parent_status:dir_status ~dune_file - ~path - in - { Dir0.project; status = dir_status; path; files; sub_dirs; dune_file }, dirs_visited - ;; - - let error_unable_to_load ~path unix_error = - User_error.raise - [ Pp.textf "Unable to load source %s." (Path.Source.to_string_maybe_quoted path) - ; Unix_error.Detailed.pp ~prefix:"Reason: " unix_error - ] - ;; - - let root () = - let path = Path.Source.root in - let dir_status : Source_dir_status.t = Normal in - let+ dir, visited = - let* readdir = - Readdir.of_source_path path - >>| function - | Ok dir -> dir - | Error unix_error -> error_unable_to_load ~path unix_error - in - let* project = - Dune_project.load - ~dir:path - ~files:(Readdir.files readdir) - ~infer_from_opam_files:true - >>| (function - | Some p -> p - | None -> - Dune_project.anonymous - ~dir:path - Package_info.empty - Package.Name.Map.empty) - >>| Only_packages.filter_packages_in_project ~vendored:(dir_status = Vendored) - in - let* dirs_visited = - Readdir.File.of_source_path (In_source_dir path) - >>| function - | Ok file -> Dirs_visited.singleton path file - | Error unix_error -> error_unable_to_load ~path unix_error - in - contents readdir ~path ~parent_dune_file:None ~dirs_visited ~project ~dir_status - in - { Output.dir; visited } - ;; - - let find_dir_raw_impl path : Dir0.t Output.t option Memo.t = - match Path.Source.parent path with - | None -> - let+ root = root () in - Some root - | Some parent_dir -> - let* parent = Memo.Cell.read (find_dir_raw parent_dir) in - (match - let open Option.O in - let* { Output.dir = parent_dir; visited = dirs_visited } = parent in - let+ dir_status, virtual_ = - let basename = Path.Source.basename path in - let+ sub_dir = Filename.Map.find parent_dir.sub_dirs basename in - let status = - let status = sub_dir.sub_dir_status in - if Dune_project.cram parent_dir.project && Cram_test.is_cram_suffix basename - then Source_dir_status.Data_only - else status - in - status, sub_dir.virtual_ - in - parent_dir, dirs_visited, dir_status, virtual_ - with - | None -> Memo.return None - | Some (parent_dir, dirs_visited, dir_status, virtual_) -> - let* readdir = - if virtual_ - then Memo.return Readdir.empty - else - Readdir.of_source_path path - >>| function - | Ok dir -> dir - | Error _ -> Readdir.empty - in - let* project = - if dir_status = Data_only - then Memo.return parent_dir.project - else - Dune_project.load - ~dir:path - ~files:(Readdir.files readdir) - ~infer_from_opam_files:false - >>| Option.map - ~f: - (Only_packages.filter_packages_in_project - ~vendored:(dir_status = Vendored)) - >>| Option.value ~default:parent_dir.project - in - let+ dir, visited = - let dirs_visited = Dirs_visited.Per_fn.find dirs_visited path in - contents - readdir - ~path - ~parent_dune_file:parent_dir.dune_file - ~dirs_visited - ~project - ~dir_status - in - Some { Output.dir; visited }) - ;; - - let find_dir_raw = - let memo = - (* amokhov: After running some experiments, I convinced myself that it's - not worth adding a [cutoff] here because we don't recompute this - function very often (the [find_dir] calls are probably guarded by other - cutoffs). Note also that adding a [cutoff] here is non-trivial because - [Dir0.t] stores memoization cells in [sub_dir_as_t]. *) - Memo.create "find-dir-raw" ~input:(module Path.Source) find_dir_raw_impl + ~parent_status:dir_status in - Memo.cell memo - ;; + virtual_ ~project ~sub_dirs ~parent_status:dir_status ~dune_file ~path ~init:dirs + in + { Dir0.project; status = dir_status; path; files; sub_dirs; dune_file } + +and find_dir_raw ~virtual_ ~dune_file ~status ~dirs_visited ~project ~path ~basename + : Dir0.t Memo.t + = + let status = + if Dune_project.cram project && Cram_test.is_cram_suffix basename + then Source_dir_status.Data_only + else status + in + let* readdir = + if virtual_ + then Memo.return Readdir.empty + else + Readdir.of_source_path path + >>| function + | Ok dir -> dir + | Error _ -> Readdir.empty + in + let* project = + if status = Data_only + then Memo.return project + else + Dune_project.load + ~dir:path + ~files:(Readdir.files readdir) + ~infer_from_opam_files:false + >>| Option.map + ~f:(Only_packages.filter_packages_in_project ~vendored:(status = Vendored)) + >>| Option.value ~default:project + in + contents readdir ~path ~dune_file ~dirs_visited ~project ~dir_status:status +;; - let find_dir p = - Memo.Cell.read (find_dir_raw p) +let root = + Memo.lazy_cell + @@ fun () -> + let path = Path.Source.root in + let dir_status : Source_dir_status.t = Normal in + let* readdir = + Readdir.of_source_path path >>| function - | Some { Output.dir; visited = _ } -> Some dir - | None -> None - ;; - - let root () = find_dir Path.Source.root >>| Option.value_exn -end - -let root () = Memoized.root () -let find_dir path = Memoized.find_dir path + | Ok dir -> dir + | Error unix_error -> error_unable_to_load ~path unix_error + in + let* project = + Dune_project.load ~dir:path ~files:(Readdir.files readdir) ~infer_from_opam_files:true + >>| (function + | Some p -> p + | None -> + Dune_project.anonymous ~dir:path Package_info.empty Package.Name.Map.empty) + >>| Only_packages.filter_packages_in_project ~vendored:(dir_status = Vendored) + in + let* dirs_visited = + Readdir.File.of_source_path (In_source_dir path) + >>| function + | Ok file -> Dirs_visited.singleton path file + | Error unix_error -> error_unable_to_load ~path unix_error + in + contents readdir ~path ~dune_file:None ~dirs_visited ~project ~dir_status +;; -let rec nearest_dir t = function - | [] -> Memo.return t - | comp :: components -> - (match Filename.Map.find (Dir0.sub_dirs t) comp with - | None -> Memo.return t - | Some sub_dir -> - let* sub_dir = Dir0.sub_dir_as_t sub_dir in - nearest_dir sub_dir components) +let gen_find_dir = + let rec loop on_success on_last_found components (dir : Dir0.t) = + match components with + | [] -> on_success dir + | x :: xs -> + (match Filename.Map.find dir.sub_dirs x with + | None -> on_last_found dir + | Some dir -> dir.sub_dir_as_t >>= loop on_success on_last_found xs) + in + fun ~on_success ~on_last_found p -> + Memo.Cell.read root >>= loop on_success on_last_found (Path.Source.explode p) ;; -let nearest_dir path = - let components = Path.Source.explode path in - let* root = root () in - nearest_dir root components +let find_dir = + gen_find_dir + ~on_success:(fun dir -> Memo.return (Some dir)) + ~on_last_found:(fun _ -> Memo.return None) ;; +let nearest_dir = gen_find_dir ~on_success:Memo.return ~on_last_found:Memo.return +let root () = Memo.Cell.read root + let files_of path = find_dir path >>| function