From d56ab322fc06689658db9d226795b4f223cf853d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulysse=20G=C3=A9rard?= Date: Thu, 22 Feb 2024 17:05:58 +0100 Subject: [PATCH] occurrences: add support for project-wide occurrences --- src/analysis/dune | 3 +- src/analysis/index_format.ml | 94 +++++++++++++++++++++++++- src/analysis/occurrences.ml | 116 ++++++++++++++++++++++++++------- src/analysis/occurrences.mli | 5 +- src/frontend/query_commands.ml | 11 ++-- 5 files changed, 195 insertions(+), 34 deletions(-) diff --git a/src/analysis/dune b/src/analysis/dune index 6b4d2f6d09..09227105d8 100644 --- a/src/analysis/dune +++ b/src/analysis/dune @@ -22,4 +22,5 @@ query_protocol ocaml_typing ocaml_utils - str)) + str + unix)) diff --git a/src/analysis/index_format.ml b/src/analysis/index_format.ml index 6ad601e7a9..e31baf369a 100644 --- a/src/analysis/index_format.ml +++ b/src/analysis/index_format.ml @@ -1,8 +1,11 @@ +exception Not_an_index of string + module Lid : Set.OrderedType with type t = Longident.t Location.loc = struct type t = Longident.t Location.loc let compare_pos (p1 : Lexing.position) (p2 : Lexing.position) = - match String.compare p1.pos_fname p2.pos_fname with + let p1f, p2f = Filename.(basename p1.pos_fname, basename p2.pos_fname) in + match String.compare p1f p2f with | 0 -> Int.compare p1.pos_cnum p2.pos_cnum | n -> n @@ -13,3 +16,92 @@ module Lid : Set.OrderedType with type t = Longident.t Location.loc = struct end module LidSet = Set.Make (Lid) +module Stats = Map.Make (String) + +(** [add tbl uid locs] adds a binding of [uid] to the locations [locs]. If this key is + already present the locations are merged. *) +let add tbl uid locs = + try + let locations = Hashtbl.find tbl uid in + Hashtbl.replace tbl uid (LidSet.union locs locations) + with Not_found -> Hashtbl.add tbl uid locs + +type stat = { mtime : float; size : int; source_digest: string option } +type index = { + defs : (Shape.Uid.t, LidSet.t) Hashtbl.t; + approximated : (Shape.Uid.t, LidSet.t) Hashtbl.t; + load_path : Load_path.paths; + cu_shape : (string, Shape.t) Hashtbl.t; + stats : stat Stats.t; +} + +let pp_partials (fmt : Format.formatter) + (partials : (Shape.Uid.t, LidSet.t) Hashtbl.t) = + Format.fprintf fmt "{@["; + Hashtbl.iter + (fun uid locs -> + Format.fprintf fmt "@[uid: %a; locs:@ @[%a@]@]@;" + Shape.Uid.print uid + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@;") + (fun fmt { Location.txt; loc } -> + Format.fprintf fmt "%S: %a" + (try Longident.flatten txt |> String.concat "." with _ -> "") + Location.print_loc loc)) + (LidSet.elements locs)) + partials; + Format.fprintf fmt "@]}" + +let pp (fmt : Format.formatter) pl = + Format.fprintf fmt "%i uids:@ {@[" (Hashtbl.length pl.defs); + Hashtbl.iter + (fun uid locs -> + Format.fprintf fmt "@[uid: %a; locs:@ @[%a@]@]@;" + Shape.Uid.print uid + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@;") + (fun fmt { Location.txt; loc } -> + Format.fprintf fmt "%S: %a" + (try Longident.flatten txt |> String.concat "." with _ -> "") + Location.print_loc loc)) + (LidSet.elements locs)) + pl.defs; + Format.fprintf fmt "@]},@ "; + Format.fprintf fmt "%i approx shapes:@ @[%a@],@ " + (Hashtbl.length pl.approximated) + pp_partials pl.approximated; + Format.fprintf fmt "and shapes for CUS %s.@ " + (String.concat ";@," (Hashtbl.to_seq_keys pl.cu_shape |> List.of_seq)) + +let ext = "ocaml-index" + +(* [magic_number] Must be the same lenght as cmt's magic numbers *) +let magic_number = "Merl2023I001" + +let write ~file index = + Merlin_utils.Misc.output_to_file_via_temporary ~mode:[ Open_binary ] file + (fun _temp_file_name oc -> + output_string oc magic_number; + output_value oc (index : index)) + +type file_content = Cmt of Cmt_format.cmt_infos | Index of index | Unknown + +let read ~file = + let ic = open_in_bin file in + Merlin_utils.Misc.try_finally + ~always:(fun () -> close_in ic) + (fun () -> + let file_magic_number = ref (Cmt_format.read_magic_number ic) in + let cmi_magic_number = Ocaml_utils.Config.cmi_magic_number in + let cmt_magic_number = Ocaml_utils.Config.cmt_magic_number in + (if String.equal !file_magic_number cmi_magic_number then + let _ = Cmi_format.input_cmi ic in + file_magic_number := Cmt_format.read_magic_number ic); + if String.equal !file_magic_number cmt_magic_number then + Cmt (input_value ic : Cmt_format.cmt_infos) + else if String.equal !file_magic_number magic_number then + Index (input_value ic : index) + else Unknown) + +let read_exn ~file = + match read ~file with Index index -> index | _ -> raise (Not_an_index file) diff --git a/src/analysis/occurrences.ml b/src/analysis/occurrences.ml index be4da7cf37..1800ccd24b 100644 --- a/src/analysis/occurrences.ml +++ b/src/analysis/occurrences.ml @@ -3,6 +3,8 @@ module LidSet = Index_format.LidSet let {Logger. log} = Logger.for_section "occurrences" +type res = { locs: Warnings.loc list; synced: bool } + let set_fname ~file (loc : Location.t) = let pos_fname = file in { loc with @@ -25,7 +27,7 @@ let decl_of_path_or_lid env namespace path lid = end | _ -> Env_lookup.loc path namespace env -let index_buffer_ ~current_buffer_path ~local_defs () = +let index_buffer_ ~scope ~current_buffer_path ~local_defs () = let {Logger. log} = Logger.for_section "index" in let defs = Hashtbl.create 64 in let module Shape_reduce = @@ -92,6 +94,7 @@ let index_buffer_ ~current_buffer_path ~local_defs () = occurrences in the current buffer only. *) let rec iter_on_path ~namespace path ({Location.txt; loc} as lid) = let () = f ~namespace env path lid in + if scope = `Buffer then match path, txt with | Pdot (path, _), Ldot (lid, s) -> let length_with_dot = String.length s + 1 in @@ -112,19 +115,20 @@ let index_buffer = (* Right now, we only cache the last used index. We could do better by caching the index for every known buffer. *) let cache = ref None in - fun ~current_buffer_path ~stamp ~local_defs () -> + fun ~scope ~current_buffer_path ~stamp ~local_defs () -> let {Logger. log} = Logger.for_section "index" in match !cache with - | Some (path, stamp', value) when + | Some (path, stamp', scope', value) when String.equal path current_buffer_path - && Int.equal stamp' stamp -> + && Int.equal stamp' stamp + && scope' = scope -> log ~title:"index_cache" "Reusing cached value for path %s and stamp %i." path stamp'; value | _ -> log ~title:"index_cache" "No valid cache found, reindexing."; - let result = index_buffer_ ~current_buffer_path ~local_defs () in - cache := Some (current_buffer_path, stamp, result); + let result = index_buffer_ ~current_buffer_path ~scope ~local_defs () in + cache := Some (current_buffer_path, stamp, scope, result); result (* A longident can have the form: A.B.x Right now we are only interested in @@ -169,7 +173,24 @@ let comp_unit_of_uid = function | Item { comp_unit; _ } -> Some comp_unit | Internal | Predef _ -> None -let locs_of ~config ~env ~typer_result ~pos path = +let check Index_format.{ stats; _ } file = + let open Index_format in + match Stats.find_opt file stats with + | None -> log ~title:"stat_check" "No mtime found for file %S." file; true + | Some { size; _ } -> + try + let stats = Unix.stat file in + let equal = + (* This is fast but approximative. A better option would be to check + [mtime] and then [source_digest] if the times differ. *) + Int.equal stats.st_size size + in + log ~title:"stat_check" + "File %s has been modified since the index was built." file; + equal + with Unix.Unix_error _ -> false + +let locs_of ~config ~env ~typer_result ~pos ~scope path = log ~title:"occurrences" "Looking for occurences of %s (pos: %s)" path (Lexing.print_position () pos); @@ -179,28 +200,29 @@ let locs_of ~config ~env ~typer_result ~pos path = ~config:{ mconfig = config; traverse_aliases=false; ml_or_mli = `ML} ~env ~local_defs ~pos path in - let def = + (* When we fail to find an exact definition we restrict scope to `Buffer *) + let def, scope = match locate_result with | `At_origin -> log ~title:"locs_of" "Cursor is on definition / declaration"; (* We are on a definition / declaration so we look for the node's uid *) let browse = Mbrowse.of_typedtree local_defs in let env, node = Mbrowse.leaf_node (Mbrowse.enclosing pos [browse]) in - uid_and_loc_of_node env node + uid_and_loc_of_node env node, scope | `Found { uid; location; approximated = false; _ } -> log ~title:"locs_of" "Found definition uid using locate: %a " Logger.fmt (fun fmt -> Shape.Uid.print fmt uid); - Some (uid, location) + Some (uid, location), scope | `Found { decl_uid; location; approximated = true; _ } -> - log ~title:"locs_of" "Approx: %a " + log ~title:"locs_of" "Approx. definition: %a " Logger.fmt (fun fmt -> Shape.Uid.print fmt decl_uid); - Some (decl_uid, location) + Some (decl_uid, location), `Buffer | `Builtin (uid, s) -> log ~title:"locs_of" "Locate found a builtin: %s" s; - Some (uid, Location.none) + Some (uid, Location.none), scope | _ -> log ~title:"locs_of" "Locate failed to find a definition."; - None + None, `Buffer in let current_buffer_path = Filename.concat config.query.directory config.query.filename @@ -213,23 +235,67 @@ let locs_of ~config ~env ~typer_result ~pos path = log ~title:"locs_of" "Indexing current buffer"; let buffer_index = let stamp = Mtyper.get_stamp typer_result in - index_buffer ~current_buffer_path ~stamp ~local_defs () + index_buffer ~scope ~current_buffer_path ~stamp ~local_defs () in let buffer_locs = Hashtbl.find_opt buffer_index def_uid in - let locs = Option.value ~default:LidSet.empty buffer_locs in + let external_locs, desync = + if scope = `Buffer then [], false else begin + let exception File_changed in + try + let locs = List.filter_map config.merlin.index_files ~f:(fun file -> + let external_index = Index_format.read_exn ~file in + Hashtbl.find_opt external_index.defs def_uid + |> Option.map ~f:(fun locs -> LidSet.filter (fun {loc; _} -> + (* We ignore external results that concern the current buffer *) + let fname = loc.Location.loc_start.Lexing.pos_fname in + if String.equal fname current_buffer_path then false + else begin + (* We ignore external results if the index is not up-to-date *) + (* We could return partial results from up-to-date file *) + if not (check external_index fname) then begin + log ~title:"locs_of" "File %s might be out-of-sync." fname; + raise File_changed + end; + true + end) locs)) + in + locs, false + with File_changed -> [], true + end + in + if desync then log ~title:"locs_of" "External index might be out-of-sync."; + let locs = + let all_locs = + match buffer_locs with + | Some buffer_locs -> buffer_locs :: external_locs + | None -> external_locs + in + List.fold_left ~init:LidSet.empty ~f:LidSet.union all_locs + in let locs = - log ~title:"occurrences" "Found %i locs" (LidSet.cardinal locs); - LidSet.elements locs - |> List.map ~f:(fun {Location.txt; loc} -> - log ~title:"occurrences" "Found occ: %s %a" - (Longident.head txt) Logger.fmt (Fun.flip Location.print_loc loc); - last_loc loc txt) + log ~title:"occurrences" "Found %i locs" (LidSet.cardinal locs); + LidSet.elements locs + |> List.filter_map ~f:(fun {Location.txt; loc} -> + log ~title:"occurrences" "Found occ: %s %a" + (Longident.head txt) Logger.fmt (Fun.flip Location.print_loc loc); + let loc = last_loc loc txt in + let fname = loc.Location.loc_start.Lexing.pos_fname in + if Filename.is_relative fname then begin + match Locate.find_source ~config loc fname with + | `Found (file, _) -> Some (set_fname ~file loc) + | `File_not_found msg -> + log ~title:"occurrences" "%s" msg; + None + end else Some loc) in let def_uid_is_in_current_unit = let uid_comp_unit = comp_unit_of_uid def_uid in Option.value_map ~default:false uid_comp_unit ~f:(String.equal @@ Env.get_unit_name ()) in - if not def_uid_is_in_current_unit then Ok locs - else Ok (set_fname ~file:current_buffer_path def_loc :: locs) - | None -> Error "Could not find the uid of the definition." + let synced = not desync in + if not def_uid_is_in_current_unit then Ok { locs; synced } + else + let locs = set_fname ~file:current_buffer_path def_loc :: locs in + Ok { locs; synced } + | None -> Error "Could not find the definition [uid]" diff --git a/src/analysis/occurrences.mli b/src/analysis/occurrences.mli index eea7b6b3e7..4a60ac3908 100644 --- a/src/analysis/occurrences.mli +++ b/src/analysis/occurrences.mli @@ -1,7 +1,10 @@ +type res = { locs: Warnings.loc list; synced: bool } + val locs_of : config:Mconfig.t -> env:Env.t -> typer_result:Mtyper.result -> pos:Lexing.position + -> scope:[`Project | `Buffer] -> string - -> (Warnings.loc list, string) result + -> (res, string) result diff --git a/src/frontend/query_commands.ml b/src/frontend/query_commands.ml index 150a53e622..379ba19ed8 100644 --- a/src/frontend/query_commands.ml +++ b/src/frontend/query_commands.ml @@ -795,7 +795,7 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = let config = Mpipeline.final_config pipeline in Mconfig.(config.merlin.source_path) - | Occurrences (`Ident_at pos, _) -> + | Occurrences (`Ident_at pos, scope) -> let config = Mpipeline.final_config pipeline in let typer_result = Mpipeline.typer_result pipeline in let pos = Mpipeline.get_lexing_pos pipeline pos in @@ -809,12 +809,11 @@ let dispatch pipeline (type a) : a Query_protocol.t -> a = path in let locs = - Occurrences.locs_of ~config ~env ~typer_result ~pos path - |> Result.value ~default:[] + match Occurrences.locs_of ~config ~env ~typer_result ~pos ~scope path with + | Ok { locs; _ } -> locs + | Error _ -> [] in - let loc_start l = l.Location.loc_start in - let cmp l1 l2 = Lexing.compare_pos (loc_start l1) (loc_start l2) in - (List.sort ~cmp locs) + locs | Version -> Printf.sprintf "The Merlin toolkit version %s, for Ocaml %s\n"