Skip to content

Commit

Permalink
index_format: using maps is more efficient when merging indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
voodoos committed Apr 24, 2024
1 parent 1792af8 commit a8db612
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 33 deletions.
22 changes: 14 additions & 8 deletions src/analysis/occurrences.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
open Std
module LidSet = Index_format.LidSet
module Lid_set = Index_format.Lid_set

let {Logger. log} = Logger.for_section "occurrences"

Expand Down Expand Up @@ -30,6 +30,12 @@ let decl_of_path_or_lid env namespace path lid =
let index_buffer_ ~scope ~current_buffer_path ~local_defs () =
let {Logger. log} = Logger.for_section "index" in
let defs = Hashtbl.create 64 in
let add tbl uid locs =
try
let locations = Hashtbl.find tbl uid in
Hashtbl.replace tbl uid (Lid_set.union locs locations)
with Not_found -> Hashtbl.add tbl uid locs
in
let module Shape_reduce =
Shape_reduce.Make (struct
let fuel = 10
Expand All @@ -56,7 +62,7 @@ let index_buffer_ ~scope ~current_buffer_path ~local_defs () =
| Some decl ->
log ~title:"index_buffer" "Found declaration: %a"
Logger.fmt (Fun.flip Location.print_loc decl.loc);
Index_format.(add defs decl.uid (LidSet.singleton lid))
add defs decl.uid (Lid_set.singleton lid)
end
in
if not_ghost lid then
Expand All @@ -72,7 +78,7 @@ let index_buffer_ ~scope ~current_buffer_path ~local_defs () =
(Longident.head lid.txt)
Logger.fmt (Fun.flip Location.print_loc lid.loc)
Logger.fmt (Fun.flip Shape.Uid.print uid);
Index_format.(add defs uid (LidSet.singleton lid))
add defs uid (Lid_set.singleton lid)
| Some uid, true ->
log ~title:"index_buffer" "Shape is approximative, found uid: %a"
Logger.fmt (Fun.flip Shape.Uid.print uid);
Expand Down Expand Up @@ -238,8 +244,8 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path =
try
let locs = List.filter_map config.merlin.index_files ~f:(fun file ->
let external_index = Index_cache.read file in
Hashtbl.find_opt external_index.defs def_uid
|> Option.map ~f:(fun locs -> LidSet.filter (fun {loc; _} ->
Index_format.Uid_map.find_opt def_uid external_index.defs
|> Option.map ~f:(fun locs -> Lid_set.filter (fun {loc; _} ->
(* We ignore external results that concern the current buffer *)
let fname = loc.Location.loc_start.Lexing.pos_fname in
if String.equal fname current_buffer_path then false
Expand All @@ -264,11 +270,11 @@ let locs_of ~config ~env ~typer_result ~pos ~scope path =
| Some buffer_locs -> buffer_locs :: external_locs
| None -> external_locs
in
List.fold_left ~init:LidSet.empty ~f:LidSet.union all_locs
List.fold_left ~init:Lid_set.empty ~f:Lid_set.union all_locs
in
let locs =
log ~title:"occurrences" "Found %i locs" (LidSet.cardinal locs);
LidSet.elements locs
log ~title:"occurrences" "Found %i locs" (Lid_set.cardinal locs);
Lid_set.elements locs
|> List.filter_map ~f:(fun {Location.txt; loc} ->
log ~title:"occurrences" "Found occ: %s %a"
(Longident.head txt) Logger.fmt (Fun.flip Location.print_loc loc);
Expand Down
40 changes: 19 additions & 21 deletions src/index-format/index_format.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,29 @@ module Lid : Set.OrderedType with type t = Longident.t Location.loc = struct
| n -> n
end

module LidSet = Set.Make (Lid)
module Lid_set = Set.Make (Lid)
module Uid_map = Shape.Uid.Map
module Stats = Map.Make (String)

(** [add tbl uid locs] adds a binding of [uid] to the locations [locs]. If this key is
(** [add map uid locs] adds a binding of [uid] to the locations [locs]. If this key is
already present the locations are merged. *)
let add tbl uid locs =
try
let locations = Hashtbl.find tbl uid in
Hashtbl.replace tbl uid (LidSet.union locs locations)
with Not_found -> Hashtbl.add tbl uid locs
let add map uid locs = Uid_map.update uid (function
| None -> Some locs
| Some locs' -> Some (Lid_set.union locs' locs))
map

type stat = { mtime : float; size : int; source_digest : string option }

type stat = { mtime : float; size : int; source_digest: string option }
type index = {
defs : (Shape.Uid.t, LidSet.t) Hashtbl.t;
approximated : (Shape.Uid.t, LidSet.t) Hashtbl.t;
load_path : Load_path.paths;
defs : Lid_set.t Uid_map.t;
approximated : Lid_set.t Uid_map.t;
cu_shape : (string, Shape.t) Hashtbl.t;
stats : stat Stats.t;
}

let pp_partials (fmt : Format.formatter)
(partials : (Shape.Uid.t, LidSet.t) Hashtbl.t) =
let pp_partials (fmt : Format.formatter) (partials : Lid_set.t Uid_map.t) =
Format.fprintf fmt "{@[";
Hashtbl.iter
Uid_map.iter
(fun uid locs ->
Format.fprintf fmt "@[<hov 2>uid: %a; locs:@ @[<v>%a@]@]@;"
Shape.Uid.print uid
Expand All @@ -48,13 +47,13 @@ let pp_partials (fmt : Format.formatter)
Format.fprintf fmt "%S: %a"
(try Longident.flatten txt |> String.concat "." with _ -> "<?>")
Location.print_loc loc))
(LidSet.elements locs))
(Lid_set.elements locs))
partials;
Format.fprintf fmt "@]}"

let pp (fmt : Format.formatter) pl =
Format.fprintf fmt "%i uids:@ {@[" (Hashtbl.length pl.defs);
Hashtbl.iter
Format.fprintf fmt "%i uids:@ {@[" (Uid_map.cardinal pl.defs);
Uid_map.iter
(fun uid locs ->
Format.fprintf fmt "@[<hov 2>uid: %a; locs:@ @[<v>%a@]@]@;"
Shape.Uid.print uid
Expand All @@ -64,19 +63,18 @@ let pp (fmt : Format.formatter) pl =
Format.fprintf fmt "%S: %a"
(try Longident.flatten txt |> String.concat "." with _ -> "<?>")
Location.print_loc loc))
(LidSet.elements locs))
(Lid_set.elements locs))
pl.defs;
Format.fprintf fmt "@]},@ ";
Format.fprintf fmt "%i approx shapes:@ @[%a@],@ "
(Hashtbl.length pl.approximated)
(Uid_map.cardinal pl.approximated)
pp_partials pl.approximated;
Format.fprintf fmt "and shapes for CUS %s.@ "
(String.concat ";@," (Hashtbl.to_seq_keys pl.cu_shape |> List.of_seq))

let ext = "ocaml-index"

(* [magic_number] Must be the same lenght as cmt's magic numbers *)
let magic_number = "Merl2023I001"
let magic_number = Config.index_magic_number

let write ~file index =
Misc.output_to_file_via_temporary ~mode:[ Open_binary ] file
Expand Down
37 changes: 37 additions & 0 deletions src/index-format/index_format.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
exception Not_an_index of string

val ext : string
val magic_number : string

module Lid : Set.OrderedType with type t = Longident.t Location.loc
module Lid_set : Set.S with type elt = Lid.t
module Stats : Map.S with type key = String.t
module Uid_map = Shape.Uid.Map

type stat = {
mtime : float;
size : int;
source_digest : string option
}

type index = {
defs : Lid_set.t Uid_map.t;
approximated : Lid_set.t Uid_map.t;
cu_shape : (string, Shape.t) Hashtbl.t;
stats : stat Stats.t;
}

val pp : Format.formatter -> index -> unit

(** [add tbl uid locs] adds a binding of [uid] to the locations [locs]. If this
key is already present the locations are merged. *)
val add : Lid_set.t Uid_map.t -> Shape.Uid.t -> Lid_set.t -> Lid_set.t Uid_map.t

type file_content =
| Cmt of Cmt_format.cmt_infos
| Index of index
| Unknown

val write : file:string -> index -> unit
val read : file:string -> file_content
val read_exn : file:string -> index
1 change: 1 addition & 0 deletions src/ocaml/utils/config.ml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ and ast_impl_magic_number = "Caml1999M034"
and ast_intf_magic_number = "Caml1999N034"
and cmxs_magic_number = "Caml1999D034"
and cmt_magic_number = "Caml1999T034"
and index_magic_number = "Merl2023I001"

let interface_suffix = ref ".mli"

Expand Down
2 changes: 2 additions & 0 deletions src/ocaml/utils/config.mli
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ val cmxs_magic_number: string
(* Magic number for dynamically-loadable plugins *)
val cmt_magic_number: string
(* Magic number for compiled interface files *)
val index_magic_number: string
(* Magic number for index files *)

val max_tag: int
(* Biggest tag that can be stored in the header of a regular block. *)
Expand Down
8 changes: 4 additions & 4 deletions tests/test-dirs/occurrences/project-wide/pwo-basic.t
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
$ ocaml-index aggregate main.cmt lib.cmt
$ ocaml-index dump project.ocaml-index
2 uids:
{uid: Stdlib.312; locs:
"print_string": File "lib.ml", line 2, characters 9-21;
"print_string": File "main.ml", line 1, characters 9-21
uid: Lib.0; locs:
{uid: Lib.0; locs:
"foo": File "lib.ml", line 1, characters 4-7;
"foo": File "lib.ml", line 2, characters 22-25;
"Lib.foo": File "main.ml", line 1, characters 22-29
uid: Stdlib.312; locs:
"print_string": File "lib.ml", line 2, characters 9-21;
"print_string": File "main.ml", line 1, characters 9-21
}, 0 approx shapes: {}, and shapes for CUS .

$ $MERLIN single occurrences -scope project -identifier-at 1:28 \
Expand Down

0 comments on commit a8db612

Please sign in to comment.