From fe7ab832fe85247c5d7af02813e65b4975198fd8 Mon Sep 17 00:00:00 2001 From: Olivier Nicole Date: Wed, 4 Sep 2024 12:27:51 +0200 Subject: [PATCH] Implement efficient line transforms on sourcemaps --- compiler/lib/source_map.ml | 370 ++++++++++++++++++++++++++++++++++++ compiler/lib/source_map.mli | 39 +++- 2 files changed, 406 insertions(+), 3 deletions(-) diff --git a/compiler/lib/source_map.ml b/compiler/lib/source_map.ml index a2d30d9f85..17a8090236 100644 --- a/compiler/lib/source_map.ml +++ b/compiler/lib/source_map.ml @@ -57,6 +57,26 @@ let gen_line = function let gen_col = function | Gen { gen_col; _ } | Gen_Ori { gen_col; _ } | Gen_Ori_Name { gen_col; _ } -> gen_col + +module Line_edits = struct + type action = + | Keep + | Drop + | Add of { count : int } + + let pp_action fmt = + let open Format in + function + | Keep -> pp_print_string fmt "Keep" + | Drop -> pp_print_string fmt "Drop" + | Add { count } -> fprintf fmt "@[Add@ {@ count =@ %d@ }@]" count + + type t = action list + + let pp fmt = Format.(pp_print_list pp_action fmt) +end + + module Mappings = struct type t = Uninterpreted of string [@@unboxed] @@ -66,6 +86,335 @@ module Mappings = struct let to_string : t -> string = fun (Uninterpreted s) -> s + type carries = + { carry_source : int + ; carry_line : int + ; carry_col : int + ; carry_name : int + } + + let update_carries_from_segment + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str = + (* Note: we don't care about the first field since we do linewise editing, + and it is reset for every line. *) + match Vlq64.decode_l str ~pos ~len with + | [ _gen_col ] -> { carry_source; carry_line; carry_col; carry_name } + | [ _gen_col; source; line; col ] -> + { carry_source = carry_source + source + ; carry_line = carry_line + line + ; carry_col = carry_col + col + ; carry_name + } + | [ _gen_col; source; line; col; name ] -> + { carry_source = carry_source + source + ; carry_line = carry_line + line + ; carry_col = carry_col + col + ; carry_name = carry_name + name + } + | _ -> invalid_arg "Mapping.update_carries_from_segment: invalid segment" + + let update_carries_and_write_segment + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str + ~buf = + match Vlq64.decode_l str ~pos ~len with + | [ gen_col ] -> + Vlq64.encode_l buf [ gen_col ]; + carry_source, carry_line, carry_col, carry_name + | [ gen_col; source; line; col ] -> + Vlq64.encode_l + buf + [ gen_col; source + carry_source; line + carry_line; col + carry_col ]; + 0, 0, 0, carry_name + | [ gen_col; source; line; col; name ] -> + Vlq64.encode_l + buf + [ gen_col + ; source + carry_source + ; line + carry_line + ; col + carry_col + ; name + carry_name + ]; + 0, 0, 0, 0 + | _ -> + invalid_arg + (Format.sprintf + "Mapping.update_carries_from_segment %s" + (String.sub ~pos ~len str)) + + (* Fold [f] over the segments in string [str.(pos..len - 1)]. *) + let fold_on_segments ~str ~pos ~len f ~init = + let rec loop acc pos end_ = + if pos >= end_ + then acc + else + let next_delim = + try min (String.index_from str pos ',') end_ with Not_found -> end_ + in + let len = next_delim - pos in + if len <= 0 then acc else loop (f acc str ~pos ~len) (next_delim + 1) end_ + in + loop init pos (pos + len) + + let update_carries_from_line + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str = + fold_on_segments + ~str + ~pos + ~len + ~init:{ carry_source; carry_line; carry_col; carry_name } + (fun acc str ~pos ~len -> + let { carry_source; carry_line; carry_col; carry_name } = acc in + update_carries_from_segment + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str) + + let update_carries_and_write_line + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str + ~buf = + let _, carries = + fold_on_segments + ~str + ~pos + ~len + ~init:(true, { carry_source; carry_line; carry_col; carry_name }) + (fun (is_first, acc) str ~pos ~len -> + let { carry_source; carry_line; carry_col; carry_name } = acc in + if not is_first then Buffer.add_char buf ','; + let carry_source, carry_line, carry_col, carry_name = + update_carries_and_write_segment + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str + ~buf + in + false, { carry_source; carry_line; carry_col; carry_name }) + in + Buffer.add_char buf ';'; + carries + + (* If [strict], then the number of [Keep] and [Drop] elements in [edits] + should be the same as the number of generated lines covered by the + mappings [orig]. Otherwise, there may be more edit actions, in which case + [Keep] just adds a line without mappings and [Drop] does nothing. *) + let rec edit_loop + ~orig + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~strict + buf + offset_in_orig + edits = + let open Line_edits in + if offset_in_orig >= String.length orig + then ( + (* No more lines in the mappings string *) + match edits with + | [] -> { carry_source; carry_line; carry_col; carry_name } + | _ :: _ -> + List.iter edits ~f:(function + | Add { count } -> Buffer.add_string buf (String.make count ';') + | Keep -> + if strict + then + invalid_arg + "Mapping.edit: more Keep or Drop edits than lines in mappings"; + Buffer.add_char buf ';' + | Drop -> + if strict + then + invalid_arg + "Mapping.edit: more Keep or Drop edits than lines in mappings"); + { carry_source; carry_line; carry_col; carry_name }) + else + match edits with + | [] -> { carry_source; carry_line; carry_col; carry_name } + | Keep :: rem -> + let next_group_delim = + try String.index_from orig offset_in_orig ';' + with Not_found -> String.length orig + in + let { carry_source; carry_line; carry_col; carry_name } = + update_carries_and_write_line + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos:offset_in_orig + ~len:(next_group_delim - offset_in_orig) + orig + ~buf + in + edit_loop + ~orig + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~strict + buf + (* Skip the ';' *) + (next_group_delim + 1) + rem + | Drop :: rem -> + let next_group_delim = + try String.index_from orig offset_in_orig ';' + with Not_found -> String.length orig + in + let { carry_source; carry_line; carry_col; carry_name } = + update_carries_from_line + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos:offset_in_orig + ~len:(next_group_delim - offset_in_orig) + orig + in + edit_loop + ~orig + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~strict + buf + (next_group_delim + 1) + rem + | Add { count } :: rem -> + Buffer.add_string buf (String.make count ';'); + edit_loop + ~orig + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~strict + buf + offset_in_orig + rem + + let edit ~strict (Uninterpreted orig) edits = + let buf = Buffer.create 8_192 in + let _ = + edit_loop + ~strict + ~orig + ~carry_source:0 + ~carry_line:0 + ~carry_col:0 + ~carry_name:0 + buf + 0 + edits + in + Uninterpreted (Buffer.contents buf) + + let num_gen_lines m = + let rec loop count pos = + if pos >= String.length m + then count + else + let next_delim = + try String.index_from m pos ';' with Not_found -> String.length m + in + if next_delim >= String.length m - 1 + then (* This was the last line *) + count + 1 + else loop (count + 1) (next_delim + 1) + in + loop 0 0 + + (* Fold [f] over the ';'-separated groups in string [str.(pos..len - 1)]. *) + let fold_on_lines ~str f ~init = + let rec loop acc pos = + if pos >= String.length str + then acc + else + let next_delim = + try min (String.index_from str pos ';') (String.length str) + with Not_found -> String.length str + in + let len = next_delim - pos in + loop (f acc str ~pos ~len) (next_delim + 1) + in + loop init 0 + + let sum_offsets mapping = + fold_on_lines + ~str:mapping + ~init:{ carry_source = 0; carry_line = 0; carry_col = 0; carry_name = 0 } + (fun { carry_source; carry_line; carry_col; carry_name } str ~pos ~len -> + update_carries_from_line + ~carry_source + ~carry_line + ~carry_col + ~carry_name + ~pos + ~len + str) + + let concat ~source_count1 ~name_count1 (Uninterpreted m1) (Uninterpreted m2) = + match m1, m2 with + | "", m2 -> Uninterpreted m2 + | m1, "" -> Uninterpreted m1 + | _, _ -> + let buf = Buffer.create 8_192 in + (* First do a pass on [m1] to accumulate its carries. *) + let { carry_source; carry_line; carry_col; carry_name } = sum_offsets m1 in + Buffer.add_string buf m1; + if not (Char.equal m1.[String.length m1 - 1] ';') then Buffer.add_char buf ';'; + let _ = + edit_loop + ~orig:m2 + (* Make the initial absolute offsets in [m2] relative. Also account + for the fact that fields [sources] and [names] of [m2] will be + concatenated to those of [m1]. *) + ~carry_source:(source_count1 - carry_source) + ~carry_line:~-carry_line + ~carry_col:~-carry_col + ~carry_name:(name_count1 - carry_name) + ~strict:true + buf + 0 + (List.init ~len:(num_gen_lines m2) ~f:(Fun.const Line_edits.Keep)) + in + Uninterpreted (Buffer.contents buf) + let encode mapping = let a = Array.of_list mapping in let len = Array.length a in @@ -230,6 +579,27 @@ let empty ~filename = ; mappings = Mappings.empty } + +let concat ~file ~sourceroot s1 s2 = + if not (Int.equal s1.version s2.version) + then invalid_arg "Source_map.concat: different versions"; + { version = s1.version + ; file + ; sourceroot + ; sources = s1.sources @ s2.sources + ; sources_content = + (match s1.sources_content, s2.sources_content with + | None, contents | contents, None -> contents + | Some c1, Some c2 -> Some (c1 @ c2)) + ; names = s1.names @ s2.names + ; mappings = + Mappings.concat + ~source_count1:(List.length s1.sources) + ~name_count1:(List.length s1.names) + s1.mappings + s2.mappings + } + let maps ~sources_offset ~names_offset x = match x with | Gen _ -> x diff --git a/compiler/lib/source_map.mli b/compiler/lib/source_map.mli index a5d6278329..82483ec034 100644 --- a/compiler/lib/source_map.mli +++ b/compiler/lib/source_map.mli @@ -44,6 +44,19 @@ type map = ; ori_name : int } +module Line_edits : sig + type action = + | Keep + | Drop + | Add of { count : int } + + val pp_action : Format.formatter -> action -> unit + + type t = action list + + val pp : Format.formatter -> t -> unit +end + module Mappings : sig type t @@ -54,17 +67,31 @@ module Mappings : sig (** By default, mappings are left uninterpreted, since many operations can be performed efficiently directly on the encoded form. Therefore this function is mostly a no-op and very cheap. It does not perform any - validation of its argument, unlike {!val:decode}. It is guaranteed that - {!val:of_string} and {!val:to_string} are inverse functions. *) + validation of its argument, unlike {!val:edit} or {!val:decode}. It is + guaranteed that {!val:of_string} and {!val:to_string} are inverse + functions. *) val decode : t -> map list - (** Parse the mappings. *) + (** Parse the mappings. Prefer using the more efficient {!val:edit} on the + uninterpreted form when applicable. *) val encode : map list -> t val to_string : t -> string (** Returns the mappings as a string in the Source map v3 format. This function is mostly a no-op and is very cheap. *) + + val edit : strict:bool -> t -> Line_edits.t -> t + (** [edit ~strict mappings edits] applies line edits [edits] to [mappings] in + order and returns the result. If [strict], then the number of {!const:Keep} and + {!const:Drop} elements in [edits] should be the same or lesser than the + number of generated lines covered by [mappings]. If the number of + {!const:Line_edits.Keep} and {!const:Line_edits.Drop} actions is lesser + than the number of lines in the domain of [mappings], only the lines + affected by an edit are included in the result. If [strict] is false, + there may be more edit actions than generated lines. In which case, a + [Keep] that does not correspond to a line just adds a line without + mappings and a [Drop] without a corresponding line does nothing. *) end type t = @@ -86,6 +113,12 @@ val merge : t list -> t option val empty : filename:string -> t +val concat : file:string -> sourceroot:string option -> t -> t -> t +(** If [s1] encodes a mapping for a generated file [f1], and [s2] for a + generated file [f2], then [concat ~file ~sourceroot s1 s2] encodes the + union of these mappings for the concatenation of [f1] and [f2], with name + [file] and source root [sourceroot). *) + val to_string : t -> string val to_file : t -> string -> unit