Skip to content

Commit

Permalink
Support Core v0.16.0
Browse files Browse the repository at this point in the history
  • Loading branch information
SGrondin committed Jul 7, 2023
1 parent 29c6f80 commit 5590891
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 97 deletions.
52 changes: 24 additions & 28 deletions .ocamlformat
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# "Asemio Style" v1.2
# Last updated for ocamlformat 0.21.0
# "Asemio Style" v1.3
# Last updated for ocamlformat 0.25.1
ocaml-version=5.0.0

# All available options are listed.
# An option is commented out when the value matches the default
Expand All @@ -9,66 +10,61 @@
# - if a default changed, see if we should follow it or not

# assignment-operator = end-line
# break-before-in = fit-or-vertical
break-cases = all
# break-collection-expressions = fit-or-vertical
# break-colon = after
# break-fun-decl = wrap
# break-fun-sig = wrap
break-infix = fit-or-vertical
# break-infix-before-func = false
# break-separators = after
# break-sequences = true
# break-string-literals = auto
# break-struct = force
cases-exp-indent = 2
# cases-matching-exp-indent = normal
disambiguate-non-breaking-match = true
doc-comments = before
# doc-comments-padding = 2
# doc-comments-tag-only = default
# dock-collection-brackets = true
exp-grouping = preserve
# extension-indent = 2
field-space = tight-decl
function-indent = 0
# function-indent-nested = never
if-then-else = keyword-first
# indicate-multiline-delimiters = no
# indent-after-in = 0
indicate-multiline-delimiters = space
indicate-nested-or-patterns = space
# infix-precedence = indent
# leading-nested-match-parens = false
let-and = sparse
# let-binding-indent = 2
# let-binding-spacing = compact
# let-module = compact
line-endings = lf
# line-endings = lf
margin = 106
max-indent = 3
# match-indent = 0
# match-indent-nested = never
max-indent = 2
module-item-spacing = sparse
# nested-match = wrap
parens-ite = true
parens-tuple = multi-line-only
# parens-tuple-patterns = multi-line-only
# parse-docstrings = false
# parse-toplevel-phrases = false
# sequence-blank-line = preserve-one
# sequence-style = terminator
single-case = sparse
# space-around-arrays = true
# space-around-lists = true
# space-around-records = true
# space-around-variants = true
# stritem-extension-indent = 0
type-decl = sparse
# type-decl-indent = 2
# wrap-comments = false
# wrap-fun-args = true


# DEPRECATED

# align-cases = false
align-constructors-decl = true
align-variants-decl = true
# break-before-in = fit-or-vertical
# break-collection-expressions = fit-or-vertical
# break-string-literals = auto
# break-struct = force
disambiguate-non-breaking-match = true
# extension-indent = 2
function-indent = 0
# function-indent-nested = never
# indent-after-in = 0
# let-binding-indent = 2
# match-indent = 0
# match-indent-nested = never
# nested-match = wrap
# parens-tuple-patterns = multi-line-only
# stritem-extension-indent = 0
# type-decl-indent = 2
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ let td =
Tdigest.create ()
|> Tdigest.add_list [ 10.0; 11.0; 12.0; 13.0 ]
in
Tdigest.p_ranks td [ 9.; 10.; 11.; 12.; 13.; 14. ] (* [ Some 0; Some 0.125; Some 0.375; Some 0.625; Some 0.875; Some 1 ] *)
Tdigest.percentiles td [ 0.; 0.25; 0.5; 0.75; 1. ]
(* [ Some 10; Some 10.5; Some 11.5; Some 12.5; Some 13 ] *)
Tdigest.p_ranks td [ 9.; 10.; 11.; 12.; 13.; 14. ]
(* [ Some 0; Some 0.125; Some 0.375; Some 0.625; Some 0.875; Some 1 ] *)
```

The T-Digest is a data structure and algorithm for constructing an approximate distribution for a collection of real numbers presented as a stream.
Expand Down
73 changes: 37 additions & 36 deletions src/tdigest.ml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
open! Core
open Float
module Map = Core.Map
open Option.Monad_infix

type delta =
| Merging of float
| Merging of float
| Discrete
[@@deriving sexp]

Expand Down Expand Up @@ -50,7 +51,7 @@ let empty_stats = { cumulates_count = 0; compress_count = 0; auto_compress_count

type t = {
settings: settings;
centroids: centroid Map.t;
centroids: centroid Float.Map.t;
mutable min: centroid option;
mutable max: centroid option;
n: float;
Expand Down Expand Up @@ -110,7 +111,7 @@ let create ?(delta = default_delta) ?(k = default_k) ?(cx = default_cx) () =
in
{
settings = { delta; k; cx; k_delta = get_k_delta (k, delta) };
centroids = Map.empty;
centroids = Float.Map.empty;
min = None;
max = None;
n = 0.0;
Expand All @@ -133,17 +134,17 @@ let find_nearest td mean =
let gt = ref None in
let lte =
Map.binary_search td.centroids `Last_less_than_or_equal_to mean ~compare:(fun ~key ~data against ->
let x = compare key against in
if Int.is_positive x then gt := Some (key, data);
x)
let x = compare key against in
if Int.is_positive x then gt := Some (key, data);
x )
in
match lte with
| Some (k, v) when mean = k -> Some v
| Some (k1, v1) -> (
match !gt with
| None -> None
| Some (k2, _v2) when mean - k1 < k2 - mean -> Some v1
| Some (_k2, v2) -> Some v2)
| Some (_k2, v2) -> Some v2 )
| None -> None

let use_cache = function
Expand All @@ -157,9 +158,9 @@ let cumulate td ~exact =
let cumn = ref 0.0 in
let centroids =
Map.map td.centroids ~f:(fun data ->
let updated = { data with mean_cumn = !cumn + (data.n / 2.); cumn = !cumn + data.n } in
cumn := updated.cumn;
updated)
let updated = { data with mean_cumn = !cumn + (data.n / 2.); cumn = !cumn + data.n } in
cumn := updated.cumn;
updated )
in
{
td with
Expand All @@ -169,7 +170,7 @@ let cumulate td ~exact =
n = !cumn;
last_cumulate = !cumn;
stats = { td.stats with cumulates_count = succ td.stats.cumulates_count };
})
} )

let new_bounds ({ mean; _ } as added) = function
| { n = 0.0; min = None; max = None; _ } -> Some added, Some added
Expand All @@ -187,9 +188,9 @@ let add_weight td nearest ~mean ~n =
let updated =
{
mean =
(if nearest.mean = mean
then nearest.mean
else nearest.mean + (n * (mean - nearest.mean) / (nearest.n + n)));
( if nearest.mean = mean
then nearest.mean
else nearest.mean + (n * (mean - nearest.mean) / (nearest.n + n)) );
cumn = nearest.cumn + n;
mean_cumn = nearest.mean_cumn + (n / 2.);
n = nearest.n + n;
Expand Down Expand Up @@ -225,17 +226,17 @@ let weights_of_td = function
let arr = Array.create ~len:(Map.length centroids) empty_centroid in
let _i =
Map.fold centroids ~init:0 ~f:(fun ~key:_ ~data i ->
arr.(i) <- data;
succ i)
arr.(i) <- data;
succ i )
in
arr

let weights_of_table table =
let arr = Array.create ~len:(Table.length table) empty_centroid in
let arr = Array.create ~len:(Hashtbl.length table) empty_centroid in
let _i =
Table.fold table ~init:0 ~f:(fun ~key:mean ~data:n i ->
arr.(i) <- { empty_centroid with mean; n };
succ i)
Hashtbl.fold table ~init:0 ~f:(fun ~key:mean ~data:n i ->
arr.(i) <- { empty_centroid with mean; n };
succ i )
in
arr

Expand All @@ -244,7 +245,7 @@ let rebuild ~auto settings (stats : stats) arr =
let blank =
{
settings;
centroids = Map.empty;
centroids = Float.Map.empty;
min = None;
max = None;
n = 0.0;
Expand Down Expand Up @@ -297,7 +298,7 @@ let to_string td =
in
let _pos =
Map.fold td.centroids ~init:0 ~f:(fun ~key:_ ~data:{ mean; n; _ } pos ->
add_float pos ~data:mean |> add_float ~data:n)
add_float pos ~data:mean |> add_float ~data:n )
in
td, Bytes.unsafe_to_string ~no_mutation_while_string_reachable:buf

Expand All @@ -323,7 +324,7 @@ let of_string ?(delta = default_delta) ?(k = default_k) ?(cx = default_cx) str =
| pos ->
let mean = parse_float str pos in
let n = parse_float str Int.(pos + 8) in
Table.update table mean ~f:(Option.value_map ~default:n ~f:(( + ) n));
Hashtbl.update table mean ~f:(Option.value_map ~default:n ~f:(( + ) n));
(loop [@tailcall]) Int.(pos + 16)
in
loop 0;
Expand Down Expand Up @@ -360,35 +361,35 @@ let merge ?(delta = default_delta) ?(k = default_k) ?(cx = default_cx) tds =
let settings = { delta; k; cx; k_delta = get_k_delta (k, delta) } in
let table = Table.create () in
List.iter tds ~f:(fun { centroids; _ } ->
Map.iter centroids ~f:(fun { mean; n; _ } ->
Table.update table mean ~f:(Option.value_map ~default:n ~f:(( + ) n))));
Map.iter centroids ~f:(fun { mean; n; _ } ->
Hashtbl.update table mean ~f:(Option.value_map ~default:n ~f:(( + ) n)) ) );
weights_of_table table |> rebuild ~auto:true settings empty_stats

type bounds =
| Neither
| Both of centroid * centroid
| Equal of centroid
| Lower of centroid
| Upper of centroid
| Both of centroid * centroid
| Equal of centroid
| Lower of centroid
| Upper of centroid

let bounds td needle lens =
let gt = ref None in
let lte =
Map.binary_search td.centroids `Last_less_than_or_equal_to needle ~compare:(fun ~key ~data against ->
let x = compare (lens data) against in
if Int.is_positive x then gt := Some (key, data);
x)
let x = compare (lens data) against in
if Int.is_positive x then gt := Some (key, data);
x )
in
match lte with
| Some (_k, v) when lens v = needle -> Equal v
| Some (_k1, v1) -> (
match !gt with
| Some (_k2, v2) -> Both (v1, v2)
| None -> Lower v1)
| None -> Lower v1 )
| None -> (
match get_min td with
| Some v -> Upper v
| None -> Neither)
| None -> Neither )

let percentile td p =
match td with
Expand All @@ -409,7 +410,7 @@ let percentile td p =
td, Some num
| Both (lower, _upper), Discrete when h <= lower.cumn -> td, Some lower.mean
| Both (_lower, upper), Discrete -> td, Some upper.mean
| Neither, _ -> td, None)
| Neither, _ -> td, None )

let percentiles td ps = List.fold_map ps ~init:td ~f:percentile

Expand Down Expand Up @@ -438,7 +439,7 @@ let p_rank td p =
+ ((p - lower.mean) * (upper.mean_cumn - lower.mean_cumn) / (upper.mean - lower.mean))
in
td, Some (num / td.n)
| _, Merging _ -> td, None))
| _, Merging _ -> td, None ) )

let p_ranks td ps = List.fold_map ps ~init:td ~f:p_rank

Expand Down
2 changes: 1 addition & 1 deletion src/tdigest.mli
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ open! Core
[~delta:Discrete] switches off TDigest behavior and treats the distribution as discrete, with no merging and exact values reported.
*)
type delta =
| Merging of float
| Merging of float
| Discrete
[@@deriving sexp]

Expand Down
6 changes: 3 additions & 3 deletions tdigest.opam
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ depends: [
"ocaml" { >= "4.10.0" }
"dune" { >= "1.9.0" }

"core" { >= "v0.15.0" }
# "ocamlformat" { = "0.21.0" } # Development
# "ocaml-lsp-server" # Development
"core" { >= "v0.15.0" & < "v0.17.0" }
"ocamlformat" { = "0.25.1" } # Development
"ocaml-lsp-server" # Development
]
build: ["dune" "build" "-p" name "-j" jobs]
2 changes: 1 addition & 1 deletion test/test_discrete.ml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ let%expect_test "discrete digests" =
(* handles multiples duplicates *)
Tdigest.create ~delta:Tdigest.Discrete ()
|> Fn.apply_n_times ~n:10 (fun td ->
td |> Tdigest.add ~data:0. |> Tdigest.add ~data:1. |> Tdigest.add ~data:0.5)
td |> Tdigest.add ~data:0. |> Tdigest.add ~data:1. |> Tdigest.add ~data:0.5 )
|> check;
[%expect {| (((mean 0) (n 10)) ((mean 0.5) (n 10)) ((mean 1) (n 10))) |}]

Expand Down
Loading

0 comments on commit 5590891

Please sign in to comment.