diff --git a/src/Bedrock/Tests/Proofs/X1305_32.v b/src/Bedrock/Tests/Proofs/X1305_32.v index ea9e90e7d6..36deb2d9dc 100644 --- a/src/Bedrock/Tests/Proofs/X1305_32.v +++ b/src/Bedrock/Tests/Proofs/X1305_32.v @@ -78,8 +78,8 @@ Section Proofs. Local Notation eval := (eval (weight (Qnum (inject_Z (Z.log2_up M) / inject_Z (Z.of_nat n))) (QDen (inject_Z (Z.log2_up M) / inject_Z (Z.of_nat n)))) n). - Local Notation loose_bounds := (UnsaturatedSolinas.loose_bounds n s c). - Local Notation tight_bounds := (UnsaturatedSolinas.tight_bounds n s c). + Local Notation loose_bounds := (UnsaturatedSolinasHeuristics.loose_bounds n s c). + Local Notation tight_bounds := (UnsaturatedSolinasHeuristics.tight_bounds n s c). Definition Bignum bounds @@ -190,8 +190,8 @@ Section Proofs. Solinas.carry_mul_correct (weight (Qnum (Z.log2_up M / n)) (Qden (Z.log2_up M / n))) n M - (UnsaturatedSolinas.tight_bounds n s c) - (UnsaturatedSolinas.loose_bounds n s c) + tight_bounds + loose_bounds (API.Interp mulmod). Proof. apply carry_mul_correct with (machine_wordsize0:=machine_wordsize). diff --git a/src/Bedrock/Tests/Proofs/X25519_32.v b/src/Bedrock/Tests/Proofs/X25519_32.v index 2be869defe..52914524a9 100644 --- a/src/Bedrock/Tests/Proofs/X25519_32.v +++ b/src/Bedrock/Tests/Proofs/X25519_32.v @@ -79,8 +79,8 @@ Section Proofs. Local Notation eval := (eval (weight (Qnum (inject_Z (Z.log2_up M) / inject_Z (Z.of_nat n))) (QDen (inject_Z (Z.log2_up M) / inject_Z (Z.of_nat n)))) n). - Local Notation loose_bounds := (UnsaturatedSolinas.loose_bounds n s c). - Local Notation tight_bounds := (UnsaturatedSolinas.tight_bounds n s c). + Local Notation loose_bounds := (UnsaturatedSolinasHeuristics.loose_bounds n s c). + Local Notation tight_bounds := (UnsaturatedSolinasHeuristics.tight_bounds n s c). Definition Bignum bounds @@ -191,8 +191,8 @@ Section Proofs. Solinas.carry_mul_correct (weight (Qnum (Z.log2_up M / n)) (Qden (Z.log2_up M / n))) n M - (UnsaturatedSolinas.tight_bounds n s c) - (UnsaturatedSolinas.loose_bounds n s c) + tight_bounds + loose_bounds (API.Interp mulmod). Proof. apply carry_mul_correct with (machine_wordsize0:=machine_wordsize). diff --git a/src/Bedrock/Tests/Proofs/X25519_64.v b/src/Bedrock/Tests/Proofs/X25519_64.v index e3079682f8..765f34571e 100644 --- a/src/Bedrock/Tests/Proofs/X25519_64.v +++ b/src/Bedrock/Tests/Proofs/X25519_64.v @@ -31,6 +31,9 @@ Require Import Crypto.PushButtonSynthesis.UnsaturatedSolinas. Require Import Crypto.Util.Tactics.BreakMatch. Require Import Crypto.Util.ZUtil.Tactics.LtbToLt. Require Import Crypto.Util.ZUtil.Tactics.RewriteModSmall. +Require Import Crypto.Util.Option. +Import Coq.Lists.List. +Require Import Crypto.Util.Tactics.DestructHead. Require Import Rewriter.Language.Wf. Require bedrock2.Map.SeparationLogic. (* if imported, list firstn/skipn get overwritten and it's annoying *) Local Open Scope Z_scope. @@ -74,8 +77,8 @@ Section Proofs. Local Notation eval := (eval (weight (Qnum (inject_Z (Z.log2_up M) / inject_Z (Z.of_nat n))) (QDen (inject_Z (Z.log2_up M) / inject_Z (Z.of_nat n)))) n). - Local Notation loose_bounds := (UnsaturatedSolinas.loose_bounds n s c). - Local Notation tight_bounds := (UnsaturatedSolinas.tight_bounds n s c). + Local Notation loose_bounds := (UnsaturatedSolinasHeuristics.loose_bounds n s c). + Local Notation tight_bounds := (UnsaturatedSolinasHeuristics.tight_bounds n s c). Definition Bignum bounds @@ -158,8 +161,8 @@ Section Proofs. Proof. cbn [LoadStoreList.within_base_access_sizes LoadStoreList.within_access_sizes_args]. - let r := eval vm_compute in (hd None loose_bounds) in - change loose_bounds with (repeat r n). + let r := (eval vm_compute in (List.fold_right (fun x y => (x <- x; y <- y; Some (Operations.ZRange.union x y))%option) (hd None loose_bounds) loose_bounds)) in + intro H; cut (list_Z_bounded_by (repeat r n) xs); [ clear H | revert H; apply relax_list_Z_bounded_by; vm_compute; reflexivity ]. let H := fresh in intro H; pose proof length_list_Z_bounded_by _ _ H. rewrite repeat_length in *. @@ -187,8 +190,8 @@ Section Proofs. Solinas.carry_mul_correct (weight (Qnum (Z.log2_up M / n)) (Qden (Z.log2_up M / n))) n M - (UnsaturatedSolinas.tight_bounds n s c) - (UnsaturatedSolinas.loose_bounds n s c) + tight_bounds + loose_bounds (API.Interp mulmod). Proof. apply carry_mul_correct with (machine_wordsize0:=machine_wordsize). diff --git a/src/PushButtonSynthesis/UnsaturatedSolinas.v b/src/PushButtonSynthesis/UnsaturatedSolinas.v index 5513445f0e..3d719249f3 100644 --- a/src/PushButtonSynthesis/UnsaturatedSolinas.v +++ b/src/PushButtonSynthesis/UnsaturatedSolinas.v @@ -17,6 +17,7 @@ Require Import Crypto.Util.Strings.Show. Require Import Crypto.Util.ZUtil.Definitions. Require Import Crypto.Util.ZUtil.Zselect. Require Import Crypto.Util.ZUtil.Tactics.LtbToLt. +Require Import Crypto.Util.Prod. Require Import Crypto.Util.Tactics.HasBody. Require Import Crypto.Util.Tactics.Head. Require Import Crypto.Util.Tactics.SpecializeBy. @@ -97,16 +98,14 @@ Section __. Let limbwidth := (Z.log2_up (s - Associational.eval c) / Z.of_nat n)%Q. Let idxs : list nat := carry_chains n s c. - Let coef := 2. Let n_bytes := bytes_n (Qnum limbwidth) (Qden limbwidth) n. - Let prime_upperbound_list : list Z - := encode_no_reduce (weight (Qnum limbwidth) (Qden limbwidth)) n (s-1). + Local Notation prime_upperbound_list := (prime_upperbound_list n s c) (only parsing). Let prime_bytes_upperbound_list : list Z := encode_no_reduce (weight 8 1) n_bytes (s-1). - Let tight_upperbounds : list Z - := List.map - (fun v : Z => Qceiling (tight_upperbound_fraction * v)) - prime_upperbound_list. + Local Notation tight_upperbounds := (tight_upperbounds n s c) (only parsing). + Local Notation loose_upperbounds := (loose_upperbounds n s c) (only parsing). + Local Notation tight_bounds := (tight_bounds n s c) (only parsing). + Local Notation loose_bounds := (loose_bounds n s c) (only parsing). Definition prime_bound : ZRange.type.option.interp (base.type.Z) := Some r[0~>(s - Associational.eval c - 1)]%zrange. Definition prime_bounds : ZRange.type.option.interp (base.type.list (base.type.Z)) @@ -130,12 +129,6 @@ Section __. Let possible_values := possible_values_of_machine_wordsize. Let possible_values_with_bytes := possible_values_of_machine_wordsize_with_bytes. - Let loose_upperbounds : list Z - := List.map (fun u => loose_upperbound_extra_multiplicand * u) tight_upperbounds. - Definition tight_bounds : list (ZRange.type.option.interp base.type.Z) - := List.map (fun u => Some r[0~>u]%zrange) tight_upperbounds. - Definition loose_bounds : list (ZRange.type.option.interp base.type.Z) - := List.map (fun u => Some r[0~>u]%zrange) loose_upperbounds. Local Instance no_select_size : no_select_size_opt := no_select_size_of_no_select machine_wordsize. Local Instance split_mul_to : split_mul_to_opt := split_mul_to_of_should_split_mul machine_wordsize possible_values. @@ -151,7 +144,7 @@ Section __. Proof using Type. cbv [tight_bounds]; now autorewrite with distr_length. Qed. Hint Rewrite length_tight_bounds : distr_length. Lemma length_loose_bounds : List.length loose_bounds = n. - Proof using Type. cbv [loose_bounds]; now autorewrite with distr_length. Qed. + Proof using Type. cbv [loose_bounds]; now autorewrite with distr_length natsimplify. Qed. Hint Rewrite length_loose_bounds : distr_length. Lemma length_prime_bytes_upperbound_list : List.length prime_bytes_upperbound_list = bytes_n (Qnum limbwidth) (Qden limbwidth) n. Proof using Type. cbv [prime_bytes_upperbound_list]; now autorewrite with distr_length. Qed. @@ -605,19 +598,7 @@ Section __. Lemma relax_correct : forall x, list_Z_bounded_by tight_bounds x -> list_Z_bounded_by loose_bounds x. - Proof using Type. - cbv [tight_bounds loose_bounds list_Z_bounded_by]. - intro. - rewrite !fold_andb_map_map1, !fold_andb_map_iff; cbn [upper lower]. - setoid_rewrite Bool.andb_true_iff. - intro H. - repeat first [ lazymatch type of H with - | and _ _ => let H' := fresh in destruct H as [H' H]; split; [ assumption | ] - end - | let x := fresh in intro x; specialize (H x) ]. - cbv [loose_upperbound_extra_multiplicand]. - Z.ltb_to_lt; lia. - Qed. + Proof using Type. apply relax_list_Z_bounded_by, tight_bounds_tighter. Qed. Lemma to_bytes_correct res (Hres : to_bytes = Success res) diff --git a/src/UnsaturatedSolinasHeuristics.v b/src/UnsaturatedSolinasHeuristics.v index 81444881ac..24826ed12c 100644 --- a/src/UnsaturatedSolinasHeuristics.v +++ b/src/UnsaturatedSolinasHeuristics.v @@ -1,10 +1,16 @@ Require Import Coq.Lists.List. +Require Import Coq.micromega.Lia. Require Import Coq.ZArith.ZArith. Require Import Coq.QArith.QArith_base Coq.QArith.Qround. +Require Import Coq.QArith.Qabs. Require Import Crypto.Arithmetic.Core. Require Import Crypto.Arithmetic.ModOps. Require Import Crypto.Util.ListUtil. Require Import Crypto.Util.ZRange. +Require Import Crypto.Util.Option. +Require Import Crypto.Util.ZUtil.Tactics.LtbToLt. +Require Import Crypto.Util.ZUtil.Tactics.DivModToQuotRem. +Require Import Crypto.Util.ListUtil.FoldBool. Require Crypto.TAPSort. Import ListNotations. Local Open Scope Z_scope. Local Open Scope list_scope. Local Open Scope bool_scope. @@ -21,6 +27,13 @@ Local Coercion Z.pos : positive >-> Z. Class tight_upperbound_fraction_opt := tight_upperbound_fraction : Q. Typeclasses Opaque tight_upperbound_fraction_opt. +Local Notation is_tighter_than0 x y + := ((lower y <=? lower x) && (upper x <=? upper y)). +Local Notation is_tighter_than0oo r1 r2 + := (match r1, r2 with _, None => true | None, Some _ => false | Some r1', Some r2' => is_tighter_than0 r1' r2' end). +Local Notation is_tighter_than ls1 ls2 + := (fold_andb_map (fun x y => is_tighter_than0oo x y) ls1 ls2). + Section __. Context {tight_upperbound_fraction : tight_upperbound_fraction_opt} (n : nat) @@ -60,24 +73,54 @@ else: else (List.seq 0 n ++ [0; 1])%list%nat. Definition default_tight_upperbound_fraction : Q := (11/10)%Q. - Definition loose_upperbound_extra_multiplicand : Z := 3. + Definition coef := 2. (* for balance in sub *) Definition prime_upperbound_list : list Z := encode_no_reduce (weight (Qnum limbwidth) (Qden limbwidth)) n (s-1). + (** We take the absolute value mostly to make proofs easy *) Definition tight_upperbounds : list Z := List.map - (fun v : Z => Qceiling (tight_upperbound_fraction * v)) + (fun v : Z => Qceiling (Qabs (tight_upperbound_fraction * v))) prime_upperbound_list. + (** We compute loose bounds based on how much headspace we have in + each limb, and treat separately the maximum number of additions + and subtractions that we guarantee *) + (** Allow enough space to do two additions in a row w/o carrying *) + Definition headspace_add_count : nat := 2. + (** Allow enough space to do one subtraction w/o carrying *) + Definition headspace_sub_count : nat := 1. + Definition loose_upperbounds : list Z := List.map - (fun v : Z => loose_upperbound_extra_multiplicand * v) - tight_upperbounds. + (fun '(v, bal) => v + Z.max (headspace_add_count * v) + (headspace_sub_count * bal)) + (List.combine tight_upperbounds (balance (weight (Qnum limbwidth) (Qden limbwidth)) n s c coef)). Definition tight_bounds : list (option zrange) := List.map (fun u => Some r[0~>u]%zrange) tight_upperbounds. Definition loose_bounds : list (option zrange) := List.map (fun u => Some r[0~>u]%zrange) loose_upperbounds. + Lemma tight_bounds_tighter : is_tighter_than tight_bounds loose_bounds = true. + Proof using Type. + cbv [tight_bounds loose_bounds tight_upperbounds loose_upperbounds balance scmul]. + rewrite !combine_map_l, !fold_andb_map_map, !fold_andb_map_map1, fold_andb_map_iff. + cbn [lower upper]. + autorewrite with distr_length. + split. + { cbv [prime_upperbound_list]. + now autorewrite with distr_length natsimplify. } + { intro; rewrite In_nth_error_iff; intros [n' H]. + rewrite !nth_error_combine in H. + break_innermost_match_hyps; inversion_option; subst; cbn [fst snd]. + rewrite !Bool.andb_true_iff; split; [ reflexivity | Z.ltb_to_lt ]. + let x := lazymatch goal with |- ?x <= _ => x end in + rewrite <- (Z.add_0_r x) at 1; apply Zplus_le_compat_l. + etransitivity; [ | apply Z.le_max_l ]. + cbv [Qceiling Qmult Qfloor Qnum Qden Qopp inject_Z Qabs]; case tight_upperbound_fraction; intros; clear. + Z.div_mod_to_quot_rem; nia. } + Qed. + (** check if the suggested number of limbs will overflow double-width registers when adding partial products after a multiplication and then doing solinas reduction *)