From b4976c2e6d86693c9290157bee3a42cdb9834a16 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Thu, 26 Sep 2024 11:02:47 +0200 Subject: [PATCH 1/2] mul_strassen: avoid computing some entries twice --- src/fmpz_mat/mul_strassen.c | 16 ++++++++++++++-- src/gr_mat/mul_strassen.c | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/fmpz_mat/mul_strassen.c b/src/fmpz_mat/mul_strassen.c index 6c5c57fd91..030448fe0c 100644 --- a/src/fmpz_mat/mul_strassen.c +++ b/src/fmpz_mat/mul_strassen.c @@ -122,10 +122,22 @@ void fmpz_mat_mul_strassen(fmpz_mat_t C, const fmpz_mat_t A, const fmpz_mat_t B) if (a > 2*anr) { - fmpz_mat_t Ar, Cr; + fmpz_mat_t Ar, Br, Cr; fmpz_mat_window_init(Ar, A, 2*anr, 0, a, b); fmpz_mat_window_init(Cr, C, 2*anr, 0, a, c); - fmpz_mat_mul(Cr, Ar, B); + + /* don't compute the overlapping entries twice */ + if (c > 2 * bnc) + { + fmpz_mat_window_init(Br, B, 0, 0, b, 2*bnc); + fmpz_mat_mul(Cr, Ar, Br); + fmpz_mat_window_clear(Br); + } + else + { + fmpz_mat_mul(Cr, Ar, B); + } + fmpz_mat_window_clear(Ar); fmpz_mat_window_clear(Cr); } diff --git a/src/gr_mat/mul_strassen.c b/src/gr_mat/mul_strassen.c index fa4994233c..9b0d0f59a1 100644 --- a/src/gr_mat/mul_strassen.c +++ b/src/gr_mat/mul_strassen.c @@ -147,10 +147,22 @@ int gr_mat_mul_strassen(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t if (ar > 2 * anr) { - gr_mat_t Ar, Cr; + gr_mat_t Ar, Br, Cr; gr_mat_window_init(Ar, A, 2 * anr, 0, ar, ac, ctx); gr_mat_window_init(Cr, C, 2 * anr, 0, ar, bc, ctx); - status |= gr_mat_mul(Cr, Ar, B, ctx); + + /* don't compute the overlapping entries twice */ + if (bc > 2 * bnc) + { + gr_mat_window_init(Br, B, 0, 0, ac, 2 * bnc, ctx); + status |= gr_mat_mul(Cr, Ar, Br, ctx); + gr_mat_window_clear(Br, ctx); + } + else + { + status |= gr_mat_mul(Cr, Ar, B, ctx); + } + gr_mat_window_clear(Ar, ctx); gr_mat_window_clear(Cr, ctx); } From 6d998edadfa5b32331e7240d97e21ecf57d3167d Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Thu, 26 Sep 2024 11:36:15 +0200 Subject: [PATCH 2/2] oops --- src/fmpz_mat/mul_strassen.c | 2 +- src/gr_mat/mul_strassen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fmpz_mat/mul_strassen.c b/src/fmpz_mat/mul_strassen.c index 030448fe0c..3a273df4d9 100644 --- a/src/fmpz_mat/mul_strassen.c +++ b/src/fmpz_mat/mul_strassen.c @@ -124,7 +124,7 @@ void fmpz_mat_mul_strassen(fmpz_mat_t C, const fmpz_mat_t A, const fmpz_mat_t B) { fmpz_mat_t Ar, Br, Cr; fmpz_mat_window_init(Ar, A, 2*anr, 0, a, b); - fmpz_mat_window_init(Cr, C, 2*anr, 0, a, c); + fmpz_mat_window_init(Cr, C, 2*anr, 0, a, 2*bnc); /* don't compute the overlapping entries twice */ if (c > 2 * bnc) diff --git a/src/gr_mat/mul_strassen.c b/src/gr_mat/mul_strassen.c index 9b0d0f59a1..6aaaef06c2 100644 --- a/src/gr_mat/mul_strassen.c +++ b/src/gr_mat/mul_strassen.c @@ -149,7 +149,7 @@ int gr_mat_mul_strassen(gr_mat_t C, const gr_mat_t A, const gr_mat_t B, gr_ctx_t { gr_mat_t Ar, Br, Cr; gr_mat_window_init(Ar, A, 2 * anr, 0, ar, ac, ctx); - gr_mat_window_init(Cr, C, 2 * anr, 0, ar, bc, ctx); + gr_mat_window_init(Cr, C, 2 * anr, 0, ar, 2 * bnc, ctx); /* don't compute the overlapping entries twice */ if (bc > 2 * bnc)