From 11c2d7361115f0b0e6e53b82665ba2bc2dd5f18e Mon Sep 17 00:00:00 2001 From: rokicki Date: Sat, 12 Aug 2023 09:22:14 -0700 Subject: [PATCH] Split the prefetch from the decision, so we can pipeline. --- src/cpp/prunetable.h | 33 +++++++---- src/cpp/solve.cpp | 127 +++++++++++++++++++++++-------------------- src/cpp/solve.h | 2 + 3 files changed, 92 insertions(+), 70 deletions(-) diff --git a/src/cpp/prunetable.h b/src/cpp/prunetable.h index efc7ec5f..f68ea97e 100644 --- a/src/cpp/prunetable.h +++ b/src/cpp/prunetable.h @@ -87,7 +87,7 @@ struct prunetable { prunetable& operator=(prunetable &&) noexcept = delete ; void filltable(const puzdef &pd, int d) ; void checkextend(const puzdef &pd, int ignorelookups=0) ; - int lookuph(ull h) const { + int lookuph(ull h) const { // deprecate this h = indexhash(h) ; int v = 3 & (mem[h >> 5] >> ((h & 31) * 2)) ; if (v == 3) @@ -95,8 +95,19 @@ struct prunetable { else return 2 - v + baseval ; } - void prefetch(ull h) const { - __builtin_prefetch(mem+((indexhash(h)) >> 5)) ; + int lookuphindexed(ull h) const { + int v = 3 & (mem[h >> 5] >> ((h & 31) * 2)) ; + if (v == 3) + return (mem[(h >> 5) & ~7] & 15) - 1 ; + else + return 2 - v + baseval ; + } + void prefetch(ull h) const { // deprecate this + __builtin_prefetch(mem+(indexhash(h) >> 5)) ; + } + ull prefetchindexed(ull h) const { + __builtin_prefetch(mem+(h >> 5)) ; + return h ; } ull indexhash(ull lowb) const { ull h = lowb ; @@ -108,19 +119,17 @@ struct prunetable { ull indexhash(int n, const setval sv) const { return indexhash(fasthash(n, sv)) ; } - int lookup(const setval sv, setval *looktmp) const { - ull h ; + ull gethashforlookup(const setval sv, setval *looktmp) const { if ((int)pdp->rotgroup.size() > 1) { slowmodm2(*pdp, sv, *looktmp) ; - h = indexhash(totsize, *looktmp) ; + return indexhash(totsize, *looktmp) ; } else { - h = indexhash(totsize, sv) ; + return indexhash(totsize, sv) ; } - int v = 3 & (mem[h >> 5] >> ((h & 31) * 2)) ; - if (v == 3) - return (mem[(h >> 5) & ~7] & 15) - 1 ; - else - return 2 - v + baseval ; + } + int lookup(const setval sv, setval *looktmp) const { + ull h = gethashforlookup(sv, looktmp) ; + return lookuphindexed(h) ; } void addlookups(ull lookups) { lookupcnt += lookups ; diff --git a/src/cpp/solve.cpp b/src/cpp/solve.cpp index 084664a5..96182404 100644 --- a/src/cpp/solve.cpp +++ b/src/cpp/solve.cpp @@ -67,68 +67,79 @@ int solveworker::possibsolution(const puzdef &pd, int sp) { return 0 ; } int solveworker::solveiter(const puzdef &pd, prunetable &pt, int togo, int sp, int st) { - int v, mi, m ; - ull mask, skipbase ; + ull h = innersetup(pt, sp) ; while (1) { - lookups++ ; - v = pt.lookup(posns[sp], looktmp) ; - if (v > togo + 1) { - v = -1 ; - } else if (v > togo) { - v = 0 ; - } else if (v == 0 && togo == 1 && didprepass && pd.comparepos(posns[sp], pd.solved) == 0) { - v = 0 ; - } else if (v == 0 && togo > 0 && noearlysolutions && pd.comparepos(posns[sp], pd.solved) == 0) { - v = 0 ; - } else if (togo == 0) { - v = possibsolution(pd, sp) ; - } else { - mask = canonmask[st] ; - skipbase = 0 ; - mi = -1 ; - goto downstack ; - } -upstack: - if (solvestates.size() == 0) + int v = innerfetch(pd, pt, togo, sp, st, h) ; + if (v != 3) return v ; - { - auto &ss = solvestates[solvestates.size()-1] ; - togo++ ; - sp-- ; - st = ss.st ; - mi = ss.mi ; - mask = ss.mask ; - skipbase = ss.skipbase ; - } - solvestates.pop_back() ; - if (v == 1) - goto upstack ; - if (!quarter && v == -1) { - m = randomstart ? randomized[togo][mi] : mi ; - if (pd.moves[m].base < 64) - skipbase |= 1LL << pd.moves[m].base ; - } -downstack: - mi++ ; - if (mi >= (int)pd.moves.size()) { - v = 0 ; - goto upstack ; - } + h = innersetup(pt, sp) ; + } +} +ull solveworker::innersetup(prunetable &pt, int sp) { + lookups++ ; + return pt.prefetchindexed(pt.gethashforlookup(posns[sp], looktmp)) ; +} +int solveworker::innerfetch(const puzdef &pd, prunetable &pt, int &togo, int &sp, int &st, ull h) { + int v = pt.lookuphindexed(h) ; + int m, mi ; + ull mask, skipbase ; + if (v > togo + 1) { + v = -1 ; + } else if (v > togo) { + v = 0 ; + } else if (v == 0 && togo == 1 && didprepass && pd.comparepos(posns[sp], pd.solved) == 0) { + v = 0 ; + } else if (v == 0 && togo > 0 && noearlysolutions && pd.comparepos(posns[sp], pd.solved) == 0) { + v = 0 ; + } else if (togo == 0) { + v = possibsolution(pd, sp) ; + } else { + mask = canonmask[st] ; + skipbase = 0 ; + mi = -1 ; + goto downstack ; + } +upstack: + if (solvestates.size() == 0) + return v ; + { + auto &ss = solvestates[solvestates.size()-1] ; + togo++ ; + sp-- ; + st = ss.st ; + mi = ss.mi ; + mask = ss.mask ; + skipbase = ss.skipbase ; + } + solvestates.pop_back() ; + if (v == 1) + goto upstack ; + if (!quarter && v == -1) { m = randomstart ? randomized[togo][mi] : mi ; - const moove &mv = pd.moves[m] ; - if (!quarter && mv.base < 64 && ((skipbase >> mv.base) & 1)) - goto downstack ; - if ((mask >> mv.cs) & 1) - goto downstack ; - pd.mul(posns[sp], mv.pos, posns[sp+1]) ; - if (!pd.legalstate(posns[sp+1])) - goto downstack ; - movehist[sp] = m ; - solvestates.push_back({st, mi, mask, skipbase}) ; - togo-- ; - sp++ ; - st = canonnext[st][mv.cs] ; + if (pd.moves[m].base < 64) + skipbase |= 1LL << pd.moves[m].base ; + } +downstack: + mi++ ; + if (mi >= (int)pd.moves.size()) { + v = 0 ; + goto upstack ; } + m = randomstart ? randomized[togo][mi] : mi ; + const moove &mv = pd.moves[m] ; + if (!quarter && mv.base < 64 && ((skipbase >> mv.base) & 1)) + goto downstack ; + if ((mask >> mv.cs) & 1) + goto downstack ; + pd.mul(posns[sp], mv.pos, posns[sp+1]) ; + if (!pd.legalstate(posns[sp+1])) + goto downstack ; + movehist[sp] = m ; + solvestates.push_back({st, mi, mask, skipbase}) ; + togo-- ; + sp++ ; + st = canonnext[st][mv.cs] ; + return 3 ; } int solveworker::solvestart(const puzdef &pd, prunetable &pt, int w) { ull initmoves = workchunks[w] ; diff --git a/src/cpp/solve.h b/src/cpp/solve.h index f064b37e..5e522f94 100644 --- a/src/cpp/solve.h +++ b/src/cpp/solve.h @@ -30,6 +30,8 @@ struct solveworker { char padding[256] ; // kill false sharing void init(const puzdef &pd, int d_, int id_, const setval &p) ; int solveiter(const puzdef &pd, prunetable &pt, int togo, int sp, int st) ; + ull innersetup(prunetable &pt, int sp) ; + int innerfetch(const puzdef &pd, prunetable &pt, int &togo, int &sp, int &st, ull h) ; int possibsolution(const puzdef &pd, int sp) ; int solvestart(const puzdef &pd, prunetable &pt, int w) ; void dowork(const puzdef &pd, prunetable &pt) ;