Skip to content

Commit

Permalink
Add AtRoot versions of FpuStrategy and FpuValue (and convert FpuReduc…
Browse files Browse the repository at this point in the history
…tion to FpuValue).
  • Loading branch information
Mardak committed Mar 8, 2019
1 parent 9e6b028 commit 2f9f00f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 28 deletions.
47 changes: 28 additions & 19 deletions src/mcts/params.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,24 +109,27 @@ const OptionId SearchParams::kSmartPruningFactorId{
"pruning is deactivated."};
const OptionId SearchParams::kFpuStrategyId{
"fpu-strategy", "FpuStrategy",
"How is an eval of unvisited node determined. \"reduction\" subtracts "
"--fpu-reduction value from the parent eval. \"absolute\" sets eval of "
"unvisited nodes to the value specified in --fpu-value."};
// TODO(crem) Make FPU in "reduction" mode use fpu-value too. For now it's kept
// for backwards compatibility.
const OptionId SearchParams::kFpuReductionId{
"fpu-reduction", "FpuReduction",
"\"First Play Urgency\" reduction (used when FPU strategy is "
"\"reduction\"). Normally when a move has no visits, "
"it's eval is assumed to be equal to parent's eval. With non-zero FPU "
"reduction, eval of unvisited move is decreased by that value, "
"discouraging visits of unvisited moves, and saving those visits for "
"(hopefully) more promising moves."};
"How is an eval of unvisited node determined. \"First Play Urgency\" "
"changes search behavior to visit unvisited nodes earlier or later by "
"using a placeholder eval before checking the network. The value specified "
"with --fpu-value results in \"reduction\" subtracting that value from the "
"parent eval while \"absolute\" directly uses that value."};
const OptionId SearchParams::kFpuValueId{
"fpu-value", "FpuValue",
"\"First Play Urgency\" value. When FPU strategy is \"absolute\", value of "
"unvisited node is assumed to be equal to this value, and does not depend "
"on parent eval."};
"\"First Play Urgency\" value used to adjust unvisited node eval based on "
"--fpu-strategy."};
const OptionId SearchParams::kFpuStrategyAtRootId{
"fpu-strategy-at-root", "FpuStrategyAtRoot",
"How is an eval of unvisited root children determined. Just like "
"--fpu-strategy except only at the root level and adjusts unvisited root "
"children eval with --fpu-value-at-root. In addition to matching the "
"strategies from --fpu-strategy, this can be \"same\" to disable the "
"special root behavior."};
const OptionId SearchParams::kFpuValueAtRootId{
"fpu-value-at-root", "FpuValueAtRoot",
"\"First Play Urgency\" value used to adjust unvisited root children eval "
"based on --fpu-strategy-at-root. Has no effect if --fpu-strategy-at-root "
"is \"same\"."};
const OptionId SearchParams::kCacheHistoryLengthId{
"cache-history-length", "CacheHistoryLength",
"Length of history, in half-moves, to include into the cache key. When "
Expand Down Expand Up @@ -197,8 +200,10 @@ void SearchParams::Populate(OptionsParser* options) {
options->Add<FloatOption>(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f;
std::vector<std::string> fpu_strategy = {"reduction", "absolute"};
options->Add<ChoiceOption>(kFpuStrategyId, fpu_strategy) = "reduction";
options->Add<FloatOption>(kFpuReductionId, -100.0f, 100.0f) = 1.2f;
options->Add<FloatOption>(kFpuValueId, -1.0f, 1.0f) = -1.0f;
options->Add<FloatOption>(kFpuValueId, -100.0f, 100.0f) = 1.2f;
fpu_strategy.push_back("same");
options->Add<ChoiceOption>(kFpuStrategyAtRootId, fpu_strategy) = "absolute";
options->Add<FloatOption>(kFpuValueAtRootId, -100.0f, 100.0f) = 1.0f;
options->Add<IntOption>(kCacheHistoryLengthId, 0, 7) = 0;
options->Add<FloatOption>(kPolicySoftmaxTempId, 0.1f, 10.0f) = 2.2f;
options->Add<IntOption>(kMaxCollisionEventsId, 1, 1024) = 32;
Expand All @@ -225,8 +230,12 @@ SearchParams::SearchParams(const OptionsDict& options)
kSmartPruningFactor(options.Get<float>(kSmartPruningFactorId.GetId())),
kFpuAbsolute(options.Get<std::string>(kFpuStrategyId.GetId()) ==
"absolute"),
kFpuReduction(options.Get<float>(kFpuReductionId.GetId())),
kFpuValue(options.Get<float>(kFpuValueId.GetId())),
kFpuAbsoluteAtRoot(
options.Get<std::string>(kFpuStrategyAtRootId.GetId()) == "absolute"),
kFpuReductionAtRoot(options.Get<std::string>(
kFpuStrategyAtRootId.GetId()) == "reduction"),
kFpuValueAtRoot(options.Get<float>(kFpuValueAtRootId.GetId())),
kCacheHistoryLength(options.Get<int>(kCacheHistoryLengthId.GetId())),
kPolicySoftmaxTemp(options.Get<float>(kPolicySoftmaxTempId.GetId())),
kMaxCollisionEvents(options.Get<int>(kMaxCollisionEventsId.GetId())),
Expand Down
11 changes: 8 additions & 3 deletions src/mcts/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ class SearchParams {
}
float GetSmartPruningFactor() const { return kSmartPruningFactor; }
bool GetFpuAbsolute() const { return kFpuAbsolute; }
float GetFpuReduction() const { return kFpuReduction; }
float GetFpuValue() const { return kFpuValue; }
bool GetFpuAbsoluteAtRoot() const { return kFpuAbsoluteAtRoot; }
bool GetFpuReductionAtRoot() const { return kFpuReductionAtRoot; }
float GetFpuValueAtRoot() const { return kFpuValueAtRoot; }
int GetCacheHistoryLength() const { return kCacheHistoryLength; }
float GetPolicySoftmaxTemp() const { return kPolicySoftmaxTemp; }
int GetMaxCollisionEvents() const { return kMaxCollisionEvents; }
Expand Down Expand Up @@ -116,8 +118,9 @@ class SearchParams {
static const OptionId kLogLiveStatsId;
static const OptionId kSmartPruningFactorId;
static const OptionId kFpuStrategyId;
static const OptionId kFpuReductionId;
static const OptionId kFpuValueId;
static const OptionId kFpuStrategyAtRootId;
static const OptionId kFpuValueAtRootId;
static const OptionId kCacheHistoryLengthId;
static const OptionId kPolicySoftmaxTempId;
static const OptionId kMaxCollisionEventsId;
Expand All @@ -144,8 +147,10 @@ class SearchParams {
const bool kNoise;
const float kSmartPruningFactor;
const bool kFpuAbsolute;
const float kFpuReduction;
const float kFpuValue;
const bool kFpuAbsoluteAtRoot;
const bool kFpuReductionAtRoot;
const float kFpuValueAtRoot;
const int kCacheHistoryLength;
const float kPolicySoftmaxTemp;
const int kMaxCollisionEvents;
Expand Down
14 changes: 9 additions & 5 deletions src/mcts/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,17 @@ int64_t Search::GetTimeToDeadline() const {

namespace {
inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node) {
// Use root FPU behavior unless it's "same"
if (is_root_node) {
if (params.GetFpuAbsoluteAtRoot()) return params.GetFpuValueAtRoot();
if (params.GetFpuReductionAtRoot())
return -node->GetQ() -
params.GetFpuValueAtRoot() * std::sqrt(node->GetVisitedPolicy());
}
return params.GetFpuAbsolute()
? params.GetFpuValue()
: ((is_root_node && params.GetNoise()) ||
!params.GetFpuReduction())
? -node->GetQ()
: -node->GetQ() - params.GetFpuReduction() *
std::sqrt(node->GetVisitedPolicy());
: -node->GetQ() -
params.GetFpuValue() * std::sqrt(node->GetVisitedPolicy());
}

inline float ComputeCpuct(const SearchParams& params, uint32_t N) {
Expand Down
4 changes: 3 additions & 1 deletion src/selfplay/tournament.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) {
defaults->Set<float>(SearchParams::kSmartPruningFactorId.GetId(), 0.0f);
defaults->Set<float>(SearchParams::kTemperatureId.GetId(), 1.0f);
defaults->Set<bool>(SearchParams::kNoiseId.GetId(), true);
defaults->Set<float>(SearchParams::kFpuReductionId.GetId(), 0.0f);
defaults->Set<float>(SearchParams::kFpuValueId.GetId(), 0.0f);
defaults->Set<std::string>(SearchParams::kFpuStrategyAtRootId.GetId(),
"same");
defaults->Set<std::string>(SearchParams::kHistoryFillId.GetId(), "no");
defaults->Set<std::string>(NetworkFactory::kBackendId.GetId(),
"multiplexing");
Expand Down

0 comments on commit 2f9f00f

Please sign in to comment.