Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AtRoot versions of FpuStrategy and FpuValue (and convert FpuReduction to FpuValue). #750

Merged
merged 2 commits into from
Mar 9, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 28 additions & 19 deletions src/mcts/params.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,24 +109,27 @@ const OptionId SearchParams::kSmartPruningFactorId{
"pruning is deactivated."};
const OptionId SearchParams::kFpuStrategyId{
"fpu-strategy", "FpuStrategy",
"How is an eval of unvisited node determined. \"reduction\" subtracts "
"--fpu-reduction value from the parent eval. \"absolute\" sets eval of "
"unvisited nodes to the value specified in --fpu-value."};
// TODO(crem) Make FPU in "reduction" mode use fpu-value too. For now it's kept
// for backwards compatibility.
const OptionId SearchParams::kFpuReductionId{
"fpu-reduction", "FpuReduction",
"\"First Play Urgency\" reduction (used when FPU strategy is "
"\"reduction\"). Normally when a move has no visits, "
"it's eval is assumed to be equal to parent's eval. With non-zero FPU "
"reduction, eval of unvisited move is decreased by that value, "
"discouraging visits of unvisited moves, and saving those visits for "
"(hopefully) more promising moves."};
"How is an eval of unvisited node determined. \"First Play Urgency\" "
"changes search behavior to visit unvisited nodes earlier or later by "
"using a placeholder eval before checking the network. The value specified "
"with --fpu-value results in \"reduction\" subtracting that value from the "
"parent eval while \"absolute\" directly uses that value."};
const OptionId SearchParams::kFpuValueId{
"fpu-value", "FpuValue",
"\"First Play Urgency\" value. When FPU strategy is \"absolute\", value of "
"unvisited node is assumed to be equal to this value, and does not depend "
"on parent eval."};
"\"First Play Urgency\" value used to adjust unvisited node eval based on "
"--fpu-strategy."};
const OptionId SearchParams::kFpuStrategyAtRootId{
"fpu-strategy-at-root", "FpuStrategyAtRoot",
"How is an eval of unvisited root children determined. Just like "
"--fpu-strategy except only at the root level and adjusts unvisited root "
"children eval with --fpu-value-at-root. In addition to matching the "
"strategies from --fpu-strategy, this can be \"same\" to disable the "
"special root behavior."};
const OptionId SearchParams::kFpuValueAtRootId{
"fpu-value-at-root", "FpuValueAtRoot",
"\"First Play Urgency\" value used to adjust unvisited root children eval "
"based on --fpu-strategy-at-root. Has no effect if --fpu-strategy-at-root "
"is \"same\"."};
const OptionId SearchParams::kCacheHistoryLengthId{
"cache-history-length", "CacheHistoryLength",
"Length of history, in half-moves, to include into the cache key. When "
Expand Down Expand Up @@ -197,8 +200,10 @@ void SearchParams::Populate(OptionsParser* options) {
options->Add<FloatOption>(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f;
std::vector<std::string> fpu_strategy = {"reduction", "absolute"};
options->Add<ChoiceOption>(kFpuStrategyId, fpu_strategy) = "reduction";
options->Add<FloatOption>(kFpuReductionId, -100.0f, 100.0f) = 1.2f;
options->Add<FloatOption>(kFpuValueId, -1.0f, 1.0f) = -1.0f;
options->Add<FloatOption>(kFpuValueId, -100.0f, 100.0f) = 1.2f;
fpu_strategy.push_back("same");
options->Add<ChoiceOption>(kFpuStrategyAtRootId, fpu_strategy) = "absolute";
Mardak marked this conversation as resolved.
Show resolved Hide resolved
options->Add<FloatOption>(kFpuValueAtRootId, -100.0f, 100.0f) = 1.0f;
options->Add<IntOption>(kCacheHistoryLengthId, 0, 7) = 0;
options->Add<FloatOption>(kPolicySoftmaxTempId, 0.1f, 10.0f) = 2.2f;
options->Add<IntOption>(kMaxCollisionEventsId, 1, 1024) = 32;
Expand All @@ -225,8 +230,12 @@ SearchParams::SearchParams(const OptionsDict& options)
kSmartPruningFactor(options.Get<float>(kSmartPruningFactorId.GetId())),
kFpuAbsolute(options.Get<std::string>(kFpuStrategyId.GetId()) ==
"absolute"),
kFpuReduction(options.Get<float>(kFpuReductionId.GetId())),
kFpuValue(options.Get<float>(kFpuValueId.GetId())),
kFpuAbsoluteAtRoot(
options.Get<std::string>(kFpuStrategyAtRootId.GetId()) == "absolute"),
kFpuReductionAtRoot(options.Get<std::string>(
kFpuStrategyAtRootId.GetId()) == "reduction"),
kFpuValueAtRoot(options.Get<float>(kFpuValueAtRootId.GetId())),
kCacheHistoryLength(options.Get<int>(kCacheHistoryLengthId.GetId())),
kPolicySoftmaxTemp(options.Get<float>(kPolicySoftmaxTempId.GetId())),
kMaxCollisionEvents(options.Get<int>(kMaxCollisionEventsId.GetId())),
Expand Down
11 changes: 8 additions & 3 deletions src/mcts/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ class SearchParams {
}
float GetSmartPruningFactor() const { return kSmartPruningFactor; }
bool GetFpuAbsolute() const { return kFpuAbsolute; }
float GetFpuReduction() const { return kFpuReduction; }
float GetFpuValue() const { return kFpuValue; }
bool GetFpuAbsoluteAtRoot() const { return kFpuAbsoluteAtRoot; }
bool GetFpuReductionAtRoot() const { return kFpuReductionAtRoot; }
float GetFpuValueAtRoot() const { return kFpuValueAtRoot; }
int GetCacheHistoryLength() const { return kCacheHistoryLength; }
float GetPolicySoftmaxTemp() const { return kPolicySoftmaxTemp; }
int GetMaxCollisionEvents() const { return kMaxCollisionEvents; }
Expand Down Expand Up @@ -116,8 +118,9 @@ class SearchParams {
static const OptionId kLogLiveStatsId;
static const OptionId kSmartPruningFactorId;
static const OptionId kFpuStrategyId;
static const OptionId kFpuReductionId;
static const OptionId kFpuValueId;
static const OptionId kFpuStrategyAtRootId;
static const OptionId kFpuValueAtRootId;
static const OptionId kCacheHistoryLengthId;
static const OptionId kPolicySoftmaxTempId;
static const OptionId kMaxCollisionEventsId;
Expand All @@ -144,8 +147,10 @@ class SearchParams {
const bool kNoise;
const float kSmartPruningFactor;
const bool kFpuAbsolute;
const float kFpuReduction;
const float kFpuValue;
const bool kFpuAbsoluteAtRoot;
const bool kFpuReductionAtRoot;
const float kFpuValueAtRoot;
const int kCacheHistoryLength;
const float kPolicySoftmaxTemp;
const int kMaxCollisionEvents;
Expand Down
14 changes: 9 additions & 5 deletions src/mcts/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,17 @@ int64_t Search::GetTimeToDeadline() const {

namespace {
inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node) {
// Use root FPU behavior unless it's "same"
if (is_root_node) {
if (params.GetFpuAbsoluteAtRoot()) return params.GetFpuValueAtRoot();
if (params.GetFpuReductionAtRoot())
return -node->GetQ() -
params.GetFpuValueAtRoot() * std::sqrt(node->GetVisitedPolicy());
}
return params.GetFpuAbsolute()
Mardak marked this conversation as resolved.
Show resolved Hide resolved
? params.GetFpuValue()
: ((is_root_node && params.GetNoise()) ||
!params.GetFpuReduction())
? -node->GetQ()
: -node->GetQ() - params.GetFpuReduction() *
std::sqrt(node->GetVisitedPolicy());
: -node->GetQ() -
params.GetFpuValue() * std::sqrt(node->GetVisitedPolicy());
}

inline float ComputeCpuct(const SearchParams& params, uint32_t N) {
Expand Down
4 changes: 3 additions & 1 deletion src/selfplay/tournament.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) {
defaults->Set<float>(SearchParams::kSmartPruningFactorId.GetId(), 0.0f);
defaults->Set<float>(SearchParams::kTemperatureId.GetId(), 1.0f);
defaults->Set<bool>(SearchParams::kNoiseId.GetId(), true);
defaults->Set<float>(SearchParams::kFpuReductionId.GetId(), 0.0f);
defaults->Set<float>(SearchParams::kFpuValueId.GetId(), 0.0f);
defaults->Set<std::string>(SearchParams::kFpuStrategyAtRootId.GetId(),
"same");
defaults->Set<std::string>(SearchParams::kHistoryFillId.GetId(), "no");
defaults->Set<std::string>(NetworkFactory::kBackendId.GetId(),
"multiplexing");
Expand Down