From b48a030ed60e31710a74ce652b5a04efead47b71 Mon Sep 17 00:00:00 2001 From: Dustin Morrill Date: Sun, 18 Aug 2024 01:38:52 -0600 Subject: [PATCH 1/3] fix #1207 --- open_spiel/python/algorithms/rcfr_test.py | 36 ++++++++++++++-------- open_spiel/python/examples/rcfr_example.py | 11 ++++--- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/open_spiel/python/algorithms/rcfr_test.py b/open_spiel/python/algorithms/rcfr_test.py index 1681747b29..106efdb384 100644 --- a/open_spiel/python/algorithms/rcfr_test.py +++ b/open_spiel/python/algorithms/rcfr_test.py @@ -37,7 +37,7 @@ def _new_model(): return rcfr.DeepRcfrModel( _GAME, num_hidden_layers=1, - num_hidden_units=13, + num_hidden_units=26, num_hidden_factors=1, use_skip_connections=True) @@ -476,12 +476,16 @@ def test_rcfr_functions(self): data = data.batch(12) data = data.repeat(num_epochs) - optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True) + optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True) + model = models[regret_player] for x, y in data: - optimizer.minimize( - lambda: tf.losses.huber_loss(y, models[regret_player](x)), # pylint: disable=cell-var-from-loop - models[regret_player].trainable_variables) + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x)) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables)) regret_player = reach_weights_player @@ -504,12 +508,15 @@ def _train(model, data): data = data.batch(12) data = data.repeat(num_epochs) - optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True) + optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True) for x, y in data: - optimizer.minimize( - lambda: tf.losses.huber_loss(y, model(x)), # pylint: disable=cell-var-from-loop - model.trainable_variables) + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x)) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables)) average_policy = patient.average_policy() self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) @@ -565,12 +572,15 @@ def _train(model, data): data = data.batch(12) data = data.repeat(num_epochs) - optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True) + optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True) for x, y in data: - optimizer.minimize( - lambda: tf.losses.huber_loss(y, model(x)), # pylint: disable=cell-var-from-loop - model.trainable_variables) + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x)) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables)) average_policy = patient.average_policy() self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91) diff --git a/open_spiel/python/examples/rcfr_example.py b/open_spiel/python/examples/rcfr_example.py index 41f385a6a8..20b3f267c4 100644 --- a/open_spiel/python/examples/rcfr_example.py +++ b/open_spiel/python/examples/rcfr_example.py @@ -87,14 +87,17 @@ def _train_fn(model, data): data = data.batch(FLAGS.batch_size) data = data.repeat(FLAGS.num_epochs) - optimizer = tf.keras.optimizers.Adam(lr=FLAGS.step_size, amsgrad=True) + optimizer = tf.keras.optimizers.Adam(learning_rate=FLAGS.step_size, amsgrad=True) @tf.function def _train(): for x, y in data: - optimizer.minimize( - lambda: tf.losses.huber_loss(y, model(x), delta=0.01), # pylint: disable=cell-var-from-loop - model.trainable_variables) + with tf.GradientTape() as tape: + loss = tf.losses.huber_loss(y, model(x), delta=0.01) + optimizer.apply_gradients( + zip( + tape.gradient(loss, model.trainable_variables), + model.trainable_variables)) _train() From b47255eca2b916ce2198a64b450a2230e52e33ea Mon Sep 17 00:00:00 2001 From: Dustin Morrill Date: Mon, 19 Aug 2024 13:18:51 -0600 Subject: [PATCH 2/3] re-enable CI test --- open_spiel/python/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/open_spiel/python/CMakeLists.txt b/open_spiel/python/CMakeLists.txt index f0f5b18d73..a36587fd9b 100644 --- a/open_spiel/python/CMakeLists.txt +++ b/open_spiel/python/CMakeLists.txt @@ -311,8 +311,7 @@ if (OPEN_SPIEL_ENABLE_TENSORFLOW) algorithms/nfsp_test.py algorithms/policy_gradient_test.py algorithms/psro_v2/strategy_selectors_test.py - # Broken in Python 3.12. Must port to Keras 3. https://github.com/google-deepmind/open_spiel/issues/1207. - # algorithms/rcfr_test.py + algorithms/rcfr_test.py ) if (OPEN_SPIEL_ENABLE_PYTHON_MISC) set(PYTHON_TESTS ${PYTHON_TESTS} From 9164f7595cacfeb6c132d38b45322e97197a277e Mon Sep 17 00:00:00 2001 From: Dustin Morrill Date: Mon, 19 Aug 2024 16:43:06 -0600 Subject: [PATCH 3/3] make flaky test more robust by using a full rather than a factored layer --- open_spiel/python/algorithms/rcfr_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/open_spiel/python/algorithms/rcfr_test.py b/open_spiel/python/algorithms/rcfr_test.py index 106efdb384..ea7b48881d 100644 --- a/open_spiel/python/algorithms/rcfr_test.py +++ b/open_spiel/python/algorithms/rcfr_test.py @@ -37,8 +37,7 @@ def _new_model(): return rcfr.DeepRcfrModel( _GAME, num_hidden_layers=1, - num_hidden_units=26, - num_hidden_factors=1, + num_hidden_units=13, use_skip_connections=True)