From ebefd45097c4e4a2821e4cbc48f3f02665df5e1b Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 30 Apr 2021 19:40:20 +0100
Subject: [PATCH 001/141] [datasets] Update AnghaBench to v1.

anghabench-v1 uses an amended manifest, but is otherwise the same.
---
 compiler_gym/envs/llvm/datasets/__init__.py   | 19 +++++++++++++
 compiler_gym/envs/llvm/datasets/anghabench.py | 28 +++++++++++++------
 docs/source/llvm/index.rst                    |  2 +-
 tests/llvm/datasets/anghabench_test.py        | 14 +++++-----
 tests/llvm/datasets/llvm_datasets_test.py     |  2 +-
 5 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py
index e83cf0d6a..74254497c 100644
--- a/compiler_gym/envs/llvm/datasets/__init__.py
+++ b/compiler_gym/envs/llvm/datasets/__init__.py
@@ -212,6 +212,25 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
     site_data_base = site_data_base or site_data_path("llvm-v0")
 
     yield AnghaBenchDataset(site_data_base=site_data_base, sort_order=0)
+    # Add legacy version of Anghabench using an old manifest.
+    anghabench_v0_manifest_url, anghabench_v0_manifest_sha256 = {
+        "darwin": (
+            "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
+            "39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
+        ),
+        "linux": (
+            "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
+            "a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
+        ),
+    }[sys.platform]
+    yield AnghaBenchDataset(
+        name="benchmark://anghabench-v0",
+        site_data_base=site_data_base,
+        sort_order=0,
+        manifest_url=anghabench_v0_manifest_url,
+        manifest_sha256=anghabench_v0_manifest_sha256,
+        deprecated="Please use anghabench-v1",
+    )
     yield BlasDataset(site_data_base=site_data_base, sort_order=0)
     yield CLgenDataset(site_data_base=site_data_base, sort_order=0)
     yield CBenchDataset(site_data_base=site_data_base, sort_order=-1)
diff --git a/compiler_gym/envs/llvm/datasets/anghabench.py b/compiler_gym/envs/llvm/datasets/anghabench.py
index bfcb46a65..ecee29f6a 100644
--- a/compiler_gym/envs/llvm/datasets/anghabench.py
+++ b/compiler_gym/envs/llvm/datasets/anghabench.py
@@ -6,6 +6,7 @@
 import sys
 from concurrent.futures import as_completed
 from pathlib import Path
+from typing import Optional
 
 from compiler_gym.datasets import Benchmark, TarDatasetWithManifest
 from compiler_gym.datasets.benchmark import BenchmarkWithSource
@@ -38,19 +39,27 @@ class AnghaBenchDataset(TarDatasetWithManifest):
     overhead of compiling it from C to bitcode. This is a one-off cost.
     """
 
-    def __init__(self, site_data_base: Path, sort_order: int = 0):
-        manifest_url, manifest_sha256 = {
+    def __init__(
+        self,
+        site_data_base: Path,
+        sort_order: int = 0,
+        manifest_url: Optional[str] = None,
+        manifest_sha256: Optional[str] = None,
+        deprecated: Optional[str] = None,
+        name: Optional[str] = None,
+    ):
+        manifest_url_, manifest_sha256_ = {
             "darwin": (
-                "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2",
-                "39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1",
+                "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v1-macos-manifest.bz2",
+                "96ead63da5f8efa07fd0370f0c6e452b59bed840828b8b19402102b1ce3ee109",
             ),
             "linux": (
-                "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2",
-                "a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477",
+                "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v1-linux-manifest.bz2",
+                "14df85f650199498cf769715e9f0d7841d09f9fa62a95b8ecc242bdaf227f33a",
             ),
         }[sys.platform]
         super().__init__(
-            name="benchmark://anghabench-v0",
+            name=name or "benchmark://anghabench-v1",
             description="Compile-only C/C++ functions extracted from GitHub",
             references={
                 "Paper": "https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf",
@@ -58,8 +67,8 @@ def __init__(self, site_data_base: Path, sort_order: int = 0):
             },
             license="Unknown. See: https://github.com/brenocfg/AnghaBench/issues/1",
             site_data_base=site_data_base,
-            manifest_urls=[manifest_url],
-            manifest_sha256=manifest_sha256,
+            manifest_urls=[manifest_url or manifest_url_],
+            manifest_sha256=manifest_sha256 or manifest_sha256_,
             tar_urls=[
                 "https://github.com/brenocfg/AnghaBench/archive/d8034ac8562b8c978376008f4b33df01b8887b19.tar.gz"
             ],
@@ -68,6 +77,7 @@ def __init__(self, site_data_base: Path, sort_order: int = 0):
             tar_compression="gz",
             benchmark_file_suffix=".bc",
             sort_order=sort_order,
+            deprecated=deprecated,
         )
 
     def benchmark(self, uri: str) -> Benchmark:
diff --git a/docs/source/llvm/index.rst b/docs/source/llvm/index.rst
index 17115cd6b..eb832d5b0 100644
--- a/docs/source/llvm/index.rst
+++ b/docs/source/llvm/index.rst
@@ -21,7 +21,7 @@ We provide several datasets of open-source LLVM-IR benchmarks for use:
 +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
 | Dataset                    | Num. Benchmarks [#f1]_   | Description                                                                                                                                                                                                        | Validatable [#f2]_   |
 +============================+==========================+====================================================================================================================================================================================================================+======================+
-| benchmark://anghabench-v0  | 1,042,976                | Compile-only C/C++ functions extracted from GitHub [`Homepage <http://cuda.dcc.ufmg.br/angha/>`__, `Paper <https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf>`__]                      | No                   |
+| benchmark://anghabench-v1  | 1,041,333                | Compile-only C/C++ functions extracted from GitHub [`Homepage <http://cuda.dcc.ufmg.br/angha/>`__, `Paper <https://homepages.dcc.ufmg.br/~fernando/publications/papers/FaustinoCGO21.pdf>`__]                      | No                   |
 +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
 | benchmark://blas-v0        | 300                      | Basic linear algebra kernels [`Homepage <http://www.netlib.org/blas/>`__, `Paper <https://strum355.netsoc.co/books/PDF/Basic%20Linear%20Algebra%20Subprograms%20for%20Fortran%20Usage%20-%20BLAS%20(1979).pdf>`__] | No                   |
 +----------------------------+--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+
diff --git a/tests/llvm/datasets/anghabench_test.py b/tests/llvm/datasets/anghabench_test.py
index 0d1026e82..bb4149dca 100644
--- a/tests/llvm/datasets/anghabench_test.py
+++ b/tests/llvm/datasets/anghabench_test.py
@@ -23,7 +23,7 @@
 def anghabench_dataset() -> AnghaBenchDataset:
     env = gym.make("llvm-v0")
     try:
-        ds = env.datasets["anghabench-v0"]
+        ds = env.datasets["anghabench-v1"]
     finally:
         env.close()
     yield ds
@@ -31,9 +31,9 @@ def anghabench_dataset() -> AnghaBenchDataset:
 
 def test_anghabench_size(anghabench_dataset: AnghaBenchDataset):
     if sys.platform == "darwin":
-        assert anghabench_dataset.size == 1042908
+        assert anghabench_dataset.size == 1041265
     else:
-        assert anghabench_dataset.size == 1042976
+        assert anghabench_dataset.size == 1041333
 
 
 def test_missing_benchmark_name(anghabench_dataset: AnghaBenchDataset, mocker):
@@ -41,15 +41,15 @@ def test_missing_benchmark_name(anghabench_dataset: AnghaBenchDataset, mocker):
     mocker.patch.object(anghabench_dataset, "install")
 
     with pytest.raises(
-        LookupError, match=r"^No benchmark specified: benchmark://anghabench-v0$"
+        LookupError, match=r"^No benchmark specified: benchmark://anghabench-v1$"
     ):
-        anghabench_dataset.benchmark("benchmark://anghabench-v0")
+        anghabench_dataset.benchmark("benchmark://anghabench-v1")
     anghabench_dataset.install.assert_called_once()
 
     with pytest.raises(
-        LookupError, match=r"^No benchmark specified: benchmark://anghabench-v0/$"
+        LookupError, match=r"^No benchmark specified: benchmark://anghabench-v1/$"
     ):
-        anghabench_dataset.benchmark("benchmark://anghabench-v0/")
+        anghabench_dataset.benchmark("benchmark://anghabench-v1/")
     assert anghabench_dataset.install.call_count == 2
 
 
diff --git a/tests/llvm/datasets/llvm_datasets_test.py b/tests/llvm/datasets/llvm_datasets_test.py
index 495f17a6a..892d1b524 100644
--- a/tests/llvm/datasets/llvm_datasets_test.py
+++ b/tests/llvm/datasets/llvm_datasets_test.py
@@ -14,7 +14,7 @@ def test_default_dataset_list():
     try:
         assert list(d.name for d in env.datasets) == [
             "benchmark://cbench-v1",
-            "benchmark://anghabench-v0",
+            "benchmark://anghabench-v1",
             "benchmark://blas-v0",
             "benchmark://clgen-v0",
             "benchmark://github-v0",

From e135a48e707ca9d7b0e199ec6d6d3807b6045009 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Sun, 2 May 2021 18:23:14 +0100
Subject: [PATCH 002/141] [leaderboard] Flush results to CSV file immediately.

---
 compiler_gym/leaderboard/llvm_instcount.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler_gym/leaderboard/llvm_instcount.py b/compiler_gym/leaderboard/llvm_instcount.py
index 9d913bb4a..bbda9baa2 100644
--- a/compiler_gym/leaderboard/llvm_instcount.py
+++ b/compiler_gym/leaderboard/llvm_instcount.py
@@ -131,7 +131,7 @@ def run(self):
                 state = self.env.state.copy()
                 state.walltime = timer.time
 
-                writer.write_state(state)
+                writer.write_state(state, flush=True)
                 self.states.append(state)
 
                 if not self.alive:

From c5bd82a4936890de15b3eac6006c82f6bfed0624 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Sun, 2 May 2021 18:26:20 +0100
Subject: [PATCH 003/141] Add missing copyright header.

---
 tests/llvm/invalid_ir.ll | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/llvm/invalid_ir.ll b/tests/llvm/invalid_ir.ll
index ceea57466..fc76054e5 100644
--- a/tests/llvm/invalid_ir.ll
+++ b/tests/llvm/invalid_ir.ll
@@ -2,6 +2,11 @@
 ; This IR file can be assembled: $ lvm-as tests/llvm/invalid_ir.ll
 ; But it cannot be compiled:     $ clang tests/llvm/invalid_ir.ll
 ; The error is: "error in backend: Cannot emit physreg copy instruction"
+;
+; Copyright (c) Facebook, Inc. and its affiliates.
+;
+; This source code is licensed under the MIT license found in the
+; LICENSE file in the root directory of this source tree.
 
 ; ModuleID = '<stdin>'
 source_filename = "/tmp/autogen.bc"

From 9a5cb5aa5dba52b39e892a46796747fec7f9f877 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 3 May 2021 13:19:01 +0100
Subject: [PATCH 004/141] [README] Re-org the installation/usage structure.

Reduce the depth of TOC and re-organize it so that we first include
instructions for installation, then for usage.
---
 README.md | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index f6ee38e96..e1557e99f 100644
--- a/README.md
+++ b/README.md
@@ -37,10 +37,9 @@ developers to expose new optimization problems for AI.
 **Table of Contents**
 
 - [Features](#features)
-- [Getting Started](#getting-started)
-  - [Installation](#installation)
-    - [Building from Source](#building-from-source)
-  - [Trying it out](#trying-it-out)
+- [Installation](#installation)
+  - [Building from Source](#building-from-source)
+- [Usage](#usage)
 - [Leaderboards](#leaderboards)
   - [LLVM Instruction Count](#llvm-instruction-count)
 - [Contributing](#contributing)
@@ -81,15 +80,7 @@ features:
 For a glimpse of what's to come, check out [our
 roadmap](https://github.com/facebookresearch/CompilerGym/projects/1).
 
-# Getting Started
-
-Starting with CompilerGym is simple. If you not already familiar with the gym
-interface, refer to the
-[getting started guide](http://facebookresearch.github.io/CompilerGym/getting_started.html)
-for an overview of the key concepts.
-
-
-## Installation
+# Installation
 
 Install the latest CompilerGym release using:
 
@@ -98,12 +89,12 @@ Install the latest CompilerGym release using:
 The binary works on macOS and Linux (on Ubuntu 18.04, Fedora 28, Debian 10 or
 newer equivalents).
 
-### Building from Source
+## Building from Source
 
 If you prefer, you may build from source. This requires a modern C++ toolchain
 and bazel.
 
-#### macOS  <!-- omit in toc -->
+### macOS  <!-- omit in toc -->
 
 On macOS the required dependencies can be installed using
 [homebrew](https://docs.brew.sh/Installation):
@@ -117,7 +108,7 @@ export PKG_CONFIG_PATH="/usr/local/opt/zlib/lib/pkgconfig"
 
 Now proceed to [All platforms](#all-platforms) below.
 
-#### Linux  <!-- omit in toc -->
+### Linux  <!-- omit in toc -->
 
 On debian-based linux systems, install the required toolchain using:
 
@@ -130,7 +121,7 @@ export CC=clang
 export CXX=clang++
 ```
 
-#### All platforms  <!-- omit in toc -->
+### All platforms  <!-- omit in toc -->
 
 We recommend using
 [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
@@ -173,8 +164,12 @@ environment using:
     conda deactivate
     conda env remove -n compiler_gym
 
+# Usage
 
-## Trying it out
+Starting with CompilerGym is simple. If you not already familiar with the gym
+interface, refer to the [getting started
+guide](http://facebookresearch.github.io/CompilerGym/getting_started.html) for
+an overview of the key concepts.
 
 In Python, import `compiler_gym` to use the environments:
 

From 168813780a155acf425202fb0d47dbe1a5e9e254 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 4 May 2021 11:02:46 +0100
Subject: [PATCH 005/141] [datasets] Remove fast-path for TarDataset.installed
 check.

---
 compiler_gym/datasets/tar_dataset.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/compiler_gym/datasets/tar_dataset.py b/compiler_gym/datasets/tar_dataset.py
index 55b15c73b..632ce935e 100644
--- a/compiler_gym/datasets/tar_dataset.py
+++ b/compiler_gym/datasets/tar_dataset.py
@@ -60,17 +60,13 @@ def __init__(
         self.tar_compression = tar_compression
         self.strip_prefix = strip_prefix
 
-        self._installed = False
         self._tar_extracted_marker = self.site_data_path / ".extracted"
         self._tar_lock = Lock()
         self._tar_lockfile = self.site_data_path / ".install_lock"
 
     @property
     def installed(self) -> bool:
-        # Fast path for repeated checks to 'installed' without a disk op.
-        if not self._installed:
-            self._installed = self._tar_extracted_marker.is_file()
-        return self._installed
+        return self._tar_extracted_marker.is_file()
 
     def install(self) -> None:
         super().install()

From e9435c49fbb4d7014606cf045d12fcd95befb8a3 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 11:32:10 +0100
Subject: [PATCH 006/141] [README] Tweak example usage instructions

Don't use the `env.benchmark` attribute, and use active writing
style.
---
 README.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index e1557e99f..4b0660316 100644
--- a/README.md
+++ b/README.md
@@ -175,12 +175,14 @@ In Python, import `compiler_gym` to use the environments:
 
 ```py
 >>> import gym
->>> import compiler_gym                     # imports the CompilerGym environments
->>> env = gym.make("llvm-autophase-ic-v0")  # starts a new environment
->>> env.benchmark = "benchmark://cbench-v1/qsort"  # select a program to compile
->>> env.reset()                             # starts a new compilation session
->>> env.render()                            # prints the IR of the program
->>> env.step(env.action_space.sample())     # applies a random optimization, updates state/reward/actions
+>>> import compiler_gym                     # import the CompilerGym environments
+>>> env = gym.make(                         # create a new environment
+...     "llvm-autophase-ic-v0"              # select the compiler optimization task
+...     benchmark="cbench-v1/qsort"         # select the program to compile
+... )
+>>> env.reset()                             # start a new compilation session
+>>> env.render()                            # print the IR of the program
+>>> env.step(env.action_space.sample())     # apply a random optimization, update state/reward/actions
 ```
 
 See the [documentation website](http://facebookresearch.github.io/CompilerGym/)

From 34fab8c934f03a9f0e03695b3a3c35079ba2491a Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 11:59:04 +0100
Subject: [PATCH 007/141] [third party] Prune dead code from inst2vec.

Much of the imported inst2vec code is unused. This removes it.
---
 compiler_gym/third_party/inst2vec/BUILD       |    6 -
 .../inst2vec/inst2vec_preprocess.py           | 3003 +----------------
 .../third_party/inst2vec/inst2vec_utils.py    |   79 -
 .../third_party/inst2vec/rgx_utils.py         |   74 -
 4 files changed, 1 insertion(+), 3161 deletions(-)
 delete mode 100644 compiler_gym/third_party/inst2vec/inst2vec_utils.py

diff --git a/compiler_gym/third_party/inst2vec/BUILD b/compiler_gym/third_party/inst2vec/BUILD
index 0586c2a2e..d0b260fa9 100644
--- a/compiler_gym/third_party/inst2vec/BUILD
+++ b/compiler_gym/third_party/inst2vec/BUILD
@@ -33,16 +33,10 @@ py_library(
     name = "inst2vec_preprocess",
     srcs = ["inst2vec_preprocess.py"],
     deps = [
-        ":inst2vec_utils",
         ":rgx_utils",
     ],
 )
 
-py_library(
-    name = "inst2vec_utils",
-    srcs = ["inst2vec_utils.py"],
-)
-
 py_library(
     name = "rgx_utils",
     srcs = ["rgx_utils.py"],
diff --git a/compiler_gym/third_party/inst2vec/inst2vec_preprocess.py b/compiler_gym/third_party/inst2vec/inst2vec_preprocess.py
index f377c6be6..7610aa95f 100644
--- a/compiler_gym/third_party/inst2vec/inst2vec_preprocess.py
+++ b/compiler_gym/third_party/inst2vec/inst2vec_preprocess.py
@@ -30,485 +30,9 @@
 
 import networkx as nx
 
-from compiler_gym.third_party.inst2vec import inst2vec_utils as i2v_utils
 from compiler_gym.third_party.inst2vec import rgx_utils as rgx
 
 
-########################################################################################################################
-# Helper functions: list and stmt handling
-########################################################################################################################
-def string_of_items(dic):
-    """
-    Return a string containing all keys of a dictionary, separated by a comma
-    (Helper function for structure inlining)
-    :param dic: dictionary [key=string: value=string]
-    :return: string constructed of the dictionaries' keys
-    """
-    s = ""
-    for k, v in dic.items():
-        s += k + ": " + v + "\n"
-    return s
-
-
-def collapse_into_one_list(data):
-    """
-    Collapse list of list of strings into one list of strings
-    :param data: list of list of strings
-    :return: list of strings
-    """
-    data_ = list()
-    for i in range(len(data)):
-        for j in range(len(data[i])):
-            data_.append(data[i][j])
-
-    return data_
-
-
-def string_from_list(l):
-    """
-    Construct a string from a list of strings
-    :param l: list of strings
-    :return: string containing elements of list l separated by a comma
-    """
-    s = l[0]
-    if len(l) > 1:
-        for i in range(len(l) - 1):
-            # only add this string to the list if it is different from the previous strings
-            e = l[i + 1]
-            if e not in l[0 : i + 1]:
-                s += ",\t\t" + e
-    return s
-
-
-def create_list_stmts(list_graphs):
-    """
-    Create a unique list of statements (strings) from a list of graphs in which statements are attributes of edges
-    :param list_graphs: list of context-graphs (nodes = ids, edges = statements)
-    :return: list_stmts: a unique list of statements (strings)
-    """
-    list_stmts = list()
-    for G in list_graphs:
-        edges_list = [e[2]["stmt"] for e in G.edges(data=True)]
-        list_stmts += edges_list
-
-    return list_stmts
-
-
-########################################################################################################################
-# Counting and statistics
-########################################################################################################################
-def get_stmt_counts(data_set, data_list):
-    """
-    Get statement counts
-    :param data_set: set containing the elements from data_list but without repetitions and ordered
-    :param data_list: list of string statements with repetitions and no ordering
-    :return: data_count: dictionary with pairs [stmt, number of occurrences in data_list]
-                         the order of the statements is the same as the one in data_set
-             data_operations_count: list of tuples
-                                    [string "tag level 1", "tag level 2", "tag level 3", int "number of occurrences"]
-    """
-    # Setup variables
-    data_count = {x: 0 for x in data_set}
-    data_operations_count = list()
-
-    # Compute stmt counts (overall)
-    print("Counting statement occurrences (overall)...")
-    for stmt in data_list:
-        data_count[stmt] += 1
-
-    # Check that all stmts have been counted (for debugging purposes)
-    total_stmt_count = sum(data_count.values())
-    assert total_stmt_count == len(data_list), "Not all statements have been counted"
-
-    # Compute stmt counts (by family)
-    print("Counting statement occurrences (by family) ...")
-    total_stmt_count = 0
-    stmts_categorized = list()
-
-    # Loop over stmt families
-    for fam in rgx.llvm_IR_stmt_families:
-        op_count = 0
-
-        # loop on all stmts in data
-        for i in range(len(data_set)):
-            # if the regular expression for the family matches
-            if re.match(fam[3], data_set[i], re.MULTILINE):
-                # add the corresponding number of occurrences to the counter
-                op_count += data_count[data_set[i]]
-                stmts_categorized.append(i)
-
-        # append the count to the list of number of occurrences
-        data_operations_count.append([fam[0], fam[1], fam[2], op_count])
-
-        # increase the total stmt count
-        total_stmt_count += op_count
-
-    # Check that all stmts have been categorized once and only once (debugging purposes)
-    print("Starting categorization check ...")
-    stmts_categorized = sorted(stmts_categorized)
-    if stmts_categorized != list(range(len(data_set))):
-        print("Tracking down the errors in categorization ... : ")
-        for i in range(len(data_set)):
-            num = stmts_categorized.count(i)
-            if num == 0:
-                print(data_set[i], "\n\tappears 0 times")
-            if num > 1:
-                print(data_set[i], "\n\tappears ", num, " times")
-
-    assert stmts_categorized <= list(
-        range(len(data_set))
-    ), "Not all statements have been categorized"
-    assert stmts_categorized >= list(
-        range(len(data_set))
-    ), "Some statements have been categorized multiple times"
-    assert total_stmt_count == len(data_list), "Not all statements have been counted"
-
-    return data_count, data_operations_count
-
-
-def data_statistics(data, descr):
-    """
-    Compute and print some statistics on the data
-    :param data: list of lists of statements (strings)
-    :param descr: string description of the current step of the pipeline to add to output
-    :return: source_data_list: list of statements
-             source_data sorted set of statements
-    """
-    # Create a list of statements (strings) collecting the statements from all files
-    source_data_list = collapse_into_one_list(data)
-
-    # Create a sorted set of statements appearing in our data set
-    source_data = sorted(set(source_data_list))
-
-    # Get number of lines and the vocabulary size
-    number_lines = len(source_data_list)
-    vocabulary_size = len(source_data)
-
-    # Construct output
-    out = (
-        "After "
-        + descr
-        + ":\n"
-        + "--- {:<26}: {:>12,d}\n".format("Number of lines", number_lines)
-        + "--- {:<26}: {:>12,d}\n".format("Vocabulary size", vocabulary_size)
-    )
-    print(out)
-
-    # Return
-    return source_data_list, source_data
-
-
-########################################################################################################################
-# Reading, writing and dumping files
-########################################################################################################################
-
-
-def read_data_files_from_folder(foldername):
-    """
-    Read all source files in folder
-    Return a list of file contents, whereby each file content is a list of strings, each string representing a line
-    :param foldername: name of the folder in which the data files to be read are located
-    :return: a list of files where each file is a list of strings
-    """
-    # Helper variables
-    data = list()
-    file_names = list()
-    file_count = 0
-
-    print("Reading data from all files in folder ", foldername)
-    listing = os.listdir(foldername + "/")
-    to_subtract = file_count
-
-    # Loop over files in folder
-    for file in listing:
-        if file[0] != "." and file[-3:] == ".ll":
-            # If this isn't a hidden file and it is an LLVM IR file ('.ll' extension),
-            # open file and import content
-            f = open(os.path.join(foldername, file), "r")
-            data.append(
-                f.read().splitlines()
-            )  # add this file as an element to the list "data"
-            f.close()
-
-            # Add file name to dictionary
-            file_names.append(file)
-
-            # Increment counters
-            file_count += 1
-
-    print("Number of files read from", foldername, ": ", file_count - to_subtract)
-    print("Total number of files read for dataset", foldername, ": ", file_count)
-    return data, file_names
-
-
-def print_preprocessed_data(raw_data, foldername, filenames):
-    """
-    Write pre-processed code to file for future reference
-    :param raw_data: a list of files where each file is a list of strings
-    :param foldername: folder in which to print
-    :param filenames: list of base file names
-    :return:
-    """
-    # Make sure the directory exists - if not, create it
-    foldername = os.path.join(foldername, "preprocessed")
-    if not os.path.exists(foldername):
-        os.makedirs(foldername)
-
-    # Write pre-processed code to files
-    i = 0
-    for file in raw_data:
-        filename = os.path.join(foldername, filenames[i][:-3] + "_preprocessed.txt")
-        print("Writing pre-processed data to file ", filename)
-        with open(filename, "w") as f:
-            for l in file:
-                f.write(l + "\n")
-        i += 1
-
-
-def print_data(data, filename):
-    """
-    Write pre-processed code to file for future reference
-    :param data: a list of strings
-    :param filename: name of file to print this to (string)
-    :return:
-    """
-    print("Write data to file ", filename)
-    with open(filename, "w") as f:
-        for l in data:
-            f.write(l + "\n")
-
-
-def sort_key(x):
-    """
-    Helper function to sort nodes
-    :param x: node
-    :return: node name, node id type
-    """
-    id_part = x[0][1:]
-
-    if id_part.isdigit():
-        return x[0][0], int(x[0][1:])
-    else:
-        return x[0][0], 1
-
-
-def print_node_family_to_file(G, f, nodetype):
-    """
-    Helper function for function "print_graph_to_file"
-    :param G: graph
-    :param f: file handle
-    :param nodetype: string corresponding to the "id" of the node family to be printed
-    """
-
-    # Construct node family
-    if nodetype == "root":
-        node_family = [
-            n for n in G.nodes() if G.out_degree(n) > 0 and G.in_degree(n) == 0
-        ]
-        node_family = sorted(node_family, key=sort_key)
-    elif nodetype == "leaf":
-        node_family = [
-            n for n in G.nodes() if G.out_degree(n) == 0 and G.in_degree(n) >= 1
-        ]
-        node_family = sorted(node_family, key=sort_key)
-    elif nodetype == "isolated":
-        node_family = [n for n in G.nodes() if G.degree(n) == 0]
-        node_family = sorted(node_family, key=sort_key)
-    else:
-        node_family = [
-            n[0]
-            for n in sorted(list(G.nodes(data=True)), key=sort_key)
-            if n[1]["id"] == nodetype
-        ]
-
-    # Write to file
-    f.write("#nodes: " + str(len(node_family)) + "\n")
-    f.write("-" * 80 + "\n")
-    for n in node_family:
-        f.write("{n:<60}\n".format(n=n))
-
-
-def print_graph_to_file(G, multi_edge_dic, folder, filename):
-    """
-    Print information about a graph to a file
-    :param G: graph
-    :param multi_edge_dic: dictionary of multi-edges
-                           = edges for which a parallel edge connecting the same two end-nodes exists
-    :param folder: folder in which to write
-    :param filename: base name of the graph
-    """
-    # Print to file
-    graph_filename = os.path.join(folder, filename[:-3] + ".txt")
-    print("Printing graph to  file : ", graph_filename)
-
-    with open(graph_filename, "w") as f:
-
-        # GENERAL
-        f.write("#nodes: " + str(G.number_of_nodes()) + "\n")
-        f.write("#edges: " + str(G.number_of_edges()) + "\n\n")
-
-        # INFORMATION ON NODES
-        # all
-        f.write("Nodes (" + str(G.number_of_nodes()) + "):\n")
-        f.write("-" * 80 + "\n")
-        for n, data in sorted(G.nodes(data=True), key=sort_key):
-            f.write("{n:<60}, {w}\n".format(n=n[:60], w=data["id"]))
-
-        # local
-        f.write("\nLocal identifier nodes: \n")
-        print_node_family_to_file(G, f, "local")
-
-        # block references
-        f.write("\nBlock reference nodes: \n")
-        print_node_family_to_file(G, f, "label")
-
-        # global
-        f.write("\nGlobal nodes: \n")
-        print_node_family_to_file(G, f, "global")
-
-        # immediate value
-        f.write("\nImmediate value nodes: \n")
-        print_node_family_to_file(G, f, "imm_val")
-
-        # ad_hoc
-        f.write("\nAd hoc value nodes: \n")
-        print_node_family_to_file(G, f, "ad_hoc")
-
-        # leaf
-        f.write("\nLeaf nodes: \n")
-        print_node_family_to_file(G, f, "leaf")
-
-        # root
-        f.write("\nRoot nodes: \n")
-        print_node_family_to_file(G, f, "root")
-
-        # isolated
-        f.write("\nIsolated nodes: \n")
-        print_node_family_to_file(G, f, "isolated")
-        f.write("\n\n")
-
-        # INFORMATION ON EDGES
-        # all
-        f.write("Edges (" + str(G.number_of_edges()) + ")\n")
-        f.write("-" * 80 + "\n")
-        for a, b, data in sorted(G.edges(data=True), key=sort_key):
-            f.write(
-                "({a:<30}, {b:<30}) {w}\n".format(a=a[:30], b=b[:30], w=data["stmt"])
-            )
-
-        # data flow edges
-        dataedges = [
-            (str(n[0]), str(n[1]), str(n[2]))
-            for n in sorted(list(G.edges(data=True)), key=sort_key)
-            if n[2]["flow"] == "data"
-        ]
-        f.write("\nData flow edges: \n")
-        f.write(
-            "#edges: "
-            + str(len(dataedges))
-            + " ("
-            + str(int(len(dataedges)) / G.number_of_edges() * 100)[:5]
-            + "%)\n"
-        )
-        f.write("-" * 80 + "\n")
-        for e in dataedges:
-            f.write("({a:<30}, {b:<30}) {c}\n".format(a=e[0][:30], b=e[1][:30], c=e[2]))
-
-        # control flow edges
-        ctrledges = [
-            (str(n[0]), str(n[1]), str(n[2]))
-            for n in sorted(list(G.edges(data=True)), key=sort_key)
-            if n[2]["flow"] == "ctrl"
-        ]
-        f.write("\nCtrl flow edges: \n")
-        f.write(
-            "#edges: "
-            + str(len(ctrledges))
-            + " ("
-            + str(int(len(dataedges)) / G.number_of_edges() * 100)[:5]
-            + "%)\n"
-        )
-        f.write("-" * 80 + "\n")
-        for e in ctrledges:
-            f.write("({a:<30}, {b:<30}) {c}\n".format(a=e[0][:30], b=e[1][:30], c=e[2]))
-
-        # multi-edges
-        f.write("\nMulti-edges: \n")
-        multi_edge_list = list()
-        for k, v in multi_edge_dic.items():  # Compile the multi-edges
-            multi_edge_list += v
-        f.write(
-            "#multi-edges: "
-            + str(len(multi_edge_list))
-            + " ("
-            + str(int(len(multi_edge_list)) / G.number_of_edges() * 100)[:5]
-            + "%)\n"
-        )
-        f.write(
-            "#node pairs connected by multi-edges: "
-            + str(len(multi_edge_dic.keys()))
-            + " ("
-            + str(int(len(multi_edge_dic)) / G.number_of_edges() * 100)[:5]
-            + "%)\n"
-        )
-        f.write("-" * 80 + "\n")
-        for k, v_ in multi_edge_dic.items():
-            n = re.match(r"(.*) \|\|\| (.*)", k)
-            assert n is not None, "Could not identify nodes in " + k
-            f.write("{m:<60} {p:<60}\n".format(m=n.group(1)[:60], p=n.group(2)[:60]))
-            for v in v_:
-                f.write("\t{}\n".format(v))
-            f.write("\n")
-
-
-def print_structure_dictionary(dic, folder, filename):
-    """
-    Print the dictionary of structures to a file
-    :param dic: dictionary ["structure name", [list of possible values]]
-    :param folder: name of folder in which to print dictionary
-    :param filename: name of file in which to print dictionary
-    :return:
-    """
-    # Print dictionary in alphabetical order
-    dic_filename = os.path.join(folder, filename[:-3] + ".txt")
-    print('Printing dictionary to file "', dic_filename)
-    with open(dic_filename, "w") as f:
-        f.write("{:<70}   {}\n\n".format("structure name", "literal value"))
-        for key, value in sorted(dic.items()):
-            f.write("{:<70}   {}\n".format(key, string_from_list(value)))
-
-
-def PrintDualXfgToFile(D, folder, filename):
-    """Print dual-XFG graph to file.
-
-    :param D: dual-XFG graphs
-    :param folder: name of folder in which to print dictionary
-    :param filename: name of file in which to print dictionary
-    """
-    # Print to file
-    graph_filename = os.path.join(folder, filename[:-3] + ".txt")
-    print("Printing graph to  file : ", graph_filename)
-
-    with open(graph_filename, "w") as f:
-        # GENERAL
-        f.write("#nodes: " + str(D.number_of_nodes()) + "\n")
-        f.write("#edges: " + str(D.number_of_edges()) + "\n\n")
-
-        # INFORMATION ON NODES
-        f.write("Nodes (" + str(D.number_of_nodes()) + ")\n")
-        f.write("-" * 80 + "\n")
-        for n, _ in sorted(D.nodes(data=True), key=sort_key):
-            f.write(f"{n:<60}\n")
-        f.write("\n")
-        # INFORMATION ON EDGES
-        f.write("Edges (" + str(D.number_of_edges()) + ")\n")
-        f.write("-" * 80 + "\n")
-        for a, b, data in sorted(D.edges(data=True), key=sort_key):
-            f.write(
-                "({a:<37}, {b:<37}) {w}\n".format(a=a[:37], b=b[:37], w=data["weight"])
-            )
-
-
 ########################################################################################################################
 # LLVM IR preprocessing
 ########################################################################################################################
@@ -810,2085 +334,6 @@ def preprocess(data):
     return preprocessed_data, functions_declared_in_files
 
 
-########################################################################################################################
-# XFG-building
-########################################################################################################################
-def get_identifiers_from_line(line):
-    """
-    Extract identifiers (local, global and label) from a statement
-    :param line: string: (part of) statement
-    :return: lists of strings: m_loc, m_glob, m_label, m_label2
-    """
-    # Find label nodes
-    m_label = m_label2 = list()
-    if line.find("label") != -1 or re.match(rgx.local_id_no_perc + r":", line):
-        m_label1 = re.findall("label (" + rgx.local_id + ")", line)
-        if re.match(r"; <label>:" + rgx.local_id_no_perc + ":\s+", line):
-            m_label2 = re.findall("<label>:(" + rgx.local_id_no_perc + "):", line)
-        elif "invoke " in line:
-            m_label2 = re.findall("label (" + rgx.local_id_no_perc + ")", line)
-        else:
-            m_label2 = re.findall(
-                "<label>:(" + rgx.local_id_no_perc + ")", line
-            ) + re.findall(r"(" + rgx.local_id_no_perc + r"):", line)
-        for i in range(len(m_label2)):
-            # put the '%' back in
-            m_label2[i] = "%" + m_label2[i]
-        m_label = m_label1 + m_label2
-
-    # Find local identifier nodes
-    modif_line = re.sub(r"\"[^\s]*\"", "", line)
-    m_loc = sorted(re.findall(rgx.local_id, modif_line))
-
-    # Remove what is actually an aggregate type and not a local identifier
-    if len(m_loc) > 0:
-        to_remove = []
-        for m in m_loc:
-            if m + "*" in line:
-                to_remove.append(m)
-            if m[:2] == '%"':
-                to_remove.append(m)
-            if " = phi " + m in line:
-                to_remove.append(m)
-            if " x " + m in line:
-                to_remove.append(m)
-            if " alloca " + m in line:
-                to_remove.append(m)
-        if len(to_remove) > 0:
-            m_loc = [m for m in m_loc if m not in to_remove]
-
-    # Find global identifier nodes
-    m_glob = sorted(re.findall(rgx.global_id, line))
-
-    # Remove label nodes from local nodes (they overlap)
-    if len(m_label) > 0:
-        m_loc = sorted(list(set(m_loc) - set(m_label)))
-
-    # Return
-    return m_loc, m_glob, m_label, m_label2
-
-
-def find_outer_most_last_parenthesis(line):
-    """
-    Find outer-most last parenthesis of a line statement
-    Used to identify the argument list in a function call
-    :param line: a string representing a statement
-    :return: a string representing this parenthesis with its content
-    """
-    # Find last closing parenthesis
-    # It might not be last position as in the following statement:
-    # invoke void @_Z12(%"class.std::basic_string"* nonnull sret %source_str, %"class.std::basic_string"* nonnull %agg)
-    # to label %invoke.cont.276 unwind label %lpad.275
-    end = 0
-    if line[-1] == ")":
-        # this corresponds to most cases
-        start = len(line) - 1
-    else:
-        # look for the last closing parenthesis
-        start = line.rfind(")")
-        end = start
-        if start == "-1":
-            assert True, "Could not find right-most closing parenthesis in\n" + line
-
-    # Stack opening and closing parenthesis to find the correct one
-    close_bracket_count = -1
-    for pos in range(start - 1, 0, -1):
-        char = line[pos]
-        if char == ")":
-            close_bracket_count -= 1
-        elif char == "(":
-            close_bracket_count += 1
-        else:
-            pass
-        if close_bracket_count == 0:
-            start = pos
-            break
-    if end == 0:
-        return line[start:]
-    else:
-        return line[start : end + 1]
-
-
-def get_num_args_func(line, func_name=None):
-    """
-    Get the number of arguments in a line containing a function
-    :param line: LLVM IR line
-    :param func_name: function name
-    :return num_args: number of arguments
-            arg_list: list of arguments
-    """
-    modif_line = re.sub(
-        r"<[^<>]+>", "", line
-    )  # commas in vectors/arrays should not be counted as argument-separators
-    arg_list_ = find_outer_most_last_parenthesis(modif_line)  # get last parenthesis
-    if arg_list_ is None:
-        # Make sure that this is the case because the function has no arguments
-        # and not because there was in error in regex matching
-        check = re.match(rgx.func_call_pattern + r"\(\)", modif_line)
-        assert check is not None, (
-            "Could not match argument list in:\n" + line + "\nFunction:\n" + func_name
-        )
-        num_args = 0
-        arg_list = ""
-    elif arg_list_ == "()":
-        # Make sure that this is the case because the function has no arguments
-        # and not because there was in error in regex matching
-        check = re.match(rgx.func_call_pattern + r"\(\)", modif_line)
-        if check is None:
-            check = re.search(r" asm (?:sideeffect )?(\".*\")\(\)", modif_line)
-        if check is None:
-            check = re.search(rgx.local_id + r"\(\)", modif_line)
-        if check is None:
-            okay = line[-2:] == "()"
-            if not okay:
-                check = None
-            else:
-                check = True
-        assert check is not None, (
-            "Could not match argument list in:\n" + line + "\nFunction:\n" + func_name
-        )
-        num_args = 0
-        arg_list = ""
-    else:
-        arg_list = arg_list_[1:-1]
-        arg_list = re.sub(r"<[^<>]+>", "", arg_list)
-        arg_list_modif = re.sub(r"\([^\(\)]+\)", "", arg_list)
-        arg_list_modif = re.sub(r"\([^\(\)]+\)", "", arg_list_modif)
-        arg_list_modif = re.sub(r"\([^\(\)]+\)", "", arg_list_modif)
-        arg_list_modif = re.sub(r"\([^\(\)]+\)", "", arg_list_modif)
-        arg_list_modif = re.sub(r"\"[^\"]*\"", "", arg_list_modif)
-        arg_list_modif = re.sub(r"{.*}", "", arg_list_modif)
-        num_args = len(re.findall(",", arg_list_modif)) + 1
-
-    return num_args, arg_list
-
-
-def construct_function_dictionary(file):
-    """
-    Construct a dictionary of functions which will be used to aid the construction of the context-graph
-    :param file: list of statements
-    :return: dictionary of functions
-             keys: names of functions which are defined (not just declared) in this file
-                    if named_args == True
-             values: list: [shortened function name, boolean:called?, corresponding return statement, number of arguments,
-                            list of arg names]
-                    if named_args == False
-             values: list: [shortened function name, boolean:called?, corresponding return statement, number of arguments]
-    """
-
-    # For debugging
-    to_track = ""
-    functions_defined_in_file = dict()
-    func_name = ""
-
-    # Loop over lines in file
-    for line in file:
-
-        # For debugging
-        # if len(to_track) > 0:
-        #   if line == to_track or to_track in line:
-        #     print('Found line', line)
-
-        # If it's a function definition
-        if re.match(r"define", line):
-
-            # When the definition of a function is detected, get the name of the function
-            func_name_ = re.match(r"define .* (" + rgx.func_name + ")", line)
-            assert func_name_ is not None, "Could not match function name in " + line
-            func_name = func_name_.group(1)[1:]  # drop the leading '@'
-            m_loc, m_glob, m_label, m_label2 = get_identifiers_from_line(line)
-            if len(m_loc) > 0:
-                named_args = True
-            else:
-                named_args = False
-
-            # We will store a shortened version of the function's name since they can get very long
-            # Find how many times the shortened version appear in other function names recorded thus far
-            name_str = str(list(functions_defined_in_file.keys()))
-            name_occurrences = len(re.findall(func_name[:20], name_str))
-            # construct the shortened name
-            func_name_short = func_name[:20] + "_" + str(name_occurrences)
-
-            # Construct its list of arguments
-            modified_line = re.sub(
-                r"<[^<>]*>", "", line
-            )  # commas in vectors/arrays not counted as arg-separators
-            arg_list_ = re.match(
-                r"define .* " + rgx.func_name + "\((.*)\)", modified_line
-            )
-            if arg_list_ is None:
-                # Make sure that this is the case because the function has no arguments
-                # and not because there was in error in regex matching
-                check = re.match(r"define .* " + rgx.func_name + "\(\)", modified_line)
-                assert check is not None, (
-                    "Could not match argument list in "
-                    + line
-                    + "\n"
-                    + "Modified line: "
-                    + modified_line
-                    + "\n"
-                    + "Function name (to check): "
-                    + func_name
-                )
-                num_args = 0
-            else:
-                num_args, arg_list_ = get_num_args_func(line)
-                if num_args > 0:
-                    arg_list = list()
-                    arg_list_ = re.sub(r"\([^\(\)]+\)", "", arg_list_)
-                    arg_list_ = re.sub(r"\([^\(\)]+\)", "", arg_list_)
-                    arg_list_ = re.sub(r"<[^<>]+>", "", arg_list_)
-                    arg_list_ = re.sub(r"<[^<>]+>", "", arg_list_)
-                    arg_list_ = re.sub(r"<[^<>]+>", "", arg_list_)
-                    arg_list_ = re.sub(r"<[^<>]+>", "", arg_list_)
-                    arg_list_ = re.sub(r"<[^<>]+>", "", arg_list_)
-                    args_ = arg_list_.split(", ")
-
-                    try:
-                        if len(args_) != num_args:
-                            print(
-                                "Could not compute the right number of arguments in "
-                                + line
-                                + "\n(a) "
-                                + str(len(args_))
-                                + "\n(b) "
-                                + str(num_args)
-                                + "\nwith arg-list: "
-                                + arg_list_
-                            )
-                            raise ValueError("FunctionNotSupported")
-                        if named_args:
-                            for a in range(num_args):
-                                arg_ = re.match(
-                                    r".*( " + rgx.local_id + r"|\.\.\.)$", args_[a]
-                                )
-                                if arg_ is None:
-                                    # Sometimes (eg. rodinia/openmp_particle_filter.ll),
-                                    # some functions have unnamed args even though most are named
-                                    # Check whether that is the case
-                                    if re.match(
-                                        rgx.any_type_or_struct
-                                        + r"( nocapture| readonly| readnone| dereferenceable)*",
-                                        args_[a],
-                                    ):
-                                        arg_list.append("%" + str(a))
-                                    else:
-                                        arg_ = re.match(r"(\.\.\.)$", args_[a])
-                                        assert arg_ is not None, (
-                                            "Could not identify argument name in \n"
-                                            + line
-                                            + "\nargument is\n"
-                                            + args_[a]
-                                        )
-                                else:
-                                    if arg_.group(1) == r"...":
-                                        arg_list.append("three_dots")
-                                    else:
-                                        arg_list.append(
-                                            arg_.group(1)[1:]
-                                        )  # drop initial space
-                    except ValueError:
-                        raise
-
-            # Construct dictionary entry for this function
-            called = False
-            if func_name in functions_defined_in_file.keys():
-                called = functions_defined_in_file[func_name][1]
-            if named_args:
-                functions_defined_in_file[func_name] = [
-                    func_name_short,
-                    called,
-                    "no_return",
-                    num_args,
-                    arg_list,
-                ]
-            else:
-                functions_defined_in_file[func_name] = [
-                    func_name_short,
-                    called,
-                    "no_return",
-                    num_args,
-                ]
-
-        # If it's a return statement
-        elif re.match(r"ret .*", line):
-            if func_name:
-                # add the return statement to the dictionary
-                functions_defined_in_file[func_name][2] = line
-                func_name = ""
-
-        # If it's a call to a function defined in this file
-        elif (
-            re.match("(" + rgx.local_id + " = )?(tail )?(call|invoke) ", line)
-            and "asm" not in line
-            and " @" in line
-        ):
-            # Get the function name
-            function_name_ = re.search(
-                r"(" + rgx.func_name + r")( to .*)?\(.*\)($|\n)", line
-            )
-            if function_name_ is None:
-                function_name_ = re.search(
-                    r"(" + rgx.local_id + r")( to .*)?\(.*\)($|\n)", line
-                )
-                assert function_name_ is not None, (
-                    "Could not identify function name in statement:\n" + line
-                )
-            else:
-                function_name = function_name_.group(1)[1:]
-                # If it is in the list of defined functions, change its entry to "called"
-                if function_name in functions_defined_in_file.keys():
-                    functions_defined_in_file[function_name][1] = True
-                else:
-                    functions_defined_in_file[function_name] = [
-                        "REMOVE",
-                        True,
-                        "REMOVE",
-                        num_args,
-                    ]
-
-    # Reconstruct dictionary removing the calls that did not have defines
-    functions_defined_in_file_DEF = dict()
-    for k, v in functions_defined_in_file.items():
-        if v[0] != "REMOVE":
-            functions_defined_in_file_DEF[k] = v
-
-    # Make sure all function names have a corresponding return identifier
-    for k, v in functions_defined_in_file_DEF.items():
-        if k != "main":
-            if v[1] == "no_return":
-                print("WARNING! Function", k, "has no corresponding return statement")
-
-    return functions_defined_in_file_DEF
-
-
-def all_edges(G, nbunch=None, data=False):
-    """
-    Get a list of all (both incoming and outgoing) edges of (Multi)DiGraph G
-    :param G: (Multi)DiGraph
-    :param nbunch: list of nodes whose adjacent edges we want to find
-    :param data: boolean: return list with or without nodes data
-    :return: corresponding list of all (both incoming and outgoing) edges (no duplicates in list)
-    """
-    if data:
-        result = [
-            (e[0], e[1], e[2]["stmt"]) for e in G.in_edges(nbunch=nbunch, data=data)
-        ]
-        result += [
-            (e[0], e[1], e[2]["stmt"]) for e in G.out_edges(nbunch=nbunch, data=data)
-        ]
-        return list(set(result))
-    else:  # data == False
-        result = list(G.in_edges(nbunch=nbunch, data=data))
-        result += list(G.out_edges(nbunch=nbunch, data=data))
-        ret = list(set(result))
-    return ret
-
-
-def all_neighbors(G, n):
-    """
-    Get a list of all neighbor-nodes (both predecessors and successors) of (Multi)DiGraph G
-    :param G: (Multi)DiGraph
-    :param n: list of nodes whose neighbours we want to find
-    :return: corresponding list of all neighbor-nodes (no duplicates in list)
-    """
-    result = list(G.predecessors(n))
-    result += list(G.successors(n))
-    return list(set(result))
-
-
-def all_degrees(G, n):
-    """
-    Get the sum of the in and out degress of a node in a (Multi)DiGraph G
-    :param G: (Multi)DiGraph
-    :param n: node whose degree we want to find
-    :return: overall degree of the node
-    """
-    return G.in_degree(n) + G.out_degree(n)
-
-
-def basic_block_leaf(G, node, ids_in_basic_block):
-    """
-    Test whether the node is a leaf node of a basic block
-    :param G: Graph
-    :param node: node to test
-    :param ids_in_basic_block: list of IDs of other nodes in the basic block
-    :return: boolean
-    """
-    bb_leaf = True
-    if G.out_degree(node) > 0:
-        for n in G.successors(node):
-            if n in ids_in_basic_block:
-                if G.node[n]["id"] != "ad_hoc":
-                    # Check whether the outgoing edge is a "store"
-                    for e in G.out_edges(node, data=True):
-                        if "store" not in e[2]["stmt"]:
-                            bb_leaf = False
-                            break
-    return bb_leaf
-
-
-def add_node(G, func_prefix, node, id, ids_in_basic_block):
-    """
-    Wrapper around "add node"
-    :param G: Graph
-    :param func_prefix: function prefix
-    :param node: node to add
-    :param id: id of the node to add
-    :param ids_in_basic_block: list of ids in current basic blocks
-    """
-    # Debugging
-    node_check = ""
-    if len(node_check) > 0:
-        if node_check in node or node_check == node:
-            print("Found node", node)
-    assert node is not None, "Node none"
-
-    # Add node
-    G.add_node(func_prefix + node, id=id)
-    if ids_in_basic_block is not None:
-        if node[0] == "%" and id != "label" and node not in ids_in_basic_block:
-            ids_in_basic_block.append(func_prefix + node)
-
-
-def add_edge(G, parent_prefix, parent_node, child_prefix, child_node, stmt, flow):
-    """
-    Wrapper around "add edge"
-    :param G: Graph
-    :param parent_prefix: prefix of parent node
-    :param parent_node: parent node
-    :param child_prefix: prefix of child node
-    :param child_node: child node
-    :param stmt: statement corresponding to the edge to add
-    :param flow: type of flow of the edge to add
-    :return:
-    """
-    # Assert
-    assert len(stmt.strip()) > 0
-
-    # Debugging
-    stmt_check = ""
-    if len(stmt_check) > 0:
-        if stmt_check in stmt or stmt_check == stmt:
-            print("Found stmt", stmt)
-    assert parent_node != "undef", "Found undef parent-node at stmt:\n" + stmt
-    assert child_node != "undef", "Found undef child-node at stmt:\n" + stmt
-
-    # Assert that the nodes have been added to the graph prior to this
-    parent_node_ = parent_prefix + parent_node
-    nodes = list(G.nodes())
-    if parent_node_ not in nodes:
-        raise ValueError(
-            "Node not added to graph:\n"
-            + parent_node_
-            + "\nFound while trying to add edge:\n"
-            + stmt
-        )
-    child_node_ = child_prefix + child_node
-    if child_node_ not in nodes:
-        raise ValueError(
-            "Node not added to graph:\n"
-            + child_node_
-            + "\nFound while trying to add edge:\n"
-            + stmt
-        )
-
-    # Add edge
-    G.add_edge(parent_node_, child_node_, stmt=stmt, flow=flow)
-
-
-def add_edge_dummy(G, parent_prefix, parent_node, stmt, ad_hoc_count):
-    """
-    Wrapper around "add edge" to add to connect a node to an ad hoc dummy node
-    :param G: Graph
-    :param parent_prefix: prefix of parent node
-    :param parent_node: parent node
-    :param stmt: statement corresponding to the edge to add
-    :param ad_hoc_count: count of ad hoc nodes
-    :return: updated ad_hoc_count
-    """
-    # Debugging
-    stmt_check = ""
-    if len(stmt_check) > 0:
-        if stmt_check in stmt:
-            print("Found stmt", stmt)
-
-    # Assert that the nodes have been added to the graph prior to this
-    parent_node_ = parent_prefix + parent_node
-    assert parent_node_ in list(G.nodes()), (
-        "Node not added to graph:\n"
-        + parent_node_
-        + "\nFound while trying to add edge:\n"
-        + stmt
-    )
-    ad_hoc_node = "ad_hoc_" + str(ad_hoc_count)
-    G.add_node(ad_hoc_node, id="ad_hoc")
-    G.add_edge(parent_node_, ad_hoc_node, stmt=stmt, flow="path")
-    return ad_hoc_count + 1
-
-
-def add_stmts_to_graph(G, file, functions_defined_in_file, functions_declared_in_file):
-    """
-    Add all statements from a file to a graph
-    :param G: (Multi)Digraph
-    :param file: list of strings constituting a file
-    :param functions_defined_in_file: dictionary of functions
-           keys: names of functions which are defined (not just declared) in this file
-                if named_args == True
-           values: list: [shortened function name, boolean:called?, corresponding return statement, number of args, list of arg names]
-                if named_args == False
-           values: list: [shortened function name, corresponding return statement, number of arguments]
-    :param functions_declared_in_file: list of names of the functions declared in this file
-    :return: completed graph
-    """
-
-    # Helper-variables
-    lines_not_added_to_graph = (
-        list()
-    )  # lines which couldn't be added to the graph (debugging purposes)
-    G.add_node(
-        "@0", id="global"
-    )  # add a global reference node to connect declaration of gloal variales to
-    glob_ref = list(G.nodes)[0]  # handle to global reference node
-    func_prefix = ""  # function prefix (for construction of identifier nodes)
-    block_ref = ""  # block reference
-    ids_in_basic_block = list()
-    ad_hoc_count = 0  # count of "ad-hoc"-nodes
-    functions_declared_in_file = set(functions_declared_in_file)
-    func_block_refs = dict()
-    stmt_check = ""
-
-    # Loop over the lines in the LLVM IR file
-    for i, line in enumerate(file):
-
-        # Debugging
-        if stmt_check:
-            if line == stmt_check or stmt_check in line:
-                print("\nFound statement in", line)
-
-        # Adapt to dragon-egg generated code
-        if '%"ssa point"' in line:
-            line = line.replace('%"ssa point"', '%"ssa_point"')
-        elif '%"alloca point"' in line:
-            line = line.replace('%"alloca point"', '%"alloca_point"')
-        elif '%"<retval>' in line:
-            line = line.replace('%"<retval>', '%"retval')
-
-        ################################################################################################################
-        # Add nodes and edges according to statement characteristics
-
-        ################################################################################################################
-        # Declaration of a global variable
-        if re.match(
-            rgx.global_id + r" =" + rgx.linkage + r"* constant ", line
-        ) or re.match(rgx.global_id + r" =" + rgx.linkage + r"* global ", line):
-            # (globref) --[stmt]--> (global variable)
-            globvar = re.match(r"(" + rgx.global_id + r") =", line).group(1)
-            add_node(G, "", globvar, "global", ids_in_basic_block)
-            add_edge(G, "", glob_ref, "", globvar, line, "path")
-
-        ################################################################################################################
-        # Function definition
-        elif re.match(r"define .* " + rgx.func_name + "\(.*\)", line):
-            # We are in the body of a new function
-            # eg define i32 @main() local_unnamed_addr #0 {
-
-            # update previous function and function prefix:
-            func_name = re.match(r"define .* (" + rgx.func_name + ")\(.*\)", line)
-            assert func_name is not None, "Could not match function name in " + line
-            func_name_ = func_name.group(1)[1:]
-            func_prefix = functions_defined_in_file[func_name_][0] + "_"
-
-            # update the block reference and add a node corresponding to the block reference
-            if re.match(rgx.start_basic_block, file[i + 1]):
-                # check if the next line is a block ref, then let that be the block reference
-                label = re.match(rgx.start_basic_block, file[i + 1]).group(1)
-                if label[0] != "%":
-                    label = "%" + label
-                if label[-1] == ":":
-                    label = label[:-1]
-                block_ref = func_prefix + label
-            else:
-                num_args = functions_defined_in_file[func_name_][3]
-                block_ref = func_prefix + "%" + str(num_args)
-
-            # +(block reference)
-            func_block_refs[func_prefix] = block_ref
-            add_node(G, "", block_ref, "label", ids_in_basic_block)
-            ids_in_basic_block = list()  # start afresh
-
-            # (globref) --[..define..]--> (block reference)
-            add_edge(G, "", glob_ref, "", block_ref, line, "path")
-
-            # Get list of arguments
-            if re.search(rgx.local_id + r"(?!\* )(?=([\s,\)]|$))", line) is not None:
-                # then the arguments are explicitely named
-                arg_nodes = functions_defined_in_file[func_name_][4]
-            else:
-                # then the arguments are referred to as %0, %1, etc. though this isn't explicitely stated
-                num_args = functions_defined_in_file[func_name_][3]
-                arg_nodes = ["%" + str(i) for i in range(num_args)]
-
-            # (blockref) --[..define..]--> (arguments)
-            for a in arg_nodes:
-                add_node(G, func_prefix, a, "local", ids_in_basic_block)
-                add_edge(G, "", block_ref, func_prefix, a, line, "path")
-
-        ################################################################################################################
-        # Label (i.e., a new basic block)
-        elif re.match(rgx.start_basic_block, line):
-            # eg ; <label>:11:                                     ; preds = %8
-            # eg .lr.ph.i:
-            assert block_ref, "Empty block reference at line:\n" + line
-            assert func_prefix, "Empty function prefix at line:\n" + line
-            if all_degrees(G, block_ref) == 0:
-                G.remove_node(
-                    block_ref
-                )  # if the previous block reference has not been used, delete it
-
-            # Update block reference
-            label_ = re.match("(?:.*<label>:)?(" + rgx.local_id_no_perc + "):?", line)
-            assert label_ is not None, "Could not identify label in:\n" + line
-            label = label_.group(1)
-            if label[0] != "%":
-                label = "%" + label
-            if label[-1] == ":":
-                label = label[:-1]
-            block_ref = func_prefix + label
-            add_node(G, func_prefix, label, "label", ids_in_basic_block)
-
-            # Empty the list of variables in basic block
-            ids_in_basic_block = list()
-
-        ################################################################################################################
-        # Variable assignment (except function calls)
-        elif re.match(rgx.local_id + r" = (?!(tail )?(call|invoke) )", line):
-
-            # Detect the assignee and add its node
-            assignee_ = re.match(r"(" + rgx.local_id + ") = ", line)
-            assert assignee_ is not None, "Could not identify assignee in:\n" + line
-            assignee = assignee_.group(1)
-            add_node(G, func_prefix, assignee, "local", ids_in_basic_block)
-
-            if not re.match(rgx.local_id + r" = phi ", line):
-
-                # This is just a regular assignment operation (not a phi-statement)
-                # Get the operands
-                m_loc, m_glob, m_label, m_label2 = get_identifiers_from_line(
-                    re.sub(r"{.*}", "", line)
-                )
-                if assignee in m_loc:
-                    m_loc.remove(assignee)
-                operands = list()
-                if len(m_loc) > 0:
-                    operands = m_loc
-                if len(m_glob) > 0:
-                    for mg in m_glob:
-                        if mg not in functions_declared_in_file:
-                            operands.append(mg)
-
-                # Connect operands to assignee
-                no_parent = True
-                if len(operands) > 0:
-                    for op in operands:
-                        # if operand is in this basic block, then the statement has a parent
-                        if op[0] != "@" and func_prefix + op in ids_in_basic_block:
-                            no_parent = False
-                        # (operand) --[stmt]--> (assignee)
-                        if not re.match(rgx.global_id, op):
-                            if re.match(rgx.local_id, op):
-                                add_node(G, func_prefix, op, "local", None)
-                                add_edge(
-                                    G,
-                                    func_prefix,
-                                    op,
-                                    func_prefix,
-                                    assignee,
-                                    line,
-                                    "data",
-                                )
-                        else:
-                            if op not in list(G.nodes()):
-                                add_node(G, "", op, "global", None)
-                                add_edge(G, "", glob_ref, "", op, line, "path")
-                            add_edge(G, "", op, func_prefix, assignee, line, "data")
-
-                # If the statement has no parent in the present basic block, connect the paths
-                if no_parent:
-                    # (block ref) --[stmt]--> (assignee)
-                    add_edge(G, "", block_ref, func_prefix, assignee, line, "path")
-
-            else:
-
-                # This is a phi statement
-                # (block ref) --[stmt]--> (assignee)
-                add_edge(G, "", block_ref, func_prefix, assignee, line, "path")
-
-                # get a list of pairs of arguments
-                m_ = re.findall(
-                    r"\[ (%?"
-                    + rgx.local_id_no_perc
-                    + r"|true|false|<.*>|getelementptr inbounds \([ \d\w\[\]\*\.@,]+\)|.*"
-                    + rgx.global_id
-                    + r".*), (%?"
-                    + rgx.local_id_no_perc
-                    + ") \],?",
-                    line,
-                )
-                if len(m_) == 0:
-                    m_ = re.findall(
-                        r"\[ (inttoptr \("
-                        + rgx.base_type
-                        + r" "
-                        + rgx.immediate_value_int
-                        + r" to "
-                        + rgx.base_type_or_struct_name
-                        + r"\**\)|%?-?"
-                        + rgx.local_id_no_perc
-                        + r"|"
-                        + rgx.immediate_value
-                        + r"|"
-                        + r"|<.*>), "
-                        + r"(%?-?"
-                        + rgx.local_id_no_perc
-                        + ") \]",
-                        line,
-                    )
-                assert len(m_) > 0, (
-                    "Could not identify arguments in phi-statement: " + line
-                )
-
-                # Loop over the list of arguments
-                for m in m_:
-                    if (
-                        m[0][0] == "%"
-                    ):  # if it's from a variable, not an immediate value
-                        # (val nodes) --[stmt]--> (assignee)
-                        add_node(G, func_prefix, m[0], "local", ids_in_basic_block)
-                        add_edge(
-                            G, func_prefix, m[0], func_prefix, assignee, line, "data"
-                        )
-
-                    elif (
-                        re.match(r".*" + rgx.global_id + r".*", m[0])
-                        or m[0][:13] == "getelementptr"
-                    ):
-                        # if it is from a global id
-                        # (val nodes) --[stmt]--> (assignee)
-                        m_g = re.search(rgx.global_id, m[0]).group(0)
-                        if (
-                            m_g in functions_declared_in_file
-                            or m_g[1:] in functions_defined_in_file.keys()
-                        ):
-                            add_node(G, "", m_g, "global", None)
-                            add_edge(G, "", glob_ref, "", m_g, line, "path")
-                        add_edge(G, "", m_g, func_prefix, assignee, line, "data")
-
-        ################################################################################################################
-        # Not an assignment:
-        else:
-
-            ############################################################################################################
-            # store
-            if re.match("store ", line):
-                # eg store float %11, float* %6, align 4
-                m = re.match(
-                    r"store (?:volatile )?\{?[\"\:\%\.\,\_\*\d\s\w\<\>]+\}? ("
-                    + rgx.immediate_or_local_id
-                    + '|undef), \{?["\:\%\.\,\_\*\d\s\w\<\>]+\}?\* ('
-                    + rgx.local_or_global_id
-                    + "|null)",
-                    line,
-                )
-                if m is None:
-                    m = re.match(
-                        r"store (?:volatile )?\{?[\"\:\%\.\,\_\*\d\s\w\<\>]+\}? ("
-                        + rgx.global_id
-                        + '), \{?["\:\%\.\,\_\*\d\s\w\<\>]+\}?\* ('
-                        + rgx.local_or_global_id
-                        + ")",
-                        line,
-                    )
-                    if m is None:
-                        line_modif = re.sub(r"\([^\(\)]+\)", "", line)
-                        line_modif = re.sub(r"\{[^\{\}]+\}", "", line_modif)
-                        m = re.match(
-                            r"store (?:volatile )?.* ("
-                            + rgx.local_or_global_id
-                            + ")(?: to .*)?, .*\* ("
-                            + rgx.local_or_global_id
-                            + ")",
-                            line_modif,
-                        )
-                        if m is None:
-                            (
-                                m_loc_,
-                                m_glob_,
-                                m_label_,
-                                m_label2_,
-                            ) = get_identifiers_from_line(line)
-                            m_imm_ = re.search(
-                                r"(?<!%)(?<!align )"
-                                + rgx.immediate_value
-                                + r"(?!( x ))",
-                                line,
-                            )
-                            l = m_loc_ + m_glob_
-                            if len(l) == 1 and m_imm_ is not None:
-                                l.append(m_imm_.group(0))
-                            assert len(l) >= 2, (
-                                "Cannot identify operands in:\n"
-                                + line
-                                + "\nGot: "
-                                + str(l)
-                            )
-                            pos = list()
-                            for ll in l:
-                                pos_ = line.find(ll)
-                                assert pos_ != -1, "Cannot find " + ll + " in " + line
-                                pos.append(pos_)
-                            val_ = min(pos)
-                            for i in range(len(pos)):
-                                if pos[i] == val_:
-                                    m1 = l[i]
-                                    break
-                            l.remove(m1)
-                            m2_ = l
-                            m = "dummy"
-                        else:
-                            m1 = m.group(1)
-                            m2_ = m.group(2)
-                    else:
-                        m1 = m.group(1)
-                        m2_ = m.group(2)
-                else:
-                    m1 = m.group(1)
-                    m2_ = m.group(2)
-
-                assert m is not None, "Cannot not identify operands in:\n" + line
-                assert m1 is not None, "m1 is none, stmt:\n" + line
-                assert m2_ is not None, "m2 is none, stmt:\n" + line
-                if not isinstance(m2_, list):
-                    m2_ = [m2_]
-                for m2 in m2_:
-                    if re.match(rgx.global_id, m1):
-                        if (
-                            m1 in functions_declared_in_file
-                            or m1[1:] in functions_defined_in_file.keys()
-                        ):
-                            add_node(G, "", m1, "global", None)
-                            add_edge(G, "", glob_ref, "", m1, line, "path")
-                        if re.match(rgx.global_id, m2):
-                            if (
-                                m2 in functions_declared_in_file
-                                or m2[1:] in functions_defined_in_file.keys()
-                            ):
-                                add_node(G, "", m2, "global", None)
-                                add_edge(G, "", glob_ref, "", m2, line, "path")
-                            add_edge(G, "", m1, "", m2, line, "data")
-                        else:
-                            add_node(G, func_prefix, m2, "local", None)
-                            add_edge(G, "", m1, func_prefix, m2, line, "data")
-                    elif re.match(rgx.local_id, m1):
-                        # val to store is a local id
-                        add_node(G, func_prefix, m1, "local", None)
-                        if re.match(rgx.global_id, m2):
-                            if (
-                                m2 in functions_declared_in_file
-                                or m2[1:] in functions_defined_in_file.keys()
-                            ):
-                                add_node(G, "", m2, "global", None)
-                                add_edge(G, "", glob_ref, "", m2, line, "path")
-                            add_edge(G, func_prefix, m1, "", m2, line, "data")
-                        else:
-                            add_node(G, func_prefix, m2, "local", None)
-                            add_edge(G, func_prefix, m1, func_prefix, m2, line, "data")
-                    else:  # re.match(r'(' + rgx.immediate_value_or_undef + r'|null)', m1):
-                        if re.match(rgx.global_id, m2):
-                            if (
-                                m2 in functions_declared_in_file
-                                or m2[1:] in functions_defined_in_file.keys()
-                            ):
-                                add_node(G, "", m2, "global", None)
-                                add_edge(G, "", glob_ref, "", a, line, "path")
-                            add_edge(G, "", block_ref, "", m2, line, "data")
-                        else:
-                            add_node(G, func_prefix, m2, "local", None)
-                            add_edge(G, "", block_ref, func_prefix, m2, line, "data")
-
-            ############################################################################################################
-            # (indirect) branch
-            elif re.match("(indirect)?br ", line):
-
-                # Unconditional branch
-                if re.match("br label ", line):
-
-                    # Get the label and add node
-                    label_ = re.search(r"label (" + rgx.local_id + r")", line)
-                    assert label_ is not None, "Could not identify label in:\n" + line
-                    label = label_.group(1)
-                    add_node(G, func_prefix, label, "label", ids_in_basic_block)
-
-                    # Get the sink nodes of this basic block
-                    # (sink nodes) --[stmt]--> (label)
-                    added_edge = False
-                    if len(ids_in_basic_block) > 0:
-                        for n in list(set(ids_in_basic_block)):
-                            if basic_block_leaf(G, n, ids_in_basic_block):
-                                add_edge(G, "", n, func_prefix, label, line, "ctrl")
-                                added_edge = True
-                        if not added_edge:
-                            add_edge(
-                                G,
-                                "",
-                                list(set(ids_in_basic_block))[-1],
-                                func_prefix,
-                                label,
-                                line,
-                                "ctrl",
-                            )
-
-                    else:
-                        # there are no local ids in this basic block, connect to the block reference
-                        add_edge(G, "", block_ref, func_prefix, label, line, "ctrl")
-
-                # Conditional branch
-                elif re.match("br i1 ", line):
-
-                    # Get all components
-                    m = re.match(
-                        r"br i1 (.*), label ("
-                        + rgx.local_id
-                        + r"), label ("
-                        + rgx.local_id
-                        + r")",
-                        line,
-                    )
-                    assert m is not None, (
-                        "Could not match components of statement:\n" + line
-                    )
-                    comparator = m.group(1)
-                    labelT = m.group(2)
-                    add_node(G, func_prefix, labelT, "label", ids_in_basic_block)
-                    labelF = m.group(3)
-                    add_node(G, func_prefix, labelF, "label", ids_in_basic_block)
-
-                    # Check whether the comparator is a local identifier
-                    if re.match(rgx.local_id, comparator):
-
-                        if func_prefix + comparator not in ids_in_basic_block:
-                            # If the statement has no parent in the present basic block, connect the paths
-                            # (block ref) --[stmt]--> (assignee)
-                            add_node(
-                                G, func_prefix, comparator, "local", ids_in_basic_block
-                            )
-                            add_edge(
-                                G, "", block_ref, func_prefix, comparator, line, "path"
-                            )
-
-                        # (comparator) --[stmt]--> (labels)
-                        add_edge(
-                            G,
-                            func_prefix,
-                            comparator,
-                            func_prefix,
-                            labelT,
-                            line,
-                            "ctrl",
-                        )
-                        add_edge(
-                            G,
-                            func_prefix,
-                            comparator,
-                            func_prefix,
-                            labelF,
-                            line,
-                            "ctrl",
-                        )
-
-                    elif (
-                        re.match(rgx.immediate_value, comparator)
-                        or comparator == "undef"
-                    ):
-                        # Get the sink nodes of this basic block
-                        # (sink nodes) --[stmt]--> (label)
-                        added_edge = False
-                        if len(ids_in_basic_block) > 0:
-                            for n in ids_in_basic_block:
-                                if basic_block_leaf(G, n, ids_in_basic_block):
-                                    add_edge(
-                                        G, "", n, func_prefix, labelT, line, "ctrl"
-                                    )
-                                    add_edge(
-                                        G, "", n, func_prefix, labelF, line, "ctrl"
-                                    )
-                                    added_edge = True
-                            assert added_edge, (
-                                "No edge was added for statement:\n" + line
-                            )
-                        else:
-                            # there are no local ids in this basic block, connect to the block reference
-                            add_edge(
-                                G, "", block_ref, func_prefix, labelT, line, "ctrl"
-                            )
-                            add_edge(
-                                G, "", block_ref, func_prefix, labelF, line, "ctrl"
-                            )
-
-                    elif re.search(rgx.global_id, comparator):
-                        # Get the comparator
-                        m = re.search(rgx.global_id, comparator).group(0)
-
-                        # If the statement has no parent in the present basic block, connect the paths
-                        # (block ref) --[stmt]--> (assignee)
-                        add_node(G, "", comparator, "local", None)
-                        add_edge(G, "", block_ref, "", comparator, line, "path")
-
-                        # (comparator) --[stmt]--> (labels)
-                        add_edge(G, "", comparator, func_prefix, labelT, line, "ctrl")
-                        add_edge(G, "", comparator, func_prefix, labelF, line, "ctrl")
-
-                        # (block ref) --[stmt]--> (assignee)
-                        add_edge(G, "", block_ref, "", comparator, line, "path")
-
-                    else:
-
-                        m = re.search(r"(" + rgx.global_id + r")", comparator)
-                        if m is not None:
-                            comparator = m.group(1)
-                        else:
-                            assert False, (
-                                "Could not identify comparator in:\n"
-                                + line
-                                + "\nComparator:\n"
-                                + comparator
-                            )
-
-                # Indirect branch
-                elif re.match("indirectbr ", line):
-
-                    # eg indirectbr i8* %18, [label %1639, label %754, ..., label %1303, label %1314]
-                    # Get address
-                    m = re.match(r"indirectbr .* (.*), \[", line)
-                    assert m is not None, "Could not identify address in stmt:\n" + line
-                    address = m.group(1)
-                    assert re.match(rgx.local_id, address) is not None, (
-                        "Address is not a local id in stmt:\n"
-                        + line
-                        + "\nAddress:\n"
-                        + address
-                    )
-                    # Get labels
-                    mlab = re.findall(r"label (.*)[,\]]", line)
-                    mlab = mlab[0].replace("label", "")
-                    mlab = mlab.split(",")
-                    assert len(mlab) > 0, "Could not identify labels in stmt:\n" + line
-                    labels = list()
-                    for m in mlab:
-                        labels.append(m.strip())
-
-                    if func_prefix + address not in ids_in_basic_block:
-                        # If the statement has no parent in the present basic block, connect the paths
-                        # (block ref) --[stmt]--> (address)
-                        add_edge(G, "", block_ref, func_prefix, address, line, "path")
-
-                    for label in labels:
-                        # (address) --[stmt]--> (labels)
-                        add_node(G, func_prefix, label, "label", None)
-                        add_edge(
-                            G, func_prefix, address, func_prefix, label, line, "ctrl"
-                        )
-
-                else:
-                    lines_not_added_to_graph.append(line)
-                    assert False, "Could not recognize statement:\n" + line
-
-            ############################################################################################################
-            # switch
-            elif re.match("switch ", line):
-
-                # Get the comparator
-                m = re.match(r"switch .* (.*), label (.*) \[", line)
-                assert m is not None, (
-                    "Could not match comparator of switch statement:\n" + line
-                )
-                comparator = m.group(1)
-                deflabel = m.group(2)
-
-                # Get the labels
-                modif_line = line.replace("\n", " ")
-                switchlist_ = re.search(r"\[.*\]$", modif_line)
-                assert switchlist_ is not None, (
-                    "Could not identify switch list in:\n" + line
-                )
-                switchlist = switchlist_.group(0)
-                m_ = re.findall(
-                    rgx.base_type
-                    + " ("
-                    + rgx.immediate_or_local_id
-                    + r"), label ("
-                    + rgx.local_id
-                    + r")",
-                    switchlist,
-                )
-                assert m_ is not None, (
-                    "Could not match components of switch statement:\n" + line
-                )
-                vals = list()
-                labels = list()
-                for m in m_:
-                    vals.append(m[0])
-                    labels.append(m[1])
-
-                # Get the default label and add it to the labels
-                labels.append(deflabel)
-
-                if re.match(rgx.local_id, comparator):
-                    # Treat like an unconditional branch
-                    sink_nodes = list()
-                    if len(ids_in_basic_block) > 0:
-                        for n in list(set(ids_in_basic_block)):
-                            if basic_block_leaf(G, n, ids_in_basic_block):
-                                sink_nodes.append(n)
-                    for label in labels:
-                        # (sink nodes) --[stmt]--> (label)
-                        add_node(G, func_prefix, label, "label", None)
-                        if len(sink_nodes) > 0:
-                            for n in sink_nodes:
-                                add_edge(G, "", n, func_prefix, label, line, "ctrl")
-                        else:
-                            # there are no local ids in this basic block, connect to the block reference
-                            add_edge(G, "", block_ref, func_prefix, label, line, "ctrl")
-
-                else:
-
-                    for label in labels:
-                        # (comparator) --[stmt]--> (labels)
-                        add_node(G, func_prefix, label, "label", None)
-                        add_edge(
-                            G, func_prefix, comparator, func_prefix, label, line, "ctrl"
-                        )
-
-            ############################################################################################################
-            # function call
-            elif re.match(r"(" + rgx.local_id + r" = )?(tail )?(call|invoke) ", line):
-
-                # Get function name
-                if " asm " in line:
-                    if (
-                        line
-                        == '%13 = tail call { %struct.rw_semaphore*, i64 } asm sideeffect "'
-                    ):
-                        line = '%13 = tail call { %struct.rw_semaphore*, i64 } asm sideeffect "# beginning down_read\0A\09.pushsection .smp_locks,\22a\22\0A.balign 4\0A.long 671f - .\0A.popsection\0A671:\0A\09lock;  incq ($3)\0A\09  jns        1f\0A  call call_rwsem_down_read_failed\0A1:\0A\09# ending down_read\0A\09", "=*m,={ax},={rsp},{ax},*m,2,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(%struct.atomic64_t* %11, %struct.rw_semaphore* %10, %struct.atomic64_t* %11, i64 %12) #4, !srcloc !9'
-                    if line == '%16 = tail call i64 asm sideeffect "':
-                        line = '%16 = tail call i64 asm sideeffect "# beginning __up_read\0A\09.pushsection .smp_locks,\22a\22\0A.balign 4\0A.long 671f - .\0A.popsection\0A671:\0A\09lock;   xadd      $1,($2)\0A\09  jns        1f\0A\09  call call_rwsem_wake\0A1:\0A# ending __up_read\0A", "=*m,={dx},{ax},1,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(%struct.atomic64_t* %11, %struct.rw_semaphore* %10, i64 -1, %struct.atomic64_t* %11) #4, !srcloc !11'
-                    func_name_ = re.search(r" asm (?:sideeffect )?(\".*\")\(", line)
-                else:
-                    func_name_ = re.search(
-                        r"(" + rgx.func_name + r")( to .*)?\(.*\)($|\n)", line
-                    )
-                if func_name_ is None:
-                    func_name_ = re.search(r"(" + rgx.local_id + r")\(.*\)($|\n)", line)
-                assert func_name_ is not None, (
-                    "Could not identify function name in:\n" + line
-                )
-                func_name = func_name_.group(1)
-
-                # If there is an assignee, get it and add its node
-                if re.match(rgx.local_id + r" = ", line) is not None:
-                    assignee = re.match(r"(" + rgx.local_id + r") = ", line).group(1)
-                    add_node(G, func_prefix, assignee, "local", ids_in_basic_block)
-                else:
-                    assignee = None
-
-                # Get string of arguments
-                num_args, arg_list = get_num_args_func(line, func_name)
-
-                # Get list of arguments
-                if num_args > 0:
-
-                    # Process argument string
-                    s = re.search(r"(\([^\(\)]+\))", arg_list)
-                    arg_list_modif = arg_list
-                    arg_list_modif = re.sub(r"\{.*\}", "", arg_list_modif)
-                    while s is not None:
-                        if re.match(
-                            r"\([^\(\)]+" + rgx.global_id + "[^\(\)]*\)", s.group(1)
-                        ):
-                            r = re.match(
-                                r"\([^\(\)]+(" + rgx.global_id + ")[^\(\)]*\)",
-                                s.group(1),
-                            ).group(1)
-                            arg_list_modif = arg_list_modif.replace(s.group(1), r, 1)
-                        elif re.match(
-                            r"\([^\(\)]+" + rgx.local_id + "[^\(\)]*\)", s.group(1)
-                        ):
-                            r = re.match(
-                                r"\([^\(\)]+(" + rgx.local_id + ")[^\(\)]*\)",
-                                s.group(1),
-                            ).group(1)
-                            arg_list_modif = arg_list_modif.replace(s.group(1), r, 1)
-                        elif re.match(
-                            r"\([^\(\)]+" + rgx.immediate_value + "[^\(\)]*\)",
-                            s.group(1),
-                        ):
-                            arg_list_modif = arg_list_modif.replace(
-                                s.group(1), str(1000), 1
-                            )
-                        else:
-                            arg_list_modif = arg_list_modif.replace(s.group(1), "", 1)
-                        # For next loop
-                        s = re.search(r"(\([^\(\)]+\))", arg_list_modif)
-
-                    # Get "to match"
-                    to_match = arg_list_modif.split(",")
-                    while len(to_match) != num_args:
-                        to_modif = re.search("\((.*),(.*)\)", arg_list)
-                        if to_modif is None:
-                            to_modif = re.search('"(.*),(.*)"', arg_list)
-                        assert to_modif is not None, (
-                            "Unexpected number of arguments in " + line
-                        )
-                        for i in range(1, int(len(to_modif.groups())), 2):
-                            arg_list = arg_list.replace(
-                                to_modif.group(i) + "," + to_modif.group(i + 1),
-                                to_modif.group(i) + " " + to_modif.group(i + 1),
-                            )
-                        to_match = arg_list.split(",")
-                    assert len(to_match) == num_args, (
-                        "Wrong number of arguments in line "
-                        + line
-                        + " (args: "
-                        + arg_list
-                        + ", expected "
-                        + str(num_args)
-                        + " arguments."
-                    )
-
-                    # Get all individual arguments (can contain immediates!)
-                    # (these are the called arguments, not the defined ones!)
-                    args = list()
-                    try:
-                        for t in to_match:
-
-                            if (
-                                re.search(rgx.local_id + r"(?!\* )(?=([\s,\)]|$))", t)
-                                is not None
-                            ):
-                                args.append(
-                                    re.search(
-                                        rgx.local_id + r"(?!\)\* )(?=([\s,]|$))", t
-                                    ).group(0)
-                                )
-                            elif re.search(rgx.global_id, t) is not None:
-                                args.append(re.search(rgx.global_id, t).group(0))
-                            elif (
-                                re.search(rgx.immediate_value_or_undef + r"$", t)
-                                is not None
-                            ):
-                                args.append(
-                                    re.search(rgx.immediate_value_or_undef, t).group(0)
-                                )
-                            elif (
-                                re.search(
-                                    rgx.immediate_or_local_id_or_undef
-                                    + r"(?!\* )(?=\()",
-                                    t,
-                                )
-                                is not None
-                            ):
-                                # eg call void @llvm.dbg.value(metadata i32 %call())
-                                # eg call void @llvm.dbg.value(metadata i32 0())
-                                args.append(
-                                    re.search(
-                                        rgx.immediate_or_local_id_or_undef
-                                        + r"(?!\)\* )(?=\()",
-                                        t,
-                                    ).group(0)
-                                )
-                            elif re.search(r"\s+$", t) is not None or "metadata" in t:
-                                args.append(str(1000))
-                            else:
-                                print(
-                                    "Could not properly identify argument in: \n"
-                                    + line
-                                    + "\n"
-                                    "argument: "
-                                    + str(t)
-                                    + ",\n"
-                                    + "argument list: "
-                                    + str(arg_list)
-                                    + "\n"
-                                    + "argument list (modif): "
-                                    + str(arg_list_modif)
-                                )
-                                raise ValueError("FunctionNotSupported")
-                    except ValueError:
-                        raise
-
-                if func_name[1:] in functions_defined_in_file.keys():
-                    # if this function is defined in this file
-
-                    # Get the called function's shortened name
-                    func_key = func_name[1:]
-                    called_func = functions_defined_in_file[func_key][0]
-
-                    # Get the list of defined (*NOT* called!) arguments
-                    if len(functions_defined_in_file[func_key]) == 5:
-                        # Arguments are named explicitely
-                        args_defined = functions_defined_in_file[func_key][4]
-                    else:
-                        # Arguments are named implicitely
-                        args_defined = ["%" + str(i) for i in range(0, num_args)]
-
-                    # Connect called->defined arguments
-                    if num_args > 0:
-                        # (arg called) --[stmt]--> (arg defined)
-                        no_parent = True
-                        for a_called, a_defined in zip(args, args_defined):
-                            add_node(G, called_func + "_", a_defined, "local", None)
-                            if re.match(rgx.global_id, a_called):
-                                if (
-                                    a_called in functions_declared_in_file
-                                    or a_called[1:] in functions_defined_in_file.keys()
-                                ):
-                                    add_node(G, "", a_called, "global", None)
-                                    add_edge(
-                                        G, "", glob_ref, "", a_called, line, "path"
-                                    )
-                                add_edge(
-                                    G,
-                                    "",
-                                    a_called,
-                                    called_func + "_",
-                                    a_defined,
-                                    line,
-                                    "data",
-                                )
-                            elif re.match(rgx.local_id, a_called):
-                                # if operand is in this basic block, then the statement has a parent
-                                if func_prefix + a_called in ids_in_basic_block:
-                                    no_parent = False
-                                add_node(G, func_prefix, a_called, "local", None)
-                                add_edge(
-                                    G,
-                                    func_prefix,
-                                    a_called,
-                                    called_func + "_",
-                                    a_defined,
-                                    line,
-                                    "data",
-                                )
-
-                    else:
-                        # There is no data flow, so we introduce control flow
-                        # (block ref) --[stmt]--> (defined function's first block ref)
-                        add_node(G, called_func + "_", "#top_ref", "label", None)
-                        add_edge(
-                            G,
-                            "",
-                            block_ref,
-                            called_func + "_",
-                            "#top_ref",
-                            line,
-                            "ctrl",
-                        )
-                        no_parent = False
-
-                    # If there is an assignee
-                    if assignee is not None:
-
-                        # Get return statement
-                        ret_ = functions_defined_in_file[func_key][2]
-                        ret_node_match = re.match(
-                            r"ret .* (%?" + rgx.local_id_no_perc + r"|false|true)", ret_
-                        )
-                        assert ret_node_match is not None, (
-                            "Return statement could not be identified in "
-                            + "function:\n"
-                            + line
-                            + "\nreturn stmt:\n"
-                            + ret_
-                        )
-                        ret_node = ret_node_match.group(1)
-
-                        # Connect
-                        # (returned value) --[stmt]--> (assignee)
-                        if re.match(rgx.local_id, ret_node):
-                            ret = ret_node
-                        else:
-                            assert ret_node != "void", (
-                                "Void return node found in call with assignment:\n"
-                                + line
-                            )
-                            ret = "#ret"
-                        add_node(G, called_func + "_", ret, "local", None)
-                        add_edge(
-                            G,
-                            called_func + "_",
-                            ret,
-                            func_prefix,
-                            assignee,
-                            line,
-                            "data",
-                        )
-
-                        # If the statement has no parent in the present basic block, connect the paths
-                        if no_parent:
-                            # (block ref) --[stmt]--> (assignee)
-                            add_edge(
-                                G, "", block_ref, func_prefix, assignee, line, "path"
-                            )
-
-                    else:  # no assignee
-
-                        # If the statement has no parent in the present basic block, connect the paths
-                        if no_parent:
-                            # (block ref) --[stmt]--> (ad hoc)
-                            ad_hoc_count = add_edge_dummy(
-                                G, "", block_ref, line, ad_hoc_count
-                            )
-
-                    # if it is an "invoke"
-                    if re.match(rgx.local_id + r" = (tail )?( fastcc)?invoke", line):
-                        # Connect "void" to the normal and unwind labels in invocation
-                        m_loc, m_glob, m_label, m_label2 = get_identifiers_from_line(
-                            line
-                        )
-                        assert len(m_label) == 2, (
-                            "Could not identify 2 labels in:\n" + line
-                        )
-                        add_node(
-                            G, func_prefix, m_label[0], "label", ids_in_basic_block
-                        )
-                        add_node(
-                            G, func_prefix, m_label[1], "label", ids_in_basic_block
-                        )
-                        add_edge(
-                            G,
-                            called_func + "_",
-                            ret,
-                            func_prefix,
-                            m_label[0],
-                            line,
-                            "ctrl",
-                        )
-                        add_edge(
-                            G,
-                            called_func + "_",
-                            ret,
-                            func_prefix,
-                            m_label[1],
-                            line,
-                            "ctrl",
-                        )
-                    elif re.match(
-                        r"invoke (.* )?void", line
-                    ):  # if it is an "invoke void"
-                        # Connect "void" to the normal and unwind labels in invocation
-                        ret = "#ret"
-                        add_node(G, called_func + "_", ret, "ret", None)
-                        m_loc, m_glob, m_label, m_label2 = get_identifiers_from_line(
-                            line
-                        )
-                        assert len(m_label) == 2, (
-                            "Could not identify 2 labels in:\n" + line
-                        )
-                        add_node(
-                            G, func_prefix, m_label[0], "label", ids_in_basic_block
-                        )
-                        add_node(
-                            G, func_prefix, m_label[1], "label", ids_in_basic_block
-                        )
-                        add_edge(
-                            G,
-                            called_func + "_",
-                            ret,
-                            func_prefix,
-                            m_label[0],
-                            line,
-                            "ctrl",
-                        )
-                        add_edge(
-                            G,
-                            called_func + "_",
-                            ret,
-                            func_prefix,
-                            m_label[1],
-                            line,
-                            "ctrl",
-                        )
-
-                else:
-                    # if this function is *NOT* defined in this file (i.e. we don't know its body)
-                    # - only declared in this file
-                    # - call .* asm
-                    # - function pointer
-                    # - intrinsic
-
-                    #  Connect arguments
-                    no_parent = True
-                    a_connected = ""
-                    if assignee is None:
-                        no_parent = True
-                        if num_args > 0:
-                            for a in args:
-                                if not re.match(rgx.immediate_value_or_undef, a):
-                                    if (
-                                        a[0] != "@"
-                                        and func_prefix + a in ids_in_basic_block
-                                    ):
-                                        no_parent = False
-                                    # (arg) --[stmt]--> (dummy_node)
-                                    a_connected = a
-                                    if re.match(rgx.global_id, a):
-                                        if (
-                                            a in functions_declared_in_file
-                                            or a[1:] in functions_defined_in_file.keys()
-                                        ):
-                                            add_node(G, "", a, "global", None)
-                                            add_edge(
-                                                G, "", glob_ref, "", a, line, "path"
-                                            )
-                                        ad_hoc_count = add_edge_dummy(
-                                            G, "", a, line, ad_hoc_count
-                                        )
-                                    else:
-                                        add_node(G, func_prefix, a, "local", None)
-                                        ad_hoc_count = add_edge_dummy(
-                                            G, func_prefix, a, line, ad_hoc_count
-                                        )
-                        if no_parent:
-                            # (block ref) --[stmt]--> (assignee)
-                            if (
-                                a_connected != ""
-                                and a_connected != "null"
-                                and re.match(rgx.immediate_value_or_undef, a_connected)
-                                is None
-                            ):
-                                if re.match(rgx.global_id, a_connected):
-                                    add_edge(
-                                        G, "", block_ref, "", a_connected, line, "path"
-                                    )
-                                else:
-                                    add_edge(
-                                        G,
-                                        "",
-                                        block_ref,
-                                        func_prefix,
-                                        a_connected,
-                                        line,
-                                        "path",
-                                    )
-                            else:
-                                ad_hoc_count = add_edge_dummy(
-                                    G, "", block_ref, line, ad_hoc_count
-                                )
-
-                        if re.match(r"invoke void", line):  # if it is an "invoke void"
-                            # Connect "void" to the normal and unwind labels in invocation
-                            (
-                                m_loc,
-                                m_glob,
-                                m_label,
-                                m_label2,
-                            ) = get_identifiers_from_line(line)
-                            assert len(m_label) == 2, (
-                                "Could not identify 2 labels in:\n" + line
-                            )
-                            add_node(
-                                G, func_prefix, m_label[0], "label", ids_in_basic_block
-                            )
-                            add_node(
-                                G, func_prefix, m_label[1], "label", ids_in_basic_block
-                            )
-                            add_edge(
-                                G, "", block_ref, func_prefix, m_label[0], line, "ctrl"
-                            )
-                            add_edge(
-                                G, "", block_ref, func_prefix, m_label[1], line, "ctrl"
-                            )
-
-                    else:
-                        no_parent = True
-                        if num_args > 0:
-                            for a in args:
-                                if not re.match(rgx.immediate_value_or_undef, a):
-                                    if (
-                                        a[0] != "@"
-                                        and func_prefix + a in ids_in_basic_block
-                                    ):
-                                        no_parent = False
-                                    # (arg) --[stmt]--> (assignee)
-                                    if re.match(rgx.global_id, a):
-                                        if (
-                                            a in functions_declared_in_file
-                                            or a[1:] in functions_defined_in_file.keys()
-                                        ):
-                                            add_node(G, "", a, "global", None)
-                                            add_edge(
-                                                G, "", glob_ref, "", a, line, "path"
-                                            )
-                                        add_edge(
-                                            G,
-                                            "",
-                                            a,
-                                            func_prefix,
-                                            assignee,
-                                            line,
-                                            "data",
-                                        )
-                                    else:
-                                        add_node(G, func_prefix, a, "local", None)
-                                        add_edge(
-                                            G,
-                                            func_prefix,
-                                            a,
-                                            func_prefix,
-                                            assignee,
-                                            line,
-                                            "data",
-                                        )
-                        # If the statement has no parent in the present basic block, connect the paths
-                        if no_parent:
-                            # (block ref) --[stmt]--> (assignee)
-                            add_edge(
-                                G, "", block_ref, func_prefix, assignee, line, "path"
-                            )
-
-                        # if it is an "invoke"
-                        if re.match(
-                            rgx.local_id + r" = (tail )?( fastcc)?invoke", line
-                        ):
-                            (
-                                m_loc,
-                                m_glob,
-                                m_label,
-                                m_label2,
-                            ) = get_identifiers_from_line(line)
-                            assert len(m_label) == 2, (
-                                "Could not identify 2 labels in:\n" + line
-                            )
-                            add_node(
-                                G, func_prefix, m_label[0], "label", ids_in_basic_block
-                            )
-                            add_node(
-                                G, func_prefix, m_label[1], "label", ids_in_basic_block
-                            )
-                            add_edge(
-                                G,
-                                func_prefix,
-                                assignee,
-                                func_prefix,
-                                m_label[0],
-                                line,
-                                "ctrl",
-                            )
-                            add_edge(
-                                G,
-                                func_prefix,
-                                assignee,
-                                func_prefix,
-                                m_label[1],
-                                line,
-                                "ctrl",
-                            )
-
-            ############################################################################################################
-            # return statement
-            elif re.match("ret ", line):
-
-                # Identify the returned value and get a node for it if needed
-                if re.match(
-                    r"ret .* " + rgx.immediate_value_or_undef, line
-                ) or re.match(r"ret void", line):
-
-                    # If it is an immediate of some sorts
-                    add_node(G, func_prefix, "#ret", "imm", None)
-                    added_edge = False
-                    if len(ids_in_basic_block) > 0:
-                        for n in ids_in_basic_block:
-                            if basic_block_leaf(G, n, ids_in_basic_block):
-                                # (leaf in basic block) --[stmt]--> (ad hoc)
-                                add_edge(G, "", n, func_prefix, "#ret", line, None)
-                                added_edge = True
-                        if not added_edge:
-                            add_edge(G, "", block_ref, func_prefix, "#ret", line, None)
-                    else:
-                        # there are no local ids in this basic block, connect to the block reference
-                        add_edge(G, "", block_ref, func_prefix, "#ret", line, None)
-
-                elif re.match(
-                    r"ret .* " + rgx.local_id + r"(?!\* )(?=([\s,\)]|$))", line
-                ):
-
-                    # Match the identifier
-                    m = re.match(
-                        r"ret .* (" + rgx.local_id + r")(?!\* )(?=([\s,\)]|$))", line
-                    )
-                    ret = m.group(1)
-                    # (id) --[stmt]--> (ad hoc)
-                    ad_hoc_count = add_edge_dummy(
-                        G, func_prefix, ret, line, ad_hoc_count
-                    )
-
-                    # If the statement has no parent in the present basic block, connect the paths
-                    if func_prefix + ret not in ids_in_basic_block:
-                        # (block ref) --[stmt]--> (assignee)
-                        add_edge(G, "", block_ref, func_prefix, ret, line, "path")
-
-                elif re.match(r"ret .* " + rgx.global_id, line):
-
-                    # Match the identifier
-                    m = re.match(r"ret .* (" + rgx.global_id + r")", line)
-                    ret = m.group(1)
-                    # (id) --[stmt]--> (ad hoc)
-                    if (
-                        ret in functions_declared_in_file
-                        or ret[1:] in functions_defined_in_file.keys()
-                    ):
-                        add_node(G, "", ret, "global", None)
-                        add_edge(G, "", glob_ref, "", ret, line, "path")
-                    ad_hoc_count = add_edge_dummy(G, "", ret, line, ad_hoc_count)
-
-                    # If the statement has no parent in the present basic block, connect the paths
-                    if func_prefix + ret not in ids_in_basic_block:
-                        # (block ref) --[stmt]--> (assignee)
-                        add_edge(G, "", block_ref, "", ret, line, "path")
-
-                else:
-
-                    assert False, "Could not identify returned value in:\n" + line
-
-            ############################################################################################################
-            # return statement
-            elif re.match("resume ", line):
-
-                # Get the identifier
-                m = re.match(r"resume .* (.*)$", line)
-                assert m is not None, "Could not identify the variable in:\n" + line
-                var = m.group(1)
-                assert re.match(rgx.local_id, var) is not None, (
-                    "Variable in resume is not a local id:\n"
-                    + line
-                    + "\nVariable:\n"
-                    + var
-                )
-
-                # Add edge
-                ad_hoc_count = add_edge_dummy(G, func_prefix, var, line, ad_hoc_count)
-
-                # If the statement has no parent in the present basic block, connect the paths
-                if func_prefix + var not in ids_in_basic_block:
-                    # (block ref) --[stmt]--> (var)
-                    add_edge(G, "", block_ref, func_prefix, var, line, "path")
-
-            ############################################################################################################
-            # unreachable or fence
-            elif re.search("(unreachable|fence)", line):
-
-                # Add edge
-                ad_hoc_count = add_edge_dummy(G, "", block_ref, line, ad_hoc_count)
-
-            ############################################################################################################
-            # Default
-            else:
-                lines_not_added_to_graph.append(line)
-                assert False, (
-                    "Could not recognize statement on line " + str(i) + ":\n" + line
-                )
-
-    # Put things together
-    for k, v in functions_defined_in_file.items():
-        if v[1]:
-            # If this function is called
-            # Connect to the corresponding #top_refs if there are any
-            func_prefix = v[0] + "_"
-            if (func_prefix + "#top_ref") in list(G.nodes()):
-                G = nx.contracted_nodes(
-                    G, func_block_refs[func_prefix], func_prefix + "#top_ref", False
-                )
-
-    return G
-
-
-def check_vocabulary_size(preprocessed_file, G):
-    """
-    Make sure the vocabulary size in the graph representation matches the one in the text representation
-    :param preprocessed_file: list of statements contained in the preprocessed file
-    :param G: Graph
-    """
-    # Construct text-vocabulary
-    vocabulary_after_preprocessing = sorted(set(preprocessed_file))
-    for i, t in enumerate(vocabulary_after_preprocessing):  # Remove 'label' statements
-        if re.match(rgx.start_basic_block, t):
-            vocabulary_after_preprocessing[i] = "REMOVE"
-    vocabulary_after_preprocessing = [
-        t for t in vocabulary_after_preprocessing if t != "REMOVE"
-    ]
-    vocabulary_size_after_preprocessing = len(vocabulary_after_preprocessing)
-
-    # Construct graph-vocabulary
-    vocabulary_after_graph_construction = sorted(
-        set([e[2]["stmt"] for e in G.edges(data=True)])
-    )
-    vocabulary_size_after_graph_construction = len(vocabulary_after_graph_construction)
-
-    # Perform checks
-    try:
-        if (
-            vocabulary_size_after_graph_construction
-            != vocabulary_size_after_preprocessing
-        ):
-            source_data_after_preprocessing_file = "vocabulary_text.txt"
-            source_data_after_graph_construction_file = "vocabulary_graph.txt"
-            source_data_after_graph_construction = list(
-                vocabulary_after_graph_construction
-            )
-            print_data(
-                vocabulary_after_preprocessing, source_data_after_preprocessing_file
-            )
-            print_data(
-                source_data_after_graph_construction,
-                source_data_after_graph_construction_file,
-            )
-            print(
-                "There are ",
-                vocabulary_size_after_preprocessing
-                - vocabulary_size_after_graph_construction,
-                " words more in the text representation than in the graph representation",
-            )
-            in_graph_not_text = set(vocabulary_after_graph_construction) - set(
-                vocabulary_after_preprocessing
-            )
-            if in_graph_not_text:
-                print(
-                    "The following words are in the graph representation but not in the text representation: "
-                )
-                for s in in_graph_not_text:
-                    print("\t", s)
-                print(
-                    "The words above are in the graph representation but not in the text representation: "
-                )
-                raise ValueError("GraphMisconstructed")
-            in_text_not_graph = list(
-                set(vocabulary_after_preprocessing)
-                - set(vocabulary_after_graph_construction)
-            )
-            if in_text_not_graph:
-                print(
-                    "The following words are in the text representation but not in the graph representation: "
-                )
-                for s in in_text_not_graph:
-                    print("\t", s)
-                print(
-                    "The words above are in the text representation but not in the graph representation: "
-                )
-                raise ValueError("GraphMisconstructed")
-    except ValueError:
-        return None
-
-
-def CheckGraphOrDie(G, filename):
-    """Assess the validity of the construction of this graph using following
-    criteria:
-      - Does every node have an ID?
-      - Have all statements been added to the graph?
-      - Are there any isolated nodes?
-      - Are there any global nodes which are leaf nodes (i.e. are not re-used)
-      - Make a list of multi-edges
-      - Make sure the graph is connected
-    :param G: context graph to be checked
-    :param filename: name of file
-    :return: multi-edges: list of edges for which parallel edges exist
-             G
-    """
-    # Make sure each node has an id
-    for n in sorted(list(G.nodes(data=True))):
-        assert n[1], 'Node "' + n[0] + '" has no id (file ' + filename + ")"
-
-    # Make sure there are no isolated nodes
-    isolated_nodes = [n for n in G.nodes() if all_degrees(G, n) == 0]
-    isolated_nodes = sorted(isolated_nodes, key=sort_key)
-    if len(isolated_nodes) > 0:
-        print("\nIsolated nodes: ")
-        for n in isolated_nodes:
-            print(n)
-        assert False, (
-            "Isolated nodes found in construction of context graph for file " + filename
-        )
-
-    # Make sure there is only one root node
-    root_nodes = [n for n in G.nodes() if G.out_degree(n) > 0 and G.in_degree(n) == 0]
-    if len(root_nodes) != 1:
-        # Found more than one root node
-        for n in root_nodes:
-            print(n)
-            print("\twith edges", G.edges(n, data=True))
-        # assert False, "Found more than one root node"
-        print("Found more than one root node")
-
-    # Make sure there are no leaf nodes which are labels
-    leaf_nodes = [
-        n
-        for n in G.nodes(data=True)
-        if G.out_degree(n[0]) == 0 and G.in_degree(n[0]) >= 1
-    ]
-    if len(leaf_nodes) > 0:
-        label_leaf = False
-        for l in leaf_nodes:
-            if l[1]["id"] == "label":
-                print(l)
-                label_leaf = True
-        if label_leaf:
-            # assert False, "Found label nodes that are leaves"
-            print("Found label nodes that are leaves")
-
-    # Make a list of all multi-edges (i.e. edges for which there exists another edge which connects the same nodes)
-    multi_edges = dict()
-    for e in G.edges(data=True):
-        if (
-            G.number_of_edges(e[0], e[1]) > 1
-            and (e[0] + " ||| " + e[1]) not in multi_edges.keys()
-        ):
-            edges_ = G[e[0]][e[1]]
-            s1 = edges_[0]["stmt"]
-            s_ = list()
-            s_.append(s1)
-            for i in range(1, len(edges_)):
-                if edges_[i]["stmt"] not in s_:
-                    s_.append(edges_[i]["stmt"])
-            if len(s_) > 1:
-                multi_edges[e[0] + " ||| " + e[1]] = s_
-
-    # Make sure the graph is not disconnected
-    G_undirected = G.to_undirected()
-    try:
-        if not nx.is_connected(G_undirected):
-            # Get the smallest connected component
-            print(
-                "\nNumber of connected components: ",
-                nx.number_connected_components(G_undirected),
-                "\n",
-            )
-            cc_ = sorted(nx.connected_components(G_undirected), key=len)
-            print("\nSecondary (non-main) connected components: \n")
-            for cc in cc_[:-1]:
-                print("\n\t--- Connected component with ", len(cc), " nodes \n")
-                for e in G_undirected.edges(cc, data=True):
-                    print("\tnode 1: ", e[0])
-                    print("\tnode 2: ", e[1])
-                    print(e[2]["stmt"])
-                    if e[0] in list(G.nodes):
-                        G.remove_node(e[0])
-                    if e[1] in list(G.nodes):
-                        G.remove_node(e[1])
-            print("WARNING! Graph for file " + filename + " is disconnected")
-            raise ValueError("GraphMisconstructed")
-    except ValueError:
-        raise
-
-    # Return the list of multi-edges
-    return multi_edges, G
-
-
-def BuildContextualFlowGraph(llvm_lines, functions_declared_in_file, filename):
-    """
-    Given a file of source code, construct a context graph
-    This function is called once for each file
-
-    :param llvm_lines: LLVM-IR source file (list of strings containing lines of LLVM
-        IR).
-    :param functions_declared_in_file:
-    :param filename: name of file
-    :return: A <digraph, multi_edge_list> tuple, where <digraph> is a directed
-      graph in which nodes are identifiers or ad-hoc and edges are statements
-      which is meant as a representation of both data and flow control of the
-      code capturing the notion of context; and <multi_edge_list> is a list of
-      edges that have parallel edges.
-    """
-    # Create a graph
-    graph = nx.MultiDiGraph()
-
-    # Dictionary of functions defined in the file
-    # keys: names of functions which are defined (not just declared) in this file
-    # values: pair: [shortened function name, its corresponding return statement]
-    functions_defined_in_file = construct_function_dictionary(llvm_lines)
-
-    # Add lines to graph
-    graph = add_stmts_to_graph(
-        graph, llvm_lines, functions_defined_in_file, functions_declared_in_file
-    )
-
-    # Make sure the vocabulary size in the graph representation matches the one in the text representation
-    check_vocabulary_size(llvm_lines, graph)
-
-    # Make sure the graph was correctly constructed.
-    multi_edges, graph = CheckGraphOrDie(graph, filename)
-
-    return graph, multi_edges
-
-
-def get_data_characteristics(data_folders):
-    """
-    Get data characteristics
-    :param data_folders: data folders
-    :return: boolean
-    """
-    # Determine whether the data set uses structure names with a pattern ('%"[^"]*") different from local identifiers
-    specific_struct_name_pattern = False
-    for folder in data_folders:
-        if folder in [
-            "data/testing/ll_1",
-            "data/testing/ll_2",
-            "data/testing/ll_20",
-            "data/eigen/ll_350a",
-            "data/eigen/ll_350b",
-            "data/eigen/ll_1000",
-            "data/eigen/eigen_addsub",
-            "data/eigen/eigen_matdec",
-            "data/eigen/eigen_matmul",
-            "data/eigen/eigen_sparse",
-            "data/eigen/eigen_vecops",
-        ]:
-            specific_struct_name_pattern = True
-
-    # Return
-    return specific_struct_name_pattern
-
-
-def disambiguate_stmts(G):
-    """
-    Append a #number to statements in order to disambiguate them
-    :param G: graph
-    :return: disambiguated graph
-    """
-    # Helper dictionary
-    appear_dic = dict()
-    for ed in list(set([e[2] for e in all_edges(G, data=True)])):
-        appear_dic[ed] = 0
-
-    G_diff = copy.deepcopy(G)
-    # Loop over the original graph's edges
-    for e in G_diff.edges(data=True):
-
-        # If it's a multi-edge, get the right index
-        if G_diff.number_of_edges(e[0], e[1]) == 1:
-            i = 0
-        else:
-            for i in range(G_diff.number_of_edges(e[0], e[1])):
-                if G_diff[e[0]][e[1]][i]["stmt"] == e[2]["stmt"]:
-                    break
-
-        # Add target statement
-        stmt_old = G_diff[e[0]][e[1]][i]["stmt"]
-        stmt_new = stmt_old + "§" + str(appear_dic[stmt_old])
-        G_diff[e[0]][e[1]][i]["stmt"] = stmt_new
-        appear_dic[stmt_old] += 1
-
-    return G_diff
-
-
 ########################################################################################################################
 # XFG-transforming (inline and abstract statements)
 ########################################################################################################################
@@ -3094,45 +539,6 @@ def construct_struct_types_dictionary_for_file(data):
     return data, ready
 
 
-def inline_struct_types_in_file(G, dic, specific_struct_name_pattern):
-    """
-    Inline structure types in the whole file
-    :param G: graph of statements
-    :param dic: dictionary ["structure name", "corresponding literal structure"]
-    :param specific_struct_name_pattern: booleas
-    :return: modified graph
-    """
-    to_track = ""
-    # Inline the named structures throughout the file
-    for e in G.edges(data=True):
-
-        # For debugging
-        if len(to_track) > 0:
-            if e[2]["stmt"] == to_track:
-                print("Found statement " + e[2]["stmt"])
-
-        # As long as the stmt contains named structures/classes
-        if specific_struct_name_pattern:
-            m = re.search(r"(" + rgx.struct_name + r")", e[2]["stmt"])
-            while m:
-                # Replace them by their value in dictionary
-                e[2]["stmt"] = re.sub(
-                    r"(" + rgx.struct_name + r")", dic[m.group(1)], e[2]["stmt"]
-                )
-                m = re.search(r"(" + rgx.struct_name + r")", e[2]["stmt"])
-        else:
-            possible_struct = re.findall("(" + rgx.struct_name + ")", e[2]["stmt"])
-            if len(possible_struct) > 0:
-                for s in possible_struct:
-                    if s in dic and not re.match(s + r"\d* = ", e[2]["stmt"]):
-                        # Replace them by their value in dictionary
-                        e[2]["stmt"] = re.sub(
-                            re.escape(s) + rgx.struct_lookahead, dic[s], e[2]["stmt"]
-                        )
-
-    return G
-
-
 def GetStructTypes(ir: str) -> Dict[str, str]:
     """Extract a dictionary of struct definitions from the given IR.
 
@@ -3145,63 +551,7 @@ def GetStructTypes(ir: str) -> Dict[str, str]:
         _, dict_temp = construct_struct_types_dictionary_for_file(ir.split("\n"))
         return dict_temp
     except AssertionError as e:
-        raise ValueError(e)
-
-
-def inline_struct_types(
-    G, data_with_struct_def, file_name, specific_struct_name_pattern
-):
-    """
-    :param G: graph of statements
-    :param data_with_struct_def: list of statements containing the structure definitions
-    :param file_name: file name
-    :param specific_struct_name_pattern: booleas
-    :return: modified graph
-    """
-    # Print
-    print("Inlining structures for file : ", file_name)
-
-    # Construct a dictionary ["structure name", "corresponding literal structure"]
-    data_with_struct_def, dict_temp = construct_struct_types_dictionary_for_file(
-        data_with_struct_def
-    )
-
-    # If the dictionary is empty
-    if not dict_temp:
-        found_type = False
-        for l in data_with_struct_def:
-            if re.match(rgx.struct_name + " = type (<?\{ .* \}|opaque|{})", l):
-                found_type = True
-                break
-        assert not found_type, (
-            "Structures' dictionary is empty for file containing type definitions: \n"
-            + data_with_struct_def[0]
-            + "\n"
-            + data_with_struct_def[1]
-            + "\n"
-            + data_with_struct_def
-            + "\n"
-        )
-
-    # If the dictionary is not empty
-    else:
-        # Use the constructed dictionary to substitute named structures
-        # by their corresponding literal structure throughout the program
-        G = inline_struct_types_in_file(G, dict_temp, specific_struct_name_pattern)
-
-    return G, dict_temp
-
-
-def abstract_statements_from_identifiers(G):
-    """
-    Simplify lines of code by stripping them from their identifiers,
-    unnamed values, etc. so that LLVM IR statements can be abstracted from them
-    :param G: graph of statements
-    :return: modified input data
-    """
-    for _, _, data in G.edges(data=True):
-        data["stmt"] = PreprocessStatement(data["stmt"])
-    return G
+        raise ValueError(e) from e
 
 
 def PreprocessStatement(stmt: str) -> str:
@@ -3247,354 +597,3 @@ def PreprocessStatement(stmt: str) -> str:
         stmt = re.sub(r"i\d+ ", "<TYP> ", stmt)
 
     return stmt
-
-
-########################################################################################################################
-# Dual-XFG-building
-########################################################################################################################
-def build_dual_graph(G):
-    """
-    :param G: base XFG-graph
-    :return: dual of XFG graph
-    """
-    # Build dual graph
-    D = nx.Graph()
-
-    # Loop over the original graph's edges
-    for e in all_edges(G, data=True):
-
-        # Add target statement
-        target_stmt = e[2]
-        D.add_node(target_stmt)
-
-        # Get its neighbour-statements
-        neighbor_stmts = [f[2] for f in all_edges(G, [e[0], e[1]], data=True)]
-        neighbor_stmts = list(set(neighbor_stmts))  # remove duplicates
-        neighbor_stmts.remove(target_stmt)
-
-        # Add to dual graph
-        for s in neighbor_stmts:
-            if D.has_edge(target_stmt, s):
-                # this edge already exists, increment its weight
-                D[target_stmt][s]["weight"] += 1
-            else:
-                # this edge does not yet exist, add it
-                D.add_edge(target_stmt, s, weight=1)
-
-    return D
-
-
-def check_sanity(D, G):
-    """
-    Check construction of dual-XFG
-    :param D: dual XFG
-    :param G: base graph
-    """
-    isolated_nodes = [n for n in D.nodes() if D.degree(n) == 0]
-    if len(isolated_nodes) != 0:
-        print("WARNING! Isolated nodes found in D-graph")
-        for n in isolated_nodes:
-            D.remove_node(n)
-        assert "Isolated nodes found in D-graph"
-    if len(list(D.nodes)) != 0:
-        if not nx.is_connected(D):
-            print("WARNING! D-graph is disconnected")
-            assert "D-graph is disconnected"
-        if D.number_of_nodes() != G.number_of_edges():
-            print(
-                "WARNING! The number of nodes in the D-graph ("
-                + str(D.number_of_nodes())
-                + ") does not match the vnumber of edges in the G-graph ("
-                + str(G.number_of_edges())
-                + ")"
-            )
-            assert "Mismatch"
-
-
-def CreateContextualFlowGraphsFromBytecodes(data_folder):
-    """Construct XFGs (conteXtual Flow Graphs) from LLVM IR code.
-
-    Input files:
-        data_folder/*/*.ll
-
-    Files produced:
-        data_folder/*/data_read_pickle
-        data_folder/*_preprocessed/data_preprocessed_pickle
-
-    Folders produced:
-        data_folder/*_preprocessed/data_transformed/
-        data_folder/*_preprocessed/preprocessed/
-        data_folder/*_preprocessed/structure_dictionaries/
-        data_folder/*_preprocessed/xfg/
-        data_folder/*_preprocessed/xfg_dual/
-
-    :param data_folder: the path to the parent directory of the subfolders containing
-        raw LLVM IR code.
-    :return: List of subfolders containing raw LLVM IR code.
-    """
-    # Get raw data sub-folders
-    assert os.path.exists(data_folder), "Folder " + data_folder + " does not exist"
-    folders_raw = list()
-    listing_to_explore = [
-        os.path.join(data_folder, f) for f in os.listdir(data_folder + "/")
-    ]
-    while len(listing_to_explore) > 0:
-        f = listing_to_explore.pop()
-        if os.path.isdir(f):
-            f_contents = os.listdir(f + "/")
-            for file in f_contents:
-                # if it contains raw .ll files
-                if file[-3:] == ".ll":
-                    folders_raw.append(f)
-                    break
-                elif os.path.isdir(os.path.join(f, file)):
-                    listing_to_explore.append(os.path.join(f, file))
-
-    print(
-        "In folder",
-        data_folder,
-        ", found",
-        len(folders_raw),
-        "raw data folder(s):\n",
-        folders_raw,
-    )
-
-    # Loop over raw data folders
-    num_folders = len(folders_raw)
-    for folder_counter, folder_raw in enumerate(folders_raw):
-
-        # Print
-        print(
-            "\n------ Processing raw folder",
-            folder_raw,
-            "(",
-            folder_counter + 1,
-            "/",
-            num_folders,
-            ")",
-        )
-
-        ################################################################################################################
-        # Check whether the folder has been preprocessed already R
-
-        folder_preprocessed = folder_raw + "_preprocessed"
-        data_preprocessing_done_filename = os.path.join(
-            folder_preprocessed, "preprocessing.done"
-        )
-        if os.path.exists(data_preprocessing_done_filename):
-            print(
-                "\tfolder already preprocessed (found file",
-                data_preprocessing_done_filename,
-                ")",
-            )
-
-        else:  # this folder has not been preprocessed yet:
-
-            ############################################################################################################
-            # Read data from files
-
-            data_read_from_folder_filename = os.path.join(
-                folder_raw, "data_read_pickle"
-            )
-            if os.path.exists(data_read_from_folder_filename):
-                # Load pre-processed data
-                print(
-                    "\n--- Loading data read from folder ",
-                    folder_raw,
-                    " from file ",
-                    data_read_from_folder_filename,
-                )
-                with open(data_read_from_folder_filename, "rb") as f:
-                    raw_data, file_names = pickle.load(f)
-
-            else:
-
-                # Read data from folder and pickle it
-                print("\n--- Read data from folder ", folder_raw)
-                raw_data, file_names = read_data_files_from_folder(folder_raw)
-                print(
-                    "Dumping data read from folder ",
-                    folder_raw,
-                    " into file ",
-                    data_read_from_folder_filename,
-                )
-                i2v_utils.safe_pickle(
-                    [raw_data, file_names], data_read_from_folder_filename
-                )
-
-            # Print data statistics and release memory
-            source_data_list, source_data = data_statistics(
-                raw_data, descr="reading data from source files"
-            )
-            del source_data_list
-
-            ############################################################################################################
-            # Pre-process source code
-
-            if not os.path.exists(folder_preprocessed):
-                os.makedirs(folder_preprocessed)
-            data_preprocessed_filename = os.path.join(
-                folder_preprocessed, "data_preprocessed_pickle"
-            )
-            if not os.path.exists(data_preprocessed_filename):
-
-                # Source code transformation: simple pre-processing
-                print("\n--- Pre-process code")
-                preprocessed_data, functions_declared_in_files = preprocess(raw_data)
-                preprocessed_data_with_structure_def = raw_data
-                # save the vocabulary for later checks
-                vocabulary_after_preprocessing = list(
-                    set(collapse_into_one_list(preprocessed_data))
-                )
-
-                # Dump pre-processed data into a folder to be reused
-                print(
-                    "Writing pre-processed data into folder ", folder_preprocessed, "/"
-                )
-                print_preprocessed_data(
-                    preprocessed_data, folder_preprocessed, file_names
-                )
-                print(
-                    "Dumping pre-processed data info file ", data_preprocessed_filename
-                )
-                i2v_utils.safe_pickle(
-                    [
-                        preprocessed_data,
-                        functions_declared_in_files,
-                        preprocessed_data_with_structure_def,
-                        vocabulary_after_preprocessing,
-                    ],
-                    data_preprocessed_filename,
-                )
-
-            else:
-
-                # Load pre-processed data
-                print(
-                    "\n--- Loading pre-processed data from ", data_preprocessed_filename
-                )
-                with open(data_preprocessed_filename, "rb") as f:
-                    (
-                        preprocessed_data,
-                        functions_declared_in_files,
-                        preprocessed_data_with_structure_def,
-                        vocabulary_after_preprocessing,
-                    ) = pickle.load(f)
-
-            # Print statistics and release memory
-            source_data_list, source_data = data_statistics(
-                preprocessed_data, descr="pre-processing code"
-            )
-            del source_data_list
-
-            # Make sure folders exist
-            graph_folder = os.path.join(folder_preprocessed, "xfg")
-            if not os.path.exists(graph_folder):
-                os.makedirs(graph_folder)
-            structures_folder = os.path.join(
-                folder_preprocessed, "structure_dictionaries"
-            )
-            if not os.path.exists(structures_folder):
-                os.makedirs(structures_folder)
-            transformed_folder = os.path.join(folder_preprocessed, "data_transformed")
-            if not os.path.exists(transformed_folder):
-                os.makedirs(transformed_folder)
-            dual_graph_folder = os.path.join(folder_preprocessed, "xfg_dual")
-            if not os.path.exists(dual_graph_folder):
-                os.makedirs(dual_graph_folder)
-
-            num_files = len(file_names)
-            if isinstance(file_names, dict):
-                file_names = list(file_names.values())
-            for i, (preprocessed_file, file_name) in enumerate(
-                zip(preprocessed_data, file_names)
-            ):
-
-                dual_graphs_filename = os.path.join(
-                    dual_graph_folder, file_name[:-3] + ".p"
-                )
-                if not os.path.exists(dual_graphs_filename):
-
-                    ####################################################################################################
-                    # Build XFG (context graph)
-                    print(
-                        "\n--- Building graph for file : ",
-                        file_name,
-                        "(",
-                        i,
-                        "/",
-                        num_files,
-                        ")",
-                    )
-
-                    # Construct graph
-                    try:
-                        G, multi_edges = BuildContextualFlowGraph(
-                            preprocessed_file,
-                            functions_declared_in_files[i],
-                            file_names[i],
-                        )
-                    except ValueError:
-                        continue
-
-                    # Print data to external file
-                    print_graph_to_file(G, multi_edges, graph_folder, file_name)
-
-                    ####################################################################################################
-                    # XFG transformations (inline structures and abstract statements)
-
-                    # Determine whether the data set has a specific structure pattern or not
-                    specific_struct_name_pattern = get_data_characteristics(folder_raw)
-                    if specific_struct_name_pattern:
-                        rgx.struct_name = (
-                            '%"[^"]*"'  # Adjust structure names in accordance
-                        )
-
-                    # Source code transformation: inline structure types
-                    G, structures_dictionary = inline_struct_types(
-                        G,
-                        preprocessed_data_with_structure_def[i],
-                        file_name,
-                        specific_struct_name_pattern,
-                    )
-
-                    # Print structures dictionary
-                    print_structure_dictionary(
-                        structures_dictionary, structures_folder, file_name
-                    )
-
-                    # Source code transformation: abstract statement
-                    G = abstract_statements_from_identifiers(G)
-
-                    # Dump list of statements to be used in construct_vocabulary
-                    stmt_list = [e[2]["stmt"] for e in G.edges(data=True)]
-                    write_to = os.path.join(transformed_folder, file_name[:-3] + ".p")
-                    print("Writing transformed data to", write_to)
-                    i2v_utils.safe_pickle(stmt_list, write_to)
-
-                    ####################################################################################################
-                    # Build dual-XFG
-
-                    dual_graphs_filename = os.path.join(
-                        dual_graph_folder, file_name[:-3] + ".p"
-                    )
-                    if not os.path.exists(dual_graphs_filename):
-                        print("Building dual graph for file ", file_name)
-
-                        G_diff = disambiguate_stmts(G)
-                        D = build_dual_graph(G_diff)  # dual-XFG
-                        check_sanity(D, G)  # check the sanity of the produced graph
-
-                        # Write dual graphs to file
-                        print("Writing dual graphs to file ", dual_graphs_filename)
-                        i2v_utils.safe_pickle(D, dual_graphs_filename)
-                else:
-
-                    print("--- Found dual-xfg for file : ", file_name, ", skipping...")
-
-            ############################################################################################################
-            # Write file indicating that the folder has been preprocessed
-            f = open(data_preprocessing_done_filename, "w")
-            f.close()
-
-    return folders_raw
diff --git a/compiler_gym/third_party/inst2vec/inst2vec_utils.py b/compiler_gym/third_party/inst2vec/inst2vec_utils.py
deleted file mode 100644
index c8753cb62..000000000
--- a/compiler_gym/third_party/inst2vec/inst2vec_utils.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# NCC: Neural Code Comprehension
-# https://github.com/spcl/ncc
-# Copyright 2018 ETH Zurich
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
-# following conditions are met:
-# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
-# disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided with the distribution.
-# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
-# products derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
-# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# ==============================================================================
-"""inst2vec utility functions"""
-import datetime
-import pickle
-import re
-import sys
-
-# Maximum number of bytes to pickle in one chunk
-max_bytes = 2 ** 31 - 1
-
-
-def safe_pickle(data, file):
-    """Pickle big files safely, processing them in chunks
-
-    :param data: data to be pickled
-    :param file: file to pickle it into
-    """
-    pickle_out = pickle.dumps(data)
-    n_bytes = sys.getsizeof(pickle_out)
-    with open(file, "wb") as f:
-        count = 0
-        for i in range(0, n_bytes, max_bytes):
-            f.write(pickle_out[i : min(n_bytes, i + max_bytes)])
-            count += 1
-
-
-def set_file_signature(param, data_folder, set_from_date_time=False):
-    """
-    Set file signature to differentiate between embedding trainings
-    :param param: parameters of the inst2vec training
-    :param data_folder: string containing the path to the parent directory of raw data sub-folders
-    :param set_from_date_time: set file signature according to time and date instead of parameters
-    :return: file signature
-    """
-    if set_from_date_time:
-        file_signature = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M")
-    else:
-        file_signature = (
-            "_"
-            + re.sub(r"/", "_", data_folder)
-            + "_d-"
-            + str(param["embedding_size"])
-            + "_m-"
-            + str(param["mini_batch_size"])
-            + "_s-"
-            + str(param["num_sampled"])
-            + "_e-"
-            + str(param["learning_rate"])
-            + "_r-"
-            + str(param["beta"])
-            + "_cw-"
-            + str(param["context_width"])
-            + "_N-"
-            + str(param["num_epochs"])
-        )
-
-    print("File signature: ", file_signature)
-    return file_signature
diff --git a/compiler_gym/third_party/inst2vec/rgx_utils.py b/compiler_gym/third_party/inst2vec/rgx_utils.py
index df8f4c58d..c6f11a32a 100644
--- a/compiler_gym/third_party/inst2vec/rgx_utils.py
+++ b/compiler_gym/third_party/inst2vec/rgx_utils.py
@@ -885,80 +885,6 @@ def get_list_tag_level_2(tag_level_1="all"):
     return list(set(list_tags))
 
 
-def get_list_tag_level_3(tag_level_2="all"):
-    """
-    Get the list of all level-2 tags in the data structure llvm_IR_families
-    corresponding to the string given as an input, or absolutely all of them
-    if input == 'all'
-
-    :param tag_level_2: string containing the level-2 tag to query, or 'all'
-    :return: list of strings
-    """
-
-    # Make sure the input parameter is valid
-    assert tag_level_2 in get_list_tag_level_2() or tag_level_2 == "all"
-
-    list_tags = list()
-
-    if tag_level_2 == "all":
-        for fam in llvm_IR_stmt_families:
-            list_tags.append(fam[2])
-        list_tags = sorted(set(list_tags))
-    else:
-        for fam in llvm_IR_stmt_families:
-            if fam[1] == tag_level_2:
-                list_tags.append(fam[2])
-
-    return list(set(list_tags))
-
-
-def get_count(data, tag, level):
-    """
-    Count the total number of occurrences of a given tag at a certain level
-
-    :param fams:
-    :param tag:
-    :param level:
-    :return:
-    """
-    # Make sure the input is valid
-    assert level in [1, 2, 3]
-
-    # Count
-    count = 0
-
-    # Depending on tag level:
-    if level == 1:
-        assert tag in get_list_tag_level_1()
-        for fam in llvm_IR_stmt_families:
-            if fam[0] == tag:
-                # count += fam[3]
-                # count occurences in data
-                for key, value in data.items():
-                    if re.match(fam[3], key):
-                        count += value
-    elif level == 2:
-        assert tag in get_list_tag_level_2()
-        for fam in llvm_IR_stmt_families:
-            if fam[1] == tag:
-                # count += fam[3]
-                # count occurences in data
-                for key, value in data.items():
-                    if re.match(fam[3], key):
-                        count += value
-    elif level == 3:
-        assert tag in get_list_tag_level_3()
-        for fam in llvm_IR_stmt_families:
-            if fam[2] == tag:
-                # count += fam[3]
-                # count occurences in data
-                for key, value in data.items():
-                    if re.match(fam[3], key):
-                        count += value
-
-    return count
-
-
 ########################################################################################################################
 # Tags for clustering statements (by statement type)
 ########################################################################################################################

From 45580c78aeffceb15cc6bb2e5991c77144ed1208 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 11:48:23 +0100
Subject: [PATCH 008/141] [datasets] Add a random_benchmark() method.

The v0.1.8 release removed the random benchmark selection from
CompilerGym environments when no benchmark was specified. If the user
wishes for random benchmark selection, they were required to roll
their own implementation. Randomly sampling from
env.dataset.benchmark_uris() is not always easy as the generator may
be infinite. For some datasets, e.g. Csmith, it is trivial to select
random benchmarks by generating random numbers within the range of
numeric seed values, but this is not obvious and the user shouldn't
have to figure this out for the simple case of uniform random
selection.

This adds a `random_benchmark()` method to the `Dataset` class which
allows uniform random benchmark selection, and a `random_benchmark()`
method to the `Datasets` class for sampling across datasets.

Issue #240.
---
 compiler_gym/bin/manual_env.py                |  2 +-
 compiler_gym/datasets/dataset.py              | 24 +++++++++++
 compiler_gym/datasets/datasets.py             | 40 ++++++++++++++++++-
 compiler_gym/datasets/files_dataset.py        |  5 +++
 compiler_gym/envs/llvm/datasets/csmith.py     |  5 +++
 .../envs/llvm/datasets/llvm_stress.py         |  6 +++
 tests/datasets/datasets_test.py               | 35 ++++++++++++----
 tests/datasets/files_dataset_test.py          | 13 ++++++
 tests/llvm/datasets/csmith_test.py            | 12 ++++++
 tests/llvm/datasets/llvm_stress_test.py       | 14 +++++++
 10 files changed, 147 insertions(+), 9 deletions(-)

diff --git a/compiler_gym/bin/manual_env.py b/compiler_gym/bin/manual_env.py
index 45df0b0a1..babe31ebc 100644
--- a/compiler_gym/bin/manual_env.py
+++ b/compiler_gym/bin/manual_env.py
@@ -367,7 +367,7 @@ def do_set_benchmark(self, arg):
         Use '-' for a random benchmark.
         """
         if arg == "-":
-            arg = self.env.datasets.benchmark().uri
+            arg = self.env.datasets.random_benchmark().uri
             print(f"set_benchmark {arg}")
 
         try:
diff --git a/compiler_gym/datasets/dataset.py b/compiler_gym/datasets/dataset.py
index 2045a42b5..915eac9e7 100644
--- a/compiler_gym/datasets/dataset.py
+++ b/compiler_gym/datasets/dataset.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 from typing import Dict, Iterable, Optional, Union
 
+import numpy as np
 from deprecated.sphinx import deprecated as mark_deprecated
 
 from compiler_gym.datasets.benchmark import Benchmark
@@ -358,6 +359,29 @@ def benchmark(self, uri: str) -> Benchmark:
         """
         raise NotImplementedError("abstract class")
 
+    def random_benchmark(
+        self, random_state: Optional[np.random.Generator] = None
+    ) -> Benchmark:
+        """Select a benchmark randomly.
+
+        :param random_state: A random number generator. If not provided, a
+            default :code:`np.random.default_rng()` is used.
+
+        :return: A :class:`Benchmark <compiler_gym.datasets.Benchmark>`
+            instance.
+        """
+        random_state = random_state or np.random.default_rng()
+        return self._random_benchmark(random_state)
+
+    def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark:
+        """Private implementation of the random benchmark getter.
+
+        Subclasses must implement this method so that it selects a benchmark
+        from the available benchmarks with uniform probability, using only
+        :code:`random_state` as a source of randomness.
+        """
+        raise NotImplementedError("abstract class")
+
     def __getitem__(self, uri: str) -> Benchmark:
         """Select a benchmark by URI.
 
diff --git a/compiler_gym/datasets/datasets.py b/compiler_gym/datasets/datasets.py
index adc97818e..2c4b0e00f 100644
--- a/compiler_gym/datasets/datasets.py
+++ b/compiler_gym/datasets/datasets.py
@@ -3,7 +3,9 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 from collections import deque
-from typing import Dict, Iterable, Set, TypeVar
+from typing import Dict, Iterable, Optional, Set, TypeVar
+
+import numpy as np
 
 from compiler_gym.datasets.benchmark import Benchmark
 from compiler_gym.datasets.dataset import Dataset
@@ -251,6 +253,42 @@ def benchmark(self, uri: str) -> Benchmark:
 
         return dataset.benchmark(uri)
 
+    def random_benchmark(
+        self, random_state: Optional[np.random.Generator] = None
+    ) -> Benchmark:
+        """Select a benchmark randomly.
+
+        First, a dataset is selected uniformly randomly using
+        :code:`random_state.choice(list(datasets))`. The
+        :meth:`random_benchmark()
+        <compiler_gym.datasets.Dataset.random_benchmark>` method of that dataset
+        is then called to select a benchmark.
+
+        Note that the distribution of benchmarks selected by this method is not
+        biased by the size of each dataset, since datasets are selected
+        uniformly. This means that datasets with a small number of benchmarks
+        will be overrepresented compared to datasets with many benchmarks. To
+        correct for this bias, use the number of benchmarks in each dataset as
+        a weight for the random selection:
+
+            >>> rng = np.random.default_rng()
+            >>> finite_datasets = [d for d in env.datasets if len(d) != math.inf]
+            >>> dataset = rng.choice(
+                finite_datasets,
+                p=[len(d) for d in finite_datasets]
+            )
+            >>> dataset.random_benchmark(random_state=rng)
+
+        :param random_state: A random number generator. If not provided, a
+            default :code:`np.random.default_rng()` is used.
+
+        :return: A :class:`Benchmark <compiler_gym.datasets.Benchmark>`
+            instance.
+        """
+        random_state = random_state or np.random.default_rng()
+        dataset = random_state.choice(list(self._visible_datasets))
+        return self[dataset].random_benchmark(random_state=random_state)
+
     @property
     def size(self) -> int:
         return len(self._visible_datasets)
diff --git a/compiler_gym/datasets/files_dataset.py b/compiler_gym/datasets/files_dataset.py
index 4c522912c..5b4e31883 100644
--- a/compiler_gym/datasets/files_dataset.py
+++ b/compiler_gym/datasets/files_dataset.py
@@ -6,6 +6,8 @@
 from pathlib import Path
 from typing import Iterable, List
 
+import numpy as np
+
 from compiler_gym.datasets.dataset import Benchmark, Dataset
 from compiler_gym.util.decorators import memoized_property
 
@@ -117,3 +119,6 @@ def benchmark(self, uri: str) -> Benchmark:
         if not abspath.is_file():
             raise LookupError(f"Benchmark not found: {uri} (file not found: {abspath})")
         return self.benchmark_class.from_file(uri, abspath)
+
+    def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark:
+        return self.benchmark(random_state.choice(list(self.benchmark_uris())))
diff --git a/compiler_gym/envs/llvm/datasets/csmith.py b/compiler_gym/envs/llvm/datasets/csmith.py
index dfe51e435..27c267c34 100644
--- a/compiler_gym/envs/llvm/datasets/csmith.py
+++ b/compiler_gym/envs/llvm/datasets/csmith.py
@@ -12,6 +12,7 @@
 from threading import Lock
 from typing import Iterable, List
 
+import numpy as np
 from fasteners import InterProcessLock
 
 from compiler_gym.datasets import Benchmark, BenchmarkSource, Dataset
@@ -227,6 +228,10 @@ def benchmark_uris(self) -> Iterable[str]:
     def benchmark(self, uri: str) -> CsmithBenchmark:
         return self.benchmark_from_seed(int(uri.split("/")[-1]))
 
+    def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark:
+        seed = random_state.integers(UINT_MAX)
+        return self.benchmark_from_seed(seed)
+
     def benchmark_from_seed(self, seed: int) -> CsmithBenchmark:
         """Get a benchmark from a uint32 seed.
 
diff --git a/compiler_gym/envs/llvm/datasets/llvm_stress.py b/compiler_gym/envs/llvm/datasets/llvm_stress.py
index 02d948bb2..6c9aeca8f 100644
--- a/compiler_gym/envs/llvm/datasets/llvm_stress.py
+++ b/compiler_gym/envs/llvm/datasets/llvm_stress.py
@@ -6,6 +6,8 @@
 from pathlib import Path
 from typing import Iterable
 
+import numpy as np
+
 from compiler_gym.datasets import Benchmark, Dataset
 from compiler_gym.datasets.benchmark import BenchmarkInitError
 from compiler_gym.third_party import llvm
@@ -56,6 +58,10 @@ def benchmark_uris(self) -> Iterable[str]:
     def benchmark(self, uri: str) -> Benchmark:
         return self.benchmark_from_seed(int(uri.split("/")[-1]))
 
+    def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark:
+        seed = random_state.integers(UINT_MAX)
+        return self.benchmark_from_seed(seed)
+
     def benchmark_from_seed(self, seed: int) -> Benchmark:
         """Get a benchmark from a uint32 seed.
 
diff --git a/tests/datasets/datasets_test.py b/tests/datasets/datasets_test.py
index 3392513ad..2402eea71 100644
--- a/tests/datasets/datasets_test.py
+++ b/tests/datasets/datasets_test.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Unit tests for //compiler_gym/datasets."""
+import numpy as np
 import pytest
 
 from compiler_gym.datasets.datasets import Datasets, round_robin_iterables
@@ -33,13 +34,14 @@ def benchmark_uris(self):
     def benchmarks(self):
         yield from self.benchmark_values
 
-    def benchmark(self, uri=None):
-        if uri:
-            for b in self.benchmark_values:
-                if b.uri == uri:
-                    return b
-            raise KeyError(uri)
-        return self.benchmark_values[0]
+    def benchmark(self, uri):
+        for b in self.benchmark_values:
+            if b.uri == uri:
+                return b
+        raise KeyError(uri)
+
+    def random_benchmark(self, random_state=None):
+        return random_state.choice(self.benchmark_values)
 
     def __repr__(self):
         return str(self.name)
@@ -243,5 +245,24 @@ def test_benchmarks_iter_deprecated():
     ]
 
 
+def test_random_benchmark(mocker):
+    da = MockDataset("benchmark://foo-v0")
+    ba = MockBenchmark(uri="benchmark://foo-v0/abc")
+    da.benchmark_values.append(ba)
+    datasets = Datasets([da])
+
+    mocker.spy(da, "random_benchmark")
+
+    num_benchmarks = 5
+    rng = np.random.default_rng(0)
+    random_benchmarks = {
+        b.uri for b in (datasets.random_benchmark(rng) for _ in range(num_benchmarks))
+    }
+
+    assert da.random_benchmark.call_count == num_benchmarks
+    assert len(random_benchmarks) == 1
+    assert next(iter(random_benchmarks)) == "benchmark://foo-v0/abc"
+
+
 if __name__ == "__main__":
     main()
diff --git a/tests/datasets/files_dataset_test.py b/tests/datasets/files_dataset_test.py
index e6d4b4233..84e2e6c8a 100644
--- a/tests/datasets/files_dataset_test.py
+++ b/tests/datasets/files_dataset_test.py
@@ -6,6 +6,7 @@
 import tempfile
 from pathlib import Path
 
+import numpy as np
 import pytest
 
 from compiler_gym.datasets import FilesDataset
@@ -111,5 +112,17 @@ def test_populated_dataset_with_file_extension_filter(populated_dataset: FilesDa
     assert populated_dataset.size == 2
 
 
+def test_populated_dataset_random_benchmark(populated_dataset: FilesDataset):
+    num_benchmarks = 3
+    rng = np.random.default_rng(0)
+    random_benchmarks = {
+        b.uri
+        for b in (
+            populated_dataset.random_benchmark(rng) for _ in range(num_benchmarks)
+        )
+    }
+    assert len(random_benchmarks) == num_benchmarks
+
+
 if __name__ == "__main__":
     main()
diff --git a/tests/llvm/datasets/csmith_test.py b/tests/llvm/datasets/csmith_test.py
index 74d7b9443..3fc889468 100644
--- a/tests/llvm/datasets/csmith_test.py
+++ b/tests/llvm/datasets/csmith_test.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 import gym
+import numpy as np
 import pytest
 
 import compiler_gym.envs.llvm  # noqa register environments
@@ -47,5 +48,16 @@ def test_csmith_random_select(
     assert (tmpwd / "source.c").is_file()
 
 
+@skip_on_ci
+def test_random_benchmark(csmith_dataset: CsmithDataset):
+    num_benchmarks = 5
+    rng = np.random.default_rng(0)
+    random_benchmarks = {
+        b.uri
+        for b in (csmith_dataset.random_benchmark(rng) for _ in range(num_benchmarks))
+    }
+    assert len(random_benchmarks) == num_benchmarks
+
+
 if __name__ == "__main__":
     main()
diff --git a/tests/llvm/datasets/llvm_stress_test.py b/tests/llvm/datasets/llvm_stress_test.py
index 50a5dcb4e..09fb94c79 100644
--- a/tests/llvm/datasets/llvm_stress_test.py
+++ b/tests/llvm/datasets/llvm_stress_test.py
@@ -7,6 +7,7 @@
 from itertools import islice
 
 import gym
+import numpy as np
 import pytest
 
 import compiler_gym.envs.llvm  # noqa register environments
@@ -59,5 +60,18 @@ def test_llvm_stress_random_select(
         assert instcount["TotalInstsCount"] > 0
 
 
+@skip_on_ci
+def test_random_benchmark(llvm_stress_dataset: LlvmStressDataset):
+    num_benchmarks = 5
+    rng = np.random.default_rng(0)
+    random_benchmarks = {
+        b.uri
+        for b in (
+            llvm_stress_dataset.random_benchmark(rng) for _ in range(num_benchmarks)
+        )
+    }
+    assert len(random_benchmarks) == num_benchmarks
+
+
 if __name__ == "__main__":
     main()

From 0d7958e8eb63ead7d90c8f95e725e2f171d2bf17 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 14:27:13 +0100
Subject: [PATCH 009/141] [README] Tweak usage instructions.

---
 README.md | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 4b0660316..838b7d337 100644
--- a/README.md
+++ b/README.md
@@ -175,14 +175,16 @@ In Python, import `compiler_gym` to use the environments:
 
 ```py
 >>> import gym
->>> import compiler_gym                     # import the CompilerGym environments
->>> env = gym.make(                         # create a new environment
-...     "llvm-autophase-ic-v0"              # select the compiler optimization task
-...     benchmark="cbench-v1/qsort"         # select the program to compile
+>>> import compiler_gym                      # imports the CompilerGym environments
+>>> env = gym.make(                          # creates a new environment
+...     "llvm-v0",                           # selects the compiler to use
+...     benchmark="cbench-v1/qsort",         # selects the program to compile
+...     reward_space="IrInstructionCountOz", # selects the optimization target
+...     observation_space="Autophase",       # selects the observation space
 ... )
->>> env.reset()                             # start a new compilation session
->>> env.render()                            # print the IR of the program
->>> env.step(env.action_space.sample())     # apply a random optimization, update state/reward/actions
+>>> env.reset()                              # starts a new compilation session
+>>> env.render()                             # prints the IR of the program
+>>> env.step(env.action_space.sample())      # applies a random optimization, updates state/reward/actions
 ```
 
 See the [documentation website](http://facebookresearch.github.io/CompilerGym/)

From 5d73dbb3f1e5f93eda647f2ecd6b88ef9ba3827f Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 14:28:53 +0100
Subject: [PATCH 010/141] [README] Tweak the usage instruction order.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 838b7d337..fa90e6406 100644
--- a/README.md
+++ b/README.md
@@ -179,8 +179,8 @@ In Python, import `compiler_gym` to use the environments:
 >>> env = gym.make(                          # creates a new environment
 ...     "llvm-v0",                           # selects the compiler to use
 ...     benchmark="cbench-v1/qsort",         # selects the program to compile
-...     reward_space="IrInstructionCountOz", # selects the optimization target
 ...     observation_space="Autophase",       # selects the observation space
+...     reward_space="IrInstructionCountOz", # selects the optimization target
 ... )
 >>> env.reset()                              # starts a new compilation session
 >>> env.render()                             # prints the IR of the program

From 731bf436f183eb0a301ca306acc785ff33346a0d Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 23:26:54 +0100
Subject: [PATCH 011/141] Remove `object` subclasses from Python classes.

The minimum supported Python version for CompilerGym is
3.6. Subclassing from object is only required for Python versions < 3.
---
 compiler_gym/datasets/benchmark.py                          | 2 +-
 compiler_gym/datasets/dataset.py                            | 2 +-
 compiler_gym/datasets/datasets.py                           | 2 +-
 compiler_gym/envs/llvm/llvm_benchmark.py                    | 2 +-
 compiler_gym/service/connection.py                          | 4 ++--
 compiler_gym/third_party/inst2vec/__init__.py               | 2 +-
 compiler_gym/util/shell_format.py                           | 2 +-
 compiler_gym/util/timer.py                                  | 2 +-
 compiler_gym/views/observation.py                           | 2 +-
 compiler_gym/views/observation_space_spec.py                | 2 +-
 compiler_gym/views/reward.py                                | 2 +-
 .../service_py/example_service.py                           | 2 +-
 tests/util/minimize_trajectory_test.py                      | 6 +++---
 tests/views/observation_test.py                             | 4 ++--
 tests/views/reward_test.py                                  | 4 ++--
 15 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/compiler_gym/datasets/benchmark.py b/compiler_gym/datasets/benchmark.py
index 7a50c21c2..6e64eaab3 100644
--- a/compiler_gym/datasets/benchmark.py
+++ b/compiler_gym/datasets/benchmark.py
@@ -38,7 +38,7 @@ def __repr__(self) -> str:
         return str(self.filename)
 
 
-class Benchmark(object):
+class Benchmark:
     """A benchmark represents a particular program that is being compiled.
 
     A benchmark is a program that can be used by a :class:`CompilerEnv
diff --git a/compiler_gym/datasets/dataset.py b/compiler_gym/datasets/dataset.py
index 915eac9e7..0e034a0c9 100644
--- a/compiler_gym/datasets/dataset.py
+++ b/compiler_gym/datasets/dataset.py
@@ -17,7 +17,7 @@
 from compiler_gym.util.debug_util import get_logging_level
 
 
-class Dataset(object):
+class Dataset:
     """A dataset is a collection of benchmarks.
 
     The Dataset class has methods for installing and managing groups of
diff --git a/compiler_gym/datasets/datasets.py b/compiler_gym/datasets/datasets.py
index 2c4b0e00f..fd6c285e0 100644
--- a/compiler_gym/datasets/datasets.py
+++ b/compiler_gym/datasets/datasets.py
@@ -33,7 +33,7 @@ def round_robin_iterables(iters: Iterable[Iterable[T]]) -> Iterable[T]:
         yield from iters.popleft()
 
 
-class Datasets(object):
+class Datasets:
     """A collection of datasets.
 
     This class provides a dictionary-like interface for indexing and iterating
diff --git a/compiler_gym/envs/llvm/llvm_benchmark.py b/compiler_gym/envs/llvm/llvm_benchmark.py
index 31d510c3e..58cc4f10e 100644
--- a/compiler_gym/envs/llvm/llvm_benchmark.py
+++ b/compiler_gym/envs/llvm/llvm_benchmark.py
@@ -107,7 +107,7 @@ def get_system_includes() -> List[Path]:
     return _SYSTEM_INCLUDES
 
 
-class ClangInvocation(object):
+class ClangInvocation:
     """Class to represent a single invocation of the clang compiler."""
 
     def __init__(
diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index ba123cd28..fbd4031ab 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -134,7 +134,7 @@ def __call__(
     StubMethod = Callable[[Request], Reply]
 
 
-class Connection(object):
+class Connection:
     """Base class for service connections."""
 
     def __init__(self, channel, url: str, logger: logging.Logger):
@@ -477,7 +477,7 @@ def __repr__(self):
         return self.url
 
 
-class CompilerGymServiceConnection(object):
+class CompilerGymServiceConnection:
     """A connection to a compiler gym service.
 
     There are two types of service connections: managed and unmanaged. The type
diff --git a/compiler_gym/third_party/inst2vec/__init__.py b/compiler_gym/third_party/inst2vec/__init__.py
index cfcef729e..31704fdde 100644
--- a/compiler_gym/third_party/inst2vec/__init__.py
+++ b/compiler_gym/third_party/inst2vec/__init__.py
@@ -15,7 +15,7 @@
 )
 
 
-class Inst2vecEncoder(object):
+class Inst2vecEncoder:
     """An LLVM encoder for inst2vec."""
 
     def __init__(self):
diff --git a/compiler_gym/util/shell_format.py b/compiler_gym/util/shell_format.py
index 9a3f74f35..abf1b18ce 100644
--- a/compiler_gym/util/shell_format.py
+++ b/compiler_gym/util/shell_format.py
@@ -5,7 +5,7 @@
 from typing import Any
 
 
-class ShellFormatCodes(object):
+class ShellFormatCodes:
     """Shell escape codes for pretty-printing."""
 
     PURPLE = "\033[95m"
diff --git a/compiler_gym/util/timer.py b/compiler_gym/util/timer.py
index f4c2d51c1..9d3562896 100644
--- a/compiler_gym/util/timer.py
+++ b/compiler_gym/util/timer.py
@@ -29,7 +29,7 @@ def humanize_duration_hms(seconds: float) -> str:
     return f"{seconds // 3600}:{(seconds % 3600) // 60:02d}:{seconds % 60:02d}"
 
 
-class Timer(object):
+class Timer:
     """A very simple scoped timer.
 
     Example:
diff --git a/compiler_gym/views/observation.py b/compiler_gym/views/observation.py
index c7de881b2..253030a2d 100644
--- a/compiler_gym/views/observation.py
+++ b/compiler_gym/views/observation.py
@@ -9,7 +9,7 @@
 from compiler_gym.views.observation_space_spec import ObservationSpaceSpec
 
 
-class ObservationView(object):
+class ObservationView:
     """A view into the available observation spaces of a service.
 
     Example usage:
diff --git a/compiler_gym/views/observation_space_spec.py b/compiler_gym/views/observation_space_spec.py
index 6940ab910..3b5bdecd8 100644
--- a/compiler_gym/views/observation_space_spec.py
+++ b/compiler_gym/views/observation_space_spec.py
@@ -22,7 +22,7 @@ def _json2nx(observation):
     )
 
 
-class ObservationSpaceSpec(object):
+class ObservationSpaceSpec:
     """Specification of an observation space.
 
     :ivar id: The name of the observation space.
diff --git a/compiler_gym/views/reward.py b/compiler_gym/views/reward.py
index 7f3f78c6f..b7142c5e3 100644
--- a/compiler_gym/views/reward.py
+++ b/compiler_gym/views/reward.py
@@ -10,7 +10,7 @@
 from compiler_gym.views.observation import ObservationView
 
 
-class RewardView(object):
+class RewardView:
     """A view into a set of reward spaces.
 
     Example usage:
diff --git a/examples/example_compiler_gym_service/service_py/example_service.py b/examples/example_compiler_gym_service/service_py/example_service.py
index 0335b8db1..4c46b7d49 100755
--- a/examples/example_compiler_gym_service/service_py/example_service.py
+++ b/examples/example_compiler_gym_service/service_py/example_service.py
@@ -86,7 +86,7 @@
 ]
 
 
-class CompilationSession(object):
+class CompilationSession:
     """Represents an instance of an interactive compilation session."""
 
     def __init__(self, benchmark: str):
diff --git a/tests/util/minimize_trajectory_test.py b/tests/util/minimize_trajectory_test.py
index 5cb33a6a1..b9e7597e4 100644
--- a/tests/util/minimize_trajectory_test.py
+++ b/tests/util/minimize_trajectory_test.py
@@ -18,14 +18,14 @@
 logging.basicConfig(level=logging.DEBUG)
 
 
-class MockActionSpace(object):
+class MockActionSpace:
     """A mock action space for use by MockEnv."""
 
     def __init__(self, actions):
         self.flags = {a: str(a) for a in set(actions)}
 
 
-class MockValidationResult(object):
+class MockValidationResult:
     """A mock validation result for use by MockEnv."""
 
     def __init__(self, okay):
@@ -35,7 +35,7 @@ def okay(self):
         return self._okay
 
 
-class MockEnv(object):
+class MockEnv:
     """A mock environment for testing trajectory minimization."""
 
     def __init__(self, actions: List[int], validate=lambda env: True):
diff --git a/tests/views/observation_test.py b/tests/views/observation_test.py
index b3edc34f9..a8d7cccef 100644
--- a/tests/views/observation_test.py
+++ b/tests/views/observation_test.py
@@ -21,12 +21,12 @@
 from tests.test_main import main
 
 
-class MockGetObservationReply(object):
+class MockGetObservationReply:
     def __init__(self, value):
         self.observation = [value]
 
 
-class MockGetObservation(object):
+class MockGetObservation:
     """Mock for the get_observation callack of ObservationView."""
 
     def __init__(self, ret=None):
diff --git a/tests/views/reward_test.py b/tests/views/reward_test.py
index d51294dd2..eb7e48123 100644
--- a/tests/views/reward_test.py
+++ b/tests/views/reward_test.py
@@ -9,7 +9,7 @@
 from tests.test_main import main
 
 
-class MockReward(object):
+class MockReward:
     def __init__(self, id, ret=None):
         self.id = id
         self.ret = list(reversed(ret or []))
@@ -21,7 +21,7 @@ def update(self, *args, **kwargs):
         return ret
 
 
-class MockObservationView(object):
+class MockObservationView:
     pass
 
 

From 7817287ba056350b96f85c7d8e898b14423b75a9 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:48:39 +0100
Subject: [PATCH 012/141] [ci] Use --batch mode for bazel.

This is to mitigate infrequent errors during server initialization of
bazel.
---
 .github/workflows/ci.yaml   | 2 ++
 .github/workflows/fuzz.yaml | 1 +
 2 files changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 88d9ef93f..48981956d 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -58,6 +58,7 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
+                  BAZEL_OPTS: --batch
                   BAZEL_TEST_OPTS: --config=ci --test_timeout=300,900,1800,7200
 
 
@@ -102,6 +103,7 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
+                  BAZEL_OPTS: --batch
                   BAZEL_BUILD_OPTS: --config=ci
 
             - name: Test
diff --git a/.github/workflows/fuzz.yaml b/.github/workflows/fuzz.yaml
index c5e8f238a..8cbcaf172 100644
--- a/.github/workflows/fuzz.yaml
+++ b/.github/workflows/fuzz.yaml
@@ -50,6 +50,7 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
+                  BAZEL_OPTS: --batch
                   BAZEL_TEST_OPTS: --config=ci
 
             - name: Test

From 7ff66144eb3b84155ed47fec45fc440265caacb8 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:58:18 +0100
Subject: [PATCH 013/141] [ci] Run release test on all supported python
 versions.

---
 .github/workflows/release_test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release_test.yaml b/.github/workflows/release_test.yaml
index f5066205f..4a0a251a2 100644
--- a/.github/workflows/release_test.yaml
+++ b/.github/workflows/release_test.yaml
@@ -14,7 +14,7 @@ jobs:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, macos-latest]
-                python: [3.9]
+                python: [3.6, 3.7, 3.8, 3.9]
 
         steps:
             - uses: actions/checkout@v2

From bfe3380e6b2c407a1951eb2d99d3d77ddcb29e39 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 14:04:33 +0100
Subject: [PATCH 014/141] [ci] Installing required zlib dependency for release
 tests.

---
 .github/workflows/release_test.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/workflows/release_test.yaml b/.github/workflows/release_test.yaml
index 4a0a251a2..82c854e4b 100644
--- a/.github/workflows/release_test.yaml
+++ b/.github/workflows/release_test.yaml
@@ -26,8 +26,16 @@ jobs:
               with:
                   python-version: ${{ matrix.python }}
 
+            - name: Install dependencies (macos)
+              run: brew install zlib
+              if: matrix.os == 'macos-latest'
+
             - name: Install python wheel
               run: python -m pip install compiler_gym
+              env:
+                  LDFLAGS: -L/usr/local/opt/zlib/lib
+                  CPPFLAGS: -I/usr/local/opt/zlib/include
+                  PKG_CONFIG_PATH: /usr/local/opt/zlib/lib/pkgconfig
 
             - name: Install python test dependencies
               run: python -m pip install -r tests/requirements.txt

From cd2c236e815c8c57a746deb9495b1e6eb09194cf Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 14:04:51 +0100
Subject: [PATCH 015/141] [ci] Invert bazel/install-test config weighting.

Rather than running most tests on bazel and a couple using pytest,
instead run most tests using pytest and a couple on bazel. The idea is
that install-test is often quicker than bazel, and provides an
execution environment closer to that of `pip install` users.
---
 .github/workflows/ci.yaml | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 48981956d..4dac98ce4 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -18,13 +18,7 @@ jobs:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, macos-latest]
-                python: [3.6, 3.7, 3.8, 3.9]
-                exclude:
-                    # Only test recent python versions on macOS.
-                    - os: macos-latest
-                      python: 3.6
-                    - os: macos-latest
-                      python: 3.7
+                python: [3.9]
 
         steps:
             - uses: actions/checkout@v2
@@ -69,7 +63,7 @@ jobs:
             fail-fast: false
             matrix:
                 os: [ubuntu-latest, macos-latest]
-                python: [3.8]
+                python: [3.6, 3.7, 3.8, 3.9]
 
         steps:
             - uses: actions/checkout@v2

From 2484571c7461ce3da1eb032e52bc2f9bcb74a40d Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 14:10:11 +0100
Subject: [PATCH 016/141] [tests] Run csmith/llvm-stress tests on CI.

Run a smaller number of tests on the CI runners.
---
 tests/llvm/datasets/csmith_test.py      | 5 ++---
 tests/llvm/datasets/llvm_stress_test.py | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/llvm/datasets/csmith_test.py b/tests/llvm/datasets/csmith_test.py
index 3fc889468..7d8d11d4b 100644
--- a/tests/llvm/datasets/csmith_test.py
+++ b/tests/llvm/datasets/csmith_test.py
@@ -13,7 +13,7 @@
 import compiler_gym.envs.llvm  # noqa register environments
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.envs.llvm.datasets import CsmithBenchmark, CsmithDataset
-from tests.pytest_plugins.common import skip_on_ci
+from tests.pytest_plugins.common import is_ci
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]
@@ -33,8 +33,7 @@ def test_csmith_size(csmith_dataset: CsmithDataset):
     assert csmith_dataset.size == float("inf")
 
 
-@skip_on_ci
-@pytest.mark.parametrize("index", range(250))
+@pytest.mark.parametrize(range(3) if is_ci() else range(250))
 def test_csmith_random_select(
     env: LlvmEnv, csmith_dataset: CsmithDataset, index: int, tmpwd: Path
 ):
diff --git a/tests/llvm/datasets/llvm_stress_test.py b/tests/llvm/datasets/llvm_stress_test.py
index 09fb94c79..79390b0bd 100644
--- a/tests/llvm/datasets/llvm_stress_test.py
+++ b/tests/llvm/datasets/llvm_stress_test.py
@@ -14,7 +14,7 @@
 from compiler_gym.datasets import BenchmarkInitError
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.envs.llvm.datasets import LlvmStressDataset
-from tests.pytest_plugins.common import skip_on_ci
+from tests.pytest_plugins.common import is_ci
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]
@@ -34,8 +34,7 @@ def test_llvm_stress_size(llvm_stress_dataset: LlvmStressDataset):
     assert llvm_stress_dataset.size == float("inf")
 
 
-@skip_on_ci
-@pytest.mark.parametrize("index", range(250))
+@pytest.mark.parametrize(range(3) if is_ci() else range(250))
 def test_llvm_stress_random_select(
     env: LlvmEnv, llvm_stress_dataset: LlvmStressDataset, index: int
 ):

From 25689304dccb1b15b9f71f50b65309cd6f985b90 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:40:11 +0100
Subject: [PATCH 017/141] Remove dead code file.

---
 compiler_gym/envs/llvm/legacy_datasets.py | 932 ----------------------
 1 file changed, 932 deletions(-)
 delete mode 100644 compiler_gym/envs/llvm/legacy_datasets.py

diff --git a/compiler_gym/envs/llvm/legacy_datasets.py b/compiler_gym/envs/llvm/legacy_datasets.py
deleted file mode 100644
index fe1090b88..000000000
--- a/compiler_gym/envs/llvm/legacy_datasets.py
+++ /dev/null
@@ -1,932 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""This module defines the available LLVM datasets."""
-import enum
-import io
-import logging
-import os
-import re
-import shutil
-import subprocess
-import sys
-import tarfile
-import tempfile
-from collections import defaultdict
-from concurrent.futures import as_completed
-from pathlib import Path
-from threading import Lock
-from typing import Callable, Dict, Iterable, List, NamedTuple, Optional
-
-import fasteners
-
-from compiler_gym.datasets.dataset import LegacyDataset
-from compiler_gym.third_party import llvm
-from compiler_gym.util import thread_pool
-from compiler_gym.util.download import download
-from compiler_gym.util.runfiles_path import cache_path, site_data_path
-from compiler_gym.util.timer import Timer
-from compiler_gym.validation_error import ValidationError
-
-_CBENCH_DATA_URL = (
-    "https://dl.fbaipublicfiles.com/compiler_gym/cBench-v0-runtime-data.tar.bz2"
-)
-_CBENCH_DATA_SHA256 = "a1b5b5d6b115e5809ccaefc2134434494271d184da67e2ee43d7f84d07329055"
-
-
-if sys.platform == "darwin":
-    _COMPILE_ARGS = [
-        "-L",
-        "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib",
-    ]
-else:
-    _COMPILE_ARGS = []
-
-LLVM_DATASETS = [
-    LegacyDataset(
-        name="blas-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-blas-v0.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://github.com/spcl/ncc/tree/master/data",
-        compiler="llvm-10.0.0",
-        file_count=300,
-        size_bytes=3969036,
-        sha256="e724a8114709f8480adeb9873d48e426e8d9444b00cddce48e342b9f0f2b096d",
-    ),
-    # The difference between cBench-v0 and cBench-v1 is the arguments passed to
-    # clang when preparing the LLVM bitcodes:
-    #
-    #   - v0: `-O0 -Xclang -disable-O0-optnone`.
-    #   - v1: `-O1 -Xclang -Xclang -disable-llvm-passes`.
-    #
-    # The key difference with is that in v0, the generated IR functions were
-    # annotated with a `noinline` attribute that prevented inline. In v1 that is
-    # no longer the case.
-    LegacyDataset(
-        name="cBench-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v0-macos.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://github.com/ctuning/ctuning-programs",
-        compiler="llvm-10.0.0",
-        file_count=23,
-        size_bytes=7154448,
-        sha256="072a730c86144a07bba948c49afe543e4f06351f1cb17f7de77f91d5c1a1b120",
-        platforms=["macos"],
-        deprecated_since="v0.1.4",
-    ),
-    LegacyDataset(
-        name="cBench-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v0-linux.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://github.com/ctuning/ctuning-programs",
-        compiler="llvm-10.0.0",
-        file_count=23,
-        size_bytes=6940416,
-        sha256="9b5838a90895579aab3b9375e8eeb3ed2ae58e0ad354fec7eb4f8b31ecb4a360",
-        platforms=["linux"],
-        deprecated_since="v0.1.4",
-    ),
-    LegacyDataset(
-        name="cBench-v1",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-macos.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://github.com/ctuning/ctuning-programs",
-        compiler="llvm-10.0.0",
-        file_count=23,
-        size_bytes=10292032,
-        sha256="90b312b40317d9ee9ed09b4b57d378879f05e8970bb6de80dc8581ad0e36c84f",
-        platforms=["macos"],
-    ),
-    LegacyDataset(
-        name="cBench-v1",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-linux.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://github.com/ctuning/ctuning-programs",
-        compiler="llvm-10.0.0",
-        file_count=23,
-        size_bytes=10075608,
-        sha256="601fff3944c866f6617e653b6eb5c1521382c935f56ca1f36a9f5cf1a49f3de5",
-        platforms=["linux"],
-    ),
-    LegacyDataset(
-        name="github-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-github-v0.tar.bz2",
-        license="CC BY 4.0",
-        description="https://zenodo.org/record/4122437",
-        compiler="llvm-10.0.0",
-        file_count=50708,
-        size_bytes=725974100,
-        sha256="880269dd7a5c2508ea222a2e54c318c38c8090eb105c0a87c595e9dd31720764",
-    ),
-    LegacyDataset(
-        name="linux-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-linux-v0.tar.bz2",
-        license="GPL-2.0",
-        description="https://github.com/spcl/ncc/tree/master/data",
-        compiler="llvm-10.0.0",
-        file_count=13920,
-        size_bytes=516031044,
-        sha256="a1ae5c376af30ab042c9e54dc432f89ce75f9ebaee953bc19c08aff070f12566",
-    ),
-    LegacyDataset(
-        name="mibench-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-mibench-v0.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://github.com/ctuning/ctuning-programs",
-        compiler="llvm-10.0.0",
-        file_count=40,
-        size_bytes=238480,
-        sha256="128c090c40b955b99fdf766da167a5f642018fb35c16a1d082f63be2e977eb13",
-    ),
-    LegacyDataset(
-        name="npb-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-npb-v0.tar.bz2",
-        license="NASA Open Source Agreement v1.3",
-        description="https://github.com/spcl/ncc/tree/master/data",
-        compiler="llvm-10.0.0",
-        file_count=122,
-        size_bytes=2287444,
-        sha256="793ac2e7a4f4ed83709e8a270371e65b724da09eaa0095c52e7f4209f63bb1f2",
-    ),
-    LegacyDataset(
-        name="opencv-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-opencv-v0.tar.bz2",
-        license="Apache 2.0",
-        description="https://github.com/spcl/ncc/tree/master/data",
-        compiler="llvm-10.0.0",
-        file_count=442,
-        size_bytes=21903008,
-        sha256="003df853bd58df93572862ca2f934c7b129db2a3573bcae69a2e59431037205c",
-    ),
-    LegacyDataset(
-        name="poj104-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-poj104-v0.tar.bz2",
-        license="BSD 3-Clause",
-        description="https://sites.google.com/site/treebasedcnn/",
-        compiler="llvm-10.0.0",
-        file_count=49628,
-        size_bytes=304207752,
-        sha256="6254d629887f6b51efc1177788b0ce37339d5f3456fb8784415ed3b8c25cce27",
-    ),
-    # FIXME(github.com/facebookresearch/CompilerGym/issues/55): Polybench
-    # dataset has `optnone` function attribute set, requires rebuild.
-    # LegacyDataset(
-    #     name="polybench-v0",
-    #     url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-polybench-v0.tar.bz2",
-    #     license="BSD 3-Clause",
-    #     description="https://github.com/ctuning/ctuning-programs",
-    #     compiler="llvm-10.0.0",
-    #     file_count=27,
-    #     size_bytes=162624,
-    #     sha256="968087e68470e5b44dc687dae195143000c7478a23d6631b27055bb3bb3116b1",
-    # ),
-    LegacyDataset(
-        name="tensorflow-v0",
-        url="https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-tensorflow-v0.tar.bz2",
-        license="Apache 2.0",
-        description="https://github.com/spcl/ncc/tree/master/data",
-        compiler="llvm-10.0.0",
-        file_count=1985,
-        size_bytes=299697312,
-        sha256="f77dd1988c772e8359e1303cc9aba0d73d5eb27e0c98415ac3348076ab94efd1",
-    ),
-]
-
-
-class BenchmarkExecutionResult(NamedTuple):
-    """The result of running a benchmark."""
-
-    walltime_seconds: float
-    """The execution time in seconds."""
-
-    error: Optional[ValidationError] = None
-    """An error."""
-
-    output: Optional[str] = None
-    """The output generated by the benchmark."""
-
-    def json(self):
-        return self._asdict()
-
-
-class LlvmSanitizer(enum.IntEnum):
-    """The LLVM sanitizers."""
-
-    ASAN = 1
-    TSAN = 2
-    MSAN = 3
-    UBSAN = 4
-
-
-# Compiler flags that are enabled by sanitizers.
-_SANITIZER_FLAGS = {
-    LlvmSanitizer.ASAN: ["-O1", "-g", "-fsanitize=address", "-fno-omit-frame-pointer"],
-    LlvmSanitizer.TSAN: ["-O1", "-g", "-fsanitize=thread"],
-    LlvmSanitizer.MSAN: ["-O1", "-g", "-fsanitize=memory"],
-    LlvmSanitizer.UBSAN: ["-fsanitize=undefined"],
-}
-
-
-def _compile_and_run_bitcode_file(
-    bitcode_file: Path,
-    cmd: str,
-    cwd: Path,
-    linkopts: List[str],
-    env: Dict[str, str],
-    num_runs: int,
-    logger: logging.Logger,
-    sanitizer: Optional[LlvmSanitizer] = None,
-    timeout_seconds: float = 300,
-    compilation_timeout_seconds: float = 60,
-) -> BenchmarkExecutionResult:
-    """Run the given cBench benchmark."""
-    # cBench benchmarks expect that a file _finfo_dataset exists in the
-    # current working directory and contains the number of benchmark
-    # iterations in it.
-    with open(cwd / "_finfo_dataset", "w") as f:
-        print(num_runs, file=f)
-
-    # Create a barebones execution environment for the benchmark.
-    run_env = {
-        "TMPDIR": os.environ.get("TMPDIR", ""),
-        "HOME": os.environ.get("HOME", ""),
-        "USER": os.environ.get("USER", ""),
-        # Disable all logging from GRPC. In the past I have had false-positive
-        # "Wrong output" errors caused by GRPC error messages being logged to
-        # stderr.
-        "GRPC_VERBOSITY": "NONE",
-    }
-    run_env.update(env)
-
-    error_data = {}
-
-    if sanitizer:
-        clang_path = llvm.clang_path()
-        binary = cwd / "a.out"
-        error_data["run_cmd"] = cmd.replace("$BIN", "./a.out")
-        # Generate the a.out binary file.
-        compile_cmd = (
-            [clang_path.name, str(bitcode_file), "-o", str(binary)]
-            + _COMPILE_ARGS
-            + list(linkopts)
-            + _SANITIZER_FLAGS.get(sanitizer, [])
-        )
-        error_data["compile_cmd"] = compile_cmd
-        logger.debug("compile: %s", compile_cmd)
-        assert not binary.is_file()
-        clang = subprocess.Popen(
-            compile_cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            universal_newlines=True,
-            env={"PATH": f"{clang_path.parent}:{os.environ.get('PATH', '')}"},
-        )
-        try:
-            output, _ = clang.communicate(timeout=compilation_timeout_seconds)
-        except subprocess.TimeoutExpired:
-            clang.kill()
-            error_data["timeout"] = compilation_timeout_seconds
-            return BenchmarkExecutionResult(
-                walltime_seconds=timeout_seconds,
-                error=ValidationError(
-                    type="Compilation timeout",
-                    data=error_data,
-                ),
-            )
-        if clang.returncode:
-            error_data["output"] = output
-            return BenchmarkExecutionResult(
-                walltime_seconds=timeout_seconds,
-                error=ValidationError(
-                    type="Compilation failed",
-                    data=error_data,
-                ),
-            )
-        assert binary.is_file()
-    else:
-        lli_path = llvm.lli_path()
-        error_data["run_cmd"] = cmd.replace("$BIN", f"{lli_path.name} benchmark.bc")
-        run_env["PATH"] = str(lli_path.parent)
-
-    try:
-        logger.debug("exec: %s", error_data["run_cmd"])
-        with Timer() as timer:
-            process = subprocess.Popen(
-                error_data["run_cmd"],
-                shell=True,
-                stderr=subprocess.STDOUT,
-                stdout=subprocess.PIPE,
-                env=run_env,
-                cwd=cwd,
-            )
-
-            stdout, _ = process.communicate(timeout=timeout_seconds)
-    except subprocess.TimeoutExpired:
-        process.kill()
-        error_data["timeout_seconds"] = timeout_seconds
-        return BenchmarkExecutionResult(
-            walltime_seconds=timeout_seconds,
-            error=ValidationError(
-                type="Execution timeout",
-                data=error_data,
-            ),
-        )
-    finally:
-        if sanitizer:
-            binary.unlink()
-
-    try:
-        output = stdout.decode("utf-8")
-    except UnicodeDecodeError:
-        output = "<binary>"
-
-    if process.returncode:
-        # Runtime error.
-        if sanitizer == LlvmSanitizer.ASAN and "LeakSanitizer" in output:
-            error_type = "Memory leak"
-        elif sanitizer == LlvmSanitizer.ASAN and "AddressSanitizer" in output:
-            error_type = "Memory error"
-        elif sanitizer == LlvmSanitizer.MSAN and "MemorySanitizer" in output:
-            error_type = "Memory error"
-        elif "Segmentation fault" in output:
-            error_type = "Segmentation fault"
-        elif "Illegal Instruction" in output:
-            error_type = "Illegal Instruction"
-        else:
-            error_type = f"Runtime error ({process.returncode})"
-
-        error_data["return_code"] = process.returncode
-        error_data["output"] = output
-        return BenchmarkExecutionResult(
-            walltime_seconds=timer.time,
-            error=ValidationError(
-                type=error_type,
-                data=error_data,
-            ),
-        )
-    return BenchmarkExecutionResult(walltime_seconds=timer.time, output=output)
-
-
-def download_cBench_runtime_data() -> bool:
-    """Download and unpack the cBench runtime dataset."""
-    cbench_data = site_data_path("llvm/cBench-v1-runtime-data/runtime_data")
-    if (cbench_data / "unpacked").is_file():
-        return False
-    else:
-        # Clean up any partially-extracted data directory.
-        if cbench_data.is_dir():
-            shutil.rmtree(cbench_data)
-
-        tar_contents = io.BytesIO(
-            download(_CBENCH_DATA_URL, sha256=_CBENCH_DATA_SHA256)
-        )
-        with tarfile.open(fileobj=tar_contents, mode="r:bz2") as tar:
-            cbench_data.parent.mkdir(parents=True, exist_ok=True)
-            tar.extractall(cbench_data.parent)
-        assert cbench_data.is_dir()
-        # Create the marker file to indicate that the directory is unpacked
-        # and ready to go.
-        (cbench_data / "unpacked").touch()
-        return True
-
-
-# Thread lock to prevent race on download_cBench_runtime_data() from
-# multi-threading. This works in tandem with the inter-process file lock - both
-# are required.
-_CBENCH_DOWNLOAD_THREAD_LOCK = Lock()
-
-
-def _make_cBench_validator(
-    cmd: str,
-    linkopts: List[str],
-    os_env: Dict[str, str],
-    num_runs: int = 1,
-    compare_output: bool = True,
-    input_files: Optional[List[Path]] = None,
-    output_files: Optional[List[Path]] = None,
-    validate_result: Optional[
-        Callable[[BenchmarkExecutionResult], Optional[str]]
-    ] = None,
-    pre_execution_callback: Optional[Callable[[Path], None]] = None,
-    sanitizer: Optional[LlvmSanitizer] = None,
-    flakiness: int = 5,
-) -> Callable[["LlvmEnv"], Optional[ValidationError]]:  # noqa: F821
-    """Construct a validation callback for a cBench benchmark. See validator() for usage."""
-    input_files = input_files or []
-    output_files = output_files or []
-
-    def validator_cb(env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
-        """The validation callback."""
-        with _CBENCH_DOWNLOAD_THREAD_LOCK:
-            with fasteners.InterProcessLock(cache_path(".cBench-v1-runtime-data.lock")):
-                download_cBench_runtime_data()
-
-        cbench_data = site_data_path("llvm/cBench-v1-runtime-data/runtime_data")
-        for input_file_name in input_files:
-            path = cbench_data / input_file_name
-            if not path.is_file():
-                raise FileNotFoundError(f"Required benchmark input not found: {path}")
-
-        # Create a temporary working directory to execute the benchmark in.
-        with tempfile.TemporaryDirectory(dir=env.service.connection.working_dir) as d:
-            cwd = Path(d)
-
-            # Expand shell variable substitutions in the benchmark command.
-            expanded_command = cmd.replace("$D", str(cbench_data))
-
-            # Translate the output file names into paths inside the working
-            # directory.
-            output_paths = [cwd / o for o in output_files]
-
-            if pre_execution_callback:
-                pre_execution_callback(cwd)
-
-            # Produce a gold-standard output using a reference version of
-            # the benchmark.
-            if compare_output or output_files:
-                gs_env = env.fork()
-                try:
-                    # Reset to the original benchmark state and compile it.
-                    gs_env.reset(benchmark=env.benchmark)
-                    gs_env.write_bitcode(cwd / "benchmark.bc")
-                    gold_standard = _compile_and_run_bitcode_file(
-                        bitcode_file=cwd / "benchmark.bc",
-                        cmd=expanded_command,
-                        cwd=cwd,
-                        num_runs=1,
-                        # Use default optimizations for gold standard.
-                        linkopts=linkopts + ["-O2"],
-                        # Always assume safe.
-                        sanitizer=None,
-                        logger=env.logger,
-                        env=os_env,
-                    )
-                    if gold_standard.error:
-                        return ValidationError(
-                            type=f"Gold standard: {gold_standard.error.type}",
-                            data=gold_standard.error.data,
-                        )
-                finally:
-                    gs_env.close()
-
-                # Check that the reference run produced the expected output
-                # files.
-                for path in output_paths:
-                    if not path.is_file():
-                        try:
-                            output = gold_standard.output
-                        except UnicodeDecodeError:
-                            output = "<binary>"
-                        raise FileNotFoundError(
-                            f"Expected file '{path.name}' not generated\n"
-                            f"Benchmark: {env.benchmark}\n"
-                            f"Command: {cmd}\n"
-                            f"Output: {output}"
-                        )
-                    path.rename(f"{path}.gold_standard")
-
-            # Serialize the benchmark to a bitcode file that will then be
-            # compiled to a binary.
-            env.write_bitcode(cwd / "benchmark.bc")
-            outcome = _compile_and_run_bitcode_file(
-                bitcode_file=cwd / "benchmark.bc",
-                cmd=expanded_command,
-                cwd=cwd,
-                num_runs=num_runs,
-                linkopts=linkopts,
-                sanitizer=sanitizer,
-                logger=env.logger,
-                env=os_env,
-            )
-
-            if outcome.error:
-                return outcome.error
-
-            # Run a user-specified validation hook.
-            if validate_result:
-                validate_result(outcome)
-
-            # Difftest the console output.
-            if compare_output and gold_standard.output != outcome.output:
-                return ValidationError(
-                    type="Wrong output",
-                    data={"expected": gold_standard.output, "actual": outcome.output},
-                )
-
-            # Difftest the output files.
-            for i, path in enumerate(output_paths, start=1):
-                if not path.is_file():
-                    return ValidationError(
-                        type="Output not generated",
-                        data={"path": path.name, "command": cmd},
-                    )
-                diff = subprocess.Popen(
-                    ["diff", str(path), f"{path}.gold_standard"],
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.STDOUT,
-                )
-                stdout, _ = diff.communicate()
-                if diff.returncode:
-                    try:
-                        stdout = stdout.decode("utf-8")
-                        return ValidationError(
-                            type="Wrong output (file)",
-                            data={"path": path.name, "diff": stdout},
-                        )
-                    except UnicodeDecodeError:
-                        return ValidationError(
-                            type="Wrong output (file)",
-                            data={"path": path.name, "diff": "<binary>"},
-                        )
-
-    def flaky_wrapped_cb(env: "LlvmEnv") -> Optional[ValidationError]:  # noqa: F821
-        """Wrap the validation callback in a flakiness retry loop."""
-        for i in range(1, max(flakiness, 1) + 1):
-            try:
-                error = validator_cb(env)
-                if not error:
-                    return
-            except TimeoutError:
-                # Timeout errors can be raised by the environment in case of a
-                # slow step / observation, and should be retried.
-                pass
-            env.logger.warning(
-                "Validation callback failed, attempt=%d/%d", i, flakiness
-            )
-        return error
-
-    return flaky_wrapped_cb
-
-
-# A map from benchmark name to validation callbacks. Defined below.
-VALIDATORS: Dict[
-    str, List[Callable[["LlvmEnv"], Optional[str]]]  # noqa: F821
-] = defaultdict(list)
-
-
-def validator(
-    benchmark: str,
-    cmd: str,
-    data: Optional[List[str]] = None,
-    outs: Optional[List[str]] = None,
-    platforms: Optional[List[str]] = None,
-    compare_output: bool = True,
-    validate_result: Optional[
-        Callable[[BenchmarkExecutionResult], Optional[str]]
-    ] = None,
-    linkopts: Optional[List[str]] = None,
-    env: Optional[Dict[str, str]] = None,
-    pre_execution_callback: Optional[Callable[[], None]] = None,
-    sanitizers: Optional[List[LlvmSanitizer]] = None,
-) -> bool:
-    """Declare a new benchmark validator.
-
-    TODO(cummins): Pull this out into a public API.
-
-    :param benchmark: The name of the benchmark that this validator supports.
-    :cmd: The shell command to run the validation. Variable substitution is
-        applied to this value as follows: :code:`$BIN` is replaced by the path
-        of the compiled binary and :code:`$D` is replaced with the path to the
-        benchmark's runtime data directory.
-    :data: A list of paths to input files.
-    :outs: A list of paths to output files.
-    :return: :code:`True` if the new validator was registered, else :code:`False`.
-    """
-    platforms = platforms or ["linux", "macos"]
-    if {"darwin": "macos"}.get(sys.platform, sys.platform) not in platforms:
-        return False
-    infiles = data or []
-    outfiles = [Path(p) for p in outs or []]
-    linkopts = linkopts or []
-    env = env or {}
-    if sanitizers is None:
-        sanitizers = LlvmSanitizer
-
-    VALIDATORS[benchmark].append(
-        _make_cBench_validator(
-            cmd=cmd,
-            input_files=infiles,
-            output_files=outfiles,
-            compare_output=compare_output,
-            validate_result=validate_result,
-            linkopts=linkopts,
-            os_env=env,
-            pre_execution_callback=pre_execution_callback,
-        )
-    )
-
-    # Register additional validators using the sanitizers.
-    if sys.platform.startswith("linux"):
-        for sanitizer in sanitizers:
-            VALIDATORS[benchmark].append(
-                _make_cBench_validator(
-                    cmd=cmd,
-                    input_files=infiles,
-                    output_files=outfiles,
-                    compare_output=compare_output,
-                    validate_result=validate_result,
-                    linkopts=linkopts,
-                    os_env=env,
-                    pre_execution_callback=pre_execution_callback,
-                    sanitizer=sanitizer,
-                )
-            )
-
-    return True
-
-
-def get_llvm_benchmark_validation_callback(
-    env: "LlvmEnv",  # noqa: F821
-) -> Optional[Callable[["LlvmEnv"], Iterable[ValidationError]]]:  # noqa: F821
-    """Return a callback for validating a given environment state.
-
-    If there is no valid callback, returns :code:`None`.
-
-    :param env: An :class:`LlvmEnv <compiler_gym.envs.LlvmEnv>` instance.
-
-    :return: An optional callback that takes an :class:`LlvmEnv
-        <compiler_gym.envs.LlvmEnv>` instance as argument and returns an
-        optional string containing a validation error message.
-    """
-    validators = VALIDATORS.get(env.benchmark)
-
-    # No match.
-    if not validators:
-        return None
-
-    def composed(env):
-        # Validation callbacks are read-only on the environment so it is
-        # safe to run validators simultaneously in parallel threads.
-        executor = thread_pool.get_thread_pool_executor()
-        futures = (executor.submit(validator, env) for validator in validators)
-        for future in as_completed(futures):
-            result = future.result()
-            if result is not None:
-                yield result
-
-        return None
-
-    return composed
-
-
-# ===============================
-# Definition of cBench validators
-# ===============================
-
-
-def validate_sha_output(result: BenchmarkExecutionResult) -> Optional[str]:
-    """SHA benchmark prints 5 random hex strings. Normally these hex strings are
-    16 characters but occasionally they are less (presumably becuase of a
-    leading zero being omitted).
-    """
-    try:
-        if not re.match(
-            r"[0-9a-f]{0,16} [0-9a-f]{0,16} [0-9a-f]{0,16} [0-9a-f]{0,16} [0-9a-f]{0,16}",
-            result.output.rstrip(),
-        ):
-            return "Failed to parse hex output"
-    except UnicodeDecodeError:
-        return "Failed to parse unicode output"
-
-
-def setup_ghostscript_library_files(dataset_id: int) -> Callable[[Path], None]:
-    """Make a pre-execution setup hook for ghostscript."""
-
-    def setup(cwd: Path):
-        cbench_data = site_data_path("llvm/cBench-v1-runtime-data/runtime_data")
-        # Copy the input data file into the current directory since ghostscript
-        # doesn't like long input paths.
-        shutil.copyfile(
-            cbench_data / "office_data" / f"{dataset_id}.ps", cwd / "input.ps"
-        )
-        # Ghostscript doesn't like the library files being symlinks so copy them
-        # into the working directory as regular files.
-        for path in (cbench_data / "ghostscript").iterdir():
-            if path.name.endswith(".ps"):
-                shutil.copyfile(path, cwd / path.name)
-
-    return setup
-
-
-validator(
-    benchmark="benchmark://cBench-v1/bitcount",
-    cmd="$BIN 1125000",
-)
-
-validator(
-    benchmark="benchmark://cBench-v1/bitcount",
-    cmd="$BIN 512",
-)
-
-for i in range(1, 21):
-
-    # NOTE(cummins): Disabled due to timeout errors, further investigation
-    # needed.
-    #
-    # validator(
-    #     benchmark="benchmark://cBench-v1/adpcm",
-    #     cmd=f"$BIN $D/telecom_data/{i}.adpcm",
-    #     data=[f"telecom_data/{i}.adpcm"],
-    # )
-    #
-    # validator(
-    #     benchmark="benchmark://cBench-v1/adpcm",
-    #     cmd=f"$BIN $D/telecom_data/{i}.pcm",
-    #     data=[f"telecom_data/{i}.pcm"],
-    # )
-
-    validator(
-        benchmark="benchmark://cBench-v1/blowfish",
-        cmd=f"$BIN d $D/office_data/{i}.benc output.txt 1234567890abcdeffedcba0987654321",
-        data=[f"office_data/{i}.benc"],
-        outs=["output.txt"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/bzip2",
-        cmd=f"$BIN -d -k -f -c $D/bzip2_data/{i}.bz2",
-        data=[f"bzip2_data/{i}.bz2"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/crc32",
-        cmd=f"$BIN $D/telecom_data/{i}.pcm",
-        data=[f"telecom_data/{i}.pcm"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/dijkstra",
-        cmd=f"$BIN $D/network_dijkstra_data/{i}.dat",
-        data=[f"network_dijkstra_data/{i}.dat"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/gsm",
-        cmd=f"$BIN -fps -c $D/telecom_gsm_data/{i}.au",
-        data=[f"telecom_gsm_data/{i}.au"],
-    )
-
-    # NOTE(cummins): ispell fails with returncode 1 and no output when run
-    # under safe optimizations.
-    #
-    # validator(
-    #     benchmark="benchmark://cBench-v1/ispell",
-    #     cmd=f"$BIN -a -d americanmed+ $D/office_data/{i}.txt",
-    #     data = [f"office_data/{i}.txt"],
-    # )
-
-    validator(
-        benchmark="benchmark://cBench-v1/jpeg-c",
-        cmd=f"$BIN -dct int -progressive -outfile output.jpeg $D/consumer_jpeg_data/{i}.ppm",
-        data=[f"consumer_jpeg_data/{i}.ppm"],
-        outs=["output.jpeg"],
-        # NOTE(cummins): AddressSanitizer disabled because of
-        # global-buffer-overflow in regular build.
-        sanitizers=[LlvmSanitizer.TSAN, LlvmSanitizer.UBSAN],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/jpeg-d",
-        cmd=f"$BIN -dct int -outfile output.ppm $D/consumer_jpeg_data/{i}.jpg",
-        data=[f"consumer_jpeg_data/{i}.jpg"],
-        outs=["output.ppm"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/patricia",
-        cmd=f"$BIN $D/network_patricia_data/{i}.udp",
-        data=[f"network_patricia_data/{i}.udp"],
-        env={
-            # NOTE(cummins): Benchmark leaks when executed with safe optimizations.
-            "ASAN_OPTIONS": "detect_leaks=0",
-        },
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/qsort",
-        cmd=f"$BIN $D/automotive_qsort_data/{i}.dat",
-        data=[f"automotive_qsort_data/{i}.dat"],
-        outs=["sorted_output.dat"],
-        linkopts=["-lm"],
-    )
-
-    # NOTE(cummins): Rijndael benchmark disabled due to memory errors under
-    # basic optimizations.
-    #
-    # validator(benchmark="benchmark://cBench-v1/rijndael", cmd=f"$BIN
-    #     $D/office_data/{i}.enc output.dec d
-    #     1234567890abcdeffedcba09876543211234567890abcdeffedcba0987654321",
-    #     data=[f"office_data/{i}.enc"], outs=["output.dec"],
-    # )
-    #
-    # validator(benchmark="benchmark://cBench-v1/rijndael", cmd=f"$BIN
-    #     $D/office_data/{i}.txt output.enc e
-    #     1234567890abcdeffedcba09876543211234567890abcdeffedcba0987654321",
-    #     data=[f"office_data/{i}.txt"], outs=["output.enc"],
-    # )
-
-    validator(
-        benchmark="benchmark://cBench-v1/sha",
-        cmd=f"$BIN $D/office_data/{i}.txt",
-        data=[f"office_data/{i}.txt"],
-        compare_output=False,
-        validate_result=validate_sha_output,
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/stringsearch",
-        cmd=f"$BIN $D/office_data/{i}.txt $D/office_data/{i}.s.txt output.txt",
-        data=[f"office_data/{i}.txt"],
-        outs=["output.txt"],
-        env={
-            # NOTE(cummins): Benchmark leaks when executed with safe optimizations.
-            "ASAN_OPTIONS": "detect_leaks=0",
-        },
-        linkopts=["-lm"],
-    )
-
-    # NOTE(cummins): The stringsearch2 benchmark has a very long execution time.
-    # Use only a single input to keep the validation time reasonable. I have
-    # also observed Segmentation fault on gold standard using 4.txt and 6.txt.
-    if i == 1:
-        validator(
-            benchmark="benchmark://cBench-v1/stringsearch2",
-            cmd=f"$BIN $D/office_data/{i}.txt $D/office_data/{i}.s.txt output.txt",
-            data=[f"office_data/{i}.txt"],
-            outs=["output.txt"],
-            env={
-                # NOTE(cummins): Benchmark leaks when executed with safe optimizations.
-                "ASAN_OPTIONS": "detect_leaks=0",
-            },
-            # TSAN disabled because of extremely long execution leading to
-            # timeouts.
-            sanitizers=[LlvmSanitizer.ASAN, LlvmSanitizer.MSAN, LlvmSanitizer.UBSAN],
-        )
-
-    validator(
-        benchmark="benchmark://cBench-v1/susan",
-        cmd=f"$BIN $D/automotive_susan_data/{i}.pgm output_large.corners.pgm -c",
-        data=[f"automotive_susan_data/{i}.pgm"],
-        outs=["output_large.corners.pgm"],
-        linkopts=["-lm"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/tiff2bw",
-        cmd=f"$BIN $D/consumer_tiff_data/{i}.tif output.tif",
-        data=[f"consumer_tiff_data/{i}.tif"],
-        outs=["output.tif"],
-        linkopts=["-lm"],
-        env={
-            # NOTE(cummins): Benchmark leaks when executed with safe optimizations.
-            "ASAN_OPTIONS": "detect_leaks=0",
-        },
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/tiff2rgba",
-        cmd=f"$BIN $D/consumer_tiff_data/{i}.tif output.tif",
-        data=[f"consumer_tiff_data/{i}.tif"],
-        outs=["output.tif"],
-        linkopts=["-lm"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/tiffdither",
-        cmd=f"$BIN $D/consumer_tiff_data/{i}.bw.tif out.tif",
-        data=[f"consumer_tiff_data/{i}.bw.tif"],
-        outs=["out.tif"],
-        linkopts=["-lm"],
-    )
-
-    validator(
-        benchmark="benchmark://cBench-v1/tiffmedian",
-        cmd=f"$BIN $D/consumer_tiff_data/{i}.nocomp.tif output.tif",
-        data=[f"consumer_tiff_data/{i}.nocomp.tif"],
-        outs=["output.tif"],
-        linkopts=["-lm"],
-    )
-
-    # NOTE(cummins): On macOS the following benchmarks abort with an illegal
-    # hardware instruction error.
-    # if sys.platform != "darwin":
-    #     validator(
-    #         benchmark="benchmark://cBench-v1/lame",
-    #         cmd=f"$BIN $D/consumer_data/{i}.wav output.mp3",
-    #         data=[f"consumer_data/{i}.wav"],
-    #         outs=["output.mp3"],
-    #         compare_output=False,
-    #         linkopts=["-lm"],
-    #     )
-
-    # NOTE(cummins): Segfault on gold standard.
-    #
-    #     validator(
-    #         benchmark="benchmark://cBench-v1/ghostscript",
-    #         cmd="$BIN -sDEVICE=ppm -dNOPAUSE -dQUIET -sOutputFile=output.ppm -- input.ps",
-    #         data=[f"office_data/{i}.ps"],
-    #         outs=["output.ppm"],
-    #         linkopts=["-lm", "-lz"],
-    #         pre_execution_callback=setup_ghostscript_library_files(i),
-    #     )

From ca1574aef780292f45dae0aefea99923f75bcc1c Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:40:25 +0100
Subject: [PATCH 018/141] [tests] Print tabular coverage report on `make
 install-test-cov`.

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9d852972d..1618fbf37 100644
--- a/Makefile
+++ b/Makefile
@@ -237,7 +237,7 @@ install-test:
 	$(call run_pytest_suite,)
 
 install-test-cov:
-	$(call run_pytest_suite,--cov=compiler_gym --cov-report=xml)
+	$(call run_pytest_suite,--cov=compiler_gym --cov-report=xml --cov-report=term)
 	@mv /tmp/compiler_gym/wheel_tests/coverage.xml .
 
 # The minimum number of seconds to run the fuzz tests in a loop for. Override

From 342104a5cebd38edef190ab286f865836d631994 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:41:04 +0100
Subject: [PATCH 019/141] [tests] Print longest 5 test durations on
 `install-test`.

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1618fbf37..92a7cd02f 100644
--- a/Makefile
+++ b/Makefile
@@ -230,7 +230,7 @@ define run_pytest_suite
 	rm -f /tmp/compiler_gym/wheel_tests/tests /tmp/compiler_gym/wheel_tests/tox.ini
 	ln -s $(ROOT)/tests /tmp/compiler_gym/wheel_tests
 	ln -s $(ROOT)/tox.ini /tmp/compiler_gym/wheel_tests
-	cd /tmp/compiler_gym/wheel_tests && pytest tests $(1) --benchmark-disable -n auto -k "not fuzz"
+	cd /tmp/compiler_gym/wheel_tests && pytest tests $(1) --durations=5 --benchmark-disable -n auto -k "not fuzz"
 endef
 
 install-test:

From bf69feca806aa6e67e7619e5b67c017188efa071 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:43:21 +0100
Subject: [PATCH 020/141] [tests] Add additional test decorators.

---
 tests/pytest_plugins/common.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tests/pytest_plugins/common.py b/tests/pytest_plugins/common.py
index 0527ae568..ef8df156b 100644
--- a/tests/pytest_plugins/common.py
+++ b/tests/pytest_plugins/common.py
@@ -16,8 +16,22 @@
 
 FLAGS = absl_flags.FLAGS
 
+
+def is_ci() -> bool:
+    """Return whether running in CI environment."""
+    return os.environ.get("CI", "") != ""
+
+
+def in_bazel() -> bool:
+    """Return whether running under bazel."""
+    return os.environ.get("TEST_WORKSPACE", "") != ""
+
+
 # Decorator to skip a test in the CI environment.
-skip_on_ci = pytest.mark.skipif(os.environ.get("CI", "") != "", reason="Skip on CI")
+skip_on_ci = pytest.mark.skipif(is_ci(), reason="Skip on CI")
+
+# Decorator to run a test only in the CI environment.
+ci_only = pytest.mark.skipif(not is_ci(), reason="Runs only on CI")
 
 # Decorator to mark a test as skipped if not on Linux.
 linux_only = pytest.mark.skipif(
@@ -30,9 +44,11 @@
 )
 
 # Decorator to mark a test as skipped if not running under bazel.
-bazel_only = pytest.mark.skipif(
-    os.environ.get("TEST_WORKSPACE", "") == "", reason="bazel only"
-)
+bazel_only = pytest.mark.skipif(not in_bazel(), reason="bazel only")
+
+# Decorator to make a test as skipped if not running in the `install-test`
+# environment.
+install_test_only = pytest.mark.skipif(in_bazel(), reason="install-test only")
 
 
 @pytest.fixture(scope="function")

From 257f9782d820ea7dd3fc203f495b0573568b5361 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 13:44:20 +0100
Subject: [PATCH 021/141] [ci] Remove unused bazel options config.

---
 .github/workflows/ci.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 4dac98ce4..e498d365f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -105,7 +105,6 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
-                  BAZEL_BUILD_OPTS: --config=ci
               if: matrix.os == 'macos-latest'
 
             - name: Test with coverage
@@ -113,7 +112,6 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
-                  BAZEL_BUILD_OPTS: --config=ci
               if: matrix.os == 'ubuntu-latest'
 
             - name: Upload coverage to Codecov

From 57db34ef8ed67e3abe614b3f13b2eb48ff33f191 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Mon, 10 May 2021 15:40:41 +0100
Subject: [PATCH 022/141] Fix missing benchmark name in random search logging.

---
 compiler_gym/random_search.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/compiler_gym/random_search.py b/compiler_gym/random_search.py
index 87c60d6de..910b82bd2 100644
--- a/compiler_gym/random_search.py
+++ b/compiler_gym/random_search.py
@@ -113,8 +113,7 @@ def random_search(
     nproc: int = cpu_count(),
     skip_done: bool = False,
 ) -> Tuple[float, List[int]]:
-    env = make_env()
-    try:
+    with make_env() as env:
         env.reset()
         if not isinstance(env, CompilerEnv):
             raise TypeError(
@@ -151,8 +150,6 @@ def random_search(
         }
         with open(str(metadata_path), "w") as f:
             json.dump(metadata, f, sort_keys=True, indent=2)
-    finally:
-        env.close()
 
     workers = [RandomAgentWorker(make_env, patience) for _ in range(nproc)]
     for worker in workers:
@@ -164,7 +161,7 @@ def random_search(
     last_best_returns = -float("inf")
 
     print(
-        f"Started {len(workers)} worker threads for "
+        f"Started {len(workers)} worker threads for {benchmark_uri} "
         f"using reward {reward_space_name}."
     )
     print(f"Writing logs to {outdir}")

From a3b80679f980606d3c56f054d2e3c549ad48992e Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Mon, 10 May 2021 18:00:39 +0100
Subject: [PATCH 023/141] Combine all env.step() type hints into a single
 module.

This adds a compiler_gym.util.gym_type_hints module that contains the
type hints for gym.Env return types. Previously they were scattered
across several files.
---
 compiler_gym/__init__.py               |  4 +---
 compiler_gym/envs/__init__.py          |  5 +----
 compiler_gym/envs/compiler_env.py      |  9 +++------
 compiler_gym/envs/llvm/BUILD           |  1 +
 compiler_gym/envs/llvm/llvm_rewards.py | 13 +++++++------
 compiler_gym/spaces/BUILD              |  1 +
 compiler_gym/spaces/__init__.py        |  8 ++++----
 compiler_gym/spaces/reward.py          | 23 ++++++++++++-----------
 compiler_gym/util/BUILD                |  1 +
 compiler_gym/util/gym_type_hints.py    | 15 +++++++++++++++
 10 files changed, 46 insertions(+), 34 deletions(-)
 create mode 100644 compiler_gym/util/gym_type_hints.py

diff --git a/compiler_gym/__init__.py b/compiler_gym/__init__.py
index 0110aa9fd..d6bc8aa8b 100644
--- a/compiler_gym/__init__.py
+++ b/compiler_gym/__init__.py
@@ -34,7 +34,7 @@
     CompilerEnvStateReader,
     CompilerEnvStateWriter,
 )
-from compiler_gym.envs import COMPILER_GYM_ENVS, CompilerEnv, observation_t, step_t
+from compiler_gym.envs import COMPILER_GYM_ENVS, CompilerEnv
 from compiler_gym.random_search import random_search
 from compiler_gym.util.debug_util import (
     get_debug_level,
@@ -63,11 +63,9 @@
     "download",
     "get_debug_level",
     "get_logging_level",
-    "observation_t",
     "random_search",
     "set_debug_level",
     "site_data_path",
-    "step_t",
     "transient_cache_path",
     "validate_states",
     "ValidationError",
diff --git a/compiler_gym/envs/__init__.py b/compiler_gym/envs/__init__.py
index 09eeb255d..3b683d861 100644
--- a/compiler_gym/envs/__init__.py
+++ b/compiler_gym/envs/__init__.py
@@ -2,15 +2,12 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-from compiler_gym.envs.compiler_env import CompilerEnv, info_t, observation_t, step_t
+from compiler_gym.envs.compiler_env import CompilerEnv
 from compiler_gym.envs.llvm.llvm_env import LlvmEnv
 from compiler_gym.util.registration import COMPILER_GYM_ENVS
 
 __all__ = [
     "CompilerEnv",
     "LlvmEnv",
-    "observation_t",
-    "info_t",
-    "step_t",
     "COMPILER_GYM_ENVS",
 ]
diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 9db13b4f3..1bf803696 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -11,7 +11,7 @@
 from math import isclose
 from pathlib import Path
 from time import time
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import gym
 import numpy as np
@@ -43,15 +43,12 @@
 )
 from compiler_gym.spaces import DefaultRewardFromObservation, NamedDiscrete, Reward
 from compiler_gym.util.debug_util import get_logging_level
+from compiler_gym.util.gym_type_hints import StepType
 from compiler_gym.util.timer import Timer
 from compiler_gym.validation_error import ValidationError
 from compiler_gym.validation_result import ValidationResult
 from compiler_gym.views import ObservationSpaceSpec, ObservationView, RewardView
 
-# Type hints.
-info_t = Dict[str, Any]
-step_t = Tuple[Optional[observation_t], Optional[float], bool, info_t]
-
 
 def _wrapped_step(
     service: CompilerGymServiceConnection, request: StepRequest
@@ -751,7 +748,7 @@ def reset(  # pylint: disable=arguments-differ
                 reply.observation[0]
             )
 
-    def step(self, action: Union[int, Iterable[int]]) -> step_t:
+    def step(self, action: Union[int, Iterable[int]]) -> StepType:
         """Take a step.
 
         :param action: An action, or a sequence of actions. When multiple
diff --git a/compiler_gym/envs/llvm/BUILD b/compiler_gym/envs/llvm/BUILD
index e819ac523..de2604763 100644
--- a/compiler_gym/envs/llvm/BUILD
+++ b/compiler_gym/envs/llvm/BUILD
@@ -57,6 +57,7 @@ py_library(
     deps = [
         "//compiler_gym/service",
         "//compiler_gym/spaces",
+        "//compiler_gym/util",
         "//compiler_gym/views",
     ],
 )
diff --git a/compiler_gym/envs/llvm/llvm_rewards.py b/compiler_gym/envs/llvm/llvm_rewards.py
index d746b092a..ea89435f1 100644
--- a/compiler_gym/envs/llvm/llvm_rewards.py
+++ b/compiler_gym/envs/llvm/llvm_rewards.py
@@ -8,6 +8,7 @@
 from compiler_gym.datasets import Benchmark
 from compiler_gym.service import observation_t
 from compiler_gym.spaces.reward import Reward
+from compiler_gym.util.gym_type_hints import RewardType
 from compiler_gym.views.observation import ObservationView
 
 
@@ -46,12 +47,12 @@ def update(
         action: int,
         observations: List[observation_t],
         observation_view: ObservationView,
-    ) -> float:
+    ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
-        cost: float = observations[0]
+        cost: RewardType = observations[0]
         if self.previous_cost is None:
             self.previous_cost = observation_view[self.init_cost_function]
-        reward = float(self.previous_cost - cost)
+        reward = RewardType(self.previous_cost - cost)
         self.previous_cost = cost
         return reward
 
@@ -82,13 +83,13 @@ def update(
         action: int,
         observations: List[observation_t],
         observation_view: ObservationView,
-    ) -> float:
+    ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
         if self.cost_norm is None:
             self.cost_norm = self.get_cost_norm(observation_view)
         return super().update(action, observations, observation_view) / self.cost_norm
 
-    def get_cost_norm(self, observation_view: ObservationView) -> float:
+    def get_cost_norm(self, observation_view: ObservationView) -> RewardType:
         """Return the value used to normalize costs."""
         return observation_view[self.init_cost_function]
 
@@ -104,7 +105,7 @@ def __init__(self, baseline_cost_function: str, **kwargs):
         super().__init__(**kwargs)
         self.baseline_cost_function: str = baseline_cost_function
 
-    def get_cost_norm(self, observation_view: ObservationView) -> float:
+    def get_cost_norm(self, observation_view: ObservationView) -> RewardType:
         """Return the value used to normalize costs."""
         init_cost = observation_view[self.init_cost_function]
         baseline_cost = observation_view[self.baseline_cost_function]
diff --git a/compiler_gym/spaces/BUILD b/compiler_gym/spaces/BUILD
index 303b4c80e..34289a46c 100644
--- a/compiler_gym/spaces/BUILD
+++ b/compiler_gym/spaces/BUILD
@@ -38,6 +38,7 @@ py_library(
     deps = [
         ":scalar",
         "//compiler_gym/service",
+        "//compiler_gym/util",
     ],
 )
 
diff --git a/compiler_gym/spaces/__init__.py b/compiler_gym/spaces/__init__.py
index 72dc9fc4f..6b06ba1a4 100644
--- a/compiler_gym/spaces/__init__.py
+++ b/compiler_gym/spaces/__init__.py
@@ -9,11 +9,11 @@
 from compiler_gym.spaces.sequence import Sequence
 
 __all__ = [
-    "DefaultRewardFromObservation",
-    "Scalar",
-    "Sequence",
-    "NamedDiscrete",
     "Commandline",
     "CommandlineFlag",
+    "DefaultRewardFromObservation",
+    "NamedDiscrete",
     "Reward",
+    "Scalar",
+    "Sequence",
 ]
diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py
index 2518a3971..8649040ac 100644
--- a/compiler_gym/spaces/reward.py
+++ b/compiler_gym/spaces/reward.py
@@ -8,6 +8,7 @@
 
 from compiler_gym.service import observation_t
 from compiler_gym.spaces.scalar import Scalar
+from compiler_gym.util.gym_type_hints import RewardType
 
 
 class Reward(Scalar):
@@ -39,11 +40,11 @@ def __init__(
         self,
         id: str,
         observation_spaces: Optional[List[str]] = None,
-        default_value: float = 0,
-        min: Optional[float] = None,
-        max: Optional[float] = None,
+        default_value: RewardType = 0,
+        min: Optional[RewardType] = None,
+        max: Optional[RewardType] = None,
         default_negates_returns: bool = False,
-        success_threshold: Optional[float] = None,
+        success_threshold: Optional[RewardType] = None,
         deterministic: bool = False,
         platform_dependent: bool = True,
     ):
@@ -82,7 +83,7 @@ def __init__(
         )
         self.id = id
         self.observation_spaces = observation_spaces or []
-        self.default_value: float = default_value
+        self.default_value: RewardType = default_value
         self.default_negates_returns: bool = default_negates_returns
         self.success_threshold = success_threshold
         self.deterministic = deterministic
@@ -102,7 +103,7 @@ def update(
         action: int,
         observations: List[observation_t],
         observation_view: "compiler_gym.views.ObservationView",  # noqa: F821
-    ) -> float:
+    ) -> RewardType:
         """Calculate a reward for the given action.
 
         :param action: The action performed.
@@ -114,7 +115,7 @@ def update(
         """
         raise NotImplementedError("abstract class")
 
-    def reward_on_error(self, episode_reward: float) -> float:
+    def reward_on_error(self, episode_reward: RewardType) -> RewardType:
         """Return the reward value for an error condition.
 
         This method should be used to produce the reward value that should be
@@ -130,7 +131,7 @@ def reward_on_error(self, episode_reward: float) -> float:
             return self.default_value
 
     @property
-    def range(self) -> Tuple[float, float]:
+    def range(self) -> Tuple[RewardType, RewardType]:
         """The lower and upper bounds of the reward."""
         return (self.min, self.max)
 
@@ -155,13 +156,13 @@ def update(
         action: int,
         observations: List[observation_t],
         observation_view: "compiler_gym.views.ObservationView",  # noqa: F821
-    ) -> float:
+    ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
         del action  # unused
         del observation_view  # unused
-        value: float = observations[0]
+        value: RewardType = observations[0]
         if self.previous_value is None:
             self.previous_value = 0
-        reward = float(value - self.previous_value)
+        reward = RewardType(value - self.previous_value)
         self.previous_value = value
         return reward
diff --git a/compiler_gym/util/BUILD b/compiler_gym/util/BUILD
index b45936aec..9d1dc1f45 100644
--- a/compiler_gym/util/BUILD
+++ b/compiler_gym/util/BUILD
@@ -14,6 +14,7 @@ py_library(
         "decorators.py",
         "download.py",
         "filesystem.py",
+        "gym_type_hints.py",
         "logs.py",
         "minimize_trajectory.py",
         "registration.py",
diff --git a/compiler_gym/util/gym_type_hints.py b/compiler_gym/util/gym_type_hints.py
new file mode 100644
index 000000000..329397ee5
--- /dev/null
+++ b/compiler_gym/util/gym_type_hints.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Any, Dict, Optional, Tuple, TypeVar
+
+# A JSON dictionary.
+JsonDictType = Dict[str, Any]
+
+# Type hints for the values returned by gym.Env.step().
+ObservationType = TypeVar("ObservationType")
+RewardType = float
+DoneType = bool
+InfoType = JsonDictType
+StepType = Tuple[Optional[ObservationType], Optional[RewardType], DoneType, InfoType]

From e18b60a43c9f6d397e32d06a08f25c3b9f8c83af Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 5 May 2021 09:26:12 +0100
Subject: [PATCH 024/141] Overhaul the README.

Split the build instructions into an INSTALL.md document, and include
a short list of features in the main README document.
---
 CONTRIBUTING.md                 |   2 +-
 INSTALL.md                      |  83 ++++++++++++
 README.md                       | 231 ++++++++++----------------------
 docs/source/getting_started.rst |   6 +-
 examples/getting-started.ipynb  |   2 +-
 5 files changed, 156 insertions(+), 168 deletions(-)
 create mode 100644 INSTALL.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0d327c510..8ead4ef3e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -41,7 +41,7 @@ We actively welcome your pull requests.
 1. Fork [the repo](https://github.com/facebookresearch/CompilerGym) and create
    your branch from `development`.
 2. Follow the instructions for
-   [building from source](https://github.com/facebookresearch/CompilerGym#building-from-source)
+   [building from source](https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md)
    to set up your environment.
 3. If you've added code that should be tested, add tests.
 4. If you've changed APIs, update the [documentation](/docs/source).
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 000000000..d32742dca
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,83 @@
+# Installation
+
+Install the latest CompilerGym release using:
+
+    pip install -U compiler_gym
+
+CompilerGym requires Python >= 3.6. The binary works on macOS and Linux (on
+Ubuntu 18.04, Fedora 28, Debian 10 or newer equivalents).
+
+## Building from Source
+
+If you prefer, you may build from source. This requires a modern C++ toolchain
+and bazel.
+
+### macOS
+
+On macOS the required dependencies can be installed using
+[homebrew](https://docs.brew.sh/Installation):
+
+```sh
+brew install bazelisk zlib
+export LDFLAGS="-L/usr/local/opt/zlib/lib"
+export CPPFLAGS="-I/usr/local/opt/zlib/include"
+export PKG_CONFIG_PATH="/usr/local/opt/zlib/lib/pkgconfig"
+```
+
+Now proceed to [All platforms](#all-platforms) below.
+
+### Linux
+
+On debian-based linux systems, install the required toolchain using:
+
+```sh
+sudo apt install clang-9 libtinfo5 libjpeg-dev patchelf
+wget https://github.com/bazelbuild/bazelisk/releases/download/v1.7.5/bazelisk-linux-amd64 -O bazel
+chmod +x bazel && mkdir -p ~/.local/bin && mv -v bazel ~/.local/bin
+export PATH="$HOME/.local/bin:$PATH"
+export CC=clang
+export CXX=clang++
+```
+
+### All platforms
+
+We recommend using
+[conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
+to manage the remaining build dependencies. First create a conda environment
+with the required dependencies:
+
+    conda create -n compiler_gym python=3.9 cmake pandoc
+    conda activate compiler_gym
+
+Then clone the CompilerGym source code using:
+
+    git clone https://github.com/facebookresearch/CompilerGym.git
+    cd CompilerGym
+
+There are two primary git branches: `stable` tracks the latest release;
+`development` is for bleeding edge features that may not yet be mature. Checkout
+your preferred branch and install the python development dependencies using:
+
+    git checkout stable
+    make init
+
+The `make init` target only needs to be run once on initial setup, or when
+pulling remote changes to the CompilerGym repository.
+
+Run the test suite to confirm that everything is working:
+
+    make test
+
+To build and install the `compiler_gym` python package, run:
+
+    make install
+
+**NOTE:** To use the `compiler_gym` package that is installed by `make install`
+you must leave the root directory of this repository. Attempting to import
+`compiler_gym` while in the root of this repository will cause import errors.
+
+When you are finished, you can deactivate and delete the conda
+environment using:
+
+    conda deactivate
+    conda env remove -n compiler_gym
diff --git a/README.md b/README.md
index fa90e6406..25b184611 100644
--- a/README.md
+++ b/README.md
@@ -1,170 +1,78 @@
 ![CompilerGym](https://github.com/facebookresearch/CompilerGym/raw/development/docs/source/_static/img/logo.png)
 
----
-
-<!-- Documentation -->
-<a href="http://facebookresearch.github.io/CompilerGym/">
-    <img src="https://img.shields.io/badge/documentation-latest-blue.svg" alt="Documentation" height="20">
-</a>
-<!-- PyPi Version -->
-<a href="https://pypi.org/project/compiler-gym/">
-    <img src="https://badge.fury.io/py/compiler-gym.svg" alt="PyPI version" height="20">
-</a>
-<!-- CI status -->
-<a href="https://github.com/facebookresearch/CompilerGym/actions?query=workflow%3ACI+branch%3Adevelopment">
-    <img src="https://github.com/facebookresearch/CompilerGym/workflows/CI/badge.svg?branch=development" alt="CI status" height="20">
-</a>
-<!-- Downloads counter -->
-<a href="https://pypi.org/project/compiler-gym/">
-    <img src="https://pepy.tech/badge/compiler-gym" alt="PyPi Downloads" height="20">
-</a>
-<!-- license -->
-<a href="https://tldrlegal.com/license/mit-license">
-    <img src="https://img.shields.io/pypi/l/compiler-gym" alt="License" height="20">
-</a>
-<!-- Getting started colab -->
-<a href="https://colab.research.google.com/github/facebookresearch/CompilerGym/blob/stable/examples/getting-started.ipynb">
-    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" height="20">
-</a>
-
-CompilerGym is a toolkit for exposing compiler optimization problems
-for reinforcement learning. It allows machine learning researchers to
-experiment with program optimization techniques without requiring any
-experience in compilers, and provides a framework for compiler
-developers to expose new optimization problems for AI.
-
-
-**Table of Contents**
-
-- [Features](#features)
-- [Installation](#installation)
-  - [Building from Source](#building-from-source)
-- [Usage](#usage)
-- [Leaderboards](#leaderboards)
-  - [LLVM Instruction Count](#llvm-instruction-count)
-- [Contributing](#contributing)
-- [Citation](#citation)
-
-
-# Features
-
-With CompilerGym, building ML models for compiler research problems is as easy
-as building ML models to play video games. Here are some highlights of key
-features:
-
-* **API:** uses the popular [Gym](https://gym.openai.com/) interface from OpenAI
-  — use Python to write your agent.
-
-* **Datasets:** wraps real world programs (C++ programs, TensorFlow programs,
-  programs from Github, etc.) and a mainstream compiler
-  ([LLVM](https://llvm.org/)), providing millions of programs for training.
-
-* **Tasks and Actions:** interfaces the [LLVM](https://llvm.org/) compiler for
-  one compiler research problem:  phase ordering (more to come). It has a large
-  discrete action space.
-
-* **Representations:** provides raw representations of programs, as well as
-  multiple kinds of pre-computed features: you can focus on end-to-end deep
-  learning or features + boosted trees, all the way up to graph models.
-
-* **Rewards:** provides appropriate reward functions and loss functions out of
-  the box.
-
-* **Testing:** provides a validation process for correctness of results.
-
-* **Baselines:** provides some baselines and reports their performance.
-
-* **Competition:** provides [leaderboards](#leaderboards) for you to submit your
-  results.
+<p align="center">
+  <!-- PyPi Version -->
+  <a href="https://pypi.org/project/compiler-gym/">
+      <img src="https://badge.fury.io/py/compiler-gym.svg" alt="PyPI version" height="20">
+  </a>
+  <!-- Downloads counter -->
+  <a href="https://pypi.org/project/compiler-gym/">
+      <img src="https://pepy.tech/badge/compiler-gym" alt="PyPi Downloads" height="20">
+  </a>
+  <!-- license -->
+  <a href="https://tldrlegal.com/license/mit-license">
+      <img src="https://img.shields.io/pypi/l/compiler-gym" alt="License" height="20">
+  </a>
+  <!-- CI status -->
+  <a href="https://github.com/facebookresearch/CompilerGym/actions?query=workflow%3ACI+branch%3Adevelopment">
+      <img src="https://github.com/facebookresearch/CompilerGym/workflows/CI/badge.svg?branch=development" alt="CI status" height="20">
+  </a>
+  <!-- Getting started colab -->
+  <a href="https://colab.research.google.com/github/facebookresearch/CompilerGym/blob/stable/examples/getting-started.ipynb">
+      <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" height="20">
+  </a>
+</p>
+
+<p align="center">
+  <i>Reinforcement learning environments for compiler optimization tasks.</i>
+</p>
+<p align="center">
+  <i>
+    Check
+    <a href="http://facebookresearch.github.io/CompilerGym/">the website</a>
+    for more information.
+  </i>
+</p>
+
+
+## Introduction
+
+CompilerGym is a library of easy to use and performant reinforcement learning
+environments for compiler tasks. It allows ML researchers to interact with
+important compiler optimization problems in a language and vocabulary with which
+they are comfortable, and provides a toolkit for systems developers to expose
+new compiler tasks for ML research. We aim to act as a catalyst for making
+compilers faster using ML. Key features include:
+
+* **Ease of use:** built on the the popular [Gym](https://gym.openai.com/)
+  interface - use Python to write your agent. With CompilerGym, building ML
+  models for compiler research problems is as easy as building ML models to play
+  video games.
+
+* **Batteries included:** includes everything required to get started. Wraps
+  real world programs and compilers to provide millions of instances for
+  training. Provides multiple kinds of pre-computed program representations: you
+  can focus on end-to-end deep learning or features + boosted trees, all the way
+  up to graph models. Appropriate reward functions and loss functions for
+  optimization targets are provided out of the box.
+
+* **Reproducible:** provides validation for correctness of results, common
+  baselines, and [leaderboards](#leaderboards) for you to submit your results.
 
 For a glimpse of what's to come, check out [our
 roadmap](https://github.com/facebookresearch/CompilerGym/projects/1).
 
-# Installation
+
+## Installation
 
 Install the latest CompilerGym release using:
 
     pip install -U compiler_gym
 
-The binary works on macOS and Linux (on Ubuntu 18.04, Fedora 28, Debian 10 or
-newer equivalents).
-
-## Building from Source
-
-If you prefer, you may build from source. This requires a modern C++ toolchain
-and bazel.
-
-### macOS  <!-- omit in toc -->
-
-On macOS the required dependencies can be installed using
-[homebrew](https://docs.brew.sh/Installation):
-
-```sh
-brew install bazelisk zlib
-export LDFLAGS="-L/usr/local/opt/zlib/lib"
-export CPPFLAGS="-I/usr/local/opt/zlib/include"
-export PKG_CONFIG_PATH="/usr/local/opt/zlib/lib/pkgconfig"
-```
-
-Now proceed to [All platforms](#all-platforms) below.
-
-### Linux  <!-- omit in toc -->
-
-On debian-based linux systems, install the required toolchain using:
-
-```sh
-sudo apt install clang-9 libtinfo5 libjpeg-dev patchelf
-wget https://github.com/bazelbuild/bazelisk/releases/download/v1.7.5/bazelisk-linux-amd64 -O bazel
-chmod +x bazel && mkdir -p ~/.local/bin && mv -v bazel ~/.local/bin
-export PATH="$HOME/.local/bin:$PATH"
-export CC=clang
-export CXX=clang++
-```
-
-### All platforms  <!-- omit in toc -->
-
-We recommend using
-[conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
-to manage the remaining build dependencies. First create a conda environment
-with the required dependencies:
-
-    conda create -n compiler_gym python=3.9 cmake pandoc
-    conda activate compiler_gym
-
-Then clone the CompilerGym source code using:
-
-    git clone https://github.com/facebookresearch/CompilerGym.git
-    cd CompilerGym
-
-There are two primary git branches: `stable` tracks the latest release;
-`development` is for bleeding edge features that may not yet be mature. Checkout
-your preferred branch and install the python development dependencies using:
-
-    git checkout stable
-    make init
-
-The `make init` target only needs to be run once on initial setup, or when
-pulling remote changes to the CompilerGym repository.
-
-Run the test suite to confirm that everything is working:
-
-    make test
-
-To build and install the `compiler_gym` python package, run:
-
-    make install
-
-**NOTE:** To use the `compiler_gym` package that is installed by `make install`
-you must leave the root directory of this repository. Attempting to import
-`compiler_gym` while in the root of this repository will cause import errors.
-
-When you are finished, you can deactivate and delete the conda
-environment using:
+See [INSTALL.md](INSTALL.md) for further details.
 
-    conda deactivate
-    conda env remove -n compiler_gym
 
-# Usage
+## Usage
 
 Starting with CompilerGym is simple. If you not already familiar with the gym
 interface, refer to the [getting started
@@ -188,21 +96,18 @@ In Python, import `compiler_gym` to use the environments:
 ```
 
 See the [documentation website](http://facebookresearch.github.io/CompilerGym/)
-for tutorials, further details, and API reference. Our
-[roadmap](https://facebookresearch.github.io/CompilerGym/about.html#roadmap) of
-planned features is public, and the
-[changelog](https://github.com/facebookresearch/CompilerGym/blob/development/CHANGELOG.md)
-summarizes shipped features.
+for tutorials, further details, and API reference. See the [examples](/examples)
+directory for pytorch integration, agent implementations, etc.
 
 
-# Leaderboards
+## Leaderboards
 
 These leaderboards track the performance of user-submitted algorithms for
 CompilerGym tasks. To submit a result please see
 [this document](https://github.com/facebookresearch/CompilerGym/blob/development/CONTRIBUTING.md#leaderboard-submissions).
 
 
-## LLVM Instruction Count
+### LLVM Instruction Count
 
 LLVM is a popular open source compiler used widely in industry and research. The
 `llvm-ic-v0` environment exposes LLVM's optimizing passes as a set of actions
@@ -227,13 +132,13 @@ environment on the 23 benchmarks in the `cbench-v1` dataset.
 | Jiadong Guo | Tabular Q (N=2000, H=5) | [write-up](leaderboard/llvm_instcount/tabular_q/README.md), [results](leaderboard/llvm_instcount/tabular_q/results-H5-N2000.csv) | 2021-04 | 694.105 | 0.988× |
 
 
-# Contributing
+## Contributing
 
 We welcome contributions to CompilerGym. If you are interested in contributing please see
 [this document](https://github.com/facebookresearch/CompilerGym/blob/development/CONTRIBUTING.md).
 
 
-# Citation
+## Citation
 
 If you use CompilerGym in any of your work, please cite:
 
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index 554ae6b25..2cef0bb1d 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -50,9 +50,9 @@ cumulative reward from these environments so as to produce the best programs.
 
 .. include:: installation.rst
 
-See `the Readme file
-<https://github.com/facebookresearch/CompilerGym#building-from-source>`_ for
-alternative installation methods.
+See `INSTALL.md
+<https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md>`_
+for alternative installation methods.
 
 
 Using CompilerGym
diff --git a/examples/getting-started.ipynb b/examples/getting-started.ipynb
index d8f3d8984..ee1fc8d7b 100644
--- a/examples/getting-started.ipynb
+++ b/examples/getting-started.ipynb
@@ -82,7 +82,7 @@
     "id": "CaRZ_tt-Uqrx"
    },
    "source": [
-    "See [Building from Source](https://github.com/facebookresearch/CompilerGym#building-from-source) for alternative installation methods."
+    "See [INSTALL.md](https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md) for alternative installation methods."
    ]
   },
   {

From e756f5815f6bda32fbe68a61b89f83131671811d Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 11 May 2021 09:50:58 +0100
Subject: [PATCH 025/141] [docs] Fix "getting started" install instructions.

---
 Makefile                        | 5 -----
 docs/source/getting_started.rst | 5 ++++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 9d852972d..cce593a11 100644
--- a/Makefile
+++ b/Makefile
@@ -189,14 +189,9 @@ docs/source/contributing.rst: CONTRIBUTING.md
 	echo "..\n  Generated from $<. Do not edit!\n" > $@
 	$(PANDOC) --from=markdown --to=rst $< >> $@
 
-docs/source/installation.rst: README.md
-	echo "..\n  Generated from $<. Do not edit!\n" > $@
-	sed -n '/^## Installation/,$$p' $< | sed -n '/^### Building/q;p' | $(PANDOC) --from=markdown --to=rst >> $@
-
 GENERATED_DOCS := \
 	docs/source/changelog.rst \
 	docs/source/contributing.rst \
-	docs/source/installation.rst \
 	$(NULL)
 
 gendocs: $(GENERATED_DOCS)
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index 2cef0bb1d..7386edb66 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -47,8 +47,11 @@ A single instance of this "agent-environment loop" represents the compilation of
 a particular program. The goal is to develop an agent that maximises the
 cumulative reward from these environments so as to produce the best programs.
 
+Install the latest CompilerGym release using:
 
-.. include:: installation.rst
+.. code-block::
+
+   pip install -U compiler_gym
 
 See `INSTALL.md
 <https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md>`_

From 9e8de4d83416e70ff35b81ea10f6a40043d48497 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 14:23:13 +0100
Subject: [PATCH 026/141] [tests] Run skipped datasets test with reduced size
 on CI.

---
 tests/llvm/datasets/clgen_test.py       | 5 ++---
 tests/llvm/datasets/csmith_test.py      | 3 +--
 tests/llvm/datasets/llvm_stress_test.py | 3 +--
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/llvm/datasets/clgen_test.py b/tests/llvm/datasets/clgen_test.py
index 313eede2f..d725a8b4a 100644
--- a/tests/llvm/datasets/clgen_test.py
+++ b/tests/llvm/datasets/clgen_test.py
@@ -12,7 +12,7 @@
 import compiler_gym.envs.llvm  # noqa register environments
 from compiler_gym.envs.llvm import LlvmEnv
 from compiler_gym.envs.llvm.datasets import CLgenDataset
-from tests.pytest_plugins.common import skip_on_ci
+from tests.pytest_plugins.common import is_ci
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]
@@ -49,8 +49,7 @@ def test_missing_benchmark_name(clgen_dataset: CLgenDataset, mocker):
     assert clgen_dataset.install.call_count == 2
 
 
-@skip_on_ci
-@pytest.mark.parametrize("index", range(250))
+@pytest.mark.parametrize("index", range(3) if is_ci() else range(250))
 def test_clgen_random_select(
     env: LlvmEnv, clgen_dataset: CLgenDataset, index: int, tmpwd: Path
 ):
diff --git a/tests/llvm/datasets/csmith_test.py b/tests/llvm/datasets/csmith_test.py
index 7d8d11d4b..681ba07a4 100644
--- a/tests/llvm/datasets/csmith_test.py
+++ b/tests/llvm/datasets/csmith_test.py
@@ -33,7 +33,7 @@ def test_csmith_size(csmith_dataset: CsmithDataset):
     assert csmith_dataset.size == float("inf")
 
 
-@pytest.mark.parametrize(range(3) if is_ci() else range(250))
+@pytest.mark.parametrize("index", range(3) if is_ci() else range(250))
 def test_csmith_random_select(
     env: LlvmEnv, csmith_dataset: CsmithDataset, index: int, tmpwd: Path
 ):
@@ -47,7 +47,6 @@ def test_csmith_random_select(
     assert (tmpwd / "source.c").is_file()
 
 
-@skip_on_ci
 def test_random_benchmark(csmith_dataset: CsmithDataset):
     num_benchmarks = 5
     rng = np.random.default_rng(0)
diff --git a/tests/llvm/datasets/llvm_stress_test.py b/tests/llvm/datasets/llvm_stress_test.py
index 79390b0bd..cc5e65117 100644
--- a/tests/llvm/datasets/llvm_stress_test.py
+++ b/tests/llvm/datasets/llvm_stress_test.py
@@ -34,7 +34,7 @@ def test_llvm_stress_size(llvm_stress_dataset: LlvmStressDataset):
     assert llvm_stress_dataset.size == float("inf")
 
 
-@pytest.mark.parametrize(range(3) if is_ci() else range(250))
+@pytest.mark.parametrize("index", range(3) if is_ci() else range(250))
 def test_llvm_stress_random_select(
     env: LlvmEnv, llvm_stress_dataset: LlvmStressDataset, index: int
 ):
@@ -59,7 +59,6 @@ def test_llvm_stress_random_select(
         assert instcount["TotalInstsCount"] > 0
 
 
-@skip_on_ci
 def test_random_benchmark(llvm_stress_dataset: LlvmStressDataset):
     num_benchmarks = 5
     rng = np.random.default_rng(0)

From fb2a168489a157f4ae88390e36d7cd32fc881d01 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 4 May 2021 18:59:46 +0100
Subject: [PATCH 027/141] [tests] Disable empty-test collection error.

---
 tests/test_main.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index f303acc5e..aa530865e 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -52,4 +52,18 @@ def test_foo():
 
     pytest_args += extra_pytest_args or []
 
-    sys.exit(pytest.main(pytest_args))
+    returncode = pytest.main(pytest_args)
+
+    # By default pytest will fail with an error if no tests are collected.
+    # Disable that behavior here (with a warning) since there legitimate cases
+    # where we may want to run a test file with no tests in it. For example,
+    # when running on a continuous integration service where all the tests are
+    # marked with the @skip_on_ci decorator.
+    if returncode == pytest.ExitCode.NO_TESTS_COLLECTED.value:
+        print(
+            "WARNING: The test suite was empty. Is that intended?",
+            file=sys.stderr,
+        )
+        returncode = 0
+
+    sys.exit(returncode)

From a152b3ac1582ec76a49434fefa19740f615f097c Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 11 May 2021 10:41:40 +0100
Subject: [PATCH 028/141] Replace observation_t with ObservationType usage.

---
 compiler_gym/envs/compiler_env.py            |  5 ++---
 compiler_gym/envs/llvm/llvm_rewards.py       | 11 +++++------
 compiler_gym/service/__init__.py             |  3 +--
 compiler_gym/service/proto2py.py             |  2 +-
 compiler_gym/spaces/reward.py                |  9 ++++-----
 compiler_gym/views/BUILD                     |  2 ++
 compiler_gym/views/observation.py            |  5 +++--
 compiler_gym/views/observation_space_spec.py | 15 ++++++++-------
 tests/pytest_plugins/random_util.py          |  4 ++--
 9 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 1bf803696..7911deeee 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -27,7 +27,6 @@
     ServiceOSError,
     ServiceTransportError,
     SessionNotFound,
-    observation_t,
 )
 from compiler_gym.service.proto import (
     AddBenchmarkRequest,
@@ -43,7 +42,7 @@
 )
 from compiler_gym.spaces import DefaultRewardFromObservation, NamedDiscrete, Reward
 from compiler_gym.util.debug_util import get_logging_level
-from compiler_gym.util.gym_type_hints import StepType
+from compiler_gym.util.gym_type_hints import ObservationType, StepType
 from compiler_gym.util.timer import Timer
 from compiler_gym.validation_error import ValidationError
 from compiler_gym.validation_result import ValidationResult
@@ -629,7 +628,7 @@ def reset(  # pylint: disable=arguments-differ
         benchmark: Optional[Union[str, Benchmark]] = None,
         action_space: Optional[str] = None,
         retry_count: int = 0,
-    ) -> Optional[observation_t]:
+    ) -> Optional[ObservationType]:
         """Reset the environment state.
 
         This method must be called before :func:`step()`.
diff --git a/compiler_gym/envs/llvm/llvm_rewards.py b/compiler_gym/envs/llvm/llvm_rewards.py
index ea89435f1..0a19e6fcc 100644
--- a/compiler_gym/envs/llvm/llvm_rewards.py
+++ b/compiler_gym/envs/llvm/llvm_rewards.py
@@ -6,9 +6,8 @@
 from typing import List, Optional
 
 from compiler_gym.datasets import Benchmark
-from compiler_gym.service import observation_t
 from compiler_gym.spaces.reward import Reward
-from compiler_gym.util.gym_type_hints import RewardType
+from compiler_gym.util.gym_type_hints import ObservationType, RewardType
 from compiler_gym.views.observation import ObservationView
 
 
@@ -35,7 +34,7 @@ def __init__(self, cost_function: str, init_cost_function: str, **kwargs):
         super().__init__(observation_spaces=[cost_function], **kwargs)
         self.cost_function: str = cost_function
         self.init_cost_function: str = init_cost_function
-        self.previous_cost: Optional[observation_t] = None
+        self.previous_cost: Optional[ObservationType] = None
 
     def reset(self, benchmark: Benchmark) -> None:
         """Called on env.reset(). Reset incremental progress."""
@@ -45,7 +44,7 @@ def reset(self, benchmark: Benchmark) -> None:
     def update(
         self,
         action: int,
-        observations: List[observation_t],
+        observations: List[ObservationType],
         observation_view: ObservationView,
     ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
@@ -65,7 +64,7 @@ class NormalizedReward(CostFunctionReward):
     def __init__(self, **kwargs):
         """Constructor."""
         super().__init__(**kwargs)
-        self.cost_norm: Optional[observation_t] = None
+        self.cost_norm: Optional[ObservationType] = None
         self.benchmark: Benchmark = None
 
     def reset(self, benchmark: str) -> None:
@@ -81,7 +80,7 @@ def reset(self, benchmark: str) -> None:
     def update(
         self,
         action: int,
-        observations: List[observation_t],
+        observations: List[ObservationType],
         observation_view: ObservationView,
     ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
diff --git a/compiler_gym/service/__init__.py b/compiler_gym/service/__init__.py
index d3f61f792..e9afad14f 100644
--- a/compiler_gym/service/__init__.py
+++ b/compiler_gym/service/__init__.py
@@ -12,12 +12,11 @@
     ServiceTransportError,
     SessionNotFound,
 )
-from compiler_gym.service.proto2py import observation_t, scalar_range2tuple
+from compiler_gym.service.proto2py import scalar_range2tuple
 
 __all__ = [
     "CompilerGymServiceConnection",
     "ConnectionOpts",
-    "observation_t",
     "scalar_range2tuple",
     "ServiceError",
     "ServiceInitError",
diff --git a/compiler_gym/service/proto2py.py b/compiler_gym/service/proto2py.py
index 6b02986c6..9a3a5fd76 100644
--- a/compiler_gym/service/proto2py.py
+++ b/compiler_gym/service/proto2py.py
@@ -11,7 +11,7 @@
 from compiler_gym.service.proto import ScalarRange
 
 json_t = Union[List[Any], Dict[str, Any]]
-observation_t = Union[np.ndarray, str, bytes, int, float, json_t, nx.DiGraph]
+ObservationType = Union[np.ndarray, str, bytes, int, float, json_t, nx.DiGraph]
 
 
 def scalar_range2tuple(sr: ScalarRange, defaults=(-np.inf, np.inf)):
diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py
index 8649040ac..0199a66a8 100644
--- a/compiler_gym/spaces/reward.py
+++ b/compiler_gym/spaces/reward.py
@@ -6,9 +6,8 @@
 
 import numpy as np
 
-from compiler_gym.service import observation_t
 from compiler_gym.spaces.scalar import Scalar
-from compiler_gym.util.gym_type_hints import RewardType
+from compiler_gym.util.gym_type_hints import ObservationType, RewardType
 
 
 class Reward(Scalar):
@@ -101,7 +100,7 @@ def reset(self, benchmark: str) -> None:
     def update(
         self,
         action: int,
-        observations: List[observation_t],
+        observations: List[ObservationType],
         observation_view: "compiler_gym.views.ObservationView",  # noqa: F821
     ) -> RewardType:
         """Calculate a reward for the given action.
@@ -144,7 +143,7 @@ def __init__(self, observation_name: str, **kwargs):
         super().__init__(
             observation_spaces=[observation_name], id=observation_name, **kwargs
         )
-        self.previous_value: Optional[observation_t] = None
+        self.previous_value: Optional[ObservationType] = None
 
     def reset(self, benchmark: str) -> None:
         """Called on env.reset(). Reset incremental progress."""
@@ -154,7 +153,7 @@ def reset(self, benchmark: str) -> None:
     def update(
         self,
         action: int,
-        observations: List[observation_t],
+        observations: List[ObservationType],
         observation_view: "compiler_gym.views.ObservationView",  # noqa: F821
     ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
diff --git a/compiler_gym/views/BUILD b/compiler_gym/views/BUILD
index a92311bb1..9116aabd2 100644
--- a/compiler_gym/views/BUILD
+++ b/compiler_gym/views/BUILD
@@ -21,6 +21,7 @@ py_library(
         ":observation_space_spec",
         "//compiler_gym/service",
         "//compiler_gym/service/proto",
+        "//compiler_gym/util",
     ],
 )
 
@@ -31,6 +32,7 @@ py_library(
         "//compiler_gym/service",
         "//compiler_gym/service/proto",
         "//compiler_gym/spaces",
+        "//compiler_gym/util",
     ],
 )
 
diff --git a/compiler_gym/views/observation.py b/compiler_gym/views/observation.py
index 253030a2d..9d0a3fb85 100644
--- a/compiler_gym/views/observation.py
+++ b/compiler_gym/views/observation.py
@@ -4,8 +4,9 @@
 # LICENSE file in the root directory of this source tree.
 from typing import Callable, Dict, List
 
-from compiler_gym.service import ServiceError, observation_t
+from compiler_gym.service import ServiceError
 from compiler_gym.service.proto import ObservationSpace, StepReply, StepRequest
+from compiler_gym.util.gym_type_hints import ObservationType
 from compiler_gym.views.observation_space_spec import ObservationSpaceSpec
 
 
@@ -41,7 +42,7 @@ def __init__(
         for i, s in enumerate(spaces):
             self._add_space(ObservationSpaceSpec.from_proto(i, s))
 
-    def __getitem__(self, observation_space: str) -> observation_t:
+    def __getitem__(self, observation_space: str) -> ObservationType:
         """Request an observation from the given space.
 
         :param observation_space: The observation space to query.
diff --git a/compiler_gym/views/observation_space_spec.py b/compiler_gym/views/observation_space_spec.py
index 3b5bdecd8..9daaa05c8 100644
--- a/compiler_gym/views/observation_space_spec.py
+++ b/compiler_gym/views/observation_space_spec.py
@@ -9,10 +9,11 @@
 import numpy as np
 from gym.spaces import Box, Space
 
-from compiler_gym.service import observation_t, scalar_range2tuple
+from compiler_gym.service import scalar_range2tuple
 from compiler_gym.service.proto import Observation, ObservationSpace
 from compiler_gym.spaces.scalar import Scalar
 from compiler_gym.spaces.sequence import Sequence
+from compiler_gym.util.gym_type_hints import ObservationType
 
 
 def _json2nx(observation):
@@ -53,11 +54,11 @@ def __init__(
         id: str,
         index: int,
         space: Space,
-        translate: Callable[[Union[observation_t, Observation]], observation_t],
-        to_string: Callable[[observation_t], str],
+        translate: Callable[[Union[ObservationType, Observation]], ObservationType],
+        to_string: Callable[[ObservationType], str],
         deterministic: bool,
         platform_dependent: bool,
-        default_value: observation_t,
+        default_value: ObservationType,
     ):
         """Constructor. Don't call directly, use make_derived_space()."""
         self.id: str = id
@@ -213,12 +214,12 @@ def translate(observation):
     def make_derived_space(
         self,
         id: str,
-        translate: Callable[[observation_t], observation_t],
+        translate: Callable[[ObservationType], ObservationType],
         space: Optional[Space] = None,
         deterministic: Optional[bool] = None,
-        default_value: Optional[observation_t] = None,
+        default_value: Optional[ObservationType] = None,
         platform_dependent: Optional[bool] = None,
-        to_string: Callable[[observation_t], str] = None,
+        to_string: Callable[[ObservationType], str] = None,
     ) -> "ObservationSpaceSpec":
         """Create a derived observation space.
 
diff --git a/tests/pytest_plugins/random_util.py b/tests/pytest_plugins/random_util.py
index 190074f99..0d0fdc750 100644
--- a/tests/pytest_plugins/random_util.py
+++ b/tests/pytest_plugins/random_util.py
@@ -8,14 +8,14 @@
 from typing import List, Tuple
 
 from compiler_gym.envs import CompilerEnv
-from compiler_gym.service import observation_t
+from compiler_gym.util.gym_type_hints import ObservationType
 
 
 def apply_random_trajectory(
     env: CompilerEnv,
     random_trajectory_length_range=(1, 50),
     timeout: int = 0,
-) -> List[Tuple[int, observation_t, float, bool]]:
+) -> List[Tuple[int, ObservationType, float, bool]]:
     """Evaluate and return a random trajectory."""
     end_time = time() + timeout
     num_actions = random.randint(*random_trajectory_length_range)

From 33d89373e7634361efb532ba3326aacc6c43b6c2 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 5 May 2021 08:44:20 +0100
Subject: [PATCH 029/141] [ci] Only run coverage test on one platform.

---
 .github/workflows/ci.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index e498d365f..090c3870b 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -105,14 +105,16 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
-              if: matrix.os == 'macos-latest'
+                  BAZEL_BUILD_OPTS: --config=ci
+              if: ${{ ! (matrix.os == 'ubuntu-latest' && matrix.python == 3.9) }}
 
             - name: Test with coverage
               run: make install-test-cov
               env:
                   CC: clang
                   CXX: clang++
-              if: matrix.os == 'ubuntu-latest'
+                  BAZEL_BUILD_OPTS: --config=ci
+              if: ${{ matrix.os == 'ubuntu-latest' && matrix.python == 3.9 }}
 
             - name: Upload coverage to Codecov
               uses: codecov/codecov-action@v1

From bff8c067c86c1c1ec10b53f58f9409e684e6f13b Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 5 May 2021 08:47:50 +0100
Subject: [PATCH 030/141] [tests] Limit scope of macos multiprocessing test.

---
 tests/llvm/BUILD                   | 2 +-
 tests/llvm/multiprocessing_test.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD
index 882786d24..c6e9a1be0 100644
--- a/tests/llvm/BUILD
+++ b/tests/llvm/BUILD
@@ -192,7 +192,7 @@ py_test(
     deps = [
         "//compiler_gym",
         "//tests:test_main",
-        "//tests/pytest_plugins:llvm",
+        "//tests/pytest_plugins:common",
     ],
 )
 
diff --git a/tests/llvm/multiprocessing_test.py b/tests/llvm/multiprocessing_test.py
index bcd5c4e7c..fd8b3027a 100644
--- a/tests/llvm/multiprocessing_test.py
+++ b/tests/llvm/multiprocessing_test.py
@@ -11,6 +11,7 @@
 import pytest
 
 from compiler_gym.envs import LlvmEnv
+from tests.pytest_plugins.common import macos_only
 from tests.test_main import main
 
 
@@ -63,7 +64,8 @@ def test_running_environment_in_background_process():
         process.join()
 
 
-@pytest.mark.skipif(sys.platform != "darwin", reason="macOS only")
+@macos_only
+@pytest.mark.skipif(sys.version_info < (3, 8, 0), reason="Py >= 3.8 only")
 def test_moving_environment_to_background_process_macos():
     """Test moving an LLVM environment to a background process."""
     queue = mp.Queue(maxsize=3)

From 3a6f8a3d399544286c70acb95722f21bec936b8e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 11 May 2021 07:02:33 -0700
Subject: [PATCH 031/141] [rpc] Add an Action protocol buffer.

This wraps the current integer-based 'action index' in an Action
protocol buffer. This is to pave the way for adding support for more
complex, non-categorical action spaces.

Issue #52
---
 compiler_gym/envs/compiler_env.py                      |  3 ++-
 compiler_gym/envs/llvm/service/LlvmSession.cc          |  2 +-
 compiler_gym/service/proto/__init__.py                 |  2 ++
 compiler_gym/service/proto/compiler_gym_service.proto  | 10 +++++++---
 examples/RandomSearch.cc                               |  2 +-
 .../service_cc/ExampleService.cc                       |  2 +-
 .../service_py/example_service.py                      |  4 ++--
 tests/llvm/service/GvnSinkTest.cc                      |  3 ++-
 8 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 7911deeee..6e29718bf 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -29,6 +29,7 @@
     SessionNotFound,
 )
 from compiler_gym.service.proto import (
+    Action,
     AddBenchmarkRequest,
     EndSessionReply,
     EndSessionRequest,
@@ -789,7 +790,7 @@ def step(self, action: Union[int, Iterable[int]]) -> StepType:
         # Send the request to the backend service.
         request = StepRequest(
             session_id=self._session_id,
-            action=actions,
+            action=[Action(action=a) for a in actions],
             observation_space=observation_indices,
         )
         try:
diff --git a/compiler_gym/envs/llvm/service/LlvmSession.cc b/compiler_gym/envs/llvm/service/LlvmSession.cc
index 5a1c281f3..cd33d2ed2 100644
--- a/compiler_gym/envs/llvm/service/LlvmSession.cc
+++ b/compiler_gym/envs/llvm/service/LlvmSession.cc
@@ -151,7 +151,7 @@ Status LlvmSession::step(const StepRequest& request, StepReply* reply) {
     case LlvmActionSpace::PASSES_ALL:
       for (int i = 0; i < request.action_size(); ++i) {
         LlvmAction action;
-        RETURN_IF_ERROR(util::intToEnum(request.action(i), &action));
+        RETURN_IF_ERROR(util::intToEnum(request.action(i).action(), &action));
         RETURN_IF_ERROR(runAction(action, reply));
       }
   }
diff --git a/compiler_gym/service/proto/__init__.py b/compiler_gym/service/proto/__init__.py
index 2db9c7f91..e818cce8a 100644
--- a/compiler_gym/service/proto/__init__.py
+++ b/compiler_gym/service/proto/__init__.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 from compiler_gym.service.proto.compiler_gym_service_pb2 import (
+    Action,
     ActionSpace,
     AddBenchmarkReply,
     AddBenchmarkRequest,
@@ -34,6 +35,7 @@
 )
 
 __all__ = [
+    "Action",
     "ActionSpace",
     "AddBenchmarkReply",
     "AddBenchmarkRequest",
diff --git a/compiler_gym/service/proto/compiler_gym_service.proto b/compiler_gym/service/proto/compiler_gym_service.proto
index 7916af741..e9f6deb40 100644
--- a/compiler_gym/service/proto/compiler_gym_service.proto
+++ b/compiler_gym/service/proto/compiler_gym_service.proto
@@ -92,9 +92,8 @@ message StartSessionReply {
 message StepRequest {
   // The ID of the session.
   int64 session_id = 1;
-  // A list of indices into the ActionSpace.action list. Actions are executed
-  // in the order they appear in this list.
-  repeated int32 action = 2;
+  // A list of actions to execute, in order.
+  repeated Action action = 2;
   // A list of indices into the GetSpacesReply.observation_space_list
   repeated int32 observation_space = 3;
 }
@@ -133,6 +132,11 @@ message ActionSpace {
   repeated string action = 2;
 }
 
+message Action {
+  // An index into the ActionSpace.action list.
+  int32 action = 1;
+}
+
 // ===========================================================================
 // Observations.
 
diff --git a/examples/RandomSearch.cc b/examples/RandomSearch.cc
index d07e3cda9..d8d7505cb 100644
--- a/examples/RandomSearch.cc
+++ b/examples/RandomSearch.cc
@@ -89,7 +89,7 @@ class Environment {
     StepReply reply;
 
     request.set_session_id(sessionId_);
-    request.add_action(static_cast<int>(action));
+    request.add_action()->set_action(static_cast<int>(action));
     request.add_observation_space(static_cast<int>(observationSpace));
     RETURN_IF_ERROR(service_.Step(nullptr, &request, &reply));
     CHECK(reply.observation_size() == 1);
diff --git a/examples/example_compiler_gym_service/service_cc/ExampleService.cc b/examples/example_compiler_gym_service/service_cc/ExampleService.cc
index 876338102..310f145fb 100644
--- a/examples/example_compiler_gym_service/service_cc/ExampleService.cc
+++ b/examples/example_compiler_gym_service/service_cc/ExampleService.cc
@@ -167,7 +167,7 @@ ExampleCompilationSession::ExampleCompilationSession(const std::string& benchmar
 
 Status ExampleCompilationSession::Step(const StepRequest* request, StepReply* reply) {
   for (int i = 0; i < request->action_size(); ++i) {
-    const auto action = request->action(i);
+    const auto action = request->action(i).action();
     // Run the actual action. Here we just range check.
     RETURN_IF_ERROR(rangeCheck(action, 0, static_cast<int32_t>(actionSpace_.action_size() - 1)));
   }
diff --git a/examples/example_compiler_gym_service/service_py/example_service.py b/examples/example_compiler_gym_service/service_py/example_service.py
index 4c46b7d49..d483d4c7f 100755
--- a/examples/example_compiler_gym_service/service_py/example_service.py
+++ b/examples/example_compiler_gym_service/service_py/example_service.py
@@ -109,8 +109,8 @@ def step(self, request: proto.StepRequest, context) -> proto.StepReply:
         # Apply a list of actions from the user. Each value is an index into the
         # ACTIONS_SPACE.action list.
         for action in request.action:
-            logging.debug("Apply action %d", action)
-            if action < 0 or action >= len(ACTION_SPACE.action):
+            logging.debug("Apply action %d", action.action)
+            if action.action < 0 or action.action >= len(ACTION_SPACE.action):
                 context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
                 context.set_details("Out-of-range")
                 return
diff --git a/tests/llvm/service/GvnSinkTest.cc b/tests/llvm/service/GvnSinkTest.cc
index 2b25a216a..feaf78b92 100644
--- a/tests/llvm/service/GvnSinkTest.cc
+++ b/tests/llvm/service/GvnSinkTest.cc
@@ -47,7 +47,8 @@ TEST_F(GvnSinkTest, runGvnSinkOnBlowfish) {
                   std::nullopt, workingDirectory_);
 
   StepRequest request;
-  request.add_action(static_cast<int>(LlvmAction::GVNSINK_PASS));
+  Action* action = request.add_action();
+  action->set_action(static_cast<int>(LlvmAction::GVNSINK_PASS)));
   StepReply reply;
   ASSERT_OK(env.Step(request, &reply));
 }

From 56eb62b6a5223b6f2058c638fb9037cb4c9608c6 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 11 May 2021 15:47:50 +0100
Subject: [PATCH 032/141] Move scalar_range2tuple() into using module.

This function is only used from one module, so move the definition
there.
---
 compiler_gym/service/BUILD                   | 10 ---------
 compiler_gym/service/__init__.py             |  2 --
 compiler_gym/service/proto2py.py             | 22 --------------------
 compiler_gym/views/BUILD                     |  1 -
 compiler_gym/views/observation_space_spec.py | 17 ++++++++++-----
 5 files changed, 12 insertions(+), 40 deletions(-)
 delete mode 100644 compiler_gym/service/proto2py.py

diff --git a/compiler_gym/service/BUILD b/compiler_gym/service/BUILD
index 9c9ad2255..bdddc4fba 100644
--- a/compiler_gym/service/BUILD
+++ b/compiler_gym/service/BUILD
@@ -10,7 +10,6 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         ":connection",
-        ":proto2py",
         "//compiler_gym/service/proto",
     ],
 )
@@ -24,12 +23,3 @@ py_library(
         "//compiler_gym/util",
     ],
 )
-
-py_library(
-    name = "proto2py",
-    srcs = ["proto2py.py"],
-    visibility = ["//visibility:public"],
-    deps = [
-        "//compiler_gym/service/proto",
-    ],
-)
diff --git a/compiler_gym/service/__init__.py b/compiler_gym/service/__init__.py
index e9afad14f..9e60c7ced 100644
--- a/compiler_gym/service/__init__.py
+++ b/compiler_gym/service/__init__.py
@@ -12,12 +12,10 @@
     ServiceTransportError,
     SessionNotFound,
 )
-from compiler_gym.service.proto2py import scalar_range2tuple
 
 __all__ = [
     "CompilerGymServiceConnection",
     "ConnectionOpts",
-    "scalar_range2tuple",
     "ServiceError",
     "ServiceInitError",
     "ServiceIsClosed",
diff --git a/compiler_gym/service/proto2py.py b/compiler_gym/service/proto2py.py
deleted file mode 100644
index 9a3a5fd76..000000000
--- a/compiler_gym/service/proto2py.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-"""Converters from protocol buffers to python-friendly types."""
-from typing import Any, Dict, List, Union
-
-import networkx as nx
-import numpy as np
-
-from compiler_gym.service.proto import ScalarRange
-
-json_t = Union[List[Any], Dict[str, Any]]
-ObservationType = Union[np.ndarray, str, bytes, int, float, json_t, nx.DiGraph]
-
-
-def scalar_range2tuple(sr: ScalarRange, defaults=(-np.inf, np.inf)):
-    """Convert a ScalarRange to a tuple of (min, max) bounds."""
-    return (
-        sr.min.value if sr.HasField("min") else defaults[0],
-        sr.max.value if sr.HasField("max") else defaults[1],
-    )
diff --git a/compiler_gym/views/BUILD b/compiler_gym/views/BUILD
index 9116aabd2..9a9eebf36 100644
--- a/compiler_gym/views/BUILD
+++ b/compiler_gym/views/BUILD
@@ -43,7 +43,6 @@ py_library(
     deps = [
         ":observation",
         "//compiler_gym/datasets",
-        "//compiler_gym/service",
         "//compiler_gym/service/proto",
         "//compiler_gym/spaces",
     ],
diff --git a/compiler_gym/views/observation_space_spec.py b/compiler_gym/views/observation_space_spec.py
index 9daaa05c8..d92c0a15a 100644
--- a/compiler_gym/views/observation_space_spec.py
+++ b/compiler_gym/views/observation_space_spec.py
@@ -9,8 +9,7 @@
 import numpy as np
 from gym.spaces import Box, Space
 
-from compiler_gym.service import scalar_range2tuple
-from compiler_gym.service.proto import Observation, ObservationSpace
+from compiler_gym.service.proto import Observation, ObservationSpace, ScalarRange
 from compiler_gym.spaces.scalar import Scalar
 from compiler_gym.spaces.sequence import Sequence
 from compiler_gym.util.gym_type_hints import ObservationType
@@ -23,6 +22,14 @@ def _json2nx(observation):
     )
 
 
+def _scalar_range2tuple(sr: ScalarRange, defaults=(-np.inf, np.inf)):
+    """Convert a ScalarRange to a tuple of (min, max) bounds."""
+    return (
+        sr.min.value if sr.HasField("min") else defaults[0],
+        sr.max.value if sr.HasField("max") else defaults[1],
+    )
+
+
 class ObservationSpaceSpec:
     """Specification of an observation space.
 
@@ -91,7 +98,7 @@ def from_proto(cls, index: int, proto: ObservationSpace):
         shape_type = proto.WhichOneof("shape")
 
         def make_box(scalar_range_list, dtype, defaults):
-            bounds = [scalar_range2tuple(r, defaults) for r in scalar_range_list]
+            bounds = [_scalar_range2tuple(r, defaults) for r in scalar_range_list]
             return Box(
                 low=np.array([b[0] for b in bounds], dtype=dtype),
                 high=np.array([b[1] for b in bounds], dtype=dtype),
@@ -99,14 +106,14 @@ def make_box(scalar_range_list, dtype, defaults):
             )
 
         def make_scalar(scalar_range, dtype, defaults):
-            scalar_range_tuple = scalar_range2tuple(scalar_range, defaults)
+            scalar_range_tuple = _scalar_range2tuple(scalar_range, defaults)
             return Scalar(
                 min=scalar_range_tuple[0], max=scalar_range_tuple[1], dtype=dtype
             )
 
         def make_seq(scalar_range, dtype, defaults):
             return Sequence(
-                size_range=scalar_range2tuple(scalar_range, defaults),
+                size_range=_scalar_range2tuple(scalar_range, defaults),
                 dtype=dtype,
                 opaque_data_format=proto.opaque_data_format,
             )

From a412bf6bc47a270e229b4b8b9464f32c311c1712 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 11 May 2021 18:01:08 +0100
Subject: [PATCH 033/141] [service] Defend against a collision in random
 service IDs.

---
 compiler_gym/service/connection.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index fbd4031ab..397c10ef8 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -252,10 +252,18 @@ def make_working_dir() -> Path:
     """Make a working directory for a service. The calling code is responsible
     for removing this directory when done.
     """
-    random_hash = random.getrandbits(16)
-    service_name = datetime.now().strftime(f"s/%m%dT%H%M%S-%f-{random_hash:04x}")
-    working_dir = transient_cache_path(service_name)
-    (working_dir / "logs").mkdir(parents=True, exist_ok=False)
+    while True:
+        random_hash = random.getrandbits(16)
+        service_name = datetime.now().strftime(f"s/%m%dT%H%M%S-%f-{random_hash:04x}")
+        working_dir = transient_cache_path(service_name)
+        # Guard against the unlike scenario that there is a collision between
+        # the randomly generated working directories of multiple
+        # make_working_dir() calls.
+        try:
+            (working_dir / "logs").mkdir(parents=True, exist_ok=False)
+            break
+        except FileExistsError:
+            pass
     return working_dir
 
 

From 756472e6d8ac63d3cc3af158bf974ce6f56016b4 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 11 May 2021 05:25:32 -0700
Subject: [PATCH 034/141] Add a compiler_gym.make() wrapper.

This can be used as a drop-in replacement for gym.make() to save on a
module import.
---
 compiler_gym/__init__.py          |  2 ++
 compiler_gym/util/registration.py |  6 ++++++
 tests/BUILD                       | 10 ++++++++++
 tests/make_test.py                | 17 +++++++++++++++++
 4 files changed, 35 insertions(+)
 create mode 100644 tests/make_test.py

diff --git a/compiler_gym/__init__.py b/compiler_gym/__init__.py
index d6bc8aa8b..087f344e3 100644
--- a/compiler_gym/__init__.py
+++ b/compiler_gym/__init__.py
@@ -42,6 +42,7 @@
     set_debug_level,
 )
 from compiler_gym.util.download import download
+from compiler_gym.util.registration import make
 from compiler_gym.util.runfiles_path import (
     cache_path,
     site_data_path,
@@ -56,6 +57,7 @@
     "__version__",
     "cache_path",
     "COMPILER_GYM_ENVS",
+    "make",
     "CompilerEnv",
     "CompilerEnvState",
     "CompilerEnvStateWriter",
diff --git a/compiler_gym/util/registration.py b/compiler_gym/util/registration.py
index bbf8d9e91..cf04c5aab 100644
--- a/compiler_gym/util/registration.py
+++ b/compiler_gym/util/registration.py
@@ -4,12 +4,18 @@
 # LICENSE file in the root directory of this source tree.
 from typing import List
 
+import gym
 from gym.envs.registration import register as gym_register
 
 # A list of gym environment names defined by CompilerGym.
 COMPILER_GYM_ENVS: List[str] = []
 
 
+def make(id: str, **kwargs):
+    """Equivalent to :code:`gym.make()`."""
+    return gym.make(id, **kwargs)
+
+
 def register(id: str, **kwargs):
     COMPILER_GYM_ENVS.append(id)
     gym_register(id=id, **kwargs)
diff --git a/tests/BUILD b/tests/BUILD
index 8a9356f3c..db70c3a5c 100644
--- a/tests/BUILD
+++ b/tests/BUILD
@@ -26,6 +26,16 @@ py_test(
     ],
 )
 
+py_test(
+    name = "make_test",
+    timeout = "short",
+    srcs = ["make_test.py"],
+    deps = [
+        "//compiler_gym",
+        "//tests:test_main",
+    ],
+)
+
 py_test(
     name = "random_search_test",
     timeout = "short",
diff --git a/tests/make_test.py b/tests/make_test.py
new file mode 100644
index 000000000..34191351b
--- /dev/null
+++ b/tests/make_test.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import compiler_gym
+from compiler_gym.envs import LlvmEnv
+from tests.test_main import main
+
+
+def test_compiler_gym_make():
+    """Test that compiler_gym.make() is equivalent to gym.make()."""
+    with compiler_gym.make("llvm-v0") as env:
+        assert isinstance(env, LlvmEnv)
+
+
+if __name__ == "__main__":
+    main()

From 558b5b0c026e9553434e6d79eee395741c14485d Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 10:26:47 +0100
Subject: [PATCH 035/141] [util] Add exponential backoff to rate limit
 failures.

---
 compiler_gym/util/download.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index bf77d9c19..acba803b4 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 import hashlib
 import logging
+from time import sleep
 from typing import List, Optional, Union
 
 import fasteners
@@ -11,16 +12,23 @@
 
 from compiler_gym.util.filesystem import atomic_file_write
 from compiler_gym.util.runfiles_path import cache_path
+from compiler_gym.util.truncate import truncate
 
 
 class DownloadFailed(OSError):
     """Error thrown if a download fails."""
 
 
+class TooManyRequests(DownloadFailed):
+    """Error thrown by HTTP 429 response."""
+
+
 def _get_url_data(url: str) -> bytes:
     req = requests.get(url)
     try:
-        if req.status_code != 200:
+        if req.status_code == 429:
+            raise TooManyRequests("429 Too Many Requests")
+        elif req.status_code != 200:
             raise DownloadFailed(f"GET returned status code {req.status_code}: {url}")
 
         return req.content
@@ -54,7 +62,7 @@ def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:
     return content
 
 
-def _download(urls: List[str], sha256: Optional[str], max_retries: int = 3) -> bytes:
+def _download(urls: List[str], sha256: Optional[str], max_retries: int = 5) -> bytes:
     # Cache hit.
     if sha256 and cache_path(f"downloads/{sha256}").is_file():
         with open(str(cache_path(f"downloads/{sha256}")), "rb") as f:
@@ -62,12 +70,22 @@ def _download(urls: List[str], sha256: Optional[str], max_retries: int = 3) -> b
 
     # A retry loop, and loop over all urls provided.
     last_exception = None
+    wait_time = 5
     for _ in range(max_retries):
         for url in urls:
             try:
                 return _do_download_attempt(url, sha256)
+            except TooManyRequests as e:
+                last_exception = e
+                logging.info(
+                    "Download attempt failed with Too Many Requests error. "
+                    "Watiting %.1f seconds",
+                    wait_time,
+                )
+                sleep(wait_time)
+                wait_time *= 1.5
             except DownloadFailed as e:
-                logging.info("Download failed")
+                logging.info("Download attempt failed: %s", truncate(e))
                 last_exception = e
     raise last_exception
 

From 91265123cc50750540dfaabc6a412cc94b31d169 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 11 May 2021 08:52:25 -0700
Subject: [PATCH 036/141] [service] Add a CompilationSession abstract base
 class.

The CompilationSession class encapsulates an incremental compilation
session.

This is the first in a series of patches that separates the RPC server
from the compilation session.

Issue #254.
---
 compiler_gym/service/BUILD                  | 23 ++++++
 compiler_gym/service/CompilationSession.cc  | 26 ++++++
 compiler_gym/service/CompilationSession.h   | 85 +++++++++++++++++++
 compiler_gym/service/__init__.py            |  2 +
 compiler_gym/service/compilation_session.py | 90 +++++++++++++++++++++
 docs/source/compiler_gym/service.rst        |  6 ++
 requirements_pre_commit.txt                 |  2 +-
 7 files changed, 233 insertions(+), 1 deletion(-)
 create mode 100644 compiler_gym/service/CompilationSession.cc
 create mode 100644 compiler_gym/service/CompilationSession.h
 create mode 100644 compiler_gym/service/compilation_session.py

diff --git a/compiler_gym/service/BUILD b/compiler_gym/service/BUILD
index bdddc4fba..280d508eb 100644
--- a/compiler_gym/service/BUILD
+++ b/compiler_gym/service/BUILD
@@ -3,17 +3,40 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 load("@rules_python//python:defs.bzl", "py_library")
+load("@rules_cc//cc:defs.bzl", "cc_library")
 
 py_library(
     name = "service",
     srcs = ["__init__.py"],
     visibility = ["//visibility:public"],
     deps = [
+        ":compilation_session",
         ":connection",
         "//compiler_gym/service/proto",
     ],
 )
 
+py_library(
+    name = "compilation_session",
+    srcs = ["compilation_session.py"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//compiler_gym/service/proto",
+    ],
+)
+
+cc_library(
+    name = "CompilationSession",
+    srcs = ["CompilationSession.cc"],
+    hdrs = ["CompilationSession.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//compiler_gym/service/proto:compiler_gym_service_cc",
+        "@boost//:filesystem",
+        "@com_github_grpc_grpc//:grpc++",
+    ],
+)
+
 py_library(
     name = "connection",
     srcs = ["connection.py"],
diff --git a/compiler_gym/service/CompilationSession.cc b/compiler_gym/service/CompilationSession.cc
new file mode 100644
index 000000000..5e55c1330
--- /dev/null
+++ b/compiler_gym/service/CompilationSession.cc
@@ -0,0 +1,26 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#include "compiler_gym/service/CompilationSession.h"
+
+using grpc::Status;
+using grpc::StatusCode;
+
+namespace compiler_gym {
+
+std::string CompilationSession::getCompilerVersion() const { return ""; }
+
+Status CompilationSession::init(CompilationSession* other) {
+  return Status(StatusCode::UNIMPLEMENTED, "CompilationSession::init() not implemented");
+}
+
+Status CompilationSession::endOfStep(bool actionHadNoEffect, bool& endOfEpisode,
+                                     std::optional<ActionSpace>& newActionSpace) {
+  return Status::OK;
+}
+
+CompilationSession::CompilationSession(const boost::filesystem::path& workingDirectory)
+    : workingDirectory_(workingDirectory) {}
+
+}  // namespace compiler_gym
diff --git a/compiler_gym/service/CompilationSession.h b/compiler_gym/service/CompilationSession.h
new file mode 100644
index 000000000..854d80de8
--- /dev/null
+++ b/compiler_gym/service/CompilationSession.h
@@ -0,0 +1,85 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+
+#include <grpcpp/grpcpp.h>
+
+#include <optional>
+#include <vector>
+
+#include "boost/filesystem.hpp"
+#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
+
+namespace compiler_gym {
+
+// Base class for encapsulating an incremental compilation session.
+//
+// To add support for a new compiler, subclass from this base and provide
+// implementations of the abstract methods, then call
+// createAndRunCompilationService() and parametrize it with your class type:
+//
+//     #include "compiler_gym/service/CompilationSession.h"
+//     #include "compiler_gym/service/runtime/Runtime.h"
+//
+//     using namespace compiler_gym;
+//
+//     class MyCompilationSession final : public CompilationSession { ... }
+//
+//     int main(int argc, char** argv) {
+//         runtime::createAndRunCompilationService<MyCompilationSession>();
+//     }
+//
+class CompilationSession {
+ public:
+  // Get the compiler version.
+  virtual std::string getCompilerVersion() const;
+
+  // A list of action spaces describing the capabilities of the compiler.
+  virtual std::vector<ActionSpace> getActionSpaces() const = 0;
+
+  // A list of feature vectors that this compiler provides.
+  virtual std::vector<ObservationSpace> getObservationSpaces() const = 0;
+
+  // Start a CompilationSession. This will be called after construction and
+  // before applyAction() or computeObservation(). This will only be called
+  // once.
+  [[nodiscard]] virtual grpc::Status init(const ActionSpace& actionSpace,
+                                          const Benchmark& benchmark) = 0;
+
+  // Initialize the state from another CompilerSession. This will be called
+  // after construction and before applyAction() or computeObservation(). This
+  // will only be called once.
+  [[nodiscard]] virtual grpc::Status init(CompilationSession* other);
+
+  // Apply an action.
+  [[nodiscard]] virtual grpc::Status applyAction(const Action& action, bool& endOfEpisode,
+                                                 std::optional<ActionSpace>& newActionSpace,
+                                                 bool& actionHadNoEffect) = 0;
+
+  // Compute an observation.
+  [[nodiscard]] virtual grpc::Status computeObservation(const ObservationSpace& observationSpace,
+                                                        Observation& observation) = 0;
+
+  // Optional. This will be called after all applyAction() and
+  // computeObservation() in a step. Use this method if you would like to
+  // perform post-transform validation of compiler state.
+  [[nodiscard]] virtual grpc::Status endOfStep(bool actionHadNoEffect, bool& endOfEpisode,
+                                               std::optional<ActionSpace>& newActionSpace);
+
+  CompilationSession(const boost::filesystem::path& workingDirectory);
+
+  virtual ~CompilationSession() = default;
+
+ protected:
+  // Get the working directory, which is a local filesystem directory that this
+  // CompilationSession can use to store temporary files such as build
+  // artifacts.
+  inline const boost::filesystem::path& workingDirectory() { return workingDirectory_; }
+
+ private:
+  const boost::filesystem::path workingDirectory_;
+};
+
+}  // namespace compiler_gym
diff --git a/compiler_gym/service/__init__.py b/compiler_gym/service/__init__.py
index 9e60c7ced..baa8c8ea8 100644
--- a/compiler_gym/service/__init__.py
+++ b/compiler_gym/service/__init__.py
@@ -2,6 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from compiler_gym.service.compilation_session import CompilationSession
 from compiler_gym.service.connection import (
     CompilerGymServiceConnection,
     ConnectionOpts,
@@ -15,6 +16,7 @@
 
 __all__ = [
     "CompilerGymServiceConnection",
+    "CompilationSession",
     "ConnectionOpts",
     "ServiceError",
     "ServiceInitError",
diff --git a/compiler_gym/service/compilation_session.py b/compiler_gym/service/compilation_session.py
new file mode 100644
index 000000000..20c2f7632
--- /dev/null
+++ b/compiler_gym/service/compilation_session.py
@@ -0,0 +1,90 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from compiler_gym.service.proto import (
+    Action,
+    ActionSpace,
+    Benchmark,
+    Observation,
+    ObservationSpace,
+)
+
+
+class CompilationSession:
+    """Base class for encapsulating an incremental compilation session.
+
+    To add support for a new compiler, subclass from this base and provide
+    implementations of the abstract methods, then call
+    :func:`create_and_run_compiler_service
+    <compiler_gym.service.runtime.create_and_run_compiler_service>` and pass in
+    your class type:
+
+    .. code-block:: python
+
+        from compiler_gym.service import CompilationSession
+        from compiler_gym.service import runtime
+
+        class MyCompilationSession(CompilationSession):
+            ...
+
+        if __name__ == "__main__":
+            runtime.create_and_run_compiler_service(MyCompilationSession)
+    """
+
+    compiler_version: str = ""
+    """The compiler version."""
+
+    action_spaces: List[ActionSpace] = []
+    """A list of action spaces describing the capabilities of the compiler."""
+
+    observation_spaces: List[ObservationSpace] = []
+    """A list of feature vectors that this compiler provides."""
+
+    def __init__(
+        self, working_dir: Path, action_space: ActionSpace, benchmark: Benchmark
+    ):
+        """Start a CompilationSession.
+
+        Subclasses should initialize the parent class first.
+
+        :param working_dir: A directory on the local filesystem that can be used
+            to store temporary files such as build artifacts.
+
+        :param action_space: The action space to use.
+
+        :param benchmark: The benchmark to use.
+        """
+        del action_space  # Subclasses must use this.
+        del benchmark  # Subclasses must use this.
+        self.working_dir = working_dir
+
+    def apply_action(self, action: Action) -> Tuple[bool, Optional[ActionSpace], bool]:
+        """Apply an action.
+
+        :param action: The action to apply.
+
+        :return: A tuple: :code:`(end_of_session, new_action_space,
+        action_had_no_effect)`.
+        """
+        raise NotImplementedError
+
+    def get_observation(self, observation_space: ObservationSpace) -> Observation:
+        """Compute an observation.
+
+        :param observation_space: The observation space.
+
+        :return: An observation.
+        """
+        raise NotImplementedError
+
+    def fork(self) -> "CompilationSession":
+        """Optional. Create a copy of current session state.
+
+        :return: A new CopmilationSession with the same state.
+        """
+        # No need to override this if you are not adding support to fork().
+        raise NotImplementedError("CompilationSession.fork() not supported")
diff --git a/docs/source/compiler_gym/service.rst b/docs/source/compiler_gym/service.rst
index 41271dd32..ac7979f75 100644
--- a/docs/source/compiler_gym/service.rst
+++ b/docs/source/compiler_gym/service.rst
@@ -13,6 +13,12 @@ client and service is managed by the :class:`CompilerGymServiceConnection
 .. contents:: Document contents:
     :local:
 
+.. autoclass:: CompilationSession
+   :members:
+
+   .. automethod:: __init__
+
+
 The connection object
 ---------------------
 
diff --git a/requirements_pre_commit.txt b/requirements_pre_commit.txt
index 9edeadf33..d3fbab6b5 100644
--- a/requirements_pre_commit.txt
+++ b/requirements_pre_commit.txt
@@ -1,3 +1,3 @@
 black==19.10b0
 isort==4.3.21
-pre-commit>=2.9.0
+pre-commit>=2.12.1

From 52fe0f27d15d3267ed67cb0dd706c96fb3234587 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 07:49:35 -0700
Subject: [PATCH 037/141] [util] Add unit tests for download().

---
 compiler_gym/util/download.py |   4 +-
 tests/util/BUILD              |   9 +++
 tests/util/download_test.py   | 102 ++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 tests/util/download_test.py

diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index acba803b4..b1e5d2874 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -46,7 +46,7 @@ def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:
         actual_sha256 = checksum.hexdigest()
         if sha256 != actual_sha256:
             raise DownloadFailed(
-                f"Checksum of downloaded dataset does not match:\n"
+                f"Checksum of download does not match:\n"
                 f"Url: {url}\n"
                 f"Expected: {sha256}\n"
                 f"Actual:   {actual_sha256}"
@@ -62,7 +62,7 @@ def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:
     return content
 
 
-def _download(urls: List[str], sha256: Optional[str], max_retries: int = 5) -> bytes:
+def _download(urls: List[str], sha256: Optional[str], max_retries: int) -> bytes:
     # Cache hit.
     if sha256 and cache_path(f"downloads/{sha256}").is_file():
         with open(str(cache_path(f"downloads/{sha256}")), "rb") as f:
diff --git a/tests/util/BUILD b/tests/util/BUILD
index 2db713bd7..8f44c2376 100644
--- a/tests/util/BUILD
+++ b/tests/util/BUILD
@@ -24,6 +24,15 @@ py_test(
     ],
 )
 
+py_test(
+    name = "download_test",
+    srcs = ["download_test.py"],
+    deps = [
+        "//compiler_gym/util",
+        "//tests:test_main",
+    ],
+)
+
 cc_test(
     name = "EnumUtilTest",
     srcs = ["EnumUtilTest.cc"],
diff --git a/tests/util/download_test.py b/tests/util/download_test.py
new file mode 100644
index 000000000..7e1201851
--- /dev/null
+++ b/tests/util/download_test.py
@@ -0,0 +1,102 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for //compiler_gym/util:download."""
+import pytest
+
+from compiler_gym.util import download
+from compiler_gym.util.runfiles_path import cache_path
+from tests.test_main import main
+
+
+@pytest.mark.parametrize("max_retries", [1, 2, 3, 5, 10])
+def test_download_timeout_retry_loop(mocker, max_retries: int):
+    """Check that download attempts are repeated with sleep() on error."""
+
+    def patched_download(*args):
+        raise download.TooManyRequests
+
+    mocker.patch.object(download, "sleep")
+    mocker.patch.object(download, "_do_download_attempt", patched_download)
+    mocker.spy(download, "_do_download_attempt")
+
+    with pytest.raises(download.TooManyRequests):
+        download.download(urls="example", max_retries=max_retries)
+
+    assert download._do_download_attempt.call_count == max_retries
+    assert download.sleep.call_count == max_retries
+    download.sleep.assert_called_with(5 * 1.5 ** (max_retries - 1))
+
+
+@pytest.mark.parametrize("max_retries", [1, 2, 3, 5, 10])
+def test_download_failed_retry_loop(mocker, max_retries: int):
+    """Check that download attempts are repeated without sleep() on error."""
+
+    def patched_download(*args):
+        raise download.DownloadFailed
+
+    mocker.patch.object(download, "sleep")
+    mocker.patch.object(download, "_do_download_attempt", patched_download)
+    mocker.spy(download, "_do_download_attempt")
+
+    with pytest.raises(download.DownloadFailed):
+        download.download(urls="example", max_retries=max_retries)
+
+    assert download._do_download_attempt.call_count == max_retries
+    assert download.sleep.call_count == 0
+
+
+def test_download_cache_hit(mocker):
+    """Check that download is not repeated on cache hit."""
+    data = b"Hello, world"
+    data_checksum = "4ae7c3b6ac0beff671efa8cf57386151c06e58ca53a78d83f36107316cec125f"
+    cached_path = cache_path(f"downloads/{data_checksum}")
+
+    # Tidy up from a previous test, if applicable.
+    if cached_path.is_file():
+        cached_path.unlink()
+
+    def patched_download(*args):
+        return data
+
+    mocker.patch.object(download, "_get_url_data", patched_download)
+    mocker.spy(download, "_get_url_data")
+
+    assert (
+        download.download(
+            "example",
+            sha256="4ae7c3b6ac0beff671efa8cf57386151c06e58ca53a78d83f36107316cec125f",
+        )
+        == data
+    )
+    download._get_url_data.assert_called_once_with("example")
+    assert cached_path.is_file()
+
+    # Cache hit.
+    assert (
+        download.download(
+            "example",
+            sha256="4ae7c3b6ac0beff671efa8cf57386151c06e58ca53a78d83f36107316cec125f",
+        )
+        == data
+    )
+    assert download._get_url_data.call_count == 1
+
+
+def test_download_mismatched_checksum(mocker):
+    """Check that error is raised when checksum does not match expected."""
+
+    def patched_download(*args):
+        return b"Hello, world"
+
+    mocker.patch.object(download, "_get_url_data", patched_download)
+
+    with pytest.raises(
+        download.DownloadFailed, match="Checksum of download does not match"
+    ):
+        download.download("example", sha256="123")
+
+
+if __name__ == "__main__":
+    main()

From 4aaee24bad05c0218834ca549c474a5363427c53 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 21:10:50 +0100
Subject: [PATCH 038/141] [llvm] Add the -early-cse pass to the action space.

This adds the -early-cse pass to the action space by injecting an
ad-hoc constructor definition into the generated headers.
---
 compiler_gym/envs/llvm/service/passes/config.py     |  4 ++--
 .../service/passes/make_action_space_genfiles.py    | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/envs/llvm/service/passes/config.py b/compiler_gym/envs/llvm/service/passes/config.py
index 871a76c64..cb7a4db87 100644
--- a/compiler_gym/envs/llvm/service/passes/config.py
+++ b/compiler_gym/envs/llvm/service/passes/config.py
@@ -47,7 +47,8 @@
     "DeadInstElimination": "DeadInstEliminationPass",
     "DivRemPairsLegacyPass": "DivRemPairsPass",
     "DSELegacyPass": "DeadStoreEliminationPass",
-    "EarlyCSEMemSSALegacyPass": "EarlyCSEPass",
+    "EarlyCSELegacyPass": "EarlyCSEPass",
+    "EarlyCSEMemSSALegacyPass": "EarlyCSEMemSSAPass",
     "EliminateAvailableExternallyLegacyPass": "EliminateAvailableExternallyPass",
     "EntryExitInstrumenter": "EntryExitInstrumenterPass",
     "Float2IntLegacyPass": "Float2IntPass",
@@ -174,7 +175,6 @@
     "WholeProgramDevirt",
     "MakeGuardsExplicitLegacyPass",
     "LowerTypeTests",
-    "EarlyCSELegacyPass",
     # Unneeded debugging passes.
     "WriteThinLTOBitcode",
     "PredicateInfoPrinterLegacyPass",
diff --git a/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py b/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py
index a5e07cc1f..1aaac0b7d 100644
--- a/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py
+++ b/compiler_gym/envs/llvm/service/passes/make_action_space_genfiles.py
@@ -154,6 +154,19 @@ def make_action_sources(pass_iterator, outpath: Path):
         print("#pragma once", file=f)
         for header in sorted(headers):
             print(f'#include "{header}"', file=f)
+
+        # Inject an ad-hoc workaround for the non-standard constructor of the
+        # EarlyCSEMemSSAPass.
+        print(
+            """
+namespace llvm {
+FunctionPass* createEarlyCSEMemSSAPass() {
+  return createEarlyCSEPass(/*UseMemorySSA=*/true);
+}
+} // namespace llvm
+""",
+            file=f,
+        )
     logging.debug("Generated %s", include_path.name)
 
     with open(actions_path, "w") as f:

From ea37391f2ee33aae70d15ce82d3e7db770128d8e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 21:20:40 +0100
Subject: [PATCH 039/141] [llvm] Remove an unused Benchmark.hash() member.

This value is not used so remove it.
---
 compiler_gym/envs/llvm/service/Benchmark.cc | 2 --
 compiler_gym/envs/llvm/service/Benchmark.h  | 9 +--------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/compiler_gym/envs/llvm/service/Benchmark.cc b/compiler_gym/envs/llvm/service/Benchmark.cc
index 9f136dffc..03cce266a 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.cc
+++ b/compiler_gym/envs/llvm/service/Benchmark.cc
@@ -100,7 +100,6 @@ Benchmark::Benchmark(const std::string& name, const Bitcode& bitcode,
     : context_(std::make_unique<llvm::LLVMContext>()),
       module_(makeModuleOrDie(*context_, bitcode, name)),
       baselineCosts_(baselineCosts),
-      hash_(getModuleHash(*module_)),
       name_(name),
       bitcodeSize_(bitcode.size()) {}
 
@@ -110,7 +109,6 @@ Benchmark::Benchmark(const std::string& name, std::unique_ptr<llvm::LLVMContext>
     : context_(std::move(context)),
       module_(std::move(module)),
       baselineCosts_(baselineCosts),
-      hash_(getModuleHash(*module_)),
       name_(name),
       bitcodeSize_(bitcodeSize) {}
 
diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h
index 713c65b99..972fbcb6d 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.h
+++ b/compiler_gym/envs/llvm/service/Benchmark.h
@@ -17,11 +17,7 @@
 
 namespace compiler_gym::llvm_service {
 
-// We identify benchmarks using a hash of the LLVM module, which is a
-// 160 bits SHA1.
-//
-// NOTE(cummins): In the future when we extend this to support optimizing for
-// performance, we would need this
+// A 160 bits SHA1 that identifies an LLVM module.
 using BenchmarkHash = llvm::ModuleHash;
 
 using Bitcode = llvm::SmallString<0>;
@@ -66,8 +62,6 @@ class Benchmark {
 
   inline const llvm::Module* module_ptr() const { return module_.get(); }
 
-  inline const BenchmarkHash hash() const { return hash_; }
-
   // Replace the benchmark module with a new one. This is to enable
   // out-of-process modification of the IR by serializing the benchmark to a
   // file, modifying the file, then loading the modified file and updating the
@@ -81,7 +75,6 @@ class Benchmark {
   std::unique_ptr<llvm::LLVMContext> context_;
   std::unique_ptr<llvm::Module> module_;
   const BaselineCosts baselineCosts_;
-  const BenchmarkHash hash_;
   const std::string name_;
   // The length of the bitcode string for this benchmark.
   const size_t bitcodeSize_;

From 1099afefb6b50eba3bb3809e3688ff60b325124e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 22:00:16 +0100
Subject: [PATCH 040/141] [llvm] Add an IrSha1 observation space.

This adds a new `IrSha1` observation space that is a 40-digit SHA1
checksum of the current module state.
---
 compiler_gym/envs/llvm/service/Benchmark.cc   |  2 ++
 compiler_gym/envs/llvm/service/Benchmark.h    |  3 +++
 compiler_gym/envs/llvm/service/LlvmSession.cc | 13 +++++++++++++
 .../envs/llvm/service/ObservationSpaces.cc    |  8 ++++++++
 .../envs/llvm/service/ObservationSpaces.h     |  2 ++
 tests/llvm/observation_spaces_test.py         | 19 +++++++++++++++++++
 6 files changed, 47 insertions(+)

diff --git a/compiler_gym/envs/llvm/service/Benchmark.cc b/compiler_gym/envs/llvm/service/Benchmark.cc
index 03cce266a..52d5de3a5 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.cc
+++ b/compiler_gym/envs/llvm/service/Benchmark.cc
@@ -120,4 +120,6 @@ std::unique_ptr<Benchmark> Benchmark::clone(const fs::path& workingDirectory) co
   return std::make_unique<Benchmark>(name(), bitcode, workingDirectory, baselineCosts());
 }
 
+BenchmarkHash Benchmark::module_hash() const { return getModuleHash(*module_); }
+
 }  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h
index 972fbcb6d..d9349ef47 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.h
+++ b/compiler_gym/envs/llvm/service/Benchmark.h
@@ -43,6 +43,9 @@ class Benchmark {
   // Make a copy of the benchmark.
   std::unique_ptr<Benchmark> clone(const boost::filesystem::path& workingDirectory) const;
 
+  // Compute and return a SHA1 hash of the module.
+  BenchmarkHash module_hash() const;
+
   inline const std::string& name() const { return name_; }
 
   inline const size_t bitcodeSize() const { return bitcodeSize_; }
diff --git a/compiler_gym/envs/llvm/service/LlvmSession.cc b/compiler_gym/envs/llvm/service/LlvmSession.cc
index cd33d2ed2..21fb07a00 100644
--- a/compiler_gym/envs/llvm/service/LlvmSession.cc
+++ b/compiler_gym/envs/llvm/service/LlvmSession.cc
@@ -8,6 +8,7 @@
 #include <fmt/format.h>
 #include <glog/logging.h>
 
+#include <iomanip>
 #include <optional>
 #include <subprocess/subprocess.hpp>
 
@@ -270,6 +271,18 @@ Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* repl
       reply->set_string_value(ir);
       break;
     }
+    case LlvmObservationSpace::IR_SHA1: {
+      std::stringstream ss;
+      const BenchmarkHash hash = benchmark().module_hash();
+      // Hex encode, zero pad, and concatenate the unsigned integers that
+      // contain the hash.
+      for (uint32_t val : hash) {
+        ss << std::setfill('0') << std::setw(sizeof(BenchmarkHash::value_type) * 2) << std::hex
+           << val;
+      }
+      reply->set_string_value(ss.str());
+      break;
+    }
     case LlvmObservationSpace::BITCODE_FILE: {
       // Generate an output path with 16 bits of randomness.
       const auto outpath = fs::unique_path(workingDirectory_ / "module-%%%%%%%%.bc");
diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.cc b/compiler_gym/envs/llvm/service/ObservationSpaces.cc
index c2c5bd8c5..fd65d0a29 100644
--- a/compiler_gym/envs/llvm/service/ObservationSpaces.cc
+++ b/compiler_gym/envs/llvm/service/ObservationSpaces.cc
@@ -37,6 +37,14 @@ std::vector<ObservationSpace> getLlvmObservationSpaceList() {
         space.set_platform_dependent(false);
         break;
       }
+      case LlvmObservationSpace::IR_SHA1: {
+        ScalarRange sha1Size;
+        space.mutable_string_size_range()->mutable_min()->set_value(40);
+        space.mutable_string_size_range()->mutable_max()->set_value(40);
+        space.set_deterministic(true);
+        space.set_platform_dependent(false);
+        break;
+      }
       case LlvmObservationSpace::BITCODE_FILE: {
         ScalarRange pathLength;
         space.mutable_string_size_range()->mutable_min()->set_value(0);
diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.h b/compiler_gym/envs/llvm/service/ObservationSpaces.h
index 23c75f0ce..358ebaa73 100644
--- a/compiler_gym/envs/llvm/service/ObservationSpaces.h
+++ b/compiler_gym/envs/llvm/service/ObservationSpaces.h
@@ -23,6 +23,8 @@ enum class LlvmObservationSpace {
   // The entire LLVM module as an IR string. This allows the user to do its own
   // feature extraction.
   IR,
+  // The 40-digit hex SHA1 checksum of the LLVM module.
+  IR_SHA1,
   // Write the bitcode to a file. Returns a string, which is the path of the
   // written file.
   BITCODE_FILE,
diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py
index 9d76826e8..72dfdc46f 100644
--- a/tests/llvm/observation_spaces_test.py
+++ b/tests/llvm/observation_spaces_test.py
@@ -39,6 +39,7 @@ def test_observation_spaces(env: LlvmEnv):
 
     assert set(env.observation.spaces.keys()) == {
         "Ir",
+        "IrSha1",
         "BitcodeFile",
         "InstCount",
         "InstCountDict",
@@ -79,6 +80,24 @@ def test_ir_observation_space(env: LlvmEnv):
     assert not space.platform_dependent
 
 
+def test_ir_sha1_observation_space(env: LlvmEnv):
+    env.reset("cbench-v1/crc32")
+    key = "IrSha1"
+    space = env.observation.spaces[key]
+    assert isinstance(space.space, Sequence)
+    assert space.space.dtype == str
+    assert space.space.size_range == (40, 40)
+
+    value: str = env.observation[key]
+    print(value)  # For debugging in case of error.
+    assert isinstance(value, str)
+    assert len(value) == 40
+    assert space.space.contains(value)
+
+    assert space.deterministic
+    assert not space.platform_dependent
+
+
 def test_bitcode_observation_space(env: LlvmEnv):
     env.reset("cbench-v1/crc32")
     key = "BitcodeFile"

From c4226d7eb9e8eb366101685fbf447e61b78643f1 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 13 May 2021 11:32:18 +0100
Subject: [PATCH 041/141] [docs] Use conda install install patchelf.

Prefer recommending that users install patchelf through conda rather
than requiring a system-wide apt install.
---
 INSTALL.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index d32742dca..344e7dc72 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -31,7 +31,7 @@ Now proceed to [All platforms](#all-platforms) below.
 On debian-based linux systems, install the required toolchain using:
 
 ```sh
-sudo apt install clang-9 libtinfo5 libjpeg-dev patchelf
+sudo apt install clang-9 libtinfo5 libjpeg-dev
 wget https://github.com/bazelbuild/bazelisk/releases/download/v1.7.5/bazelisk-linux-amd64 -O bazel
 chmod +x bazel && mkdir -p ~/.local/bin && mv -v bazel ~/.local/bin
 export PATH="$HOME/.local/bin:$PATH"
@@ -46,7 +46,7 @@ We recommend using
 to manage the remaining build dependencies. First create a conda environment
 with the required dependencies:
 
-    conda create -n compiler_gym python=3.9 cmake pandoc
+    conda create -n compiler_gym python=3.9 cmake pandoc patchelf
     conda activate compiler_gym
 
 Then clone the CompilerGym source code using:

From 8a507358e00b409e6d0d948736ba1d94904b1025 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 11 May 2021 17:29:47 -0700
Subject: [PATCH 042/141] [runtime] Add an in-memory cache for Benchmark
 protos.

This will be used by the CompilationSession runtime to keep track of
the Benchmark protobufs that have been sent by the user to the
service, so that CompilationSession::init() can be passed a benchmark
proto.

This is a generalization of the BenchmarkFactory class that is used by
the LLVM service to keep a bunch of llvm::Modules loaded in
memory. The same class is implemented twice in C++ and Python using
the same semantics and with the same tests.

The cache has a target maximum size based on the number of bytes of
its elements. When this size is reached, benchamrks are evicted using
a random policy. The idea behind random cache eviction is that this
cache will be large enough by default to store a good number of
benchmarks, so exceeding the max cache size implies a training loop in
which random programs are selected from a very large pool, rather than
smaller pool where an LRU policy would be better.

Issue #254.
---
 BUILD.bazel                                   |   1 +
 compiler_gym/service/runtime/BUILD            |  40 ++++++
 .../service/runtime/BenchmarkCache.cc         |  83 +++++++++++
 compiler_gym/service/runtime/BenchmarkCache.h |  54 +++++++
 compiler_gym/service/runtime/__init__.py      |   4 +
 .../service/runtime/benchmark_cache.py        | 128 +++++++++++++++++
 setup.py                                      |   3 +-
 tests/service/runtime/BUILD                   |  27 ++++
 tests/service/runtime/BenchmarkCacheTest.cc   | 100 +++++++++++++
 tests/service/runtime/benchmark_cache_test.py | 132 ++++++++++++++++++
 10 files changed, 571 insertions(+), 1 deletion(-)
 create mode 100644 compiler_gym/service/runtime/BUILD
 create mode 100644 compiler_gym/service/runtime/BenchmarkCache.cc
 create mode 100644 compiler_gym/service/runtime/BenchmarkCache.h
 create mode 100644 compiler_gym/service/runtime/__init__.py
 create mode 100644 compiler_gym/service/runtime/benchmark_cache.py
 create mode 100644 tests/service/runtime/BUILD
 create mode 100644 tests/service/runtime/BenchmarkCacheTest.cc
 create mode 100644 tests/service/runtime/benchmark_cache_test.py

diff --git a/BUILD.bazel b/BUILD.bazel
index 4558ba5cc..4ba330cd7 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -23,6 +23,7 @@ py_library(
         "//compiler_gym/datasets",
         "//compiler_gym/envs",
         "//compiler_gym/service",
+        "//compiler_gym/service/runtime",
         "//compiler_gym/spaces",
         "//compiler_gym/views",
         "//examples/sensitivity_analysis:action_sensitivity_analysis",
diff --git a/compiler_gym/service/runtime/BUILD b/compiler_gym/service/runtime/BUILD
new file mode 100644
index 000000000..5c75bc297
--- /dev/null
+++ b/compiler_gym/service/runtime/BUILD
@@ -0,0 +1,40 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# This package implements the CompilerGym service runtime, which is the utility
+# code that creates RPC servers and dispatches to CompilationServices.
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@rules_python//python:defs.bzl", "py_library")
+
+py_library(
+    name = "runtime",
+    srcs = ["__init__.py"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":benchmark_cache",
+    ],
+)
+
+py_library(
+    name = "benchmark_cache",
+    srcs = ["benchmark_cache.py"],
+    visibility = ["//tests/service/runtime:__subpackages__"],
+    deps = [
+        "//compiler_gym/service/proto",
+    ],
+)
+
+cc_library(
+    name = "BenchmarkCache",
+    srcs = ["BenchmarkCache.cc"],
+    hdrs = ["BenchmarkCache.h"],
+    visibility = ["//tests/service/runtime:__subpackages__"],
+    deps = [
+        "//compiler_gym/service/proto:compiler_gym_service_cc",
+        "@boost//:filesystem",
+        "@com_github_grpc_grpc//:grpc++",
+        "@glog",
+    ],
+)
diff --git a/compiler_gym/service/runtime/BenchmarkCache.cc b/compiler_gym/service/runtime/BenchmarkCache.cc
new file mode 100644
index 000000000..cbeffee01
--- /dev/null
+++ b/compiler_gym/service/runtime/BenchmarkCache.cc
@@ -0,0 +1,83 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#include "compiler_gym/service/runtime/BenchmarkCache.h"
+
+#include <glog/logging.h>
+
+using grpc::Status;
+using grpc::StatusCode;
+
+namespace compiler_gym::runtime {
+
+BenchmarkCache::BenchmarkCache(std::optional<std::mt19937_64> rand, size_t maxSizeInBytes)
+    : rand_(rand.has_value() ? *rand : std::mt19937_64(std::random_device()())),
+      maxSizeInBytes_(maxSizeInBytes),
+      sizeInBytes_(0){};
+
+const Benchmark* BenchmarkCache::get(const std::string& uri) const {
+  auto it = benchmarks_.find(uri);
+  if (it == benchmarks_.end()) {
+    return nullptr;
+  }
+
+  return &it->second;
+}
+
+void BenchmarkCache::add(const Benchmark&& benchmark) {
+  VLOG(3) << "Caching benchmark " << benchmark.uri() << ". Cache size = " << sizeInBytes()
+          << " bytes, " << size() << " items";
+
+  // Remove any existing value to keep the cache size consistent.
+  const auto it = benchmarks_.find(benchmark.uri());
+  if (it != benchmarks_.end()) {
+    const size_t replacedSize = it->second.ByteSizeLong();
+    benchmarks_.erase(it);
+    sizeInBytes_ -= replacedSize;
+  }
+
+  const size_t size = benchmark.ByteSizeLong();
+  if (sizeInBytes() + size > maxSizeInBytes()) {
+    if (size > maxSizeInBytes()) {
+      LOG(WARNING) << "Adding new benchmark with size " << size
+                   << " bytes exceeds total target cache size of " << maxSizeInBytes() << " bytes";
+    } else {
+      VLOG(3) << "Adding new benchmark with size " << size << " bytes exceeds maximum size "
+              << maxSizeInBytes() << " bytes, " << this->size() << " items";
+    }
+    prune();
+  }
+
+  benchmarks_.insert({benchmark.uri(), std::move(benchmark)});
+  sizeInBytes_ += size;
+}
+
+void BenchmarkCache::prune(std::optional<size_t> targetSize) {
+  int evicted = 0;
+  targetSize = targetSize.has_value() ? targetSize : maxSizeInBytes() / 2;
+
+  while (size() && sizeInBytes() > targetSize) {
+    // Select a benchmark randomly.
+    std::uniform_int_distribution<size_t> distribution(0, benchmarks_.size() - 1);
+    size_t index = distribution(rand_);
+    auto iterator = std::next(std::begin(benchmarks_), index);
+
+    // Evict the benchmark from the pool of loaded benchmarks.
+    ++evicted;
+    sizeInBytes_ -= iterator->second.ByteSizeLong();
+    benchmarks_.erase(iterator);
+  }
+
+  if (evicted) {
+    VLOG(2) << "Evicted " << evicted << " benchmarks from cache. Benchmark cache "
+            << "size now " << sizeInBytes() << " bytes, " << benchmarks_.size() << " items";
+  }
+}
+
+void BenchmarkCache::setMaxSizeInBytes(size_t maxSizeInBytes) {
+  maxSizeInBytes_ = maxSizeInBytes;
+  prune(maxSizeInBytes);
+}
+
+}  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h
new file mode 100644
index 000000000..36ad6d481
--- /dev/null
+++ b/compiler_gym/service/runtime/BenchmarkCache.h
@@ -0,0 +1,54 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+
+#include <grpcpp/grpcpp.h>
+
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <random>
+
+#include "boost/filesystem.hpp"
+#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
+
+namespace compiler_gym::runtime {
+
+constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024;
+
+// An in-memory cache of Benchmark protocol buffers.
+//
+// This object caches Benchmark messages by URI. Once the cache reaches a
+// predetermined size, benchmarks are evicted randomly until the capacity is
+// reduced to 50%.
+class BenchmarkCache {
+ public:
+  BenchmarkCache(std::optional<std::mt19937_64> rand = std::nullopt,
+                 size_t maxSizeInBytes = kEvictionSizeInBytes);
+
+  // The pointer set by benchmark is valid only until the next call to add().
+  const Benchmark* get(const std::string& uri) const;
+
+  // Move-insert the given benchmark to the cache.
+  void add(const Benchmark&& benchmark);
+
+  inline size_t size() const { return benchmarks_.size(); };
+  inline size_t sizeInBytes() const { return sizeInBytes_; };
+  inline size_t maxSizeInBytes() const { return maxSizeInBytes_; };
+
+  void setMaxSizeInBytes(size_t maxSizeInBytes);
+
+  // Evict benchmarks randomly to reduce the capacity below 50%.
+  void prune(std::optional<size_t> targetSize = std::nullopt);
+
+ private:
+  std::unordered_map<std::string, const Benchmark> benchmarks_;
+
+  std::mt19937_64 rand_;
+  size_t maxSizeInBytes_;
+  size_t sizeInBytes_;
+};
+
+}  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/__init__.py b/compiler_gym/service/runtime/__init__.py
new file mode 100644
index 000000000..626423691
--- /dev/null
+++ b/compiler_gym/service/runtime/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/compiler_gym/service/runtime/benchmark_cache.py b/compiler_gym/service/runtime/benchmark_cache.py
new file mode 100644
index 000000000..efd9a13f5
--- /dev/null
+++ b/compiler_gym/service/runtime/benchmark_cache.py
@@ -0,0 +1,128 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+from typing import Dict, Optional
+
+import numpy as np
+
+from compiler_gym.service.proto import Benchmark
+
+MAX_SIZE_IN_BYTES = 512 * 104 * 1024
+
+
+class BenchmarkCache:
+    """An in-memory cache of Benchmark messages.
+
+    This object caches Benchmark messages by URI. Once the cache reaches a
+    predetermined size, benchmarks are evicted randomly until the capacity is
+    reduced to 50%.
+    """
+
+    def __init__(
+        self,
+        max_size_in_bytes: int = MAX_SIZE_IN_BYTES,
+        logger: Optional[logging.Logger] = None,
+        rng: Optional[np.random.Generator] = None,
+    ):
+        self.rng = rng or np.random.default_rng()
+        self._max_size_in_bytes = max_size_in_bytes
+        self.logger = logger or logging.getLogger("compiler_gym")
+
+        self._benchmarks: Dict[str, Benchmark] = {}
+        self._size_in_bytes = 0
+
+    def __getitem__(self, uri: str) -> Benchmark:
+        """Get a benchmark by URI. Raises KeyError."""
+        item = self._benchmarks.get(uri)
+        if item is None:
+            raise KeyError(uri)
+        return item
+
+    def __contains__(self, uri: str):
+        """Whether URI is in cache."""
+        return uri in self._benchmarks
+
+    def __setitem__(self, uri: str, benchmark: Benchmark):
+        """Add benchmark to cache."""
+        self.logger.debug(
+            "Caching benchmark %s. Cache size = %d bytes, %d items",
+            uri,
+            self.size_in_bytes,
+            self.size,
+        )
+
+        # Remove any existing value to keep the cache size consistent.
+        if uri in self._benchmarks:
+            self._size_in_bytes -= self._benchmarks[uri].ByteSize()
+            del self._benchmarks[uri]
+
+        size = benchmark.ByteSize()
+        if self.size_in_bytes + size > self.max_size_in_bytes:
+            if size > self.max_size_in_bytes:
+                self.logger.warning(
+                    "Adding new benchmark with size %d bytes exceeds total "
+                    "target cache size of %d bytes",
+                    size,
+                    self.max_size_in_bytes,
+                )
+            else:
+                self.logger.debug(
+                    "Adding new benchmark with size %d bytes "
+                    "exceeds maximum size %d bytes, %d items",
+                    size,
+                    self.max_size_in_bytes,
+                    self.size,
+                )
+            self.prune()
+
+        self._benchmarks[uri] = benchmark
+        self._size_in_bytes += size
+
+    def prune(self, target_size_in_bytes: Optional[int] = None) -> None:
+        """Evict benchmarks randomly to reduce the capacity below 50%."""
+        evicted = 0
+        target_size_in_bytes = (
+            self.max_size_in_bytes // 2
+            if target_size_in_bytes is None
+            else target_size_in_bytes
+        )
+
+        while self.size and self.size_in_bytes > target_size_in_bytes:
+            evicted += 1
+            key = self.rng.choice(list(self._benchmarks.keys()))
+            self._size_in_bytes -= self._benchmarks[key].ByteSize()
+            del self._benchmarks[key]
+
+        if evicted:
+            self.logger.info(
+                "Evicted %d benchmarks from cache. "
+                "Benchmark cache size now %d bytes, %d items",
+                evicted,
+                self.size_in_bytes,
+                self.size,
+            )
+
+    @property
+    def size(self) -> int:
+        """The number of items in the cache."""
+        return len(self._benchmarks)
+
+    @property
+    def size_in_bytes(self) -> int:
+        """The combined size of the elements in the cache, excluding the
+        cache overhead.
+        """
+        return self._size_in_bytes
+
+    @property
+    def max_size_in_bytes(self) -> int:
+        """The maximum size of the cache."""
+        return self._max_size_in_bytes
+
+    @max_size_in_bytes.setter
+    def max_size_in_bytes(self, value: int) -> None:
+        """Set a new maximum cache size."""
+        self._max_size_in_bytes = value
+        self.prune(target_size_in_bytes=value)
diff --git a/setup.py b/setup.py
index d361a6a57..2841bded9 100644
--- a/setup.py
+++ b/setup.py
@@ -58,8 +58,9 @@ def get_tag(self):
         "compiler_gym.envs",
         "compiler_gym.envs",
         "compiler_gym.leaderboard",
-        "compiler_gym.service.proto",
         "compiler_gym.service",
+        "compiler_gym.service.proto",
+        "compiler_gym.service.runtime",
         "compiler_gym.spaces",
         "compiler_gym.third_party.autophase",
         "compiler_gym.third_party.inst2vec",
diff --git a/tests/service/runtime/BUILD b/tests/service/runtime/BUILD
new file mode 100644
index 000000000..9e659686b
--- /dev/null
+++ b/tests/service/runtime/BUILD
@@ -0,0 +1,27 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+load("@rules_cc//cc:defs.bzl", "cc_test")
+load("@rules_python//python:defs.bzl", "py_test")
+
+py_test(
+    name = "benchmark_cache_test",
+    srcs = ["benchmark_cache_test.py"],
+    deps = [
+        "//compiler_gym/service/proto",
+        "//compiler_gym/service/runtime:benchmark_cache",
+        "//tests:test_main",
+    ],
+)
+
+cc_test(
+    name = "BenchmarkCacheTest",
+    srcs = ["BenchmarkCacheTest.cc"],
+    deps = [
+        "//compiler_gym/service/proto:compiler_gym_service_cc",
+        "//compiler_gym/service/runtime:BenchmarkCache",
+        "//tests:TestMain",
+        "@gtest",
+    ],
+)
diff --git a/tests/service/runtime/BenchmarkCacheTest.cc b/tests/service/runtime/BenchmarkCacheTest.cc
new file mode 100644
index 000000000..c120a06d4
--- /dev/null
+++ b/tests/service/runtime/BenchmarkCacheTest.cc
@@ -0,0 +1,100 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#include <gtest/gtest.h>
+
+#include <optional>
+
+#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
+#include "compiler_gym/service/runtime/BenchmarkCache.h"
+
+using namespace ::testing;
+
+namespace compiler_gym::runtime {
+namespace {
+
+// Test helper. Generate a benchmark of the given size in bytes.
+Benchmark makeBenchmarkOfSize(const std::string& uri, int sizeInBytes, int target) {
+  Benchmark bm;
+  bm.set_uri(uri);
+  std::vector<char> contents(target, '0');
+  *bm.mutable_program()->mutable_contents() = {contents.begin(), contents.end()};
+  int sizeOffset = bm.ByteSizeLong() - sizeInBytes;
+  if (sizeOffset) {
+    return makeBenchmarkOfSize(uri, sizeInBytes, sizeInBytes - sizeOffset);
+  }
+
+  return bm;
+}
+
+Benchmark makeBenchmarkOfSize(const std::string& uri, int sizeInBytes) {
+  return makeBenchmarkOfSize(uri, sizeInBytes, sizeInBytes);
+}
+
+TEST(BenchmarkCache, makeBenchmarkOfSize) {
+  // Sanity check for test helper function.
+  ASSERT_EQ(makeBenchmarkOfSize("a", 10).ByteSizeLong(), 10);
+  ASSERT_EQ(makeBenchmarkOfSize("abc", 10).ByteSizeLong(), 10);
+  ASSERT_EQ(makeBenchmarkOfSize("a", 50).ByteSizeLong(), 50);
+  ASSERT_EQ(makeBenchmarkOfSize("a", 100).ByteSizeLong(), 100);
+  ASSERT_EQ(makeBenchmarkOfSize("a", 1024).ByteSizeLong(), 1024);
+}
+
+TEST(BenchmarkCache, replaceExistingItem) {
+  BenchmarkCache cache;
+
+  cache.add(makeBenchmarkOfSize("a", 30));
+  ASSERT_EQ(cache.size(), 1);
+  ASSERT_EQ(cache.sizeInBytes(), 30);
+
+  cache.add(makeBenchmarkOfSize("a", 50));
+  ASSERT_EQ(cache.size(), 1);
+  ASSERT_EQ(cache.sizeInBytes(), 50);
+}
+
+TEST(BenchmarkCache, pruneOnMaxSizeReached) {
+  BenchmarkCache cache;
+  cache.setMaxSizeInBytes(100);
+
+  cache.add(makeBenchmarkOfSize("a", 30));
+  cache.add(makeBenchmarkOfSize("b", 30));
+  cache.add(makeBenchmarkOfSize("c", 30));
+  ASSERT_EQ(cache.sizeInBytes(), 90);
+  ASSERT_EQ(cache.size(), 3);
+
+  cache.add(makeBenchmarkOfSize("d", 30));
+  ASSERT_EQ(cache.sizeInBytes(), 60);
+  ASSERT_EQ(cache.size(), 2);
+}
+
+TEST(BenchmarkCache, getter) {
+  BenchmarkCache cache;
+
+  const auto a = makeBenchmarkOfSize("a", 30);
+  cache.add(makeBenchmarkOfSize("a", 30));
+
+  const auto b = makeBenchmarkOfSize("b", 50);
+  cache.add(makeBenchmarkOfSize("b", 50));
+
+  ASSERT_EQ(cache.get("a")->DebugString(), a.DebugString());
+  ASSERT_NE(cache.get("a")->DebugString(), b.DebugString());
+  ASSERT_EQ(cache.get("b")->DebugString(), b.DebugString());
+}
+
+TEST(BenchmarkCache, pruneOnMaximumSizeUpdate) {
+  BenchmarkCache cache;
+
+  cache.add(makeBenchmarkOfSize("a", 30));
+  cache.add(makeBenchmarkOfSize("b", 30));
+  cache.add(makeBenchmarkOfSize("c", 30));
+  ASSERT_EQ(cache.sizeInBytes(), 90);
+  ASSERT_EQ(cache.size(), 3);
+
+  cache.setMaxSizeInBytes(50);
+  ASSERT_EQ(cache.size(), 1);
+  ASSERT_EQ(cache.sizeInBytes(), 30);
+}
+
+}  // anonymous namespace
+}  // namespace compiler_gym::runtime
diff --git a/tests/service/runtime/benchmark_cache_test.py b/tests/service/runtime/benchmark_cache_test.py
new file mode 100644
index 000000000..28e3ac651
--- /dev/null
+++ b/tests/service/runtime/benchmark_cache_test.py
@@ -0,0 +1,132 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for //compiler_gym/service/runtime:benchmark_cache."""
+
+import pytest
+
+from compiler_gym.service.proto import Benchmark, File
+from compiler_gym.service.runtime.benchmark_cache import BenchmarkCache
+from tests.test_main import main
+
+
+def make_benchmark_of_size(size_in_bytes: int, target: int = 0) -> Benchmark:
+    """Test helper. Generate a benchmark of the given size in bytes."""
+    target = target or size_in_bytes
+    bm = Benchmark(program=File(contents=("." * target).encode("utf-8")))
+    size_offset = bm.ByteSize() - size_in_bytes
+    if size_offset:
+        return make_benchmark_of_size(size_in_bytes, size_in_bytes - size_offset)
+    return bm
+
+
+@pytest.mark.parametrize("size", [5, 10, 100, 1024])
+def test_make_benchmark_of_size(size: int):
+    """Sanity check for test helper function."""
+    assert make_benchmark_of_size(size).ByteSize() == size
+
+
+def test_oversized_benchmark_triggers_prune(mocker):
+    cache = BenchmarkCache(max_size_in_bytes=10)
+
+    mocker.spy(cache, "prune")
+
+    cache["test"] = make_benchmark_of_size(50)
+
+    assert cache.size == 1
+    assert cache.size_in_bytes == 50
+
+    cache.prune.assert_called_once()
+
+
+def test_prune_on_max_size_reached(mocker):
+    """Test that cache is pruned when the maximum size is exceeded."""
+    cache = BenchmarkCache(max_size_in_bytes=100)
+
+    mocker.spy(cache, "prune")
+    mocker.spy(cache.logger, "info")
+
+    cache["a"] = make_benchmark_of_size(30)
+    cache["b"] = make_benchmark_of_size(30)
+    cache["c"] = make_benchmark_of_size(30)
+    assert cache.prune.call_count == 0
+
+    cache["d"] = make_benchmark_of_size(30)
+    assert cache.prune.call_count == 1
+
+    assert cache.size == 2
+    assert cache.size_in_bytes == 60
+
+    cache.logger.info.assert_called_once_with(
+        "Evicted %d benchmarks from cache. Benchmark cache size now %d bytes, "
+        "%d items",
+        2,
+        30,
+        1,
+    )
+
+
+def test_oversized_benchmark_emits_warning(mocker):
+    """Test that a warning is emitted when a single item is larger than the
+    entire target cache size.
+    """
+    cache = BenchmarkCache(max_size_in_bytes=10)
+
+    mocker.spy(cache.logger, "warning")
+
+    cache["test"] = make_benchmark_of_size(50)
+
+    cache.logger.warning.assert_called_once_with(
+        "Adding new benchmark with size %d bytes exceeds total target cache "
+        "size of %d bytes",
+        50,
+        10,
+    )
+
+
+def test_contains():
+    cache = BenchmarkCache(max_size_in_bytes=100)
+
+    cache["a"] = make_benchmark_of_size(30)
+
+    assert "a" in cache
+    assert "b" not in cache
+
+
+def test_getter():
+    cache = BenchmarkCache(max_size_in_bytes=100)
+
+    a = make_benchmark_of_size(30)
+    b = make_benchmark_of_size(40)
+
+    cache["a"] = a
+    cache["b"] = b
+
+    assert cache["a"] == a
+    assert cache["a"] != b
+    assert cache["b"] == b
+
+    with pytest.raises(KeyError, match="c"):
+        cache["c"]
+
+
+def test_prune_on_maximum_size_update(mocker):
+    """Test that cache is pruned when the maximum size is exceeded."""
+    cache = BenchmarkCache(max_size_in_bytes=100)
+
+    mocker.spy(cache, "prune")
+    mocker.spy(cache.logger, "info")
+
+    cache["a"] = make_benchmark_of_size(30)
+    cache["b"] = make_benchmark_of_size(30)
+    cache["c"] = make_benchmark_of_size(30)
+    assert cache.prune.call_count == 0
+
+    cache.max_size_in_bytes = 50
+    assert cache.prune.call_count == 1
+    assert cache.size_in_bytes == 30
+
+
+if __name__ == "__main__":
+    main()

From 249aff3e2402375efe9aca7b5b7d6d2c7a718e2e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 06:46:38 -0700
Subject: [PATCH 043/141] [service] Add a test for benchmark cache replacement.

---
 tests/service/runtime/benchmark_cache_test.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/service/runtime/benchmark_cache_test.py b/tests/service/runtime/benchmark_cache_test.py
index 28e3ac651..2066cce6a 100644
--- a/tests/service/runtime/benchmark_cache_test.py
+++ b/tests/service/runtime/benchmark_cache_test.py
@@ -40,6 +40,18 @@ def test_oversized_benchmark_triggers_prune(mocker):
     cache.prune.assert_called_once()
 
 
+def test_replace_existing_item():
+    cache = BenchmarkCache()
+
+    cache["a"] = make_benchmark_of_size(30)
+    assert cache.size == 1
+    assert cache.size_in_bytes == 30
+
+    cache["a"] = make_benchmark_of_size(50)
+    assert cache.size == 1
+    assert cache.size_in_bytes == 50
+
+
 def test_prune_on_max_size_reached(mocker):
     """Test that cache is pruned when the maximum size is exceeded."""
     cache = BenchmarkCache(max_size_in_bytes=100)

From d8715f60fc47ca3e44799614a41998e60befe672 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 13 May 2021 07:29:41 -0700
Subject: [PATCH 044/141] [runtime] BenchmarkCache PR feedback.

Incorporate reviewer feedback on #263.
---
 .../service/runtime/BenchmarkCache.cc         |  8 +++---
 compiler_gym/service/runtime/BenchmarkCache.h | 10 ++++---
 .../service/runtime/benchmark_cache.py        | 10 +++----
 tests/service/runtime/BenchmarkCacheTest.cc   |  4 +--
 tests/service/runtime/benchmark_cache_test.py | 26 +++++++++----------
 5 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/compiler_gym/service/runtime/BenchmarkCache.cc b/compiler_gym/service/runtime/BenchmarkCache.cc
index cbeffee01..8cd2cd219 100644
--- a/compiler_gym/service/runtime/BenchmarkCache.cc
+++ b/compiler_gym/service/runtime/BenchmarkCache.cc
@@ -11,7 +11,7 @@ using grpc::StatusCode;
 
 namespace compiler_gym::runtime {
 
-BenchmarkCache::BenchmarkCache(std::optional<std::mt19937_64> rand, size_t maxSizeInBytes)
+BenchmarkCache::BenchmarkCache(size_t maxSizeInBytes, std::optional<std::mt19937_64> rand)
     : rand_(rand.has_value() ? *rand : std::mt19937_64(std::random_device()())),
       maxSizeInBytes_(maxSizeInBytes),
       sizeInBytes_(0){};
@@ -46,14 +46,14 @@ void BenchmarkCache::add(const Benchmark&& benchmark) {
       VLOG(3) << "Adding new benchmark with size " << size << " bytes exceeds maximum size "
               << maxSizeInBytes() << " bytes, " << this->size() << " items";
     }
-    prune();
+    evictToCapacity();
   }
 
   benchmarks_.insert({benchmark.uri(), std::move(benchmark)});
   sizeInBytes_ += size;
 }
 
-void BenchmarkCache::prune(std::optional<size_t> targetSize) {
+void BenchmarkCache::evictToCapacity(std::optional<size_t> targetSize) {
   int evicted = 0;
   targetSize = targetSize.has_value() ? targetSize : maxSizeInBytes() / 2;
 
@@ -77,7 +77,7 @@ void BenchmarkCache::prune(std::optional<size_t> targetSize) {
 
 void BenchmarkCache::setMaxSizeInBytes(size_t maxSizeInBytes) {
   maxSizeInBytes_ = maxSizeInBytes;
-  prune(maxSizeInBytes);
+  evictToCapacity(maxSizeInBytes);
 }
 
 }  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h
index 36ad6d481..cf08965be 100644
--- a/compiler_gym/service/runtime/BenchmarkCache.h
+++ b/compiler_gym/service/runtime/BenchmarkCache.h
@@ -25,8 +25,8 @@ constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024;
 // reduced to 50%.
 class BenchmarkCache {
  public:
-  BenchmarkCache(std::optional<std::mt19937_64> rand = std::nullopt,
-                 size_t maxSizeInBytes = kEvictionSizeInBytes);
+  BenchmarkCache(size_t maxSizeInBytes = kEvictionSizeInBytes,
+                 std::optional<std::mt19937_64> rand = std::nullopt);
 
   // The pointer set by benchmark is valid only until the next call to add().
   const Benchmark* get(const std::string& uri) const;
@@ -40,8 +40,10 @@ class BenchmarkCache {
 
   void setMaxSizeInBytes(size_t maxSizeInBytes);
 
-  // Evict benchmarks randomly to reduce the capacity below 50%.
-  void prune(std::optional<size_t> targetSize = std::nullopt);
+  // Evict benchmarks randomly to reduce the capacity to the given size. If
+  // targetSizeInBytes is not provided, benchmarks are evicted to 50% of
+  // maxSizeInBytes.
+  void evictToCapacity(std::optional<size_t> targetSizeInBytes = std::nullopt);
 
  private:
   std::unordered_map<std::string, const Benchmark> benchmarks_;
diff --git a/compiler_gym/service/runtime/benchmark_cache.py b/compiler_gym/service/runtime/benchmark_cache.py
index efd9a13f5..72a862b75 100644
--- a/compiler_gym/service/runtime/benchmark_cache.py
+++ b/compiler_gym/service/runtime/benchmark_cache.py
@@ -23,11 +23,11 @@ class BenchmarkCache:
     def __init__(
         self,
         max_size_in_bytes: int = MAX_SIZE_IN_BYTES,
-        logger: Optional[logging.Logger] = None,
         rng: Optional[np.random.Generator] = None,
+        logger: Optional[logging.Logger] = None,
     ):
-        self.rng = rng or np.random.default_rng()
         self._max_size_in_bytes = max_size_in_bytes
+        self.rng = rng or np.random.default_rng()
         self.logger = logger or logging.getLogger("compiler_gym")
 
         self._benchmarks: Dict[str, Benchmark] = {}
@@ -75,12 +75,12 @@ def __setitem__(self, uri: str, benchmark: Benchmark):
                     self.max_size_in_bytes,
                     self.size,
                 )
-            self.prune()
+            self.evict_to_capacity()
 
         self._benchmarks[uri] = benchmark
         self._size_in_bytes += size
 
-    def prune(self, target_size_in_bytes: Optional[int] = None) -> None:
+    def evict_to_capacity(self, target_size_in_bytes: Optional[int] = None) -> None:
         """Evict benchmarks randomly to reduce the capacity below 50%."""
         evicted = 0
         target_size_in_bytes = (
@@ -125,4 +125,4 @@ def max_size_in_bytes(self) -> int:
     def max_size_in_bytes(self, value: int) -> None:
         """Set a new maximum cache size."""
         self._max_size_in_bytes = value
-        self.prune(target_size_in_bytes=value)
+        self.evict_to_capacity(target_size_in_bytes=value)
diff --git a/tests/service/runtime/BenchmarkCacheTest.cc b/tests/service/runtime/BenchmarkCacheTest.cc
index c120a06d4..898547697 100644
--- a/tests/service/runtime/BenchmarkCacheTest.cc
+++ b/tests/service/runtime/BenchmarkCacheTest.cc
@@ -53,7 +53,7 @@ TEST(BenchmarkCache, replaceExistingItem) {
   ASSERT_EQ(cache.sizeInBytes(), 50);
 }
 
-TEST(BenchmarkCache, pruneOnMaxSizeReached) {
+TEST(BenchmarkCache, evictToCapacityOnMaxSizeReached) {
   BenchmarkCache cache;
   cache.setMaxSizeInBytes(100);
 
@@ -82,7 +82,7 @@ TEST(BenchmarkCache, getter) {
   ASSERT_EQ(cache.get("b")->DebugString(), b.DebugString());
 }
 
-TEST(BenchmarkCache, pruneOnMaximumSizeUpdate) {
+TEST(BenchmarkCache, evictToCapacityOnMaximumSizeUpdate) {
   BenchmarkCache cache;
 
   cache.add(makeBenchmarkOfSize("a", 30));
diff --git a/tests/service/runtime/benchmark_cache_test.py b/tests/service/runtime/benchmark_cache_test.py
index 2066cce6a..e3036e3dd 100644
--- a/tests/service/runtime/benchmark_cache_test.py
+++ b/tests/service/runtime/benchmark_cache_test.py
@@ -27,17 +27,17 @@ def test_make_benchmark_of_size(size: int):
     assert make_benchmark_of_size(size).ByteSize() == size
 
 
-def test_oversized_benchmark_triggers_prune(mocker):
+def test_oversized_benchmark_triggers_evict_to_capacity(mocker):
     cache = BenchmarkCache(max_size_in_bytes=10)
 
-    mocker.spy(cache, "prune")
+    mocker.spy(cache, "evict_to_capacity")
 
     cache["test"] = make_benchmark_of_size(50)
 
     assert cache.size == 1
     assert cache.size_in_bytes == 50
 
-    cache.prune.assert_called_once()
+    cache.evict_to_capacity.assert_called_once()
 
 
 def test_replace_existing_item():
@@ -52,20 +52,20 @@ def test_replace_existing_item():
     assert cache.size_in_bytes == 50
 
 
-def test_prune_on_max_size_reached(mocker):
-    """Test that cache is pruned when the maximum size is exceeded."""
+def test_evict_to_capacity_on_max_size_reached(mocker):
+    """Test that cache is evict_to_capacityd when the maximum size is exceeded."""
     cache = BenchmarkCache(max_size_in_bytes=100)
 
-    mocker.spy(cache, "prune")
+    mocker.spy(cache, "evict_to_capacity")
     mocker.spy(cache.logger, "info")
 
     cache["a"] = make_benchmark_of_size(30)
     cache["b"] = make_benchmark_of_size(30)
     cache["c"] = make_benchmark_of_size(30)
-    assert cache.prune.call_count == 0
+    assert cache.evict_to_capacity.call_count == 0
 
     cache["d"] = make_benchmark_of_size(30)
-    assert cache.prune.call_count == 1
+    assert cache.evict_to_capacity.call_count == 1
 
     assert cache.size == 2
     assert cache.size_in_bytes == 60
@@ -123,20 +123,20 @@ def test_getter():
         cache["c"]
 
 
-def test_prune_on_maximum_size_update(mocker):
-    """Test that cache is pruned when the maximum size is exceeded."""
+def test_evict_to_capacity_on_maximum_size_update(mocker):
+    """Test that cache is evict_to_capacityd when the maximum size is exceeded."""
     cache = BenchmarkCache(max_size_in_bytes=100)
 
-    mocker.spy(cache, "prune")
+    mocker.spy(cache, "evict_to_capacity")
     mocker.spy(cache.logger, "info")
 
     cache["a"] = make_benchmark_of_size(30)
     cache["b"] = make_benchmark_of_size(30)
     cache["c"] = make_benchmark_of_size(30)
-    assert cache.prune.call_count == 0
+    assert cache.evict_to_capacity.call_count == 0
 
     cache.max_size_in_bytes = 50
-    assert cache.prune.call_count == 1
+    assert cache.evict_to_capacity.call_count == 1
     assert cache.size_in_bytes == 30
 
 

From 3be7be9ef42714d7404a9f2907b04fca79a8873c Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 13 May 2021 16:22:30 -0700
Subject: [PATCH 045/141] [tests] Use a more relaxed regex match.

---
 tests/llvm/custom_benchmarks_test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/llvm/custom_benchmarks_test.py b/tests/llvm/custom_benchmarks_test.py
index 35fe0e0cd..292a6c9bd 100644
--- a/tests/llvm/custom_benchmarks_test.py
+++ b/tests/llvm/custom_benchmarks_test.py
@@ -55,11 +55,9 @@ def test_invalid_benchmark_missing_file(env: LlvmEnv):
         )
     )
 
-    with pytest.raises(ValueError) as ctx:
+    with pytest.raises(ValueError, match="No program set"):
         env.reset(benchmark=benchmark)
 
-    assert str(ctx.value) == "No program set"
-
 
 def test_benchmark_path_empty_file(env: LlvmEnv):
     with tempfile.TemporaryDirectory() as tmpdir:

From 87f12e7d948de7cf45dedab3dafb3c4973efd6cf Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 13 May 2021 16:23:16 -0700
Subject: [PATCH 046/141] [service] Fix function name in docstring.

---
 compiler_gym/service/CompilationSession.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/service/CompilationSession.h b/compiler_gym/service/CompilationSession.h
index 854d80de8..a97b2eb7d 100644
--- a/compiler_gym/service/CompilationSession.h
+++ b/compiler_gym/service/CompilationSession.h
@@ -18,7 +18,7 @@ namespace compiler_gym {
 //
 // To add support for a new compiler, subclass from this base and provide
 // implementations of the abstract methods, then call
-// createAndRunCompilationService() and parametrize it with your class type:
+// createAndRunCompilerGymService() and parametrize it with your class type:
 //
 //     #include "compiler_gym/service/CompilationSession.h"
 //     #include "compiler_gym/service/runtime/Runtime.h"
@@ -28,7 +28,7 @@ namespace compiler_gym {
 //     class MyCompilationSession final : public CompilationSession { ... }
 //
 //     int main(int argc, char** argv) {
-//         runtime::createAndRunCompilationService<MyCompilationSession>();
+//         runtime::createAndRunCompilerGymService<MyCompilationSession>();
 //     }
 //
 class CompilationSession {

From 83166f2d27092992077b7e85b24dc8dd09950183 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 13 May 2021 16:24:34 -0700
Subject: [PATCH 047/141] [util] Add helpers for mapping from string to enum
 values.

---
 compiler_gym/util/EnumUtil.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/compiler_gym/util/EnumUtil.h b/compiler_gym/util/EnumUtil.h
index ad8c9badd..74d40e4c8 100644
--- a/compiler_gym/util/EnumUtil.h
+++ b/compiler_gym/util/EnumUtil.h
@@ -10,6 +10,7 @@
 #include <optional>
 #include <stdexcept>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "magic_enum.hpp"
@@ -67,6 +68,33 @@ std::string demangle() {
   }
 }
 
+// Convert a PascalCase enum name to enum value.
+// E.g. pascalCaseToEnum("MyEnumVal", &myEnum) -> MyEnum::MY_ENUM_VAL
+template <typename Enum>
+[[nodiscard]] grpc::Status pascalCaseToEnum(const std::string& name, Enum* value) {
+  for (const auto candidateValue : magic_enum::enum_values<Enum>()) {
+    const std::string pascalCaseName = enumNameToPascalCase(candidateValue);
+    if (pascalCaseName == name) {
+      *value = candidateValue;
+      return grpc::Status::OK;
+    }
+  }
+  return grpc::Status(
+      grpc::StatusCode::INVALID_ARGUMENT,
+      fmt::format("Could not convert '{}' to {} enum entry", name, demangle<Enum>()));
+}
+
+// Create a map from PascalCase enum value names to enum values.
+template <typename Enum>
+std::unordered_map<std::string, Enum> createPascalCaseToEnumLookupTable() {
+  std::unordered_map<std::string, Enum> table;
+  for (const auto value : magic_enum::enum_values<Enum>()) {
+    const std::string pascalCaseName = enumNameToPascalCase(value);
+    table[pascalCaseName] = value;
+  }
+  return table;
+}
+
 // Convert an integer to an enum with bounds checking.
 // E.g. intToEnum(3, &myEnum);
 template <typename Enum>

From a1825f66d92320d54e8c2d6d1547a65f466e93e7 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 06:40:16 -0700
Subject: [PATCH 048/141] [service] Add a reusable CompilationService runtime.

This adds a new CompilerGymService class in both C++ and Python that
takes a concrete CompilationSession subclass and provides all of the
runtime logic to start and manage and RPC server that responds to
requests and dispatches to CompilationSession instances.

Issue #254.
---
 compiler_gym/service/runtime/BUILD            |  76 +++++-
 .../service/runtime/CompilerGymService.h      |  88 +++++++
 .../service/runtime/CompilerGymServiceImpl.h  | 244 ++++++++++++++++++
 .../CreateAndRunCompilerGymServiceImpl.cc     |  12 +
 .../CreateAndRunCompilerGymServiceImpl.h      | 125 +++++++++
 compiler_gym/service/runtime/Runtime.h        |  17 ++
 compiler_gym/service/runtime/__init__.py      |   8 +
 .../service/runtime/compiler_gym_service.py   | 171 ++++++++++++
 .../create_and_run_compiler_gym_service.py    |  80 ++++++
 9 files changed, 820 insertions(+), 1 deletion(-)
 create mode 100644 compiler_gym/service/runtime/CompilerGymService.h
 create mode 100644 compiler_gym/service/runtime/CompilerGymServiceImpl.h
 create mode 100644 compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc
 create mode 100644 compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
 create mode 100644 compiler_gym/service/runtime/Runtime.h
 create mode 100644 compiler_gym/service/runtime/compiler_gym_service.py
 create mode 100644 compiler_gym/service/runtime/create_and_run_compiler_gym_service.py

diff --git a/compiler_gym/service/runtime/BUILD b/compiler_gym/service/runtime/BUILD
index 5c75bc297..05fac86e8 100644
--- a/compiler_gym/service/runtime/BUILD
+++ b/compiler_gym/service/runtime/BUILD
@@ -13,7 +13,16 @@ py_library(
     srcs = ["__init__.py"],
     visibility = ["//visibility:public"],
     deps = [
-        ":benchmark_cache",
+        ":create_and_run_compiler_gym_service",
+    ],
+)
+
+cc_library(
+    name = "cc_runtime",
+    hdrs = ["Runtime.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":CreateAndRunCompilerGymServiceImpl",
     ],
 )
 
@@ -38,3 +47,68 @@ cc_library(
         "@glog",
     ],
 )
+
+py_library(
+    name = "compiler_gym_service",
+    srcs = ["compiler_gym_service.py"],
+    deps = [
+        ":benchmark_cache",
+        "//compiler_gym/service:compilation_session",
+        "//compiler_gym/service/proto",
+        "//compiler_gym/util",
+    ],
+)
+
+cc_library(
+    name = "CompilerGymService",
+    hdrs = [
+        "CompilerGymService.h",
+        "CompilerGymServiceImpl.h",
+    ],
+    visibility = ["//tests/service/runtime:__subpackages__"],
+    deps = [
+        ":BenchmarkCache",
+        ":CompilerGymServiceImpl",
+        "//compiler_gym/service:CompilationSession",
+        "//compiler_gym/service/proto:compiler_gym_service_cc",
+        "//compiler_gym/service/proto:compiler_gym_service_cc_grpc",
+        "@boost//:filesystem",
+        "@com_github_grpc_grpc//:grpc++",
+    ],
+)
+
+cc_library(
+    name = "CompilerGymServiceImpl",
+    hdrs = ["CompilerGymServiceImpl.h"],
+    deps = [
+        "//compiler_gym/util:GrpcStatusMacros",
+        "//compiler_gym/util:Version",
+        "@fmt",
+        "@glog",
+    ],
+)
+
+py_library(
+    name = "create_and_run_compiler_gym_service",
+    srcs = ["create_and_run_compiler_gym_service.py"],
+    deps = [
+        ":compiler_gym_service",
+        "//compiler_gym/service/proto",
+        "//compiler_gym/util",
+    ],
+)
+
+cc_library(
+    name = "CreateAndRunCompilerGymServiceImpl",
+    srcs = ["CreateAndRunCompilerGymServiceImpl.cc"],
+    hdrs = ["CreateAndRunCompilerGymServiceImpl.h"],
+    deps = [
+        ":CompilerGymService",
+        "//compiler_gym/util:GrpcStatusMacros",
+        "//compiler_gym/util:Unreachable",
+        "@boost//:filesystem",
+        "@com_github_grpc_grpc//:grpc++",
+        "@gflags",
+        "@glog",
+    ],
+)
diff --git a/compiler_gym/service/runtime/CompilerGymService.h b/compiler_gym/service/runtime/CompilerGymService.h
new file mode 100644
index 000000000..106c3f76f
--- /dev/null
+++ b/compiler_gym/service/runtime/CompilerGymService.h
@@ -0,0 +1,88 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+
+#include <grpcpp/grpcpp.h>
+
+#include <memory>
+#include <mutex>
+
+#include "boost/filesystem.hpp"
+#include "compiler_gym/service/CompilationSession.h"
+#include "compiler_gym/service/proto/compiler_gym_service.grpc.pb.h"
+#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
+#include "compiler_gym/service/runtime/BenchmarkCache.h"
+
+namespace compiler_gym::runtime {
+
+// A default implementation of the CompilerGymService. When parametrized by a
+// CompilationSession subclass, this provides the RPC handling logic to run a
+// gym service.
+template <typename CompilationSessionType>
+class CompilerGymService final : public compiler_gym::CompilerGymService::Service {
+ public:
+  CompilerGymService(const boost::filesystem::path& workingDirectory,
+                     std::unique_ptr<BenchmarkCache> benchmarks = nullptr);
+
+  // RPC endpoints.
+  grpc::Status GetVersion(grpc::ServerContext* context, const GetVersionRequest* request,
+                          GetVersionReply* reply) final override;
+
+  grpc::Status GetSpaces(grpc::ServerContext* context, const GetSpacesRequest* request,
+                         GetSpacesReply* reply) final override;
+
+  grpc::Status StartSession(grpc::ServerContext* context, const StartSessionRequest* request,
+                            StartSessionReply* reply) final override;
+
+  grpc::Status ForkSession(grpc::ServerContext* context, const ForkSessionRequest* request,
+                           ForkSessionReply* reply) final override;
+
+  grpc::Status EndSession(grpc::ServerContext* context, const EndSessionRequest* request,
+                          EndSessionReply* reply) final override;
+
+  // NOTE: Step() is not thread safe. The underlying assumption is that each
+  // CompilationSessionType is managed by a single thread, so race conditions
+  // between operations that affect the same CompilationSessionType are not
+  // protected against.
+  grpc::Status Step(grpc::ServerContext* context, const StepRequest* request,
+                    StepReply* reply) final override;
+
+  grpc::Status AddBenchmark(grpc::ServerContext* context, const AddBenchmarkRequest* request,
+                            AddBenchmarkReply* reply) final override;
+
+  inline BenchmarkCache& benchmarks() { return *benchmarks_; }
+
+ protected:
+  [[nodiscard]] grpc::Status session(uint64_t id, CompilationSession** environment);
+
+  [[nodiscard]] grpc::Status session(uint64_t id, const CompilationSession** environment) const;
+
+  [[nodiscard]] grpc::Status action_space(const CompilationSession* session, int index,
+                                          const ActionSpace** actionSpace) const;
+
+  [[nodiscard]] grpc::Status observation_space(const CompilationSession* session, int index,
+                                               const ObservationSpace** observationSpace) const;
+
+  inline const boost::filesystem::path& workingDirectory() const { return workingDirectory_; }
+
+  // Add the given session and return its ID.
+  uint64_t addSession(std::unique_ptr<CompilationSession> session);
+
+ private:
+  const boost::filesystem::path workingDirectory_;
+  const std::vector<ActionSpace> actionSpaces_;
+  const std::vector<ObservationSpace> observationSpaces_;
+
+  std::unordered_map<uint64_t, std::unique_ptr<CompilationSession>> sessions_;
+  std::unique_ptr<BenchmarkCache> benchmarks_;
+
+  // Mutex used to ensure thread safety of creation and destruction of sessions.
+  std::mutex sessionsMutex_;
+  uint64_t nextSessionId_;
+};
+
+}  // namespace compiler_gym::runtime
+
+#include "compiler_gym/service/runtime/CompilerGymServiceImpl.h"
diff --git a/compiler_gym/service/runtime/CompilerGymServiceImpl.h b/compiler_gym/service/runtime/CompilerGymServiceImpl.h
new file mode 100644
index 000000000..4aee1c6d9
--- /dev/null
+++ b/compiler_gym/service/runtime/CompilerGymServiceImpl.h
@@ -0,0 +1,244 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the LICENSE file
+// in the root directory of this source tree.
+//
+// Private implementation of the CompilerGymService template class. Do not
+// include this header directly! Use
+// compiler_gym/service/runtimeCompilerGymService.h.
+#pragma once
+
+#include <fmt/format.h>
+
+#include "compiler_gym/util/GrpcStatusMacros.h"
+#include "compiler_gym/util/Version.h"
+
+namespace compiler_gym::runtime {
+
+template <typename CompilationSessionType>
+CompilerGymService<CompilationSessionType>::CompilerGymService(
+    const boost::filesystem::path& workingDirectory, std::unique_ptr<BenchmarkCache> benchmarks)
+    : workingDirectory_(workingDirectory),
+      actionSpaces_(CompilationSessionType(workingDirectory).getActionSpaces()),
+      observationSpaces_(CompilationSessionType(workingDirectory).getObservationSpaces()),
+      benchmarks_(benchmarks ? std::move(benchmarks) : std::make_unique<BenchmarkCache>()),
+      nextSessionId_(0) {}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::GetVersion(
+    grpc::ServerContext* context, const GetVersionRequest* request, GetVersionReply* reply) {
+  VLOG(2) << "GetVersion()";
+  reply->set_service_version(COMPILER_GYM_VERSION);
+  CompilationSessionType environment(workingDirectory());
+  reply->set_compiler_version(environment.getCompilerVersion());
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::GetSpaces(grpc::ServerContext* context,
+                                                                   const GetSpacesRequest* request,
+                                                                   GetSpacesReply* reply) {
+  VLOG(2) << "GetSpaces()";
+  *reply->mutable_action_space_list() = {actionSpaces_.begin(), actionSpaces_.end()};
+  *reply->mutable_observation_space_list() = {observationSpaces_.begin(), observationSpaces_.end()};
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::StartSession(
+    grpc::ServerContext* context, const StartSessionRequest* request, StartSessionReply* reply) {
+  if (!request->benchmark().size()) {
+    return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+                        "No benchmark URI set for StartSession()");
+  }
+
+  VLOG(1) << "StartSession(" << request->benchmark() << "), [" << nextSessionId_ << "]";
+  const std::lock_guard<std::mutex> lock(sessionsMutex_);
+
+  const Benchmark* benchmark = benchmarks().get(request->benchmark());
+  if (!benchmark) {
+    return grpc::Status(grpc::StatusCode::NOT_FOUND, "Benchmark not found");
+  }
+
+  // Construct the new session.
+  auto environment = std::make_unique<CompilationSessionType>(workingDirectory());
+
+  // Resolve the action space.
+  const ActionSpace* actionSpace;
+  RETURN_IF_ERROR(action_space(environment.get(), request->action_space(), &actionSpace));
+
+  // Initialize the session.
+  RETURN_IF_ERROR(environment->init(*actionSpace, *benchmark));
+
+  // Compute the initial observations.
+  for (int i = 0; i < request->observation_space_size(); ++i) {
+    const ObservationSpace* observationSpace;
+    RETURN_IF_ERROR(
+        observation_space(environment.get(), request->observation_space(i), &observationSpace));
+    RETURN_IF_ERROR(environment->computeObservation(*observationSpace, *reply->add_observation()));
+  }
+
+  reply->set_session_id(addSession(std::move(environment)));
+
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::ForkSession(
+    grpc::ServerContext* context, const ForkSessionRequest* request, ForkSessionReply* reply) {
+  const std::lock_guard<std::mutex> lock(sessionsMutex_);
+
+  CompilationSession* baseSession;
+  RETURN_IF_ERROR(session(request->session_id(), &baseSession));
+  VLOG(1) << "ForkSession(" << request->session_id() << "), [" << nextSessionId_ << "]";
+
+  // Construct the new session.
+  auto forked = std::make_unique<CompilationSessionType>(workingDirectory());
+
+  // Initialize from the base environment.
+  RETURN_IF_ERROR(forked->init(baseSession));
+
+  reply->set_session_id(addSession(std::move(forked)));
+
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::EndSession(
+    grpc::ServerContext* context, const EndSessionRequest* request, EndSessionReply* reply) {
+  VLOG(1) << "EndSession(" << request->session_id() << "), " << sessions_.size() - 1
+          << " sessions remaining";
+
+  const std::lock_guard<std::mutex> lock(sessionsMutex_);
+
+  // Note that unlike the other methods, no error is thrown if the requested
+  // session does not exist.
+  if (sessions_.find(request->session_id()) != sessions_.end()) {
+    const CompilationSession* environment;
+    RETURN_IF_ERROR(session(request->session_id(), &environment));
+    sessions_.erase(request->session_id());
+  }
+
+  reply->set_remaining_sessions(sessions_.size());
+  return Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::Step(grpc::ServerContext* context,
+                                                              const StepRequest* request,
+                                                              StepReply* reply) {
+  CompilationSession* environment;
+  RETURN_IF_ERROR(session(request->session_id(), &environment));
+
+  VLOG(2) << "Session " << request->session_id() << " Step()";
+
+  bool endOfEpisode = false;
+  std::optional<ActionSpace> newActionSpace;
+  bool actionsHadNoEffect = true;
+
+  // Apply the actions.
+  for (int i = 0; i < request->action_size(); ++i) {
+    bool actionHadNoEffect = false;
+    std::optional<ActionSpace> newActionSpaceFromAction;
+    RETURN_IF_ERROR(environment->applyAction(request->action(i), endOfEpisode,
+                                             newActionSpaceFromAction, actionHadNoEffect));
+    actionsHadNoEffect &= actionHadNoEffect;
+    if (newActionSpaceFromAction.has_value()) {
+      newActionSpace = *newActionSpaceFromAction;
+    }
+    if (endOfEpisode) {
+      break;
+    }
+  }
+
+  // Compute the requested observations.
+  for (int i = 0; i < request->observation_space_size(); ++i) {
+    const ObservationSpace* observationSpace;
+    RETURN_IF_ERROR(
+        observation_space(environment, request->observation_space(i), &observationSpace));
+    DCHECK(observationSpace) << "No observation space set";
+    RETURN_IF_ERROR(environment->computeObservation(*observationSpace, *reply->add_observation()));
+  }
+
+  // Call the end-of-step callback.
+  RETURN_IF_ERROR(environment->endOfStep(actionsHadNoEffect, endOfEpisode, newActionSpace));
+
+  reply->set_action_had_no_effect(actionsHadNoEffect);
+  if (newActionSpace.has_value()) {
+    *reply->mutable_new_action_space() = *newActionSpace;
+  }
+  reply->set_end_of_session(endOfEpisode);
+  return Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::AddBenchmark(
+    grpc::ServerContext* context, const AddBenchmarkRequest* request, AddBenchmarkReply* reply) {
+  // We need to grab the sessions lock here to ensure thread safe access to the
+  // benchmarks cache.
+  const std::lock_guard<std::mutex> lock(sessionsMutex_);
+
+  VLOG(2) << "AddBenchmark()";
+  for (int i = 0; i < request->benchmark_size(); ++i) {
+    benchmarks().add(std::move(request->benchmark(i)));
+  }
+
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::session(uint64_t id,
+                                                                 CompilationSession** environment) {
+  auto it = sessions_.find(id);
+  if (it == sessions_.end()) {
+    return Status(grpc::StatusCode::NOT_FOUND, fmt::format("Session not found: {}", id));
+  }
+
+  *environment = it->second.get();
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::session(
+    uint64_t id, const CompilationSession** environment) const {
+  auto it = sessions_.find(id);
+  if (it == sessions_.end()) {
+    return grpc::Status(grpc::StatusCode::NOT_FOUND, fmt::format("Session not found: {}", id));
+  }
+
+  *environment = it->second.get();
+  return grpc::Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::action_space(
+    const CompilationSession* session, int index, const ActionSpace** actionSpace) const {
+  if (index < 0 || index >= static_cast<int>(actionSpaces_.size())) {
+    return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+                        fmt::format("Action space index out of range: {}", index));
+  }
+  *actionSpace = &actionSpaces_[index];
+  return Status::OK;
+}
+
+template <typename CompilationSessionType>
+grpc::Status CompilerGymService<CompilationSessionType>::observation_space(
+    const CompilationSession* session, int index, const ObservationSpace** observationSpace) const {
+  if (index < 0 || index >= static_cast<int>(observationSpaces_.size())) {
+    return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT,
+                        fmt::format("Observation space index out of range: {}", index));
+  }
+  *observationSpace = &observationSpaces_[index];
+  return Status::OK;
+}
+
+template <typename CompilationSessionType>
+uint64_t CompilerGymService<CompilationSessionType>::addSession(
+    std::unique_ptr<CompilationSession> session) {
+  uint64_t id = nextSessionId_;
+  sessions_[id] = std::move(session);
+  ++nextSessionId_;
+  return id;
+}
+
+}  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc
new file mode 100644
index 000000000..7ea3ebdd9
--- /dev/null
+++ b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc
@@ -0,0 +1,12 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#include "compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h"
+
+DEFINE_string(
+    working_dir, "",
+    "The working directory to use. Must be an existing directory with write permissions.");
+DEFINE_string(port, "0",
+              "The port to listen on. If 0, an unused port will be selected. The selected port is "
+              "written to <working_dir>/port.txt.");
diff --git a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
new file mode 100644
index 000000000..fafe7227f
--- /dev/null
+++ b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
@@ -0,0 +1,125 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Private implementation of the createAndRunCompilerGymService(). Do not
+// include this header directly! Use compiler_gym/service/runtime/Runtime.h.
+#pragma once
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <grpcpp/grpcpp.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "boost/filesystem.hpp"
+#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
+#include "compiler_gym/service/runtime/CompilerGymService.h"
+#include "compiler_gym/util/Unreachable.h"
+
+DECLARE_string(port);
+DECLARE_string(working_dir);
+
+namespace compiler_gym::runtime {
+
+// Increase maximum message size beyond the 4MB default as inbound message
+// may be larger (e.g., in the case of IR strings).
+constexpr size_t kMaxMessageSizeInBytes = 512 * 1024 * 1024;
+
+// Create a service, configured using --port and --working_dir flags, and run
+// it. This function never returns.
+//
+// CompilationService must be a valid compiler_gym::CompilationService subclass
+// that implements the abstract methods and takes a single-argument working
+// directory constructor:
+//
+//     class MyCompilationService final : public CompilationService {
+//      public:
+//       ...
+//     }
+//
+// Usage:
+//
+//     int main(int argc, char** argv) {
+//       createAndRunCompilerGymServiceImpl(argc, argv, "usage string");
+//     }
+template <typename CompilationSessionType>
+[[noreturn]] void createAndRunCompilerGymServiceImpl(int argc, char** argv, const char* usage) {
+  gflags::SetUsageMessage(std::string(usage));
+
+  // Parse the command line arguments and die if any are unrecognized.
+  gflags::ParseCommandLineFlags(&argc, &argv, /*remove_flags=*/true);
+  if (argc > 1) {
+    std::cerr << "ERROR: unknown command line argument '" << argv[1] << '\'';
+    exit(1);
+  }
+
+  // Set up the working and logging directories.
+  boost::filesystem::path workingDirectory = FLAGS_working_dir;
+  bool createdWorkingDir = false;
+  if (FLAGS_working_dir.empty()) {
+    // If no working directory was set, create one.
+    workingDirectory = boost::filesystem::unique_path(boost::filesystem::temp_directory_path() /
+                                                      "compiler_gym-service-%%%%-%%%%");
+    boost::filesystem::create_directories(workingDirectory / "logs");
+    FLAGS_working_dir = workingDirectory.string();
+    createdWorkingDir = true;
+  }
+
+  FLAGS_log_dir = workingDirectory.string() + "/logs";
+  if (!createdWorkingDir && !boost::filesystem::is_directory(FLAGS_log_dir)) {
+    std::cerr << "ERROR: logging directory '" << FLAGS_log_dir << "' not found";
+    exit(1);
+  }
+
+  google::InitGoogleLogging(argv[0]);
+
+  CompilerGymService<CompilationSessionType> service{workingDirectory};
+
+  grpc::ServerBuilder builder;
+  builder.RegisterService(&service);
+
+  builder.SetMaxMessageSize(kMaxMessageSizeInBytes);
+
+  // Start a channel on the port.
+  int port;
+  std::string serverAddress = "0.0.0.0:" + (FLAGS_port.empty() ? "0" : FLAGS_port);
+  builder.AddListeningPort(serverAddress, grpc::InsecureServerCredentials(), &port);
+
+  // Start the server.
+  std::unique_ptr<grpc::Server> server(builder.BuildAndStart());
+  CHECK(server) << "Failed to build RPC service";
+
+  {
+    // Write the port to a <working_dir>/port.txt file, which an external
+    // process can read to determine how to get in touch. First write the port
+    // to a temporary file and rename it, since renaming is atomic.
+    const boost::filesystem::path portPath = workingDirectory / "port.txt";
+    std::ofstream out(portPath.string() + ".tmp");
+    out << std::to_string(port) << std::endl;
+    out.close();
+    boost::filesystem::rename(portPath.string() + ".tmp", portPath);
+  }
+
+  {
+    // Write the process ID to a <working_dir>/pid.txt file, which can
+    // external process can later use to determine if this service is still
+    // alive.
+    const boost::filesystem::path pidPath = workingDirectory / "pid.txt";
+    std::ofstream out(pidPath.string() + ".tmp");
+    out << std::to_string(getpid()) << std::endl;
+    out.close();
+    boost::filesystem::rename(pidPath.string() + ".tmp", pidPath);
+  }
+
+  LOG(INFO) << "Service " << workingDirectory << " listening on " << port << ", PID = " << getpid();
+
+  server->Wait();
+  UNREACHABLE("grpc::Server::Wait() should not return");
+}
+
+}  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/Runtime.h b/compiler_gym/service/runtime/Runtime.h
new file mode 100644
index 000000000..f49d0caa4
--- /dev/null
+++ b/compiler_gym/service/runtime/Runtime.h
@@ -0,0 +1,17 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+#pragma once
+
+#include "compiler_gym/service/runtime/CompilerGymService.h"
+#include "compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h"
+
+namespace compiler_gym::runtime {
+
+template <typename CompilationSessionType>
+[[noreturn]] void createAndRunCompilerGymService(int argc, char** argv, const char* usage) {
+  createAndRunCompilerGymServiceImpl<CompilationSessionType>(argc, argv, usage);
+}
+
+}  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/__init__.py b/compiler_gym/service/runtime/__init__.py
index 626423691..579dd8b87 100644
--- a/compiler_gym/service/runtime/__init__.py
+++ b/compiler_gym/service/runtime/__init__.py
@@ -2,3 +2,11 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+
+from compiler_gym.service.runtime.create_and_run_compiler_gym_service import (
+    create_and_run_compiler_gym_service,
+)
+
+__all__ = [
+    "create_and_run_compiler_gym_service",
+]
diff --git a/compiler_gym/service/runtime/compiler_gym_service.py b/compiler_gym/service/runtime/compiler_gym_service.py
new file mode 100644
index 000000000..2a2bd8a0e
--- /dev/null
+++ b/compiler_gym/service/runtime/compiler_gym_service.py
@@ -0,0 +1,171 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+from contextlib import contextmanager
+from pathlib import Path
+from threading import Lock
+from typing import Dict
+
+from grpc import StatusCode
+
+from compiler_gym.service.compilation_session import CompilationSession
+from compiler_gym.service.proto import AddBenchmarkReply, AddBenchmarkRequest
+from compiler_gym.service.proto import (
+    CompilerGymServiceServicer as CompilerGymServiceServicerStub,
+)
+from compiler_gym.service.proto import (
+    EndSessionReply,
+    EndSessionRequest,
+    GetSpacesReply,
+    GetSpacesRequest,
+    GetVersionReply,
+    GetVersionRequest,
+    StartSessionReply,
+    StartSessionRequest,
+    StepReply,
+    StepRequest,
+)
+from compiler_gym.service.runtime.benchmark_cache import BenchmarkCache
+from compiler_gym.util.version import __version__
+
+
+@contextmanager
+def exception_to_grpc_status(context):
+    def handle_exception_as(exception, code):
+        context.set_code(code)
+        context.set_details(str(exception))
+
+    try:
+        yield
+    except ValueError as e:
+        handle_exception_as(e, StatusCode.INVALID_ARGUMENT)
+    except LookupError as e:
+        handle_exception_as(e, StatusCode.NOT_FOUND)
+    except NotImplementedError as e:
+        handle_exception_as(e, StatusCode.UNIMPLEMENTED)
+    except FileNotFoundError as e:
+        handle_exception_as(e, StatusCode.UNIMPLEMENTED)
+    except TypeError as e:
+        handle_exception_as(e, StatusCode.FAILED_PRECONDITION)
+    except TimeoutError as e:
+        handle_exception_as(e, StatusCode.DEADLINE_EXCEEDED)
+
+
+class CompilerGymService(CompilerGymServiceServicerStub):
+    def __init__(self, working_directory: Path, compilation_session_type):
+        self.working_directory = working_directory
+        self.benchmarks = BenchmarkCache()
+
+        self.compilation_session_type = compilation_session_type
+        self.sessions: Dict[int, CompilationSession] = {}
+        self.sessions_lock = Lock()
+        self.next_session_id: int = 0
+
+        self.action_spaces = compilation_session_type.action_spaces
+        self.observation_spaces = compilation_session_type.observation_spaces
+
+    def GetVersion(self, request: GetVersionRequest, context) -> GetVersionReply:
+        del context  # Unused
+        del request  # Unused
+        logging.debug("GetVersion()")
+        return GetVersionReply(
+            service_version=__version__,
+            compiler_version=self.compilation_session_type.compiler_version,
+        )
+
+    def GetSpaces(self, request: GetSpacesRequest, context) -> GetSpacesReply:
+        del request  # Unused
+        logging.debug("GetSpaces()")
+        with exception_to_grpc_status(context):
+            return GetSpacesReply(
+                action_space_list=self.action_spaces,
+                observation_space_list=self.observation_spaces,
+            )
+
+    def StartSession(self, request: StartSessionRequest, context) -> StartSessionReply:
+        """Create a new compilation session."""
+        logging.debug("StartSession(%s), [%d]", request.benchmark, self.next_session_id)
+        reply = StartSessionReply()
+
+        if not request.benchmark:
+            context.set_code(StatusCode.INVALID_ARGUMENT)
+            context.set_details("No benchmark URI set for StartSession()")
+            return reply
+
+        with self.sessions_lock, exception_to_grpc_status(context):
+            if request.benchmark not in self.benchmarks:
+                context.set_code(StatusCode.NOT_FOUND)
+                context.set_details("Benchmark not found")
+                return reply
+
+            session = self.compilation_session_type(
+                working_directory=self.working_directory,
+                action_space=self.action_spaces[request.action_space],
+                benchmark=self.benchmarks[request.benchmark],
+            )
+
+            # Generate the initial observations.
+            reply.observation.extend(
+                [
+                    session.get_observation(self.observation_spaces[obs])
+                    for obs in request.observation_space
+                ]
+            )
+
+            reply.session_id = self.next_session_id
+            self.sessions[reply.session_id] = session
+            self.next_session_id += 1
+
+        return reply
+
+    def EndSession(self, request: EndSessionRequest, context) -> EndSessionReply:
+        del context  # Unused
+        logging.debug(
+            "EndSession(%d), %d sessions remaining",
+            request.session_id,
+            len(self.sessions) - 1,
+        )
+
+        with self.sessions_lock:
+            if request.session_id in self.sessions:
+                del self.sessions[request.session_id]
+            return EndSessionReply(remaining_sessions=len(self.sessions))
+
+    def Step(self, request: StepRequest, context) -> StepReply:
+        logging.debug("Step()")
+        reply = StepReply()
+
+        if request.session_id not in self.sessions:
+            context.set_code(StatusCode.NOT_FOUND)
+            context.set_details(f"Session not found: {request.session_id}")
+            return reply
+
+        session = self.sessions[request.session_id]
+
+        reply.action_had_no_effect = True
+
+        with exception_to_grpc_status(context):
+            for action in request.action:
+                reply.end_of_session, nas, ahne = session.apply_action(action)
+                reply.action_had_no_effect &= ahne
+                if nas:
+                    reply.new_action_space.CopyFrom(nas)
+
+            reply.observation.extend(
+                [
+                    session.get_observation(self.observation_spaces[obs])
+                    for obs in request.observation_space
+                ]
+            )
+
+        return reply
+
+    def AddBenchmark(self, request: AddBenchmarkRequest, context) -> AddBenchmarkReply:
+        del context  # Unused
+        reply = AddBenchmarkReply()
+        with self.sessions_lock:
+            for benchmark in request.benchmark:
+                self.benchmarks[benchmark.uri] = benchmark
+        return reply
diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
new file mode 100644
index 000000000..a0453d522
--- /dev/null
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -0,0 +1,80 @@
+#! /usr/bin/env python3
+#
+#  Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""An example CompilerGym service in python."""
+import os
+import sys
+from concurrent import futures
+from multiprocessing import cpu_count
+from pathlib import Path
+from tempfile import mkdtemp
+
+import grpc
+from absl import app, flags, logging
+
+from compiler_gym.service.proto import compiler_gym_service_pb2_grpc
+from compiler_gym.service.runtime.compiler_gym_service import CompilerGymService
+from compiler_gym.util.filesystem import atomic_file_write
+
+flags.DEFINE_string("working_dir", "", "Path to use as service working directory")
+flags.DEFINE_integer("port", 0, "The service listening port")
+flags.DEFINE_integer("nproc", cpu_count(), "The number of server worker threads")
+flags.DEFINE_integer("logbuflevel", 0, "Flag for compatability with C++ service.")
+FLAGS = flags.FLAGS
+
+MAX_MESSAGE_SIZE_IN_BYTES = 512 * 1024 * 1024
+
+
+def create_and_run_compiler_gym_service(compilation_session_type):
+    def main(argv):
+        argv = [x for x in argv if x.strip()]
+        if len(argv) > 1:
+            print(
+                f"ERROR: Unrecognized command line argument '{argv[1]}'",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        working_dir = Path(FLAGS.working_dir or mkdtemp(prefix="compiler_gym-service-"))
+        (working_dir / "logs").mkdir(exist_ok=True, parents=True)
+
+        FLAGS.log_dir = str(working_dir / "logs")
+        logging.get_absl_handler().use_absl_log_file()
+
+        # Create the service.
+        server = grpc.server(
+            futures.ThreadPoolExecutor(max_workers=FLAGS.nproc),
+            options=[
+                ("grpc.max_send_message_length", MAX_MESSAGE_SIZE_IN_BYTES),
+                ("grpc.max_receive_message_length", MAX_MESSAGE_SIZE_IN_BYTES),
+            ],
+        )
+        servicer = CompilerGymService(
+            working_directory=working_dir,
+            compilation_session_type=compilation_session_type,
+        )
+        compiler_gym_service_pb2_grpc.add_CompilerGymServiceServicer_to_server(
+            servicer, server
+        )
+        port = server.add_insecure_port("0.0.0.0:0")
+
+        with atomic_file_write(working_dir / "port.txt", fileobj=True, mode="w") as f:
+            f.write(str(port))
+
+        with atomic_file_write(working_dir / "pid.txt", fileobj=True, mode="w") as f:
+            f.write(str(os.getpid()))
+
+        logging.info(
+            "Service %s listening on %d, PID = %d", working_dir, port, os.getpid()
+        )
+
+        server.start()
+        server.wait_for_termination()
+        logging.fatal(
+            "Unreachable! grpc.server.wait_for_termination() should not return"
+        )
+
+    app.run(main)

From 748400b855d9491a4770f14c71c1daaefe723bec Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 14 May 2021 02:16:54 -0700
Subject: [PATCH 049/141] [runtime] Rename --nproc flag to prevent conflict.

The --nproc flag is used elsewhere in the codebase.
---
 .../service/runtime/create_and_run_compiler_gym_service.py  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index a0453d522..a4d0dc004 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -21,7 +21,9 @@
 
 flags.DEFINE_string("working_dir", "", "Path to use as service working directory")
 flags.DEFINE_integer("port", 0, "The service listening port")
-flags.DEFINE_integer("nproc", cpu_count(), "The number of server worker threads")
+flags.DEFINE_integer(
+    "rpc_service_threads", cpu_count(), "The number of server worker threads"
+)
 flags.DEFINE_integer("logbuflevel", 0, "Flag for compatability with C++ service.")
 FLAGS = flags.FLAGS
 
@@ -46,7 +48,7 @@ def main(argv):
 
         # Create the service.
         server = grpc.server(
-            futures.ThreadPoolExecutor(max_workers=FLAGS.nproc),
+            futures.ThreadPoolExecutor(max_workers=FLAGS.rpc_service_threads),
             options=[
                 ("grpc.max_send_message_length", MAX_MESSAGE_SIZE_IN_BYTES),
                 ("grpc.max_receive_message_length", MAX_MESSAGE_SIZE_IN_BYTES),

From 13c5f3747e41d70733415b7ce0ae5819dbb1617a Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 14 May 2021 03:50:29 -0700
Subject: [PATCH 050/141] [service] Don't append an empty argument when no
 service args set.

---
 compiler_gym/service/connection.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index 397c10ef8..86db14673 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -289,17 +289,12 @@ def __init__(
             raise FileNotFoundError(f"File not found: {local_service_binary}")
         self.working_dir = make_working_dir()
 
-        # Set environment variable COMPILER_GYM_SERVICE_ARGS to pass
-        # additional arguments to the service.
-        args = os.environ.get("COMPILER_GYM_SERVICE_ARGS", "")
-
         # The command that will be executed. The working directory of this
         # command will be set to the local_service_binary's parent, so we can
         # use the relpath for a neater `ps aux` view.
         cmd = [
             f"./{local_service_binary.name}",
             f"--working_dir={self.working_dir}",
-            args,
         ]
 
         # Set the root of the runfiles directory.
@@ -325,6 +320,12 @@ def __init__(
             if not os.environ.get("GRPC_VERBOSITY"):
                 os.environ["GRPC_VERBOSITY"] = "NONE"
 
+        # Set environment variable COMPILER_GYM_SERVICE_ARGS to pass
+        # additional arguments to the service.
+        args = os.environ.get("COMPILER_GYM_SERVICE_ARGS", "")
+        if args:
+            cmd.append(args)
+
         logger.debug("Exec %s", cmd)
         self.process = subprocess.Popen(
             cmd,

From a2527af5c7d4d413a8d652df612748ac181d0f40 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 11:04:46 -0700
Subject: [PATCH 051/141] [datasets] Fix csmith error handling.

---
 compiler_gym/envs/llvm/datasets/csmith.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/compiler_gym/envs/llvm/datasets/csmith.py b/compiler_gym/envs/llvm/datasets/csmith.py
index 27c267c34..184f28aa4 100644
--- a/compiler_gym/envs/llvm/datasets/csmith.py
+++ b/compiler_gym/envs/llvm/datasets/csmith.py
@@ -22,7 +22,6 @@
 from compiler_gym.util.decorators import memoized_property
 from compiler_gym.util.download import download
 from compiler_gym.util.runfiles_path import transient_cache_path
-from compiler_gym.util.truncate import truncate
 
 # The maximum value for the --seed argument to csmith.
 UINT_MAX = (2 ** 32) - 1
@@ -251,30 +250,25 @@ def benchmark_from_seed(self, seed: int) -> CsmithBenchmark:
         )
 
         # Generate the C source.
-        src, stderr = csmith.communicate(timeout=300)
+        src, _ = csmith.communicate(timeout=300)
         if csmith.returncode:
-            error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100)
-            raise OSError(f"Csmith failed with seed {seed}\nError: {error}")
+            raise OSError(f"Csmith failed with seed {seed}")
 
         # Compile to IR.
         clang = subprocess.Popen(
             self.clang_compile_command,
             stdin=subprocess.PIPE,
             stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
         )
-        stdout, stderr = clang.communicate(src, timeout=300)
+        stdout, _ = clang.communicate(src, timeout=300)
 
-        if csmith.returncode:
-            raise OSError(f"Csmith failed with seed {seed}")
         if clang.returncode:
             compile_cmd = " ".join(self.clang_compile_command)
-            error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100)
             raise BenchmarkInitError(
                 f"Compilation job failed!\n"
                 f"Csmith seed: {seed}\n"
                 f"Command: {compile_cmd}\n"
-                f"Error: {error}"
             )
 
         return self.benchmark_class.create(f"{self.name}/{seed}", stdout, src)

From 98a587e3e9a72a44f66d698c771b06f0139bf9c3 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 11:06:50 -0700
Subject: [PATCH 052/141] [ci] Always upload codecov report.

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 090c3870b..606b36bd6 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -120,7 +120,7 @@ jobs:
               uses: codecov/codecov-action@v1
               with:
                   files: ./coverage.xml
-              if: matrix.os == 'ubuntu-latest'
+              if: ${{ always() && matrix.os == 'ubuntu-latest' }}
 
             - name: Uninstall
               run: make purge

From 2232abdc67fdb8d8daf204b6fd7dfd8545f75082 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 11:57:51 -0700
Subject: [PATCH 053/141] [util] Change DownloadFailed to inherit from IOError.

---
 compiler_gym/util/download.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index b1e5d2874..f06588dc9 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -15,7 +15,7 @@
 from compiler_gym.util.truncate import truncate
 
 
-class DownloadFailed(OSError):
+class DownloadFailed(IOError):
     """Error thrown if a download fails."""
 
 
@@ -109,7 +109,7 @@ def download(
 
     :return: The contents of the downloaded file.
 
-    :raises OSError: If the download fails, or if the downloaded content does
+    :raises IOError: If the download fails, or if the downloaded content does
         match the expected :code:`sha256` checksum.
     """
     # Convert a singular string into a list of strings.

From ea78aa6816738ac958b9f807805db66770a97b02 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 11:58:32 -0700
Subject: [PATCH 054/141] [util] Catch and recast requests exception as
 DownloadFailed.

---
 compiler_gym/util/download.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index f06588dc9..4dac7dd19 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -24,7 +24,12 @@ class TooManyRequests(DownloadFailed):
 
 
 def _get_url_data(url: str) -> bytes:
-    req = requests.get(url)
+    try:
+        req = requests.get(url)
+    except IOError as e:
+        # Re-cast an error raised by requests library to DownloadFailed type.
+        raise DownloadFailed(str(e)) from e
+
     try:
         if req.status_code == 429:
             raise TooManyRequests("429 Too Many Requests")

From fba398a12a05ec852b2a8368bd6de632903b52b0 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 12:14:20 -0700
Subject: [PATCH 055/141] [llvm] Switch to the fast service-side ir_sha1
 implementation.

---
 compiler_gym/envs/llvm/llvm_env.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/compiler_gym/envs/llvm/llvm_env.py b/compiler_gym/envs/llvm/llvm_env.py
index 02cab0459..12aeda589 100644
--- a/compiler_gym/envs/llvm/llvm_env.py
+++ b/compiler_gym/envs/llvm/llvm_env.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Extensions to the CompilerEnv environment for LLVM."""
-import hashlib
 import os
 import shutil
 from pathlib import Path
@@ -461,9 +460,7 @@ def ir_sha1(self) -> str:
 
         :return: A 40-character hexadecimal sha1 string.
         """
-        # TODO(cummins): Compute this on the service-side and add it as an
-        # observation space.
-        return hashlib.sha1(self.ir.encode("utf-8")).hexdigest()
+        return self.observation["IrSha1"]
 
     def write_ir(self, path: Union[Path, str]) -> Path:
         """Write the current program state to a file.

From c6a4dec0885f13f8910cb65742e8aab265f83791 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Sun, 16 May 2021 21:51:51 +0100
Subject: [PATCH 056/141] Add a module of wrappers for CompilerEnv
 environments.

This adds a series of helper wrappers under a new
`compiler_gym.warppers` module that makes it easier to modify the
behavior of CompilerEnv environments without having to change the
underlying implementation.

New classes are:

* `CompilerEnvWrapper` an extension to `gym.Wrapper` to support the
  custom compiler_gym API calls.

* `ActionWrapper` compatibility with `gym.ActionWrapper`.

* `CommandlineWithTerminalAction` adds an "end of episode" action to a
  commandline space.

* `ConstrainedCommandline` allows a subset of commandline flags to be
  selected for use.

* `CycleOverBenchmarks` loop over a list of benchamrks on `reset()`.

* `IterateOverBenchmarks` same as above but the iterator is
  exhaustible.

* `RandomOrderBenchmarks` same as above but the order is random and
  non-terminating.
---
 compiler_gym/BUILD                          |   1 +
 compiler_gym/datasets/benchmark.py          |   3 +
 compiler_gym/wrappers/BUILD                 |  22 ++++
 compiler_gym/wrappers/__init__.py           |  28 ++++
 compiler_gym/wrappers/commandline.py        | 138 ++++++++++++++++++++
 compiler_gym/wrappers/core.py               |  58 ++++++++
 compiler_gym/wrappers/datasets.py           |  85 ++++++++++++
 compiler_gym/wrappers/time_limit.py         |  22 ++++
 docs/source/compiler_gym/wrappers.rst       |  58 ++++++++
 docs/source/index.rst                       |   1 +
 setup.py                                    |   1 +
 tests/wrappers/BUILD                        |  49 +++++++
 tests/wrappers/commandline_wrappers_test.py |  56 ++++++++
 tests/wrappers/core_wrappers_test.py        |  99 ++++++++++++++
 tests/wrappers/datasets_wrappers_test.py    |  87 ++++++++++++
 tests/wrappers/time_limit_wrappers_test.py  |  57 ++++++++
 16 files changed, 765 insertions(+)
 create mode 100644 compiler_gym/wrappers/BUILD
 create mode 100644 compiler_gym/wrappers/__init__.py
 create mode 100644 compiler_gym/wrappers/commandline.py
 create mode 100644 compiler_gym/wrappers/core.py
 create mode 100644 compiler_gym/wrappers/datasets.py
 create mode 100644 compiler_gym/wrappers/time_limit.py
 create mode 100644 docs/source/compiler_gym/wrappers.rst
 create mode 100644 tests/wrappers/BUILD
 create mode 100644 tests/wrappers/commandline_wrappers_test.py
 create mode 100644 tests/wrappers/core_wrappers_test.py
 create mode 100644 tests/wrappers/datasets_wrappers_test.py
 create mode 100644 tests/wrappers/time_limit_wrappers_test.py

diff --git a/compiler_gym/BUILD b/compiler_gym/BUILD
index 8b3da5205..059223b29 100644
--- a/compiler_gym/BUILD
+++ b/compiler_gym/BUILD
@@ -17,6 +17,7 @@ py_library(
         "//compiler_gym/envs",
         "//compiler_gym/leaderboard",
         "//compiler_gym/util",
+        "//compiler_gym/wrappers",
     ],
 )
 
diff --git a/compiler_gym/datasets/benchmark.py b/compiler_gym/datasets/benchmark.py
index 6e64eaab3..e3b93ab00 100644
--- a/compiler_gym/datasets/benchmark.py
+++ b/compiler_gym/datasets/benchmark.py
@@ -98,6 +98,9 @@ def __init__(
     def __repr__(self) -> str:
         return str(self.uri)
 
+    def __hash__(self) -> int:
+        return hash(self.uri)
+
     @property
     def uri(self) -> str:
         """The URI of the benchmark.
diff --git a/compiler_gym/wrappers/BUILD b/compiler_gym/wrappers/BUILD
new file mode 100644
index 000000000..549a14370
--- /dev/null
+++ b/compiler_gym/wrappers/BUILD
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+load("@rules_python//python:defs.bzl", "py_library")
+
+py_library(
+    name = "wrappers",
+    srcs = [
+        "__init__.py",
+        "commandline.py",
+        "core.py",
+        "datasets.py",
+        "time_limit.py",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//compiler_gym/datasets",
+        "//compiler_gym/envs",
+        "//compiler_gym/util",
+    ],
+)
diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py
new file mode 100644
index 000000000..142f4b0ba
--- /dev/null
+++ b/compiler_gym/wrappers/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""The :code:`compiler_gym.wrappers` module provides.
+"""
+from compiler_gym.wrappers.commandline import (
+    CommandlineWithTerminalAction,
+    ConstrainedCommandline,
+)
+from compiler_gym.wrappers.core import ActionWrapper, CompilerEnvWrapper
+from compiler_gym.wrappers.datasets import (
+    CycleOverBenchmarks,
+    IterateOverBenchmarks,
+    RandomOrderBenchmarks,
+)
+from compiler_gym.wrappers.time_limit import TimeLimit
+
+__all__ = [
+    "ActionWrapper",
+    "CommandlineWithTerminalAction",
+    "CompilerEnvWrapper",
+    "ConstrainedCommandline",
+    "CycleOverBenchmarks",
+    "IterateOverBenchmarks",
+    "RandomOrderBenchmarks",
+    "TimeLimit",
+]
diff --git a/compiler_gym/wrappers/commandline.py b/compiler_gym/wrappers/commandline.py
new file mode 100644
index 000000000..da3f1d267
--- /dev/null
+++ b/compiler_gym/wrappers/commandline.py
@@ -0,0 +1,138 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from collections.abc import Iterable as IterableType
+from typing import Dict, Iterable, List, Optional, Union
+
+from compiler_gym.envs import CompilerEnv
+from compiler_gym.spaces import Commandline, CommandlineFlag
+from compiler_gym.util.gym_type_hints import StepType
+from compiler_gym.wrappers.core import ActionWrapper, CompilerEnvWrapper
+
+
+class CommandlineWithTerminalAction(CompilerEnvWrapper):
+    """Creates a new action space with a special "end of episode" terminal
+    action at the start. If step() is called with it, the "done" flag is set.
+    """
+
+    def __init__(
+        self,
+        env: CompilerEnv,
+        terminal=CommandlineFlag(
+            name="end-of-episode",
+            flag="# end-of-episode",
+            description="End the episode",
+        ),
+    ):
+        """Constructor.
+
+        :param env: The environment to wrap.
+
+        :param terminal: The flag to use as the terminal action. Optional.
+        """
+        super().__init__(env)
+
+        if not isinstance(env.action_space, Commandline):
+            raise TypeError(
+                f"Unsupported action space: {type(env.action_space).__name__}"
+            )
+
+        # Redefine the action space, inserting the terminal action at the start.
+        self.action_space = Commandline(
+            items=[terminal]
+            + [
+                CommandlineFlag(
+                    name=name,
+                    flag=flag,
+                    description=description,
+                )
+                for name, flag, description in zip(
+                    env.action_space.names,
+                    env.action_space.flags,
+                    env.action_space.descriptions,
+                )
+            ],
+            name=f"{type(self).__name__}<{env.action_space.name}>",
+        )
+
+    def step(self, action: int) -> StepType:
+        if isinstance(action, int):
+            end_of_episode = action == 0
+            action = [] if end_of_episode else action - 1
+        else:
+            try:
+                index = action.index(0)
+                end_of_episode = True
+            except ValueError:
+                index = len(action)
+                end_of_episode = False
+            action = [a - 1 for a in action[:index]]
+
+        observation, reward, done, info = self.env.step(action)
+        if end_of_episode and not done:
+            done = True
+            info["terminal_action"] = True
+
+        return observation, reward, done, info
+
+
+class ConstrainedCommandline(ActionWrapper):
+    """Constrains a Commandline action space to a subset of the original space's
+    flags.
+    """
+
+    def __init__(
+        self, env: CompilerEnv, flags: Iterable[str], name: Optional[str] = None
+    ):
+        """Constructor.
+
+        :param env: The environment to wrap.
+
+        :param flags: A list of entries from :code:`env.action_space.flags`
+            denoting flags that are available in this wrapped environment.
+
+        :param name: The name of the new action space.
+        """
+        super().__init__(env)
+
+        if not flags:
+            raise TypeError("No flags provided")
+        if not isinstance(env.action_space, Commandline):
+            raise TypeError(
+                "Can only wrap Commandline action space. "
+                f"Received: {type(env.action_space).__name__}"
+            )
+
+        self._forward_translation: List[int] = [self.action_space[f] for f in flags]
+        self._reverse_translation: Dict[int, int] = {
+            v: i for i, v in enumerate(self._forward_translation)
+        }
+
+        # Redefine the action space using this smaller set of flags.
+        self.action_space = Commandline(
+            items=[
+                CommandlineFlag(
+                    name=env.action_space.names[a],
+                    flag=env.action_space.flags[a],
+                    description=env.action_space.descriptions[a],
+                )
+                for a in (env.action_space.flags.index(f) for f in flags)
+            ],
+            name=f"{type(self).__name__}<{env.action_space.name}, {len(flags)}>",
+        )
+
+    def action(self, action: Union[int, List[int]]):
+        if isinstance(action, IterableType):
+            return [self._forward_translation[a] for a in action]
+        return self._forward_translation[action]
+
+    def reverse_action(self, action: Union[int, List[int]]):
+        if isinstance(action, IterableType):
+            return [self._reverse_translation[a] for a in action]
+        return self._reverse_translation[action]
+
+    @property
+    def actions(self) -> List[int]:
+        """Reverse-translate actions back into the constrained space."""
+        return self.reverse_action(self.env.actions)
diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
new file mode 100644
index 000000000..93fa55f21
--- /dev/null
+++ b/compiler_gym/wrappers/core.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Iterable, Union
+
+import gym
+
+from compiler_gym.envs import CompilerEnv
+from compiler_gym.util.gym_type_hints import ObservationType, StepType
+
+
+class CompilerEnvWrapper(gym.Wrapper):
+    """Wraps a :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` environment
+    to allow a modular transformation.
+
+    This class is the base class for all wrappers. This class must be used
+    rather than :code:`gym.Wrapper` to support the CompilerGym API extensions
+    such as the :code:`fork()` method.
+    """
+
+    def __init__(self, env: CompilerEnv):
+        """Constructor.
+
+        :param env: The environment to wrap.
+
+        :raises TypeError: If :code:`env` is not a :class:`CompilerEnv
+            <compiler_gym.envs.CompilerEnv>`.
+        """
+        super().__init__(env)
+        if not isinstance(env, CompilerEnv):
+            raise TypeError(
+                "Only a CompilerEnv instance can be wrapped, not "
+                f"an instance of type: '{type(env).__name__}'"
+            )
+
+    def reset(self, *args, **kwargs) -> ObservationType:
+        return self.env.reset(*args, **kwargs)
+
+    def fork(self) -> CompilerEnv:
+        return type(self)(env=self.env.fork())
+
+
+class ActionWrapper(CompilerEnvWrapper):
+    """Wraps a :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` environment
+    to allow an action space transformation.
+    """
+
+    def step(self, action: Union[int, Iterable[int]]) -> StepType:
+        return self.env.step(self.action(action))
+
+    def action(self, action):
+        """Translate the action to the new space."""
+        raise NotImplementedError
+
+    def reverse_action(self, action):
+        """Translate an action from the new space to the wrapped space."""
+        raise NotImplementedError
diff --git a/compiler_gym/wrappers/datasets.py b/compiler_gym/wrappers/datasets.py
new file mode 100644
index 000000000..39c036a7c
--- /dev/null
+++ b/compiler_gym/wrappers/datasets.py
@@ -0,0 +1,85 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from itertools import cycle
+from typing import Iterable, Optional, Union
+
+import numpy as np
+
+from compiler_gym.datasets import Benchmark
+from compiler_gym.envs import CompilerEnv
+from compiler_gym.wrappers.core import CompilerEnvWrapper
+
+BenchmarkArg = Union[str, Benchmark]
+
+
+class IterateOverBenchmarks(CompilerEnvWrapper):
+    """Iterate over a (possibly finite) sequence of benchmarks on each call to
+    reset(). Will raise :code:`StopIteration` on :meth:`reset()
+    <compiler_gym.envs.CompilerEnv.reset>` once the iterator is exhausted. Use
+    :class:`CycleOverBenchmarks` or :class:`RandomOrderBenchmarks` for wrappers
+    which will loop over the benchmarks.
+    """
+
+    def __init__(self, env: CompilerEnv, benchmarks: Iterable[BenchmarkArg]):
+        """Constructor.
+
+        :param env: The environment to wrap.
+
+        :param benchmarks: An iterable sequence of benchmarks.
+        """
+        super().__init__(env)
+        self.benchmarks = iter(benchmarks)
+
+    def reset(self, benchmark: Optional[BenchmarkArg] = None, **kwargs):
+        if benchmark is not None:
+            raise TypeError("Benchmark passed toIterateOverBenchmarks.reset()")
+        benchmark: BenchmarkArg = next(self.benchmarks)
+        return self.env.reset(benchmark=benchmark)
+
+
+class CycleOverBenchmarks(IterateOverBenchmarks):
+    """Cycle through a list of benchmarks on each call to :meth:`reset()
+    <compiler_gym.envs.CompilerEnv.reset>`. Same as
+    :class:`IterateOverBenchmarks` except the list of benchmarks repeats once
+    exhausted.
+    """
+
+    def __init__(
+        self,
+        env: CompilerEnv,
+        benchmarks: Iterable[BenchmarkArg],
+    ):
+        """Constructor.
+
+        :param env: The environment to wrap.
+
+        :param benchmarks: An iterable sequence of benchmarks.
+        """
+        super().__init__(env, benchmarks=cycle(benchmarks))
+
+
+class RandomOrderBenchmarks(IterateOverBenchmarks):
+    """Select randomly from a list of benchmarks on each call to :meth:`reset()
+    <compiler_gym.envs.CompilerEnv.reset>`.
+    """
+
+    def __init__(
+        self,
+        env: CompilerEnv,
+        benchmarks: Iterable[BenchmarkArg],
+        rng: Optional[np.random.Generator] = None,
+    ):
+        """Constructor.
+
+        :param env: The environment to wrap.
+
+        :param benchmarks: An iterable sequence of benchmarks.
+
+        :param rng: A random number generator to use for random benchmark
+            selection.
+        """
+        benchmarks = list(benchmarks)
+        rng = rng or np.random.default_rng()
+        super().__init__(env, benchmarks=(rng.choice(benchmarks) for _ in iter(int, 1)))
diff --git a/compiler_gym/wrappers/time_limit.py b/compiler_gym/wrappers/time_limit.py
new file mode 100644
index 000000000..f48790a86
--- /dev/null
+++ b/compiler_gym/wrappers/time_limit.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import gym
+
+from compiler_gym.wrappers.core import CompilerEnvWrapper
+
+
+class TimeLimit(gym.wrappers.TimeLimit, CompilerEnvWrapper):
+    """A step-limited wrapper that is compatible with CompilerGym.
+
+    Example usage:
+
+        >>> env = TimeLimit(env, max_episode_steps=3)
+        >>> env.reset()
+        >>> _, _, done, _ = env.step(0)
+        >>> _, _, done, _ = env.step(0)
+        >>> _, _, done, _ = env.step(0)
+        >>> done
+        True
+    """
diff --git a/docs/source/compiler_gym/wrappers.rst b/docs/source/compiler_gym/wrappers.rst
new file mode 100644
index 000000000..f042ba014
--- /dev/null
+++ b/docs/source/compiler_gym/wrappers.rst
@@ -0,0 +1,58 @@
+compiler_gym.wrappers
+=====================
+
+.. automodule:: compiler_gym.wrappers
+
+.. contents:: Document contents:
+    :local:
+
+.. currentmodule:: compiler_gym.wrappers
+
+
+Base wrappers
+-------------
+
+.. autoclass:: CompilerEnvWrapper
+
+    .. automethod:: __init__
+
+
+.. autoclass:: ActionWrapper
+
+    .. automethod:: action
+
+    .. automethod:: reverse_action
+
+
+Action space wrappers
+---------------------
+
+.. autoclass:: CommandlineWithTerminalAction
+
+    .. automethod:: __init__
+
+
+.. autoclass:: ConstrainedCommandline
+
+    .. automethod:: __init__
+
+
+.. autoclass:: TimeLimit
+
+
+Datasets wrappers
+-----------------
+
+.. autoclass:: IterateOverBenchmarks
+
+    .. automethod:: __init__
+
+
+.. autoclass:: CycleOverBenchmarks
+
+    .. automethod:: __init__
+
+
+.. autoclass:: RandomOrderBenchmarks
+
+    .. automethod:: __init__
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0e554e6f0..8ea874ec2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -35,6 +35,7 @@ for applying reinforcement learning to compiler optimizations.
    compiler_gym/service
    compiler_gym/spaces
    compiler_gym/views
+   compiler_gym/wrappers
 
 ..
    TODO(github.com/facebookresearch/CompilerGym/issues/4): Add LLVM Service docs.
diff --git a/setup.py b/setup.py
index 2841bded9..a82db7f96 100644
--- a/setup.py
+++ b/setup.py
@@ -69,6 +69,7 @@ def get_tag(self):
         "compiler_gym.util.flags",
         "compiler_gym.util",
         "compiler_gym.views",
+        "compiler_gym.wrappers",
         "compiler_gym",
     ],
     package_dir={
diff --git a/tests/wrappers/BUILD b/tests/wrappers/BUILD
new file mode 100644
index 000000000..d13685a73
--- /dev/null
+++ b/tests/wrappers/BUILD
@@ -0,0 +1,49 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+load("@rules_python//python:defs.bzl", "py_test")
+
+py_test(
+    name = "commandline_wrappers_test",
+    timeout = "short",
+    srcs = ["commandline_wrappers_test.py"],
+    deps = [
+        "//compiler_gym/wrappers",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
+py_test(
+    name = "core_wrappers_test",
+    timeout = "short",
+    srcs = ["core_wrappers_test.py"],
+    deps = [
+        "//compiler_gym/wrappers",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
+py_test(
+    name = "datasets_wrappers_test",
+    timeout = "short",
+    srcs = ["datasets_wrappers_test.py"],
+    deps = [
+        "//compiler_gym/wrappers",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
+py_test(
+    name = "time_limit_wrappers_test",
+    timeout = "short",
+    srcs = ["time_limit_wrappers_test.py"],
+    deps = [
+        "//compiler_gym/wrappers",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
diff --git a/tests/wrappers/commandline_wrappers_test.py b/tests/wrappers/commandline_wrappers_test.py
new file mode 100644
index 000000000..d90d0c7e9
--- /dev/null
+++ b/tests/wrappers/commandline_wrappers_test.py
@@ -0,0 +1,56 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for //compiler_gym/wrappers."""
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.wrappers import CommandlineWithTerminalAction, ConstrainedCommandline
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.llvm"]
+
+
+def test_constrained_action_space(env: LlvmEnv):
+    mem2reg_index = env.action_space["-mem2reg"]
+    reg2mem_index = env.action_space["-reg2mem"]
+
+    env = ConstrainedCommandline(env=env, flags=["-mem2reg", "-reg2mem"])
+
+    assert env.action_space.n == 2
+    assert env.action_space.flags == ["-mem2reg", "-reg2mem"]
+
+    assert env.action(0) == mem2reg_index
+    assert env.action([0, 1]) == [mem2reg_index, reg2mem_index]
+
+    env.reset()
+    env.step(0)
+    env.step([1, 1])
+
+    assert env.actions == [0, 1, 1]
+
+
+def test_commandline_with_terminal_action(env: LlvmEnv):
+    mem2reg_unwrapped_index = env.action_space["-mem2reg"]
+
+    env = CommandlineWithTerminalAction(env)
+
+    mem2reg_index = env.action_space["-mem2reg"]
+    reg2mem_index = env.action_space["-reg2mem"]
+
+    assert mem2reg_index == mem2reg_unwrapped_index + 1
+
+    env.reset()
+    _, _, done, info = env.step(mem2reg_index + 1)
+    assert not done, info
+    _, _, done, info = env.step([reg2mem_index + 1, reg2mem_index + 1])
+    assert not done, info
+
+    assert env.actions == [mem2reg_index, reg2mem_index, reg2mem_index]
+
+    _, _, done, info = env.step(0)
+    assert done
+    assert "terminal_action" in info
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py
new file mode 100644
index 000000000..5e1738e42
--- /dev/null
+++ b/tests/wrappers/core_wrappers_test.py
@@ -0,0 +1,99 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for //compiler_gym/wrappers."""
+from compiler_gym.datasets import Datasets
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.wrappers import ActionWrapper, CompilerEnvWrapper
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.llvm"]
+
+
+def test_wrapped_close(env: LlvmEnv):
+    env = CompilerEnvWrapper(env)
+    env.close()
+    assert env.service is None
+
+
+def test_wrapped_properties(env: LlvmEnv):
+    """Test accessing the non-standard properties."""
+    assert env.actions == []
+    assert env.benchmark
+    assert isinstance(env.datasets, Datasets)
+
+
+def test_wrapped_fork_type(env: LlvmEnv):
+    """Test forking a wrapper."""
+
+    env = CompilerEnvWrapper(env)
+    fkd = env.fork()
+    try:
+        assert isinstance(fkd, CompilerEnvWrapper)
+    finally:
+        fkd.close()
+
+
+def test_wrapped_fork_subtype(env: LlvmEnv):
+    """Test forking a wrapper subtype."""
+
+    class MyWrapper(CompilerEnvWrapper):
+        def __init__(self, env):
+            super().__init__(env)
+
+    env = MyWrapper(env)
+    fkd = env.fork()
+    try:
+        assert isinstance(fkd, MyWrapper)
+    finally:
+        fkd.close()
+
+
+def test_wrapped_fork_subtype_custom_constructor(env: LlvmEnv):
+    """Test forking a wrapper with a custom constructor. This requires a custom
+    fork() implementation."""
+
+    class MyWrapper(CompilerEnvWrapper):
+        def __init__(self, env, foo):
+            super().__init__(env)
+            self.foo = foo
+
+        def fork(self):
+            return MyWrapper(self.env.fork(), foo=self.foo)
+
+    env = MyWrapper(env, foo=1)
+    fkd = env.fork()
+    try:
+        assert isinstance(fkd, MyWrapper)
+        assert fkd.foo == 1
+    finally:
+        fkd.close()
+
+
+def test_wrapped_step_multi_step(env: LlvmEnv):
+    env.reset(benchmark="benchmark://cbench-v1/dijkstra")
+    env.step([0, 0, 0])
+
+    assert env.benchmark == "benchmark://cbench-v1/dijkstra"
+    assert env.actions == [0, 0, 0]
+
+
+def test_wrapped_action(env: LlvmEnv):
+    class MyWrapper(ActionWrapper):
+        def action(self, action):
+            return action - 1
+
+        def reverse_action(self, action):
+            return action + 1
+
+    env = MyWrapper(env)
+    env.reset()
+    env.step(1)
+    env.step(2)
+
+    assert env.actions == [0, 1]
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/wrappers/datasets_wrappers_test.py b/tests/wrappers/datasets_wrappers_test.py
new file mode 100644
index 000000000..f2a5926c9
--- /dev/null
+++ b/tests/wrappers/datasets_wrappers_test.py
@@ -0,0 +1,87 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for //compiler_gym/wrappers."""
+import pytest
+
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.wrappers import (
+    CycleOverBenchmarks,
+    IterateOverBenchmarks,
+    RandomOrderBenchmarks,
+)
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.llvm"]
+
+
+def test_iterate_over_benchmarks(env: LlvmEnv):
+    env = IterateOverBenchmarks(
+        env=env,
+        benchmarks=[
+            "benchmark://cbench-v1/crc32",
+            "benchmark://cbench-v1/qsort",
+            "benchmark://cbench-v1/dijkstra",
+        ],
+    )
+
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/crc32"
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/qsort"
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/dijkstra"
+
+    with pytest.raises(StopIteration):
+        env.reset()
+
+
+def test_cycle_over_benchmarks(env: LlvmEnv):
+    env = CycleOverBenchmarks(
+        env=env,
+        benchmarks=[
+            "benchmark://cbench-v1/crc32",
+            "benchmark://cbench-v1/qsort",
+        ],
+    )
+
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/crc32"
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/qsort"
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/crc32"
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/qsort"
+    env.reset()
+    assert env.benchmark == "benchmark://cbench-v1/crc32"
+
+
+def test_random_order_benchmarks(env: LlvmEnv):
+    env = RandomOrderBenchmarks(
+        env=env,
+        benchmarks=[
+            "benchmark://cbench-v1/crc32",
+            "benchmark://cbench-v1/qsort",
+        ],
+    )
+    env.reset()
+    assert env.benchmark in {
+        "benchmark://cbench-v1/crc32",
+        "benchmark://cbench-v1/qsort",
+    }
+    env.reset()
+    assert env.benchmark in {
+        "benchmark://cbench-v1/crc32",
+        "benchmark://cbench-v1/qsort",
+    }
+    env.reset()
+    assert env.benchmark in {
+        "benchmark://cbench-v1/crc32",
+        "benchmark://cbench-v1/qsort",
+    }
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/wrappers/time_limit_wrappers_test.py b/tests/wrappers/time_limit_wrappers_test.py
new file mode 100644
index 000000000..62515e293
--- /dev/null
+++ b/tests/wrappers/time_limit_wrappers_test.py
@@ -0,0 +1,57 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Unit tests for //compiler_gym/wrappers."""
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.wrappers import TimeLimit
+
+# from gym.wrappers import TimeLimit
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.llvm"]
+
+
+def test_wrapped_close(env: LlvmEnv):
+    env = TimeLimit(env, max_episode_steps=5)
+    env.close()
+    assert env.service is None
+
+
+def test_wrapped_fork_type(env: LlvmEnv):
+    env = TimeLimit(env, max_episode_steps=5)
+    fkd = env.fork()
+    try:
+        assert isinstance(fkd, TimeLimit)
+    finally:
+        fkd.close()
+
+
+def test_wrapped_step_multi_step(env: LlvmEnv):
+    env = TimeLimit(env, max_episode_steps=5)
+    env.reset(benchmark="benchmark://cbench-v1/dijkstra")
+    env.step([0, 0, 0])
+
+    assert env.benchmark == "benchmark://cbench-v1/dijkstra"
+    assert env.actions == [0, 0, 0]
+
+
+def test_time_limit_reached(env: LlvmEnv):
+    env = TimeLimit(env, max_episode_steps=3)
+
+    env.reset()
+    _, _, done, info = env.step(0)
+    assert not done, info
+    _, _, done, info = env.step(0)
+    assert not done, info
+    _, _, done, info = env.step(0)
+    assert done, info
+    assert info["TimeLimit.truncated"], info
+
+    _, _, done, info = env.step(0)
+    assert done, info
+    assert info["TimeLimit.truncated"], info
+
+
+if __name__ == "__main__":
+    main()

From bd177c0b2900491b93a47ba72e4a97799f0b0a3b Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 14 May 2021 06:41:17 -0700
Subject: [PATCH 057/141] Make ObservationSpaceSpec hashable.

---
 compiler_gym/views/observation_space_spec.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/compiler_gym/views/observation_space_spec.py b/compiler_gym/views/observation_space_spec.py
index d92c0a15a..2a88794e3 100644
--- a/compiler_gym/views/observation_space_spec.py
+++ b/compiler_gym/views/observation_space_spec.py
@@ -77,6 +77,12 @@ def __init__(
         self.translate = translate
         self.to_string = to_string
 
+    def __hash__(self) -> int:
+        # Quickly hash observation spaces by comparing the index into the list
+        # of spaces returned by the environment. This means that hashing across
+        # different environments is _not_ safe.
+        return self.index
+
     def __repr__(self) -> str:
         return f"ObservationSpaceSpec({self.id})"
 

From e8c979e1da2a428775e374ed9606aaa7491dd1d2 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 14 May 2021 06:41:33 -0700
Subject: [PATCH 058/141] Reward.update() takes a list of actions, not a single
 action.

This is for consistency with CompilerEnv.step(), which operates on
lists of (1 or more) action.
---
 compiler_gym/envs/llvm/llvm_rewards.py | 7 ++++---
 compiler_gym/spaces/reward.py          | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/compiler_gym/envs/llvm/llvm_rewards.py b/compiler_gym/envs/llvm/llvm_rewards.py
index 0a19e6fcc..f167f75ee 100644
--- a/compiler_gym/envs/llvm/llvm_rewards.py
+++ b/compiler_gym/envs/llvm/llvm_rewards.py
@@ -43,11 +43,12 @@ def reset(self, benchmark: Benchmark) -> None:
 
     def update(
         self,
-        action: int,
+        actions: List[int],
         observations: List[ObservationType],
         observation_view: ObservationView,
     ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
+        del actions
         cost: RewardType = observations[0]
         if self.previous_cost is None:
             self.previous_cost = observation_view[self.init_cost_function]
@@ -79,14 +80,14 @@ def reset(self, benchmark: str) -> None:
 
     def update(
         self,
-        action: int,
+        actions: List[int],
         observations: List[ObservationType],
         observation_view: ObservationView,
     ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
         if self.cost_norm is None:
             self.cost_norm = self.get_cost_norm(observation_view)
-        return super().update(action, observations, observation_view) / self.cost_norm
+        return super().update(actions, observations, observation_view) / self.cost_norm
 
     def get_cost_norm(self, observation_view: ObservationView) -> RewardType:
         """Return the value used to normalize costs."""
diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py
index 0199a66a8..1576d5154 100644
--- a/compiler_gym/spaces/reward.py
+++ b/compiler_gym/spaces/reward.py
@@ -99,7 +99,7 @@ def reset(self, benchmark: str) -> None:
 
     def update(
         self,
-        action: int,
+        actions: List[int],
         observations: List[ObservationType],
         observation_view: "compiler_gym.views.ObservationView",  # noqa: F821
     ) -> RewardType:

From 44a3528b0a56dc7cb4baf8795eb7caba9d1e2c23 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 14 May 2021 06:43:55 -0700
Subject: [PATCH 059/141] [env] Make step() accept a list of observations and
 rewards.

---
 compiler_gym/envs/compiler_env.py   | 195 ++++++++++++++++++++--------
 compiler_gym/util/gym_type_hints.py |   9 +-
 2 files changed, 150 insertions(+), 54 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 6e29718bf..74a177773 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -43,7 +43,7 @@
 )
 from compiler_gym.spaces import DefaultRewardFromObservation, NamedDiscrete, Reward
 from compiler_gym.util.debug_util import get_logging_level
-from compiler_gym.util.gym_type_hints import ObservationType, StepType
+from compiler_gym.util.gym_type_hints import ObservationType, RewardType, StepType
 from compiler_gym.util.timer import Timer
 from compiler_gym.validation_error import ValidationError
 from compiler_gym.validation_result import ValidationResult
@@ -737,7 +737,7 @@ def reset(  # pylint: disable=arguments-differ
 
         self.reward.reset(benchmark=self.benchmark)
         if self.reward_space:
-            self.episode_reward = 0
+            self.episode_reward = 0.0
 
         if self.observation_space:
             if len(reply.observation) != 1:
@@ -748,41 +748,32 @@ def reset(  # pylint: disable=arguments-differ
                 reply.observation[0]
             )
 
-    def step(self, action: Union[int, Iterable[int]]) -> StepType:
-        """Take a step.
-
-        :param action: An action, or a sequence of actions. When multiple
-            actions are provided the observation and reward are returned after
-            running all of the actions.
-
-        :return: A tuple of observation, reward, done, and info. Observation and
-            reward are None if default observation/reward is not set. If done is
-            True, observation and reward may also be None (e.g. because the
-            service failed).
-
-        :raises SessionNotFound: If :meth:`reset()
-            <compiler_gym.envs.CompilerEnv.reset>` has not been called.
-        """
+    def raw_step(
+        self,
+        actions: Iterable[int],
+        observations: Iterable[ObservationSpaceSpec],
+        rewards: Iterable[Reward],
+    ) -> StepType:
         if not self.in_episode:
             raise SessionNotFound("Must call reset() before step()")
-        actions = action if isinstance(action, IterableType) else [action]
-        observation, reward = None, None
 
         # Build the list of observations that must be computed by the backend
-        # service to generate the user-requested observation and reward.
-        # TODO(cummins): We could de-duplicate this list to improve efficiency
-        # when multiple redundant copies of the same observation space are
-        # requested.
-        observation_indices, observation_spaces = [], []
-        if self.observation_space:
-            observation_indices.append(self.observation_space_spec.index)
-            observation_spaces.append(self.observation_space_spec.id)
-        if self.reward_space:
-            observation_indices += [
-                self.observation.spaces[obs].index
-                for obs in self.reward_space.observation_spaces
+        user_observation_spaces: List[ObservationSpaceSpec] = list(observations)
+        reward_spaces: List[Reward] = list(rewards)
+
+        reward_observation_spaces: List[ObservationSpaceSpec] = []
+        for reward_space in reward_spaces:
+            reward_observation_spaces += [
+                self.observation.spaces[obs] for obs in reward_space.observation_spaces
             ]
-            observation_spaces += self.reward_space.observation_spaces
+
+        observations_to_compute: List[ObservationSpaceSpec] = list(
+            set(user_observation_spaces).union(set(reward_observation_spaces))
+        )
+        observation_space_index_map: Dict[ObservationSpaceSpec, int] = {
+            observation_space: i
+            for i, observation_space in enumerate(observations_to_compute)
+        }
 
         # Record the actions.
         self.actions += actions
@@ -791,7 +782,9 @@ def step(self, action: Union[int, Iterable[int]]) -> StepType:
         request = StepRequest(
             session_id=self._session_id,
             action=[Action(action=a) for a in actions],
-            observation_space=observation_indices,
+            observation_space=[
+                observation_space.index for observation_space in observations_to_compute
+            ],
         )
         try:
             reply = _wrapped_step(self.service, request)
@@ -810,11 +803,15 @@ def step(self, action: Union[int, Iterable[int]]) -> StepType:
                 "error_type": type(e).__name__,
                 "error_details": str(e),
             }
-            if self.reward_space:
-                reward = self.reward_space.reward_on_error(self.episode_reward)
-            if self.observation_space:
-                observation = self.observation_space_spec.default_value
-            return observation, reward, True, info
+            default_observations = [
+                observation_space.default_value
+                for observation_space in user_observation_spaces
+            ]
+            default_rewards = [
+                float(reward_space.reward_on_error(self.episode_reward))
+                for reward_space in reward_spaces
+            ]
+            return default_observations, default_rewards, True, info
 
         # If the action space has changed, update it.
         if reply.HasField("new_action_space"):
@@ -823,32 +820,126 @@ def step(self, action: Union[int, Iterable[int]]) -> StepType:
             )
 
         # Translate observations to python representations.
-        if len(reply.observation) != len(observation_indices):
+        if len(reply.observation) != len(observations_to_compute):
             raise ServiceError(
-                f"Requested {observation_indices} observations "
+                f"Requested {len(observations_to_compute)} observations "
                 f"but received {len(reply.observation)}"
             )
-        observations = [
-            self.observation.spaces[obs].translate(val)
-            for obs, val in zip(observation_spaces, reply.observation)
+        computed_observations = [
+            observation_space.translate(value)
+            for observation_space, value in zip(
+                observations_to_compute, reply.observation
+            )
         ]
 
-        # Pop the requested observation.
-        if self.observation_space:
-            observation, observations = observations[0], observations[1:]
+        # Get the user-requested observation.
+        observations: List[ObservationType] = [
+            computed_observations[observation_space_index_map[observation_space]]
+            for observation_space in user_observation_spaces
+        ]
 
-        # Compute reward.
-        self.reward.previous_action = action
-        if self.reward_space:
-            reward = self.reward_space.update(action, observations, self.observation)
-            self.episode_reward += reward
+        # Update and compue the rewards.
+        rewards: List[RewardType] = []
+        for reward_space in reward_spaces:
+            reward_observations = [
+                computed_observations[
+                    observation_space_index_map[
+                        self.observation.spaces[observation_space]
+                    ]
+                ]
+                for observation_space in reward_space.observation_spaces
+            ]
+            rewards.append(
+                float(
+                    reward_space.update(actions, reward_observations, self.observation)
+                )
+            )
 
         info = {
             "action_had_no_effect": reply.action_had_no_effect,
             "new_action_space": reply.HasField("new_action_space"),
         }
 
-        return observation, reward, reply.end_of_session, info
+        return observations, rewards, reply.end_of_session, info
+
+    def step(
+        self,
+        action: Union[int, Iterable[int]],
+        observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
+        rewards: Optional[Iterable[Union[str, Reward]]] = None,
+    ) -> StepType:
+        """Take a step.
+
+        :param action: An action, or a sequence of actions. When multiple
+            actions are provided the observation and reward are returned after
+            running all of the actions.
+
+        :param observations: A list of observation spaces to compute
+            observations from. If provided, this changes the :code:`observation`
+            element of the return tuple to be a list of observations from the
+            requested spaces. The default :code:`env.observation_space` is not
+            returned.
+
+        :param rewards: A list of reward spaces to compute rewards from. If
+            provided, this changes the :code:`reward` element of the return
+            tuple to be a list of rewards from the requested spaces. The default
+            :code:`env.reward_space` is not returned.
+
+        :return: A tuple of observation, reward, done, and info. Observation and
+            reward are None if default observation/reward is not set. If done is
+            True, observation and reward may also be None (e.g. because the
+            service failed).
+
+        :raises SessionNotFound: If :meth:`reset()
+            <compiler_gym.envs.CompilerEnv.reset>` has not been called.
+        """
+        # Coerce actions into a list.
+        actions = action if isinstance(action, IterableType) else [action]
+
+        # Coerce observation spaces into a list of ObservationSpaceSpec instances.
+        if observations:
+            observation_spaces: List[ObservationSpaceSpec] = [
+                obs
+                if isinstance(obs, ObservationSpaceSpec)
+                else self.observation.spaces[obs]
+                for obs in observations
+            ]
+        elif self.observation_space_spec:
+            observation_spaces: List[ObservationSpaceSpec] = [
+                self.observation_space_spec
+            ]
+        else:
+            observation_spaces: List[ObservationSpaceSpec] = []
+
+        # Coerce reward spaces into a list of Reward instances.
+        if rewards:
+            reward_spaces: List[Reward] = [
+                rew if isinstance(rew, Reward) else self.reward.spaces[rew]
+                for rew in rewards
+            ]
+        elif self.reward_space:
+            reward_spaces: List[Reward] = [self.reward_space]
+        else:
+            reward_spaces: List[Reward] = []
+
+        # Perform the underlying environment step.
+        observations, rewards, done, info = self.raw_step(
+            actions, observation_spaces, reward_spaces
+        )
+
+        # Translate observations lists back to the appropriate types.
+        if self.observation_space_spec and len(observations) == 1:
+            observations = observations[0]
+        elif not observation_spaces:
+            observations = None
+
+        # Translate reward lists back to the appropriate types.
+        if self.reward_space_spec and len(rewards) == 1:
+            rewards = rewards[0]
+        elif not reward_spaces:
+            rewards = None
+
+        return observations, rewards, done, info
 
     def render(
         self,
diff --git a/compiler_gym/util/gym_type_hints.py b/compiler_gym/util/gym_type_hints.py
index 329397ee5..1f082498d 100644
--- a/compiler_gym/util/gym_type_hints.py
+++ b/compiler_gym/util/gym_type_hints.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-from typing import Any, Dict, Optional, Tuple, TypeVar
+from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
 
 # A JSON dictionary.
 JsonDictType = Dict[str, Any]
@@ -12,4 +12,9 @@
 RewardType = float
 DoneType = bool
 InfoType = JsonDictType
-StepType = Tuple[Optional[ObservationType], Optional[RewardType], DoneType, InfoType]
+StepType = Tuple[
+    Optional[Union[ObservationType, List[ObservationType]]],
+    Optional[Union[RewardType, List[RewardType]]],
+    DoneType,
+    InfoType,
+]

From 8890e3f6d0c9ca57debfdaa7c0d66fe4439c77c8 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 17:21:11 -0700
Subject: [PATCH 060/141] [env] Switch the observation getter to use
 raw_step().

Use the raw_step() method rather than calling the RPC endpoint
directly to compute observations. This simplifies the code, improves
the quality of error messages when observation is used incorrectly,
and reduces the complexity of tests.
---
 compiler_gym/envs/compiler_env.py             |   2 +-
 compiler_gym/util/gym_type_hints.py           |   1 +
 compiler_gym/views/observation.py             |  34 +++---
 .../example_compiler_gym_service/env_tests.py |   8 +-
 tests/llvm/observation_spaces_test.py         |  19 +++
 tests/views/observation_test.py               | 108 +++---------------
 6 files changed, 59 insertions(+), 113 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 74a177773..1296300f7 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -262,7 +262,7 @@ def __init__(
             for space in self.service.action_spaces
         ]
         self.observation = self._observation_view_type(
-            get_observation=lambda req: _wrapped_step(self.service, req),
+            raw_step=self.raw_step,
             spaces=self.service.observation_spaces,
         )
         self.reward = self._reward_view_type(rewards, self.observation)
diff --git a/compiler_gym/util/gym_type_hints.py b/compiler_gym/util/gym_type_hints.py
index 1f082498d..cc592de45 100644
--- a/compiler_gym/util/gym_type_hints.py
+++ b/compiler_gym/util/gym_type_hints.py
@@ -9,6 +9,7 @@
 
 # Type hints for the values returned by gym.Env.step().
 ObservationType = TypeVar("ObservationType")
+ActionType = int
 RewardType = float
 DoneType = bool
 InfoType = JsonDictType
diff --git a/compiler_gym/views/observation.py b/compiler_gym/views/observation.py
index 9d0a3fb85..280f5b90c 100644
--- a/compiler_gym/views/observation.py
+++ b/compiler_gym/views/observation.py
@@ -4,9 +4,13 @@
 # LICENSE file in the root directory of this source tree.
 from typing import Callable, Dict, List
 
-from compiler_gym.service import ServiceError
-from compiler_gym.service.proto import ObservationSpace, StepReply, StepRequest
-from compiler_gym.util.gym_type_hints import ObservationType
+from compiler_gym.service.proto import ObservationSpace
+from compiler_gym.util.gym_type_hints import (
+    ActionType,
+    ObservationType,
+    RewardType,
+    StepType,
+)
 from compiler_gym.views.observation_space_spec import ObservationSpaceSpec
 
 
@@ -29,15 +33,16 @@ class ObservationView:
 
     def __init__(
         self,
-        get_observation: Callable[[StepRequest], StepReply],
+        raw_step: Callable[
+            [List[ActionType], List[ObservationType], List[RewardType]], StepType
+        ],
         spaces: List[ObservationSpace],
     ):
         if not spaces:
             raise ValueError("No observation spaces")
         self.spaces: Dict[str, ObservationSpaceSpec] = {}
 
-        self._get_observation = get_observation
-        self.session_id = -1
+        self._raw_step = raw_step
 
         for i, s in enumerate(spaces):
             self._add_space(ObservationSpaceSpec.from_proto(i, s))
@@ -54,17 +59,14 @@ def __getitem__(self, observation_space: str) -> ObservationType:
         :raises SessionNotFound: If :meth:`env.reset()
             <compiler_gym.envs.CompilerEnv.reset>` has not been called.
         """
-        space = self.spaces[observation_space]
-        request = StepRequest(
-            session_id=self.session_id,
-            observation_space=[space.index],
+        observation_space: ObservationSpaceSpec = self.spaces[observation_space]
+        observations, _, _, _ = self._raw_step(
+            actions=[], observations=[observation_space], rewards=[]
         )
-        reply: StepReply = self._get_observation(request)
-        if len(reply.observation) != 1:
-            raise ServiceError(
-                f"Requested 1 observation but received {len(reply.observation)}"
-            )
-        return space.translate(reply.observation[0])
+        assert (
+            len(observations) == 1
+        ), f"Expected 1 observation. Received: {len(observations)}"
+        return observations[0]
 
     def _add_space(self, space: ObservationSpaceSpec):
         """Register a new space."""
diff --git a/examples/example_compiler_gym_service/env_tests.py b/examples/example_compiler_gym_service/env_tests.py
index 6196ef4cf..854f234b8 100644
--- a/examples/example_compiler_gym_service/env_tests.py
+++ b/examples/example_compiler_gym_service/env_tests.py
@@ -66,22 +66,20 @@ def test_reward_spaces(env: CompilerEnv):
 
 def test_step_before_reset(env: CompilerEnv):
     """Taking a step() before reset() is illegal."""
-    with pytest.raises(Exception):
+    with pytest.raises(SessionNotFound, match=r"Must call reset\(\) before step\(\)"):
         env.step(0)
 
 
 def test_observation_before_reset(env: CompilerEnv):
     """Taking an observation before reset() is illegal."""
-    with pytest.raises(SessionNotFound) as ctx:
+    with pytest.raises(SessionNotFound, match=r"Must call reset\(\) before step\(\)"):
         _ = env.observation["ir"]
-    assert str(ctx.value).startswith("Session not found")
 
 
 def test_reward_before_reset(env: CompilerEnv):
     """Taking a reward before reset() is illegal."""
-    with pytest.raises(SessionNotFound) as ctx:
+    with pytest.raises(SessionNotFound, match=r"Must call reset\(\) before step\(\)"):
         _ = env.reward["runtime"]
-    assert str(ctx.value).startswith("Session not found")
 
 
 def test_reset_invalid_benchmark(env: CompilerEnv):
diff --git a/tests/llvm/observation_spaces_test.py b/tests/llvm/observation_spaces_test.py
index 72dfdc46f..8ea0031a7 100644
--- a/tests/llvm/observation_spaces_test.py
+++ b/tests/llvm/observation_spaces_test.py
@@ -1151,5 +1151,24 @@ def test_object_text_size_observation_spaces(env: LlvmEnv):
     assert value == crc32_code_sizes[sys.platform][2]
 
 
+def test_add_derived_space(env: LlvmEnv):
+    env.reset()
+    env.observation.add_derived_space(
+        id="IrLen",
+        base_id="Ir",
+        space=Box(low=0, high=float("inf"), shape=(1,), dtype=int),
+        translate=lambda base: [15],
+    )
+
+    value = env.observation["IrLen"]
+    assert isinstance(value, list)
+    assert value == [15]
+
+    # Repeat the above test using the generated bound method.
+    value = env.observation.IrLen()
+    assert isinstance(value, list)
+    assert value == [15]
+
+
 if __name__ == "__main__":
     main()
diff --git a/tests/views/observation_test.py b/tests/views/observation_test.py
index a8d7cccef..cdc83900e 100644
--- a/tests/views/observation_test.py
+++ b/tests/views/observation_test.py
@@ -5,39 +5,32 @@
 """Unit tests for //compiler_gym/views."""
 import numpy as np
 import pytest
-from gym.spaces import Box
 
 from compiler_gym.service.proto import (
-    DoubleList,
-    Int64List,
-    Observation,
     ObservationSpace,
     ScalarLimit,
     ScalarRange,
     ScalarRangeList,
-    StepRequest,
 )
 from compiler_gym.views import ObservationView
 from tests.test_main import main
 
 
-class MockGetObservationReply:
-    def __init__(self, value):
-        self.observation = [value]
-
-
 class MockGetObservation:
     """Mock for the get_observation callack of ObservationView."""
 
     def __init__(self, ret=None):
         self.called_observation_spaces = []
-        self.ret = list(reversed(ret or []))
+        self.ret = list(reversed(ret or [None]))
 
-    def __call__(self, request: StepRequest):
-        self.called_observation_spaces.append(request.observation_space[0])
+    def __call__(self, actions, observations, rewards):
+        assert not actions
+        assert len(observations) == 1
+        assert not rewards
+        self.called_observation_spaces.append(observations[0].id)
         ret = self.ret[-1]
         del self.ret[-1]
-        return MockGetObservationReply(ret)
+        return [ret], [], False, {}
 
 
 def test_empty_space():
@@ -46,32 +39,6 @@ def test_empty_space():
     assert str(ctx.value) == "No observation spaces"
 
 
-def test_invalid_observation_name():
-    spaces = [
-        ObservationSpace(
-            name="ir",
-            string_size_range=ScalarRange(min=ScalarLimit(value=0)),
-        )
-    ]
-    observation = ObservationView(MockGetObservation(), spaces)
-    with pytest.raises(KeyError) as ctx:
-        _ = observation["invalid"]
-
-    assert str(ctx.value) == "'invalid'"
-
-
-def test_invalid_observation_index():
-    spaces = [
-        ObservationSpace(
-            name="ir",
-            string_size_range=ScalarRange(min=ScalarLimit(value=0)),
-        )
-    ]
-    observation = ObservationView(MockGetObservation(), spaces)
-    with pytest.raises(KeyError):
-        _ = observation[100]
-
-
 def test_observed_value_types():
     spaces = [
         ObservationSpace(
@@ -108,14 +75,14 @@ def test_observed_value_types():
     ]
     mock = MockGetObservation(
         ret=[
-            Observation(string_value="Hello, IR"),
-            Observation(double_list=DoubleList(value=[1.0, 2.0])),
-            Observation(int64_list=Int64List(value=[-5, 15])),
-            Observation(binary_value=b"Hello, bytes\0"),
-            Observation(string_value="Hello, IR"),
-            Observation(double_list=DoubleList(value=[1.0, 2.0])),
-            Observation(int64_list=Int64List(value=[-5, 15])),
-            Observation(binary_value=b"Hello, bytes\0"),
+            "Hello, IR",
+            [1.0, 2.0],
+            [-5, 15],
+            b"Hello, bytes\0",
+            "Hello, IR",
+            [1.0, 2.0],
+            [-5, 15],
+            b"Hello, bytes\0",
         ]
     )
     observation = ObservationView(mock, spaces)
@@ -126,17 +93,15 @@ def test_observed_value_types():
 
     value = observation["dfeat"]
     np.testing.assert_array_almost_equal(value, [1.0, 2.0])
-    assert value.dtype == np.float64
 
     value = observation["features"]
     np.testing.assert_array_equal(value, [-5, 15])
-    assert value.dtype == np.int64
 
     value = observation["binary"]
     assert value == b"Hello, bytes\0"
 
     # Check that the correct observation_space_list indices were used.
-    assert mock.called_observation_spaces == [0, 2, 1, 3]
+    assert mock.called_observation_spaces == ["ir", "dfeat", "features", "binary"]
     mock.called_observation_spaces = []
 
     # Repeat the above tests using the generated bound methods.
@@ -146,54 +111,15 @@ def test_observed_value_types():
 
     value = observation.dfeat()
     np.testing.assert_array_almost_equal(value, [1.0, 2.0])
-    assert value.dtype == np.float64
 
     value = observation.features()
     np.testing.assert_array_equal(value, [-5, 15])
-    assert value.dtype == np.int64
 
     value = observation.binary()
     assert value == b"Hello, bytes\0"
 
     # Check that the correct observation_space_list indices were used.
-    assert mock.called_observation_spaces == [0, 2, 1, 3]
-
-
-def test_add_derived_space():
-    spaces = [
-        ObservationSpace(
-            name="ir",
-            string_size_range=ScalarRange(min=ScalarLimit(value=0)),
-        ),
-    ]
-    mock = MockGetObservation(
-        ret=[
-            Observation(string_value="Hello, world!"),
-            Observation(string_value="Hello, world!"),
-        ],
-    )
-    observation = ObservationView(mock, spaces)
-    observation.add_derived_space(
-        id="ir_len",
-        base_id="ir",
-        space=Box(low=0, high=float("inf"), shape=(1,), dtype=int),
-        translate=lambda base: [
-            len(base),
-        ],
-    )
-
-    value = observation["ir_len"]
-    assert isinstance(value, list)
-    assert value == [
-        len("Hello, world!"),
-    ]
-
-    # Repeat the above test using the generated bound method.
-    value = observation.ir_len()
-    assert isinstance(value, list)
-    assert value == [
-        len("Hello, world!"),
-    ]
+    assert mock.called_observation_spaces == ["ir", "dfeat", "features", "binary"]
 
 
 if __name__ == "__main__":

From dc26422060792bb57ec9cea1ac1e2932ee577c6e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 17:22:25 -0700
Subject: [PATCH 061/141] Minor refactor for readability.

---
 compiler_gym/envs/llvm/make_specs.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/compiler_gym/envs/llvm/make_specs.py b/compiler_gym/envs/llvm/make_specs.py
index 2da8952f2..8f686419b 100644
--- a/compiler_gym/envs/llvm/make_specs.py
+++ b/compiler_gym/envs/llvm/make_specs.py
@@ -18,8 +18,7 @@ def main(argv):
     assert len(argv) == 3, "Usage: make_specs.py <service_binary> <output_path>"
     service_path, output_path = argv[1:]
 
-    env = LlvmEnv(Path(service_path))
-    try:
+    with LlvmEnv(Path(service_path)) as env:
         with open(output_path, "w") as f:
             print("from enum import Enum", file=f)
             print(file=f)
@@ -30,8 +29,6 @@ def main(argv):
             print("class reward_spaces(Enum):", file=f)
             for name in env.reward.spaces:
                 print(f'    {name} = "{name}"', file=f)
-    finally:
-        env.close()
 
 
 if __name__ == "__main__":

From 9e7d27c1aa47ede8e84aa3fa2d92898bdf91171d Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 17:22:54 -0700
Subject: [PATCH 062/141] [env] Use the ActionType hint.

---
 compiler_gym/envs/compiler_env.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 1296300f7..f31b1c1bb 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -43,7 +43,12 @@
 )
 from compiler_gym.spaces import DefaultRewardFromObservation, NamedDiscrete, Reward
 from compiler_gym.util.debug_util import get_logging_level
-from compiler_gym.util.gym_type_hints import ObservationType, RewardType, StepType
+from compiler_gym.util.gym_type_hints import (
+    ActionType,
+    ObservationType,
+    RewardType,
+    StepType,
+)
 from compiler_gym.util.timer import Timer
 from compiler_gym.validation_error import ValidationError
 from compiler_gym.validation_result import ValidationResult
@@ -864,7 +869,7 @@ def raw_step(
 
     def step(
         self,
-        action: Union[int, Iterable[int]],
+        action: Union[ActionType, Iterable[ActionType]],
         observations: Optional[Iterable[Union[str, ObservationSpaceSpec]]] = None,
         rewards: Optional[Iterable[Union[str, Reward]]] = None,
     ) -> StepType:

From fa6877dcbc0b94b4208a61c0962a3f98f1f733a3 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Tue, 18 May 2021 03:01:01 +0100
Subject: [PATCH 063/141] [examples] Update Actor Critic to use the new
 wrappers classes.

---
 compiler_gym/wrappers/core.py |  5 ---
 examples/actor_critic.py      | 81 +++++++++++++----------------------
 2 files changed, 29 insertions(+), 57 deletions(-)

diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
index 93fa55f21..e886bf84d 100644
--- a/compiler_gym/wrappers/core.py
+++ b/compiler_gym/wrappers/core.py
@@ -28,11 +28,6 @@ def __init__(self, env: CompilerEnv):
             <compiler_gym.envs.CompilerEnv>`.
         """
         super().__init__(env)
-        if not isinstance(env, CompilerEnv):
-            raise TypeError(
-                "Only a CompilerEnv instance can be wrapped, not "
-                f"an instance of type: '{type(env).__name__}'"
-            )
 
     def reset(self, *args, **kwargs) -> ObservationType:
         return self.env.reset(*args, **kwargs)
diff --git a/examples/actor_critic.py b/examples/actor_critic.py
index 8102755b3..d17c7cb3a 100644
--- a/examples/actor_critic.py
+++ b/examples/actor_critic.py
@@ -35,6 +35,7 @@
 from torch.distributions import Categorical
 
 import compiler_gym  # noqa Register environments.
+from compiler_gym.wrappers import ConstrainedCommandline, TimeLimit
 
 flags.DEFINE_list(
     "flags",
@@ -58,7 +59,7 @@
     "List of optimizatins to explore.",
 )
 flags.DEFINE_string("reward", "IrInstructionCount", "The reward function to optimize.")
-flags.DEFINE_string("benchmark", "cbench-v1/dijkstra", "Benchmark to optimize.")
+flags.DEFINE_string("benchmark", "cbench-v1/qsort", "Benchmark to optimize.")
 flags.DEFINE_integer("episode_len", 5, "Number of transitions per episode.")
 flags.DEFINE_integer("hidden_size", 64, "Latent vector size.")
 flags.DEFINE_integer("episodes", 1000, "Number of episodes to run and train on.")
@@ -95,19 +96,8 @@ def next(self, entry):
         return self.value
 
 
-class CustomEnv:
-    """A custom environment class for applying compiler passes.
-
-    Initialization: load a program (benchmark)
-    Step: apply a compiler pass to the program
-    Reward: reduction in the size of the program measured in instructions
-
-    The sum of all rewards will be the total reduction in the size of
-    the program, due to the combination of all optimization passes
-    that were applied, measured in instructions. This number will be
-    negative if the program became larger.
-
-    For the input representation (state), if there are N possible
+class HistoryObservation(gym.ObservationWrapper):
+    """For the input representation (state), if there are N possible
     actions, then an action x is represented by a one-hot vector V(x)
     with N entries. A sequence of M actions (x, y, ...) is represented
     by an MxN matrix of 1-hot vectors (V(x), V(y), ...). Actions that
@@ -116,25 +106,22 @@ class CustomEnv:
     a fixed number of actions.
     """
 
-    def __init__(self):
-        self._env = gym.make("llvm-v0", reward_space=FLAGS.reward)
-        try:
-            self._env.reset(benchmark=FLAGS.benchmark)
-
-            # Project onto the subset of transformations that have
-            # been specified to be used.
-            self._actions = [self._env.action_space.flags.index(f) for f in FLAGS.flags]
-            self.action_space = [self._env.action_space.flags[a] for a in self._actions]
-
-        finally:
-            # The program will not terminate until the environment is
-            # closed, not even if there is an exception.
-            self._env.close()
+    def __init__(self, env):
+        super().__init__(env=env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(len(FLAGS.flags), 0, dtype=np.float32),
+            high=np.full(len(FLAGS.flags), float("inf"), dtype=np.float32),
+            dtype=np.float32,
+        )
 
-    def step(self, action):
-        assert 0 <= action and action < len(self._actions)
+    def reset(self, *args, **kwargs):
+        self._steps_taken = 0
+        self._state = np.zeros(
+            (FLAGS.episode_len - 1, self.action_space.n), dtype=np.int32
+        )
+        return super().reset(*args, **kwargs)
 
-        # Update state
+    def step(self, action: int):
         assert self._steps_taken < FLAGS.episode_len
         if self._steps_taken < FLAGS.episode_len - 1:
             # Don't need to record the last action since there are no
@@ -143,23 +130,11 @@ def step(self, action):
             self._state[self._steps_taken][action] = 1
         self._steps_taken += 1
 
-        # Update environment
-        _, reward, done, info = self._env.step(self._actions[action])
-        done = done or self._steps_taken == FLAGS.episode_len
-
-        return self._state, reward, done, info
+        return super().step(action)
 
-    def reset(self):
-        self._env.reset()
-        self._steps_taken = 0
-        self._state = np.zeros(
-            (FLAGS.episode_len - 1, len(self.action_space)), dtype=np.int32
-        )
+    def observation(self, observation):
         return self._state
 
-    def close(self):
-        self._env.close()
-
 
 class Policy(nn.Module):
     """A very simple actor critic policy model."""
@@ -367,13 +342,20 @@ def TrainActorCritic(env):
     return avg_reward.value
 
 
+def make_env():
+    env = gym.make("llvm-v0", benchmark=FLAGS.benchmark, reward_space=FLAGS.reward)
+    env = ConstrainedCommandline(env, flags=FLAGS.flags)
+    env = TimeLimit(env, max_episode_steps=FLAGS.episode_len)
+    env = HistoryObservation(env)
+    return env
+
+
 def main(argv):
     """Main entry point."""
     torch.manual_seed(FLAGS.seed)
     random.seed(FLAGS.seed)
 
-    env = CustomEnv()
-    try:
+    with make_env() as env:
         print(f"Seed: {FLAGS.seed}")
         print(f"Episode length: {FLAGS.episode_len}")
         print(f"Exploration: {FLAGS.exploration:.2%}")
@@ -401,11 +383,6 @@ def main(argv):
         print(f"   Avg performance: {statistics.mean(performances):.2f}")
         print(f" Worst performance: {min(performances):.2f}")
 
-    finally:
-        # The program will not terminate until the environment is
-        # closed.
-        env.close()
-
 
 if __name__ == "__main__":
     app.run(main)

From 833fc77ca670f61a4d574440e223ee813a1a4c57 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 00:36:47 +0100
Subject: [PATCH 064/141] [third-party] Add Protoxygen protocol buffer -> C++
 filter.

Available at: https://github.com/lisroach/Protoxygen
License: GPL v2.1
---
 compiler_gym/third_party/proto2cpp.py | 255 ++++++++++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 compiler_gym/third_party/proto2cpp.py

diff --git a/compiler_gym/third_party/proto2cpp.py b/compiler_gym/third_party/proto2cpp.py
new file mode 100644
index 000000000..355dc64e3
--- /dev/null
+++ b/compiler_gym/third_party/proto2cpp.py
@@ -0,0 +1,255 @@
+# Protoxygen, from https://github.com/lisroach/Protoxygen
+##
+# Doxygen filter for Google Protocol Buffers .proto files.
+# This script converts .proto files into C++ style ones
+# and prints the output to standard output.
+#
+# version 0.6-beta
+#
+# How to enable this filter in Doxygen:
+#   1. Generate Doxygen configuration file with command 'doxygen -g <filename>'
+#        e.g.  doxygen -g doxyfile
+#   2. In the Doxygen configuration file, find JAVADOC_AUTOBRIEF and set it enabled
+#        JAVADOC_AUTOBRIEF      = YES
+#   3. In the Doxygen configuration file, find FILE_PATTERNS and add *.proto
+#        FILE_PATTERNS          = *.proto
+#   4. In the Doxygen configuration file, find EXTENSION_MAPPING and add proto=C
+#        EXTENSION_MAPPING      = proto=C
+#   5. In the Doxygen configuration file, find INPUT_FILTER and add this script
+#        INPUT_FILTER           = "python proto2cpp.py"
+#   6. Run Doxygen with the modified configuration
+#        doxygen doxyfile
+#
+#
+# Copyright (C) 2012-2015 Timo Marjoniemi
+# All rights reserved.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+#
+##
+
+import fnmatch
+import inspect
+import os
+import re
+import sys
+
+# Class for converting Google Protocol Buffers .proto files into C++ style output to enable Doxygen usage.
+##
+# The C++ style output is printed into standard output.<br />
+# There are three different logging levels for the class:
+# <ul><li>#logNone: do not log anything</li>
+# <li>#logErrors: log errors only</li>
+# <li>#logAll: log everything</li></ul>
+# Logging level is determined by \c #logLevel.<br />
+# Error logs are written to file determined by \c #errorLogFile.<br />
+# Debug logs are written to file determined by \c #logFile.
+#
+
+
+class proto2cpp:
+
+    # Logging level: do not log anything.
+    logNone = 0
+    # Logging level: log errors only.
+    logErrors = 1
+    # Logging level: log everything.
+    logAll = 2
+
+    # Conmessageor
+    #
+    def __init__(self):
+        # Debug log file name.
+        self.logFile = "proto2cpp.log"
+        # Error log file name.
+        self.errorLogFile = "proto2cpp.error.log"
+        # Logging level.
+        self.logLevel = self.logNone
+
+    # Handles a file.
+    ##
+    # If @p fileName has .proto suffix, it is processed through parseFile().
+    # Otherwise it is printed to stdout as is except for file \c proto2cpp.py without
+    # path since it's the script given to python for processing.
+    ##
+    # @param fileName Name of the file to be handled.
+    #
+    def handleFile(self, fileName):
+        if fnmatch.fnmatch(filename, "*.proto"):
+            self.log("\nXXXXXXXXXX\nXX " + filename + "\nXXXXXXXXXX\n\n")
+            # Open the file. Use try to detect whether or not we have an actual
+            # file.
+            try:
+                with open(filename, "r") as inputFile:
+                    self.parseFile(inputFile)
+                pass
+            except IOError:
+                self.logError(
+                    "the file " + filename + " could not be opened for reading"
+                )
+
+        elif not fnmatch.fnmatch(
+            filename, os.path.basename(inspect.getfile(inspect.currentframe()))
+        ):
+            self.log("\nXXXXXXXXXX\nXX " + filename + "\nXXXXXXXXXX\n\n")
+            try:
+                with open(filename, "r") as theFile:
+                    output = ""
+                    for theLine in theFile:
+                        output += theLine
+                    print(output)
+                    self.log(output)
+                pass
+            except IOError:
+                self.logError(
+                    "the file " + filename + " could not be opened for reading"
+                )
+        else:
+            self.log("\nXXXXXXXXXX\nXX " + filename + " --skipped--\nXXXXXXXXXX\n\n")
+
+    # Parser function.
+    ##
+    # The function takes a .proto file object as input
+    # parameter and modifies the contents into C++ style.
+    # The modified data is printed into standard output.
+    ##
+    # @param inputFile Input file object
+    #
+    def parseFile(self, inputFile):
+        # Go through the input file line by line.
+        isEnum = False
+        # This variable is here as a workaround for not getting extra line breaks (each line
+        # ends with a line separator and print() method will add another one).
+        # We will be adding lines into this var and then print the var out at
+        # the end.
+        theOutput = ""
+        for line in inputFile:
+            # Search for comment ("//") and add one more slash character ("/") to the comment
+            # block to make Doxygen detect it.
+            matchComment = re.search("//", line)
+            # Search for semicolon and if one is found before comment, add a third slash character
+            # ("/") and a smaller than ("<") chracter to the comment to make Doxygen detect it.
+            matchSemicolon = re.search(";", line)
+            if matchSemicolon is not None and (
+                matchComment is not None
+                and matchSemicolon.start() < matchComment.start()
+            ):
+                line = (
+                    line[: matchComment.start()] + "///<" + line[matchComment.end() :]
+                )
+            elif matchSemicolon is not None and (
+                matchComment is not None
+                and matchSemicolon.start() > matchComment.start()
+            ):
+                line = line.replace("//", "")
+            elif matchComment is not None:
+                line = line[: matchComment.start()] + "///" + line[matchComment.end() :]
+            # Search for "enum" and if one is found before comment,
+            # start changing all semicolons (";") to commas (",").
+            matchEnum = re.search("enum", line)
+            if matchEnum is not None and (
+                matchComment is None or matchEnum.start() < matchComment.start()
+            ):
+                isEnum = True
+            # Search again for semicolon if we have detected an enum, and
+            # replace semicolon with comma.
+            if isEnum is True and re.search(";", line) is not None:
+                matchSemicolon = re.search(";", line)
+                line = (
+                    line[: matchSemicolon.start()] + "," + line[matchSemicolon.end() :]
+                )
+            # Search for a closing brace.
+            matchClosingBrace = re.search("}", line)
+            if isEnum is True and matchClosingBrace is not None:
+                line = (
+                    line[: matchClosingBrace.start()]
+                    + "};"
+                    + line[matchClosingBrace.end() :]
+                )
+                isEnum = False
+            elif isEnum is False and re.search("}", line) is not None:
+                # Message (to be struct) ends => add semicolon so that it'll
+                # be a proper C(++) message and Doxygen will handle it
+                # correctly.
+                line = (
+                    line[: matchClosingBrace.start()]
+                    + "};"
+                    + line[matchClosingBrace.end() :]
+                )
+            # Search for 'import' and replace it with '#include' unless
+            # 'import' is behind a comment.
+            matchMsg = re.search("message", line)
+            if matchMsg is not None and (
+                matchComment is None or matchMsg.start() < matchComment.start()
+            ):
+                line = "struct" + line[: matchMsg.start()] + line[matchMsg.end() :]
+            matchSrv = re.search("^service", line)
+            if matchSrv is not None and (
+                matchComment is None or matchSrv.start() < matchComment.start()
+            ):
+                line = "namespace" + line[: matchSrv.start()] + line[matchSrv.end() :]
+            matchImp = re.search("import", line)
+            if matchImp is not None and (
+                matchComment is None or matchImp.start() < matchComment.start()
+            ):
+                line = "#include" + line[: matchImp.start()] + line[matchImp.end() :]
+            else:
+                theOutput += line
+            # Search for 'stuct' and replace it with 'message' unless 'message'
+            # is behind a comment.
+
+        # Now that we've got all lines in the string let's split the lines and print out
+        # one by one.
+        # This is a workaround to get rid of extra empty line at the end which
+        # print() method adds.
+        lines = theOutput.splitlines()
+        for line in lines:
+            if len(line) > 0:
+                print(line)
+                # Our logger does not add extra line breaks so explicitly
+                # adding one to make the log more readable.
+                self.log(line + "\n")
+            else:
+                self.log("\n   --- skipped empty line")
+
+    # Writes @p string to log file.
+    ##
+    # logLevel must be #logAll or otherwise the logging is skipped.
+    ##
+    # @param string String to be written to log file.
+    #
+    def log(self, string):
+        if self.logLevel >= self.logAll:
+            with open(self.logFile, "a") as theFile:
+                theFile.write(string)
+
+    # Writes @p string to error log file.
+    ##
+    # logLevel must be #logError or #logAll or otherwise the logging is skipped.
+    ##
+    # @param string String to be written to error log file.
+    #
+    def logError(self, string):
+        if self.logLevel >= self.logError:
+            with open(self.errorLogFile, "a") as theFile:
+                theFile.write(string)
+
+
+converter = proto2cpp()
+# Doxygen will give us the file names
+for filename in sys.argv[1:]:
+    converter.handleFile(filename)
+
+# end of file

From c2476c3a068ec29ba641d930754d3a3f9e7ed62e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 00:39:01 +0100
Subject: [PATCH 065/141] [util] Add doxygen comments for EnumUtil.h.

---
 compiler_gym/util/EnumUtil.h | 64 +++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/compiler_gym/util/EnumUtil.h b/compiler_gym/util/EnumUtil.h
index 74d40e4c8..0429a0bfe 100644
--- a/compiler_gym/util/EnumUtil.h
+++ b/compiler_gym/util/EnumUtil.h
@@ -17,8 +17,14 @@
 
 namespace compiler_gym::util {
 
-// Convert an UPPER_SNAKE_CASE enum name to PascalCase.
-// E.g. MyEnum::MY_ENUM_VALUE -> "MyEnumValue".
+/**
+ * Convert an UPPER_SNAKE_CASE enum name to PascalCase.
+ *
+ * E.g. `MyEnum::MY_ENUM_VALUE -> "MyEnumValue"`.
+ *
+ * @param value An enum.
+ * @return A string.
+ */
 template <typename Enum>
 std::string enumNameToPascalCase(Enum value) {
   const std::string name(magic_enum::enum_name<Enum>(value));
@@ -35,8 +41,14 @@ std::string enumNameToPascalCase(Enum value) {
   return out;
 }
 
-// Convert an optional UPPER_SNAKE_CASE enum name to PascalCase.
-// E.g. MyEnum::MY_ENUM_VALUE -> "MyEnumValue".
+/**
+ * Convert an UPPER_SNAKE_CASE enum name to PascalCase.
+ *
+ * E.g. `MyEnum::MY_ENUM_VALUE -> "MyEnumValue"`.
+ *
+ * @param value An enum.
+ * @return A string.
+ */
 template <typename Enum>
 std::string enumNameToPascalCase(std::optional<Enum> value) {
   if (!value.has_value()) {
@@ -45,7 +57,11 @@ std::string enumNameToPascalCase(std::optional<Enum> value) {
   return enumNameToPascalCase(value.value());
 }
 
-// Enumerate all values of an optional enum, including nullopt.
+/**
+ * Enumearate all values of an optional Enum, including `std::nullopt`.
+ *
+ * @return A vector of optional enum values.
+ */
 template <typename Enum>
 std::vector<std::optional<Enum>> optionalEnumValues() {
   std::vector<std::optional<Enum>> values;
@@ -56,7 +72,11 @@ std::vector<std::optional<Enum>> optionalEnumValues() {
   return values;
 }
 
-// Return the name of an enum, e.g. demangle<foo::MyEnum>() -> "MyEnum".
+/**
+ * Return the name of an enum, e.g. `demangle<foo::MyEnum>() -> "MyEnum"`.
+ *
+ * @return A string.
+ */
 template <typename Enum>
 std::string demangle() {
   const std::string name(magic_enum::enum_type_name<Enum>());
@@ -68,8 +88,17 @@ std::string demangle() {
   }
 }
 
-// Convert a PascalCase enum name to enum value.
-// E.g. pascalCaseToEnum("MyEnumVal", &myEnum) -> MyEnum::MY_ENUM_VAL
+/**
+ * Convert a PascalCase enum name to enum value.
+ *
+ * E.g. `pascalCaseToEnum("MyEnumVal", &myEnum) -> MyEnum::MY_ENUM_VAL`.
+ *
+ * @tparam Enum Enum type.
+ * @param name A string.
+ * @param value The value to write to.
+ * @return `Status::OK` on success. `Status::INVALID_ARGUMENT` if the string
+ *          name is not recognized.
+ */
 template <typename Enum>
 [[nodiscard]] grpc::Status pascalCaseToEnum(const std::string& name, Enum* value) {
   for (const auto candidateValue : magic_enum::enum_values<Enum>()) {
@@ -84,7 +113,12 @@ template <typename Enum>
       fmt::format("Could not convert '{}' to {} enum entry", name, demangle<Enum>()));
 }
 
-// Create a map from PascalCase enum value names to enum values.
+/**
+ * @brief Create a map from PascalCase enum value names to enum values.
+ *
+ * @tparam Enum Enum type.
+ * @return A `name -> value` lookup table.
+ */
 template <typename Enum>
 std::unordered_map<std::string, Enum> createPascalCaseToEnumLookupTable() {
   std::unordered_map<std::string, Enum> table;
@@ -95,8 +129,16 @@ std::unordered_map<std::string, Enum> createPascalCaseToEnumLookupTable() {
   return table;
 }
 
-// Convert an integer to an enum with bounds checking.
-// E.g. intToEnum(3, &myEnum);
+/**
+ * Convert an integer to an enum with bounds checking.
+ *
+ * E.g. `intToEnum(3, &myEnum);`
+ *
+ * @tparam Enum Enum type.
+ * @param numericValue An integer.
+ * @param enumValue An enum to write.
+ * @return `Status::OK` on success. `Status::INVALID_ARGUMENT` if out of bounds.
+ */
 template <typename Enum>
 [[nodiscard]] inline grpc::Status intToEnum(int numericValue, Enum* enumValue) {
   const auto max = magic_enum::enum_count<Enum>();

From aecb8ee58df0a94b0af3e9b0a259bf3b2bab5e76 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 00:39:36 +0100
Subject: [PATCH 066/141] [docs] Mark unused rst files as orphans.

---
 docs/source/tutorial/example_service.rst        | 2 ++
 docs/source/tutorial/makefile_integration.rst   | 2 ++
 docs/source/tutorial/reinforcement_learning.rst | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/docs/source/tutorial/example_service.rst b/docs/source/tutorial/example_service.rst
index d30f77b12..bf9d515d7 100644
--- a/docs/source/tutorial/example_service.rst
+++ b/docs/source/tutorial/example_service.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 Adding support for a new compiler
 =================================
 
diff --git a/docs/source/tutorial/makefile_integration.rst b/docs/source/tutorial/makefile_integration.rst
index 677a0d722..f28b02774 100644
--- a/docs/source/tutorial/makefile_integration.rst
+++ b/docs/source/tutorial/makefile_integration.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 LLVM Makefile Integration
 =========================
 
diff --git a/docs/source/tutorial/reinforcement_learning.rst b/docs/source/tutorial/reinforcement_learning.rst
index 37317fb48..8b2806469 100644
--- a/docs/source/tutorial/reinforcement_learning.rst
+++ b/docs/source/tutorial/reinforcement_learning.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 Simple Reinforcement Learning
 =============================
 

From 2d7e2026422f55a43e8e00eef75bb2a164bb5176 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 00:41:06 +0100
Subject: [PATCH 067/141] [docs] Add C++ API reference docs.

The process for generating these docs is somewhat convoluted:

1. `make docs` now runs a `doxygen` target.

2. This runs Doxygen, which extracts docstrings from the C++ and
   protocol buffer source and header files. This generates XML
   files.

3. The `breathe` extension for sphinx parses the XML and provides a
   series of sphinx directives that can be used to populate the
   documentation.

4. Sphinx compiles the documentation into HTML.

This commit adds the basic infrastructure and documentation
outlines. Subsequent patches will port the C++ documentation to
doxygen format and flesh out the rst docs more.

Issue #4.
---
 INSTALL.md                                    |    1 +
 Makefile                                      |   14 +-
 docs/.gitignore                               |    2 +-
 docs/Doxyfile                                 | 2575 +++++++++++++++++
 docs/Makefile                                 |    2 +-
 docs/generate_cc_rst.py                       |   63 +
 docs/requirements.txt                         |    1 +
 .../cc/compiler_gym/envs/llvm/service.rst     |   57 +
 docs/source/cc/compiler_gym/service.rst       |   12 +
 .../cc/compiler_gym/service/runtime.rst       |   26 +
 docs/source/cc/compiler_gym/util.rst          |   47 +
 docs/source/compiler_gym/service.rst          |    2 +-
 docs/source/conf.py                           |    7 +
 docs/source/faq.rst                           |   12 +-
 docs/source/index.rst                         |   18 +-
 docs/source/rpc.rst                           |   88 +
 16 files changed, 2914 insertions(+), 13 deletions(-)
 create mode 100644 docs/Doxyfile
 create mode 100644 docs/generate_cc_rst.py
 create mode 100644 docs/source/cc/compiler_gym/envs/llvm/service.rst
 create mode 100644 docs/source/cc/compiler_gym/service.rst
 create mode 100644 docs/source/cc/compiler_gym/service/runtime.rst
 create mode 100644 docs/source/cc/compiler_gym/util.rst
 create mode 100644 docs/source/rpc.rst

diff --git a/INSTALL.md b/INSTALL.md
index 344e7dc72..0a373384a 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -48,6 +48,7 @@ with the required dependencies:
 
     conda create -n compiler_gym python=3.9 cmake pandoc patchelf
     conda activate compiler_gym
+    conda install -c conda-forge doxygen
 
 Then clone the CompilerGym source code using:
 
diff --git a/Makefile b/Makefile
index 62d4ac254..f8a9e9181 100644
--- a/Makefile
+++ b/Makefile
@@ -112,6 +112,7 @@ export HELP
 CC ?= clang
 CXX ?= clang++
 BAZEL ?= bazel
+DOXYGEN ?= doxygen
 IBAZEL ?= ibazel
 PANDOC ?= pandoc
 PYTHON ?= python3
@@ -196,13 +197,22 @@ GENERATED_DOCS := \
 
 gendocs: $(GENERATED_DOCS)
 
-docs: gendocs bazel-build
+doxygen:
+	cd docs && $(DOXYGEN) Doxyfile
+
+doxygen-rst:
+	cd docs && $(PYTHON) generate_cc_rst.py
+
+docs: gendocs bazel-build doxygen
 	PYTHONPATH=$(ROOT)/bazel-bin/package.runfiles/CompilerGym $(MAKE) -C docs html
 
-livedocs: gendocs
+livedocs: gendocs doxygen
 	PYTHONPATH=$(ROOT)/bazel-bin/package.runfiles/CompilerGym $(MAKE) -C docs livehtml
 
 
+.PHONY: doxygen doxygen-rst
+
+
 ###########
 # Testing #
 ###########
diff --git a/docs/.gitignore b/docs/.gitignore
index ab0facc42..a707b777e 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,4 +1,4 @@
 build
+doxygen
 source/changelog.rst
 source/contributing.rst
-source/installation.rst
diff --git a/docs/Doxyfile b/docs/Doxyfile
new file mode 100644
index 000000000..7f083f92f
--- /dev/null
+++ b/docs/Doxyfile
@@ -0,0 +1,2575 @@
+# Doxyfile 1.9.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "CompilerGym"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = "Reinforcement learning environments for compiler optimization tasks."
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doxygen
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = YES
+
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
+
+EXTENSION_MAPPING      = proto=C
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 5.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 5
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which efficively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = ../compiler_gym
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.
+
+FILE_PATTERNS          = *.cc *.h *.proto
+
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           = python ../compiler_gym/third_party/proto2cpp.py
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = ../compiler_gym/README.md
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# entity all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see https://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = NO
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the main .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = https://cdn.jsdelivr.net/npm/mathjax@2
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using JavaScript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see:
+# https://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         =
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = YES
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
+# files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
+# The default value is: YES.
+
+DOT_CLEANUP            = YES
diff --git a/docs/Makefile b/docs/Makefile
index e9bb7fdd7..5b7de500e 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -9,7 +9,7 @@ help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 livehtml:
-	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) --pre-build 'make -C .. gendocs bazel-build' --watch ../compiler_gym $(O)
+	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) --pre-build 'make -C .. gendocs bazel-build doxygen' --watch ../compiler_gym $(O)
 
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
diff --git a/docs/generate_cc_rst.py b/docs/generate_cc_rst.py
new file mode 100644
index 000000000..f20e502d7
--- /dev/null
+++ b/docs/generate_cc_rst.py
@@ -0,0 +1,63 @@
+"""A script to auto-populate RST files from the CompilerGym header files.
+
+Usage:
+
+    $ python generate_cc_rst.py
+"""
+import os
+from pathlib import Path
+from typing import List
+
+SOURCES = Path("../compiler_gym")
+OUTPUT_DIR = Path("source/cc")
+
+
+def header(message, underline="="):
+    underline = underline * (len(str(message)) // len(underline))
+    return f"{message}\n{underline}"
+
+
+def main():
+    valid_files: List[Path] = []
+    for root, _, files in os.walk(SOURCES):
+        if "third_party" in root:
+            continue
+        headers = [
+            f
+            for f in files
+            if (f.endswith(".h") or f.endswith(".proto")) and not f.endswith("Impl.h")
+        ]
+        if not headers:
+            continue
+
+        while root.startswith("../"):
+            root = root[len("../") :]
+        root = Path(root)
+
+        (OUTPUT_DIR / root).parent.mkdir(parents=True, exist_ok=True)
+        output_path = Path(f"{OUTPUT_DIR / root}.rst")
+        valid_files.append(output_path)
+        print("Generating", output_path)
+        with open(output_path, "w") as f:
+            print(header(str(root)), file=f)
+            print(file=f)
+            print(".. contents::", file=f)
+            print("   :local:", file=f)
+            for header_name in headers:
+                print(file=f)
+                print(header(header_name, "-"), file=f)
+                print(file=f)
+                print(f':code:`#include "{root}/{header_name}"`', file=f)
+                print(file=f)
+                print(f".. doxygenfile:: {root}/{header_name}", file=f)
+
+    for root, _, files in os.walk(OUTPUT_DIR):
+        for file in files:
+            path = Path(root) / file
+            if path not in valid_files:
+                print("rm", path)
+                path.unlink()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 77374feff..a027d3a7c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
+breathe==4.30.0
 sphinx==3.3.1
 sphinx-autobuild
 sphinx-rtd-theme==0.5.0
diff --git a/docs/source/cc/compiler_gym/envs/llvm/service.rst b/docs/source/cc/compiler_gym/envs/llvm/service.rst
new file mode 100644
index 000000000..eb9d886b2
--- /dev/null
+++ b/docs/source/cc/compiler_gym/envs/llvm/service.rst
@@ -0,0 +1,57 @@
+compiler_gym/envs/llvm/service
+==============================
+
+This directory contains the core C++ implementation of the LLVM environment for
+CompilerGym.
+
+.. contents::
+   :local:
+
+LlvmService.h
+-------------
+
+:code:`#include "compiler_gym/envs/llvm/service/LlvmService.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/LlvmService.h
+
+Cost.h
+------
+
+:code:`#include "compiler_gym/envs/llvm/service/Cost.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/Cost.h
+
+ActionSpace.h
+-------------
+
+:code:`#include "compiler_gym/envs/llvm/service/ActionSpace.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/ActionSpace.h
+
+ObservationSpaces.h
+-------------------
+
+:code:`#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/ObservationSpaces.h
+
+LlvmSession.h
+-------------
+
+:code:`#include "compiler_gym/envs/llvm/service/LlvmSession.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/LlvmSession.h
+
+Benchmark.h
+-----------
+
+:code:`#include "compiler_gym/envs/llvm/service/Benchmark.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/Benchmark.h
+
+BenchmarkFactory.h
+------------------
+
+:code:`#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/BenchmarkFactory.h
diff --git a/docs/source/cc/compiler_gym/service.rst b/docs/source/cc/compiler_gym/service.rst
new file mode 100644
index 000000000..e33f2f6ed
--- /dev/null
+++ b/docs/source/cc/compiler_gym/service.rst
@@ -0,0 +1,12 @@
+compiler_gym/service
+====================
+
+.. contents::
+   :local:
+
+CompilationSession.h
+--------------------
+
+:code:`#include "compiler_gym/service/CompilationSession.h"`
+
+.. doxygenfile:: compiler_gym/service/CompilationSession.h
diff --git a/docs/source/cc/compiler_gym/service/runtime.rst b/docs/source/cc/compiler_gym/service/runtime.rst
new file mode 100644
index 000000000..e96d78abf
--- /dev/null
+++ b/docs/source/cc/compiler_gym/service/runtime.rst
@@ -0,0 +1,26 @@
+compiler_gym/service/runtime
+============================
+
+.. contents::
+   :local:
+
+Runtime.h
+---------
+
+:code:`#include "compiler_gym/service/runtime/Runtime.h"`
+
+.. doxygenfile:: compiler_gym/service/runtime/Runtime.h
+
+CompilerGymService.h
+--------------------
+
+:code:`#include "compiler_gym/service/runtime/CompilerGymService.h"`
+
+.. doxygenfile:: compiler_gym/service/runtime/CompilerGymService.h
+
+BenchmarkCache.h
+----------------
+
+:code:`#include "compiler_gym/service/runtime/BenchmarkCache.h"`
+
+.. doxygenfile:: compiler_gym/service/runtime/BenchmarkCache.h
diff --git a/docs/source/cc/compiler_gym/util.rst b/docs/source/cc/compiler_gym/util.rst
new file mode 100644
index 000000000..f21ad3a4b
--- /dev/null
+++ b/docs/source/cc/compiler_gym/util.rst
@@ -0,0 +1,47 @@
+compiler_gym/util
+=================
+
+.. contents::
+   :local:
+
+RunfilesPath.h
+--------------
+
+:code:`#include "compiler_gym/util/RunfilesPath.h"`
+
+.. doxygenfile:: compiler_gym/util/RunfilesPath.h
+
+EnumUtil.h
+----------
+
+:code:`#include "compiler_gym/util/EnumUtil.h"`
+
+.. doxygenfile:: compiler_gym/util/EnumUtil.h
+
+GrpcStatusMacros.h
+------------------
+
+:code:`#include "compiler_gym/util/GrpcStatusMacros.h"`
+
+.. doxygenfile:: compiler_gym/util/GrpcStatusMacros.h
+
+StrLenConstexpr.h
+-----------------
+
+:code:`#include "compiler_gym/util/StrLenConstexpr.h"`
+
+.. doxygenfile:: compiler_gym/util/StrLenConstexpr.h
+
+RunService.h
+------------
+
+:code:`#include "compiler_gym/util/RunService.h"`
+
+.. doxygenfile:: compiler_gym/util/RunService.h
+
+Unreachable.h
+-------------
+
+:code:`#include "compiler_gym/util/Unreachable.h"`
+
+.. doxygenfile:: compiler_gym/util/Unreachable.h
diff --git a/docs/source/compiler_gym/service.rst b/docs/source/compiler_gym/service.rst
index ac7979f75..09fdce8f8 100644
--- a/docs/source/compiler_gym/service.rst
+++ b/docs/source/compiler_gym/service.rst
@@ -6,7 +6,7 @@ compiler_gym.service
 CompilerGym uses a client/server architecture. Services provide an interface for
 manipulating compiler behavior. Clients are Python frontend objects that provide
 a reinforcement learning abstraction on top of the service. Communication
-between the service and client is done using RPC. The connection between the
+between the service and client is done :doc:`using RPC </rpc>`. The connection between the
 client and service is managed by the :class:`CompilerGymServiceConnection
 <compiler_gym.service.CompilerGymServiceConnection>` object.
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 36fbddff5..d54f5c751 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -48,6 +48,7 @@
     "sphinx_rtd_theme",
     "sphinx.ext.autosectionlabel",
     "sphinxemoji.sphinxemoji",
+    "breathe",
 ]
 
 autosectionlabel_prefix_document = True
@@ -75,3 +76,9 @@
 ]
 
 html_static_path = ["_static"]
+
+# -- Breathe configuration -
+
+breathe_default_project = "CompilerGym"
+
+breathe_projects = {"CompilerGym": "../doxygen/xml"}
diff --git a/docs/source/faq.rst b/docs/source/faq.rst
index 539675d16..eb588dae9 100644
--- a/docs/source/faq.rst
+++ b/docs/source/faq.rst
@@ -103,11 +103,13 @@ observation or :meth:`add_space() <compiler_gym.views.RewardView.add_space>` to
 add a new reward space.
 
 If you require modifying the underlying compiler service implementation, fork
-this project and build it from source (see :doc:`installation`). Then modify the
-C++ service implementation for the compiler that you are interested in. The
-service codebase is located at :code:`compiler_gym/envs/$COMPILER/service`,
-where :code:`$COMPILER` is the name of the compiler service you would wish to
-modify, e.g. llvm. Once done, send us a pull request!
+this project and build it from source (see `installation
+<https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md>`_).
+Then modify the C++ service implementation for the compiler that you are
+interested in. The service codebase is located at
+:code:`compiler_gym/envs/$COMPILER/service`, where :code:`$COMPILER` is the name
+of the compiler service you would wish to modify, e.g. llvm. Once done, send us
+a pull request!
 
 
 Should I always try different actions?
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8ea874ec2..7c0d3203b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,7 +11,6 @@ for applying reinforcement learning to compiler optimizations.
    llvm/index
    cli
    about
-   contributing
    changelog
    faq
 
@@ -37,8 +36,21 @@ for applying reinforcement learning to compiler optimizations.
    compiler_gym/views
    compiler_gym/wrappers
 
-..
-   TODO(github.com/facebookresearch/CompilerGym/issues/4): Add LLVM Service docs.
+.. toctree::
+   :maxdepth: 3
+   :caption: C++ API Reference
+
+   cc/compiler_gym/envs/llvm/service.rst
+   cc/compiler_gym/service.rst
+   cc/compiler_gym/service/runtime.rst
+   cc/compiler_gym/util.rst
+
+.. toctree::
+   :maxdepth: 3
+   :caption: Developer Manual
+
+   contributing
+   rpc.rst
 
 
 Indices and tables
diff --git a/docs/source/rpc.rst b/docs/source/rpc.rst
new file mode 100644
index 000000000..22c5fb33e
--- /dev/null
+++ b/docs/source/rpc.rst
@@ -0,0 +1,88 @@
+RPC Service Reference
+=====================
+
+This document describes the remote procedure call (RPC) client/service
+architecture that CompilerGym to separate the user frontend code from the
+compiler backends.
+
+.. contents::
+   :local:
+
+
+How it Works
+------------
+
+The file :code:`compiler_gym/service/proto/compiler_gym_service.proto` defines a
+:ref:`CompilerGymService <rpc:CompilerGymService>`, which is an interface that
+can be used by compilers to expose the incremental compilation of a program as
+an interactive environment. The service is defined using `gRPC
+<https://grpc.io/>`_, and the individual requests and responses are defined
+using `protocol buffers <https://developers.google.com/protocol-buffers>`_. The
+protocol buffer schema is then used to generate bindings in a programming
+language of choice. Protocol buffers support a wide range of programming
+languages, allowing compiler developers to expose their optimization problems in
+whatever language makes sense for them.
+
+To use the service from C++, include the generated protocol buffer header:
+
+.. code-block:: c++
+
+   #include "compiler_gym/service/proto/compiler_gym_service.pb.h"
+
+To use the service from Python, import the generated protocol buffer module:
+
+.. code-block:: python
+
+   import compiler_gym.service.proto
+
+
+CompilerGymService
+------------------
+
+.. doxygennamespace:: CompilerGymService
+
+
+Request and Reply Messages
+--------------------------
+
+.. doxygenstruct:: GetVersionRequest
+   :members:
+
+.. doxygenstruct:: GetVersionReply
+   :members:
+
+.. doxygenstruct:: GetSpacesRequest
+   :members:
+
+.. doxygenstruct:: GetSpacesReply
+   :members:
+
+.. doxygenstruct:: StartSessionRequest
+   :members:
+
+.. doxygenstruct:: StartSessionReply
+   :members:
+
+.. doxygenstruct:: ForkSessionRequest
+   :members:
+
+.. doxygenstruct:: ForkSessionReply
+   :members:
+
+.. doxygenstruct:: EndSessionRequest
+   :members:
+
+.. doxygenstruct:: EndSessionReply
+   :members:
+
+.. doxygenstruct:: StepRequest
+   :members:
+
+.. doxygenstruct:: StepReply
+   :members:
+
+.. doxygenstruct:: AddBenchmarkRequest
+   :members:
+
+.. doxygenstruct:: AddBenchmarkReply
+   :members:

From cc7583b021418b11beb785ddf657ef4ecc7dc1de Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 01:00:03 +0100
Subject: [PATCH 068/141] Port documentation to Doxygen format.

---
 .../service/proto/compiler_gym_service.proto  | 65 +++++++++---------
 compiler_gym/service/runtime/BenchmarkCache.h | 67 ++++++++++++++++---
 2 files changed, 89 insertions(+), 43 deletions(-)

diff --git a/compiler_gym/service/proto/compiler_gym_service.proto b/compiler_gym/service/proto/compiler_gym_service.proto
index e9f6deb40..573d8dbea 100644
--- a/compiler_gym/service/proto/compiler_gym_service.proto
+++ b/compiler_gym/service/proto/compiler_gym_service.proto
@@ -14,6 +14,8 @@ option java_multiple_files = true;
 option java_outer_classname = "CompilerGymServiceProto";
 option java_package = "com.compiler_gym";
 
+// The CompilerGymService is the interface that exposes the incremental
+// optimization of a program as an interactive environment.
 service CompilerGymService {
   // Request version strings from the service.
   rpc GetVersion(GetVersionRequest) returns (GetVersionReply);
@@ -43,11 +45,10 @@ service CompilerGymService {
   rpc AddBenchmark(AddBenchmarkRequest) returns (AddBenchmarkReply);
 }
 
-// ===========================================================================
-// GetVersion().
-
+// A GetVersion() request.
 message GetVersionRequest {}
 
+// The GetVersion() response.
 message GetVersionReply {
   // The version string for this service.
   string service_version = 1;
@@ -55,9 +56,7 @@ message GetVersionReply {
   string compiler_version = 2;
 }
 
-// ===========================================================================
-// StartSession().
-
+// A StartSession() request.
 message StartSessionRequest {
   // The name of the benchmark to use for this session. If not provided, a
   // benchmark is chosen randomly by the service.
@@ -70,6 +69,7 @@ message StartSessionRequest {
   repeated int32 observation_space = 3;
 }
 
+// A StartSession() reply.
 message StartSessionReply {
   // The ID that has been assigned to the session. The client must use this ID
   // in all subsequent interactions with the service for this session.
@@ -86,9 +86,7 @@ message StartSessionReply {
   repeated Observation observation = 4;
 }
 
-// ===========================================================================
-// Step().
-
+// A Step() request.
 message StepRequest {
   // The ID of the session.
   int64 session_id = 1;
@@ -98,6 +96,7 @@ message StepRequest {
   repeated int32 observation_space = 3;
 }
 
+// A Step() reply.
 message StepReply {
   // Indicates that the session has ended. This could be because there are no
   // further actions that can be made, or because the action has led to an
@@ -118,28 +117,27 @@ message StepReply {
   repeated Observation observation = 4;
 }
 
-// ===========================================================================
-// Actions.
-
+// A description of an action space.
+//
+// \warning This message format is likely to change. This currently only
+//     supports flat action spaces of categorical values. In the future we will
+//     want to replace this with a more extensible representation that supports
+//     parameterized actions, and actions of different types (e.g. optimization
+//     passes vs optimization contexts).
 message ActionSpace {
   // The name of the action space.
   string name = 1;
   // A list of discrete action names.
-  // NOTE(cummins): This currently only supports flat action spaces of
-  // categorical values. In the future we will want to replace this with a more
-  // extensible representation that supports parameterized actions, and actions
-  // of different types (e.g. optimization passes vs optimization contexts).
   repeated string action = 2;
 }
 
+// An action.
 message Action {
   // An index into the ActionSpace.action list.
   int32 action = 1;
 }
 
-// ===========================================================================
-// Observations.
-
+// An observations from a compiler.
 message Observation {
   // A point in an ObservationSpace is _either_ a scalar or vector of integers
   // or real values, a string, or an opaque byte array.
@@ -153,14 +151,17 @@ message Observation {
   }
 }
 
+// A list of 64 bit integers.
 message Int64List {
   repeated int64 value = 1;
 }
 
+// A list of doubles.
 message DoubleList {
   repeated double value = 1;
 }
 
+// The [min, max] range of a scalar.
 message ScalarRange {
   // The minimum value (inclusive). If not set, the value is -inf.
   ScalarLimit min = 1;
@@ -168,14 +169,17 @@ message ScalarRange {
   ScalarLimit max = 2;
 }
 
+// Representation of the upper or lower limit of a scalar.
 message ScalarLimit {
   double value = 1;
 }
 
+// A list of scalar ranges.
 message ScalarRangeList {
   repeated ScalarRange range = 1;
 }
 
+// The description of a space of observations.
 message ObservationSpace {
   // The name of the observation space.
   string name = 1;
@@ -211,37 +215,34 @@ message ObservationSpace {
   Observation default_value = 9;
 }
 
-// ===========================================================================
-// Fork().
-
+// A Fork() request.
 message ForkSessionRequest {
   // The ID of the session to fork.
   int64 session_id = 1;
 }
 
+// A Fork() reply.
 message ForkSessionReply {
   // The ID of the newly created session.
   int64 session_id = 1;
 }
 
-// ===========================================================================
-// EndSession().
-
+// An EndSession() request.
 message EndSessionRequest {
   // The ID of the session.
   int64 session_id = 1;
 }
 
+// An EndSession() reply.
 message EndSessionReply {
   // The number of sessions that the service currently has.
   int32 remaining_sessions = 1;
 }
 
-// ===========================================================================
-// GetSpaces().
-
+// A GetSpaces() request.
 message GetSpacesRequest {}
 
+// A GetSpaces() reply.
 message GetSpacesReply {
   // The initial space of actions. Subsequent calls to step() may produce
   // a new action space.
@@ -251,11 +252,7 @@ message GetSpacesReply {
   repeated ObservationSpace observation_space_list = 2;
 }
 
-// ===========================================================================
-// AddBenchmark().
-
-// A Benchmark message is used to register a new benchmark with a compiler
-// service.
+// Representation of the input to a compiler.
 message Benchmark {
   // The name of the benchmark to add. In case of conflict with an existing
   // benchmark, this new benchmark replaces the existing one.
@@ -278,8 +275,10 @@ message File {
   }
 }
 
+// An AddBenchmark() request.
 message AddBenchmarkRequest {
   repeated Benchmark benchmark = 1;
 }
 
+// An AddBenchmark() reply.
 message AddBenchmarkReply {}
diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h
index cf08965be..d54645ae7 100644
--- a/compiler_gym/service/runtime/BenchmarkCache.h
+++ b/compiler_gym/service/runtime/BenchmarkCache.h
@@ -18,31 +18,78 @@ namespace compiler_gym::runtime {
 
 constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024;
 
-// An in-memory cache of Benchmark protocol buffers.
-//
-// This object caches Benchmark messages by URI. Once the cache reaches a
-// predetermined size, benchmarks are evicted randomly until the capacity is
-// reduced to 50%.
+/**
+ * @brief  A cache of Benchmark protocol messages.
+ *
+ * This object caches Benchmark messages by URI. Once the cache reaches a
+ * predetermined size, benchmarks are evicted randomly until the capacity is
+ * reduced to 50%.
+ */
 class BenchmarkCache {
  public:
+  /**
+   * @brief Constructor.
+   *
+   * @param maxSizeInBytes The maximum size of the benchmark buffer before an
+   *    automated eviction is run.
+   * @param rand A random start used for selecting benchmarks for random
+   *    eviction.
+   */
   BenchmarkCache(size_t maxSizeInBytes = kEvictionSizeInBytes,
                  std::optional<std::mt19937_64> rand = std::nullopt);
 
-  // The pointer set by benchmark is valid only until the next call to add().
+  /**
+   * Lookup a benchmark. The pointer set by this method is valid only until the
+   * next call to add().
+   *
+   * @param uri The URI of the benchmark
+   * @return A Benchmark pointer.
+   */
   const Benchmark* get(const std::string& uri) const;
 
-  // Move-insert the given benchmark to the cache.
+  /**
+   * Move-insert the given benchmark to the cache.
+   *
+   * @param benchmark A benchmark to insert.
+   */
   void add(const Benchmark&& benchmark);
 
+  /**
+   * Get the number of elements in the cache.
+   *
+   * @return A nonnegative integer.
+   */
   inline size_t size() const { return benchmarks_.size(); };
+
+  /**
+   * Get the size of the cache in bytes.
+   *
+   * @return A nonnegative integer.
+   */
   inline size_t sizeInBytes() const { return sizeInBytes_; };
+
+  /**
+   * The maximum size of the cache before an eviction.
+   *
+   * @return A nonnegative integer.
+   */
   inline size_t maxSizeInBytes() const { return maxSizeInBytes_; };
 
+  /**
+   * Set a new maximum size of the cache.
+   *
+   * @param maxSizeInBytes A number of bytes.
+   */
   void setMaxSizeInBytes(size_t maxSizeInBytes);
 
-  // Evict benchmarks randomly to reduce the capacity to the given size. If
-  // targetSizeInBytes is not provided, benchmarks are evicted to 50% of
-  // maxSizeInBytes.
+  /**
+   * Evict benchmarks randomly to reduce the capacity to the given size.
+   *
+   * If `targetSizeInBytes` is not provided, benchmarks are evicted to 50% of
+   * `maxSizeInBytes`.
+   *
+   * @param targetSizeInBytes A target maximum size in bytes.
+   */
   void evictToCapacity(std::optional<size_t> targetSizeInBytes = std::nullopt);
 
  private:

From ebd81425a5ce0345bf5fd7cd76b0403ab50a8307 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 04:58:30 -0700
Subject: [PATCH 069/141] Clarify comment about hashing.

---
 compiler_gym/views/observation_space_spec.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/views/observation_space_spec.py b/compiler_gym/views/observation_space_spec.py
index 2a88794e3..e361188c6 100644
--- a/compiler_gym/views/observation_space_spec.py
+++ b/compiler_gym/views/observation_space_spec.py
@@ -79,8 +79,17 @@ def __init__(
 
     def __hash__(self) -> int:
         # Quickly hash observation spaces by comparing the index into the list
-        # of spaces returned by the environment. This means that hashing across
-        # different environments is _not_ safe.
+        # of spaces returned by the environment. This means that you should not
+        # hash between observation spaces from different environments as this
+        # will cause collisions, e.g.
+        #
+        #     # not okay:
+        #     >>> obs = set(env.observation.spaces).union(
+        #         other_env.observation.spaces
+        #     )
+        #
+        # If you want to hash between environments, consider using the string id
+        # to identify the observation spaces.
         return self.index
 
     def __repr__(self) -> str:

From 2f2073a03c16599c7c13505f82eb9a8d378f4412 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 05:08:56 -0700
Subject: [PATCH 070/141] Small documentation improvements.

---
 compiler_gym/envs/compiler_env.py      | 40 ++++++++++++++++++++------
 compiler_gym/envs/llvm/llvm_rewards.py |  2 +-
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index f31b1c1bb..0538f47bb 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -80,11 +80,11 @@ class CompilerEnv(gym.Env):
     :doc:`/compiler_gym/service` for more details on connecting to services):
 
     >>> env = CompilerEnv(
-        service="localhost:8080",
-        observation_space="features",
-        reward_space="runtime",
-        rewards=[env_reward_spaces],
-    )
+    ...     service="localhost:8080",
+    ...     observation_space="features",
+    ...     reward_space="runtime",
+    ...     rewards=[env_reward_spaces],
+    ... )
 
     Once constructed, an environment can be used in exactly the same way as a
     regular :code:`gym.Env`, e.g.
@@ -759,6 +759,30 @@ def raw_step(
         observations: Iterable[ObservationSpaceSpec],
         rewards: Iterable[Reward],
     ) -> StepType:
+        """Take a step.
+
+        :param actions: A list of actions to be applied.
+
+        :param observations: A list of observations spaces to compute
+            observations from. These are evaluated after the actions are
+            applied.
+
+        :param rewards: A list of reward spaces to compute rewards from. These
+            are evaluated after the actions are applied.
+
+        :return: A tuple of observations, rewards, done, and info. Observations
+            and rewards are lists.
+
+        :raises SessionNotFound: If :meth:`reset()
+            <compiler_gym.envs.CompilerEnv.reset>` has not been called.
+
+        .. warning::
+
+            Prefer :meth:`step() <compiler_gym.envs.CompilerEnv.step>` to
+            :meth:`raw_step() <compiler_gym.envs.CompilerEnv.step>`.
+            :meth:`step() <compiler_gym.envs.CompilerEnv.step>` has equivalent
+            functionality, and is less likely to change in the future.
+        """
         if not self.in_episode:
             raise SessionNotFound("Must call reset() before step()")
 
@@ -843,7 +867,7 @@ def raw_step(
             for observation_space in user_observation_spaces
         ]
 
-        # Update and compue the rewards.
+        # Update and compute the rewards.
         rewards: List[RewardType] = []
         for reward_space in reward_spaces:
             reward_observations = [
@@ -891,9 +915,7 @@ def step(
             :code:`env.reward_space` is not returned.
 
         :return: A tuple of observation, reward, done, and info. Observation and
-            reward are None if default observation/reward is not set. If done is
-            True, observation and reward may also be None (e.g. because the
-            service failed).
+            reward are None if default observation/reward is not set.
 
         :raises SessionNotFound: If :meth:`reset()
             <compiler_gym.envs.CompilerEnv.reset>` has not been called.
diff --git a/compiler_gym/envs/llvm/llvm_rewards.py b/compiler_gym/envs/llvm/llvm_rewards.py
index f167f75ee..17d6baf8e 100644
--- a/compiler_gym/envs/llvm/llvm_rewards.py
+++ b/compiler_gym/envs/llvm/llvm_rewards.py
@@ -48,7 +48,7 @@ def update(
         observation_view: ObservationView,
     ) -> RewardType:
         """Called on env.step(). Compute and return new reward."""
-        del actions
+        del actions  # unused
         cost: RewardType = observations[0]
         if self.previous_cost is None:
             self.previous_cost = observation_view[self.init_cost_function]

From 5fb9176ab00b647f954e510fad0592300a26c3cc Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 08:03:11 -0700
Subject: [PATCH 071/141] [docs] Reorder TOC: developer manual before API
 reference.

---
 docs/source/index.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7c0d3203b..48bfd14e0 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -22,6 +22,13 @@ for applying reinforcement learning to compiler optimizations.
        tutorial/reinforcement_learning
        tutorial/example_service
 
+.. toctree::
+   :maxdepth: 3
+   :caption: Developer Manual
+
+   contributing
+   rpc.rst
+
 .. toctree::
    :maxdepth: 3
    :caption: Python API Reference
@@ -45,13 +52,6 @@ for applying reinforcement learning to compiler optimizations.
    cc/compiler_gym/service/runtime.rst
    cc/compiler_gym/util.rst
 
-.. toctree::
-   :maxdepth: 3
-   :caption: Developer Manual
-
-   contributing
-   rpc.rst
-
 
 Indices and tables
 ==================

From 6a6fd07b608c39791d8726ee0bd6bf8747e1509a Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 20 May 2021 00:21:31 +0100
Subject: [PATCH 072/141] [ci] Split the dependency install rules into a
 separate action.

---
 .../install-build-dependencies/action.yml     | 28 +++++++++++++
 .github/workflows/ci.yaml                     | 41 ++-----------------
 .github/workflows/fuzz.yaml                   | 16 +-------
 .github/workflows/release_test.yaml           |  2 +-
 4 files changed, 35 insertions(+), 52 deletions(-)
 create mode 100644 .github/actions/install-build-dependencies/action.yml

diff --git a/.github/actions/install-build-dependencies/action.yml b/.github/actions/install-build-dependencies/action.yml
new file mode 100644
index 000000000..de7053750
--- /dev/null
+++ b/.github/actions/install-build-dependencies/action.yml
@@ -0,0 +1,28 @@
+---
+name: Install the Linux dependencies
+description: Install build dependencies
+runs:
+    using: composite
+    steps:
+        - name: Install dependencies (linux)
+          run: |
+              if [ "$(uname)" != "Darwin" ]; then
+                  curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.6.1/bazelisk-linux-amd64" > bazel
+                  chmod +x bazel
+                  sudo mv bazel /usr/local/bin/bazel
+                  sudo apt-get install clang-9 patchelf
+                  python -m pip install -r compiler_gym/requirements.txt -r examples/requirements.txt -r tests/requirements.txt
+              fi
+          shell: bash
+
+        - name: Install dependencies (macos)
+          run: |
+              if [ "$(uname)" = "Darwin" ]; then
+                  brew install bazelisk zlib
+                  python -m pip install -r compiler_gym/requirements.txt -r examples/requirements.txt -r tests/requirements.txt
+              fi
+          shell: bash
+          env:
+              LDFLAGS: -L/usr/local/opt/zlib/lib
+              CPPFLAGS: -I/usr/local/opt/zlib/include
+              PKG_CONFIG_PATH: /usr/local/opt/zlib/lib/pkgconfig
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 606b36bd6..5301c2cf1 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -28,24 +28,8 @@ jobs:
               with:
                   python-version: ${{ matrix.python }}
 
-            - name: Install dependencies (linux)
-              run: |
-                  curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.6.1/bazelisk-linux-amd64" > bazel
-                  chmod +x bazel
-                  sudo mv bazel /usr/local/bin/bazel
-                  sudo apt install clang-9 patchelf
-                  python -m pip install -r compiler_gym/requirements.txt -r examples/requirements.txt -r tests/requirements.txt
-              if: matrix.os == 'ubuntu-latest'
-
-            - name: Install dependencies (macOS)
-              run: |
-                  brew install bazelisk zlib
-                  python -m pip install -r compiler_gym/requirements.txt -r examples/requirements.txt -r tests/requirements.txt
-              env:
-                  LDFLAGS: -L/usr/local/opt/zlib/lib
-                  CPPFLAGS: -I/usr/local/opt/zlib/include
-                  PKG_CONFIG_PATH: /usr/local/opt/zlib/lib/pkgconfig
-              if: matrix.os == 'macos-latest'
+            - name: Install build dependencies
+              uses: ./.github/actions/install-build-dependencies
 
             - name: Test
               run: make test
@@ -55,7 +39,6 @@ jobs:
                   BAZEL_OPTS: --batch
                   BAZEL_TEST_OPTS: --config=ci --test_timeout=300,900,1800,7200
 
-
     install_test:
         runs-on: ${{ matrix.os }}
 
@@ -73,24 +56,8 @@ jobs:
               with:
                   python-version: ${{ matrix.python }}
 
-            - name: Install dependencies (linux)
-              run: |
-                  curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.6.1/bazelisk-linux-amd64" > bazel
-                  chmod +x bazel
-                  sudo mv bazel /usr/local/bin/bazel
-                  sudo apt install clang-9 patchelf
-                  python -m pip install -r compiler_gym/requirements.txt -r examples/requirements.txt -r tests/requirements.txt
-              if: matrix.os == 'ubuntu-latest'
-
-            - name: Install dependencies (macos)
-              run: |
-                  brew install bazelisk zlib
-                  python -m pip install -r compiler_gym/requirements.txt -r examples/requirements.txt -r tests/requirements.txt
-              env:
-                  LDFLAGS: -L/usr/local/opt/zlib/lib
-                  CPPFLAGS: -I/usr/local/opt/zlib/include
-                  PKG_CONFIG_PATH: /usr/local/opt/zlib/lib/pkgconfig
-              if: matrix.os == 'macos-latest'
+            - name: Install build dependencies
+              uses: ./.github/actions/install-build-dependencies
 
             - name: Install
               run: make install
diff --git a/.github/workflows/fuzz.yaml b/.github/workflows/fuzz.yaml
index 8cbcaf172..195a7d404 100644
--- a/.github/workflows/fuzz.yaml
+++ b/.github/workflows/fuzz.yaml
@@ -30,20 +30,8 @@ jobs:
               with:
                   python-version: ${{ matrix.python }}
 
-            - name: Install build dependencies (linux)
-              run: |
-                  curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.6.1/bazelisk-linux-amd64" > bazel.tmp
-                  sudo mv bazel.tmp /usr/local/bin/bazel
-                  chmod +x /usr/local/bin/bazel
-                  sudo apt install clang-9 patchelf
-                  python -m pip install -r compiler_gym/requirements.txt -r tests/requirements.txt
-              if: matrix.os == 'ubuntu-latest'
-
-            - name: Install build dependencies (macOS)
-              run: |
-                  brew install bazelisk
-                  python -m pip install -r compiler_gym/requirements.txt -r tests/requirements.txt
-              if: matrix.os == 'macos-latest'
+            - name: Install build dependencies
+              uses: ./.github/actions/install-build-dependencies
 
             - name: Install
               run: make install
diff --git a/.github/workflows/release_test.yaml b/.github/workflows/release_test.yaml
index 82c854e4b..f225689ab 100644
--- a/.github/workflows/release_test.yaml
+++ b/.github/workflows/release_test.yaml
@@ -26,7 +26,7 @@ jobs:
               with:
                   python-version: ${{ matrix.python }}
 
-            - name: Install dependencies (macos)
+            - name: Install runtime dependencies (macos)
               run: brew install zlib
               if: matrix.os == 'macos-latest'
 

From 8736defa8908ec1c50d6405bddd0cab58ed33f74 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 25 May 2021 04:59:37 -0700
Subject: [PATCH 073/141] Relax type check on random search.

This is to enable compatability with wrapped environments.
---
 compiler_gym/random_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler_gym/random_search.py b/compiler_gym/random_search.py
index 910b82bd2..7cb025443 100644
--- a/compiler_gym/random_search.py
+++ b/compiler_gym/random_search.py
@@ -115,7 +115,7 @@ def random_search(
 ) -> Tuple[float, List[int]]:
     with make_env() as env:
         env.reset()
-        if not isinstance(env, CompilerEnv):
+        if not isinstance(env.unwrapped, CompilerEnv):
             raise TypeError(
                 f"random_search() requires CompilerEnv. Called with: {type(env).__name__}"
             )

From 7c6a44e917c56f4105943c82a646d4c16df8a05e Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 25 May 2021 05:00:18 -0700
Subject: [PATCH 074/141] Fix minimum iteration count on for loop.

---
 compiler_gym/util/download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index 4dac7dd19..9b2cc6241 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -76,7 +76,7 @@ def _download(urls: List[str], sha256: Optional[str], max_retries: int) -> bytes
     # A retry loop, and loop over all urls provided.
     last_exception = None
     wait_time = 5
-    for _ in range(max_retries):
+    for _ in range(max(max_retries, 1)):
         for url in urls:
             try:
                 return _do_download_attempt(url, sha256)

From 13990a3b8092543af5fbc155232b438003738f05 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 20 May 2021 22:04:55 +0100
Subject: [PATCH 075/141] [llvm] Add the chstone-v0 dataset.

The dataset is from:

    Hara, Yuko, Hiroyuki Tomiyama, Shinya Honda, Hiroaki Takada, and Katsuya
    Ishii. "Chstone: A benchmark program suite for practical c-based
    high-level synthesis." In 2008 IEEE International Symposium on Circuits
    and Systems, pp. 1192-1195. IEEE, 2008.

And is available at:

    http://www.ertl.jp/chstone/
---
 compiler_gym/envs/llvm/datasets/BUILD       |   1 +
 compiler_gym/envs/llvm/datasets/__init__.py |   2 +
 compiler_gym/envs/llvm/datasets/chstone.py  | 135 ++++++++++++++++++++
 tests/llvm/datasets/BUILD                   |  13 ++
 tests/llvm/datasets/chstone_test.py         |  57 +++++++++
 tests/llvm/datasets/llvm_datasets_test.py   |   1 +
 6 files changed, 209 insertions(+)
 create mode 100644 compiler_gym/envs/llvm/datasets/chstone.py
 create mode 100644 tests/llvm/datasets/chstone_test.py

diff --git a/compiler_gym/envs/llvm/datasets/BUILD b/compiler_gym/envs/llvm/datasets/BUILD
index cd18149a0..b2129ed3c 100644
--- a/compiler_gym/envs/llvm/datasets/BUILD
+++ b/compiler_gym/envs/llvm/datasets/BUILD
@@ -10,6 +10,7 @@ py_library(
         "__init__.py",
         "anghabench.py",
         "cbench.py",
+        "chstone.py",
         "clgen.py",
         "csmith.py",
         "llvm_stress.py",
diff --git a/compiler_gym/envs/llvm/datasets/__init__.py b/compiler_gym/envs/llvm/datasets/__init__.py
index 74254497c..b46c8d799 100644
--- a/compiler_gym/envs/llvm/datasets/__init__.py
+++ b/compiler_gym/envs/llvm/datasets/__init__.py
@@ -9,6 +9,7 @@
 from compiler_gym.datasets import Dataset, TarDatasetWithManifest
 from compiler_gym.envs.llvm.datasets.anghabench import AnghaBenchDataset
 from compiler_gym.envs.llvm.datasets.cbench import CBenchDataset, CBenchLegacyDataset
+from compiler_gym.envs.llvm.datasets.chstone import CHStoneDataset
 from compiler_gym.envs.llvm.datasets.clgen import CLgenDataset
 from compiler_gym.envs.llvm.datasets.csmith import CsmithBenchmark, CsmithDataset
 from compiler_gym.envs.llvm.datasets.llvm_stress import LlvmStressDataset
@@ -248,6 +249,7 @@ def get_llvm_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset
         sort_order=100,
     )
     yield CBenchLegacyDataset(site_data_base=site_data_base)
+    yield CHStoneDataset(site_data_base=site_data_base)
     yield CsmithDataset(site_data_base=site_data_base, sort_order=0)
     yield GitHubDataset(site_data_base=site_data_base, sort_order=0)
     yield LinuxDataset(site_data_base=site_data_base, sort_order=0)
diff --git a/compiler_gym/envs/llvm/datasets/chstone.py b/compiler_gym/envs/llvm/datasets/chstone.py
new file mode 100644
index 000000000..2ef3ca97d
--- /dev/null
+++ b/compiler_gym/envs/llvm/datasets/chstone.py
@@ -0,0 +1,135 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import subprocess
+from concurrent.futures import as_completed
+from pathlib import Path
+from typing import Iterable
+
+from compiler_gym.datasets import Benchmark, TarDatasetWithManifest
+from compiler_gym.datasets.benchmark import BenchmarkWithSource
+from compiler_gym.envs.llvm.llvm_benchmark import ClangInvocation
+from compiler_gym.util import thread_pool
+from compiler_gym.util.filesystem import atomic_file_write
+
+URIS = [
+    "benchmark://chstone-v0/adpcm",
+    "benchmark://chstone-v0/aes",
+    "benchmark://chstone-v0/blowfish",
+    "benchmark://chstone-v0/dfadd",
+    "benchmark://chstone-v0/dfdiv",
+    "benchmark://chstone-v0/dfmul",
+    "benchmark://chstone-v0/dfsin",
+    "benchmark://chstone-v0/gsm",
+    "benchmark://chstone-v0/jpeg",
+    "benchmark://chstone-v0/mips",
+    "benchmark://chstone-v0/motion",
+    "benchmark://chstone-v0/sha",
+]
+
+
+class CHStoneDataset(TarDatasetWithManifest):
+    """A dataset of C programs curated from GitHub source code.
+
+    The dataset is from:
+
+        Hara, Yuko, Hiroyuki Tomiyama, Shinya Honda, Hiroaki Takada, and Katsuya
+        Ishii. "Chstone: A benchmark program suite for practical c-based
+        high-level synthesis." In 2008 IEEE International Symposium on Circuits
+        and Systems, pp. 1192-1195. IEEE, 2008.
+
+    And is available at:
+
+        http://www.ertl.jp/chstone/
+    """
+
+    def __init__(
+        self,
+        site_data_base: Path,
+        sort_order: int = 0,
+    ):
+        super().__init__(
+            name="benchmark://chstone-v0",
+            description="Benchmarks for C-based High-Level Synthesis",
+            references={
+                "Paper": "http://www.yxi.com/applications/iscas2008-300_1027.pdf",
+                "Homepage": "http://www.ertl.jp/chstone/",
+            },
+            license="Mixture of open source and public domain licenses",
+            site_data_base=site_data_base,
+            tar_urls=[
+                "https://github.com/ChrisCummins/patmos_HLS/archive/e62d878ceb91e5a18007ca2e0a9602ee44ff7d59.tar.gz"
+            ],
+            tar_sha256="f7acab9d3c3dc7b971e62c8454bc909d84bddb6d0a96378e41beb94231739acb",
+            strip_prefix="patmos_HLS-e62d878ceb91e5a18007ca2e0a9602ee44ff7d59/benchmarks/CHStone",
+            tar_compression="gz",
+            benchmark_file_suffix=".bc",
+            sort_order=sort_order,
+            # We provide our own manifest.
+            manifest_urls=[],
+            manifest_sha256="",
+        )
+
+    def benchmark_uris(self) -> Iterable[str]:
+        yield from URIS
+
+    def benchmark(self, uri: str) -> Benchmark:
+        self.install()
+
+        benchmark_name = uri[len(self.name) + 1 :]
+        if not benchmark_name:
+            raise LookupError(f"No benchmark specified: {uri}")
+
+        bitcode_abspath = self.dataset_root / f"{benchmark_name}.bc"
+
+        # Most of the source files are named after the parent directory, but not
+        # all.
+        c_file_name = {
+            "blowfish": "bf.c",
+            "motion": "mpeg2.c",
+            "sha": "sha_driver.c",
+            "jpeg": "main.c",
+        }.get(benchmark_name, f"{benchmark_name}.c")
+        c_file_abspath = self.dataset_root / benchmark_name / c_file_name
+
+        # If the file does not exist, compile it on-demand.
+        if not bitcode_abspath.is_file():
+            if not c_file_abspath.is_file():
+                raise LookupError(
+                    f"Benchmark not found: {uri} (file not found: {c_file_abspath})"
+                )
+
+            with atomic_file_write(bitcode_abspath) as tmp_path:
+                compile_cmd = ClangInvocation.from_c_file(
+                    c_file_abspath,
+                    copt=[
+                        "-ferror-limit=1",  # Stop on first error.
+                        "-w",  # No warnings.
+                    ],
+                ).command(outpath=tmp_path)
+                subprocess.check_call(compile_cmd, timeout=300)
+
+        return BenchmarkWithSource.create(
+            uri, bitcode_abspath, "function.c", c_file_abspath
+        )
+
+    @property
+    def size(self) -> int:
+        return len(URIS)
+
+    def compile_all(self):
+        n = self.size
+        executor = thread_pool.get_thread_pool_executor()
+        # Since the dataset is lazily compiled, simply iterating over the full
+        # set of URIs will compile everything. Do this in parallel.
+        futures = (
+            executor.submit(self.benchmark, uri) for uri in self.benchmark_uris()
+        )
+        for i, future in enumerate(as_completed(futures), start=1):
+            future.result()
+            print(
+                f"\r\033[KCompiled {i} of {n} programs ({i/n:.1%} complete)",
+                flush=True,
+                end="",
+            )
diff --git a/tests/llvm/datasets/BUILD b/tests/llvm/datasets/BUILD
index 9077658ba..8495f611c 100644
--- a/tests/llvm/datasets/BUILD
+++ b/tests/llvm/datasets/BUILD
@@ -45,6 +45,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "chstone_test",
+    timeout = "moderate",
+    srcs = ["chstone_test.py"],
+    deps = [
+        "//compiler_gym/envs/llvm",
+        "//compiler_gym/envs/llvm/datasets",
+        "//tests:test_main",
+        "//tests/pytest_plugins:common",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
 py_test(
     name = "clgen_test",
     timeout = "moderate",
diff --git a/tests/llvm/datasets/chstone_test.py b/tests/llvm/datasets/chstone_test.py
new file mode 100644
index 000000000..738fabdd7
--- /dev/null
+++ b/tests/llvm/datasets/chstone_test.py
@@ -0,0 +1,57 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Tests for the AnghaBench dataset."""
+import gym
+import pytest
+
+import compiler_gym.envs.llvm  # noqa register environments
+from compiler_gym.envs.llvm import LlvmEnv
+from compiler_gym.envs.llvm.datasets import CHStoneDataset, chstone
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.common", "tests.pytest_plugins.llvm"]
+
+
+@pytest.fixture(scope="module")
+def chstone_dataset() -> CHStoneDataset:
+    env = gym.make("llvm-v0")
+    try:
+        ds = env.datasets["chstone-v0"]
+    finally:
+        env.close()
+    yield ds
+
+
+def test_anghabench_size(chstone_dataset: CHStoneDataset):
+    assert chstone_dataset.size == 12
+
+
+def test_missing_benchmark_name(chstone_dataset: CHStoneDataset, mocker):
+    # Mock install() so that on CI it doesn't download and unpack the tarfile.
+    mocker.patch.object(chstone_dataset, "install")
+
+    with pytest.raises(
+        LookupError, match=r"^No benchmark specified: benchmark://chstone-v0$"
+    ):
+        chstone_dataset.benchmark("benchmark://chstone-v0")
+    chstone_dataset.install.assert_called_once()
+
+    with pytest.raises(
+        LookupError, match=r"^No benchmark specified: benchmark://chstone-v0/$"
+    ):
+        chstone_dataset.benchmark("benchmark://chstone-v0/")
+    assert chstone_dataset.install.call_count == 2
+
+
+@pytest.mark.parametrize("uri", chstone.URIS)
+def test_chstone_benchmark_reset(
+    env: LlvmEnv, chstone_dataset: CHStoneDataset, uri: str
+):
+    env.reset(chstone_dataset.benchmark(uri))
+    assert env.benchmark == uri
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/llvm/datasets/llvm_datasets_test.py b/tests/llvm/datasets/llvm_datasets_test.py
index 892d1b524..69b4305ec 100644
--- a/tests/llvm/datasets/llvm_datasets_test.py
+++ b/tests/llvm/datasets/llvm_datasets_test.py
@@ -16,6 +16,7 @@ def test_default_dataset_list():
             "benchmark://cbench-v1",
             "benchmark://anghabench-v1",
             "benchmark://blas-v0",
+            "benchmark://chstone-v0",
             "benchmark://clgen-v0",
             "benchmark://github-v0",
             "benchmark://linux-v0",

From 29f60001c7a82bf215747a0ee57768bdc750c299 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 25 May 2021 05:09:50 -0700
Subject: [PATCH 076/141] [util] Add download error handler for missing URLs.

---
 compiler_gym/util/download.py | 3 +++
 tests/util/download_test.py   | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/compiler_gym/util/download.py b/compiler_gym/util/download.py
index 9b2cc6241..d38ba001e 100644
--- a/compiler_gym/util/download.py
+++ b/compiler_gym/util/download.py
@@ -68,6 +68,9 @@ def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes:
 
 
 def _download(urls: List[str], sha256: Optional[str], max_retries: int) -> bytes:
+    if not urls:
+        raise ValueError("No URLs to download")
+
     # Cache hit.
     if sha256 and cache_path(f"downloads/{sha256}").is_file():
         with open(str(cache_path(f"downloads/{sha256}")), "rb") as f:
diff --git a/tests/util/download_test.py b/tests/util/download_test.py
index 7e1201851..d94622dcc 100644
--- a/tests/util/download_test.py
+++ b/tests/util/download_test.py
@@ -98,5 +98,10 @@ def patched_download(*args):
         download.download("example", sha256="123")
 
 
+def test_download_no_urls():
+    with pytest.raises(ValueError, match="No URLs to download"):
+        download.download(urls=[])
+
+
 if __name__ == "__main__":
     main()

From 7895f3322ccf715cbf74af4d1ecaab2f4317dcc2 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Mon, 17 May 2021 17:44:59 -0700
Subject: [PATCH 077/141] [rpc] Enable arena codegen.

---
 compiler_gym/service/proto/compiler_gym_service.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler_gym/service/proto/compiler_gym_service.proto b/compiler_gym/service/proto/compiler_gym_service.proto
index 573d8dbea..19ab49bd5 100644
--- a/compiler_gym/service/proto/compiler_gym_service.proto
+++ b/compiler_gym/service/proto/compiler_gym_service.proto
@@ -9,6 +9,7 @@ syntax = "proto3";
 
 package compiler_gym;
 
+option cc_enable_arenas = true;
 option go_package = "compiler_gympb";
 option java_multiple_files = true;
 option java_outer_classname = "CompilerGymServiceProto";

From c81e3c1c00c052a3fe317bac37e7211aa5d2e2d6 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 03:19:41 -0700
Subject: [PATCH 078/141] Bump bazel gRPC dependency to 1.37.1.

---
 WORKSPACE | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 0e113c78a..f546ac5ff 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -121,10 +121,10 @@ rules_proto_toolchains()
 # Version should be kept in step with compiler_gym/requirements.txt.
 http_archive(
     name = "com_github_grpc_grpc",
-    sha256 = "1a5127c81487f4e3e57973bb332f04b9159f94d860c207e096d8a587d371edbd",
-    strip_prefix = "grpc-1.36.0",
+    sha256 = "acf247ec3a52edaee5dee28644a4e485c5e5badf46bdb24a80ca1d76cb8f1174",
+    strip_prefix = "grpc-1.37.1",
     urls = [
-        "https://github.com/grpc/grpc/archive/v1.36.0.tar.gz",
+        "https://github.com/grpc/grpc/archive/v1.37.1.tar.gz",
     ],
 )
 

From a0bb5a80e579525e1da548edff6df1739557c39a Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 12 May 2021 06:43:34 -0700
Subject: [PATCH 079/141] [examples] Port the example services to use the new
 service runtime.

Issue #254.
---
 .../service_cc/BUILD                          |  20 +-
 .../service_cc/ExampleService.cc              | 254 +++++----------
 .../service_cc/ExampleService.h               |  74 -----
 .../service_cc/RunService.cc                  |  20 --
 .../service_py/BUILD                          |   2 +
 .../service_py/example_service.py             | 296 ++++++------------
 6 files changed, 182 insertions(+), 484 deletions(-)
 delete mode 100644 examples/example_compiler_gym_service/service_cc/ExampleService.h
 delete mode 100644 examples/example_compiler_gym_service/service_cc/RunService.cc

diff --git a/examples/example_compiler_gym_service/service_cc/BUILD b/examples/example_compiler_gym_service/service_cc/BUILD
index 690a64481..73160db05 100644
--- a/examples/example_compiler_gym_service/service_cc/BUILD
+++ b/examples/example_compiler_gym_service/service_cc/BUILD
@@ -6,24 +6,12 @@ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
 
 cc_binary(
     name = "compiler_gym-example-service-cc",
-    srcs = ["RunService.cc"],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":ExampleService",
-        "//compiler_gym/util:RunService",
-    ],
-)
-
-cc_library(
-    name = "ExampleService",
     srcs = ["ExampleService.cc"],
-    hdrs = ["ExampleService.h"],
+    visibility = ["//visibility:public"],
     deps = [
-        "//compiler_gym/service/proto:compiler_gym_service_cc",
-        "//compiler_gym/service/proto:compiler_gym_service_cc_grpc",
-        "//compiler_gym/util:GrpcStatusMacros",
-        "//compiler_gym/util:Version",
-        "@boost//:filesystem",
+        "//compiler_gym/service:CompilationSession",
+        "//compiler_gym/service/runtime:cc_runtime",
+        "//compiler_gym/util:Unreachable",
         "@com_github_grpc_grpc//:grpc++",
         "@fmt",
     ],
diff --git a/examples/example_compiler_gym_service/service_cc/ExampleService.cc b/examples/example_compiler_gym_service/service_cc/ExampleService.cc
index 310f145fb..799bcee87 100644
--- a/examples/example_compiler_gym_service/service_cc/ExampleService.cc
+++ b/examples/example_compiler_gym_service/service_cc/ExampleService.cc
@@ -2,23 +2,20 @@
 //
 // This source code is licensed under the MIT license found in the
 // LICENSE file in the root directory of this source tree.
-
-#include "examples/example_compiler_gym_service/service_cc/ExampleService.h"
-
+//
+// Run the example service on a local port.
 #include <fmt/format.h>
 
-#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
-#include "compiler_gym/util/GrpcStatusMacros.h"
-#include "compiler_gym/util/Version.h"
+#include "compiler_gym/service/CompilationSession.h"
+#include "compiler_gym/service/runtime/Runtime.h"
+#include "compiler_gym/util/Unreachable.h"
 
-namespace compiler_gym::example_service {
+const char* usage = R"(Example CompilerGym service)";
 
-using grpc::ServerContext;
+using namespace compiler_gym;
 using grpc::Status;
 using grpc::StatusCode;
 
-namespace fs = boost::filesystem;
-
 namespace {
 
 template <typename T>
@@ -29,179 +26,102 @@ template <typename T>
   return Status::OK;
 }
 
-std::vector<std::string> getBenchmarkUris() {
-  return {"benchmark://example-v0/foo", "benchmark://example-v0/bar"};
-}
+class ExampleCompilationSession final : public CompilationSession {
+ public:
+  using CompilationSession::CompilationSession;
 
-std::vector<ActionSpace> getActionSpaces() {
-  ActionSpace space;
-  space.set_name("default");
-  space.add_action("a");
-  space.add_action("b");
-  space.add_action("c");
+  std::string getCompilerVersion() const final override { return "1.0.0"; }
 
-  return {space};
-}
+  std::vector<ActionSpace> getActionSpaces() const final override {
+    ActionSpace space;
+    space.set_name("default");
+    space.add_action("a");
+    space.add_action("b");
+    space.add_action("c");
 
-std::vector<ObservationSpace> getObservationSpaces() {
-  ObservationSpace ir;
-  ir.set_name("ir");
-  ScalarRange irSizeRange;
-  irSizeRange.mutable_min()->set_value(0);
-  *ir.mutable_string_size_range() = irSizeRange;
-  ir.set_deterministic(true);
-  ir.set_platform_dependent(false);
-
-  ObservationSpace features;
-  features.set_name("features");
-  for (int i = 0; i < 3; ++i) {
-    ScalarRange* featureSizeRange = features.mutable_int64_range_list()->add_range();
-    featureSizeRange->mutable_min()->set_value(-100);
-    featureSizeRange->mutable_max()->set_value(100);
+    return {space};
   }
 
-  ObservationSpace runtime;
-  runtime.set_name("runtime");
-  ScalarRange runtimeRange;
-  runtimeRange.mutable_min()->set_value(0);
-  *runtime.mutable_scalar_double_range() = runtimeRange;
-  runtime.set_deterministic(false);
-  runtime.set_platform_dependent(true);
-
-  return {ir, features, runtime};
-}
-
-}  // namespace
-
-ExampleService::ExampleService(const fs::path& workingDirectory)
-    : workingDirectory_(workingDirectory), nextSessionId_(0) {}
-
-Status ExampleService::GetVersion(ServerContext* /* unused */,
-                                  const GetVersionRequest* /* unused */, GetVersionReply* reply) {
-  reply->set_service_version(COMPILER_GYM_VERSION);
-  reply->set_compiler_version("1.0.0");
-  return Status::OK;
-}
-
-Status ExampleService::GetSpaces(ServerContext* /* unused*/, const GetSpacesRequest* /* unused */,
-                                 GetSpacesReply* reply) {
-  const auto actionSpaces = getActionSpaces();
-  const auto observationSpaces = getObservationSpaces();
-
-  *reply->mutable_action_space_list() = {actionSpaces.begin(), actionSpaces.end()};
-  *reply->mutable_observation_space_list() = {observationSpaces.begin(), observationSpaces.end()};
-  return Status::OK;
-}
-
-Status ExampleService::StartSession(ServerContext* /* unused*/, const StartSessionRequest* request,
-                                    StartSessionReply* reply) {
-  const std::lock_guard<std::mutex> lock(sessionsMutex_);
-
-  // Determine the benchmark to use.
-  std::string benchmark = request->benchmark();
-  const auto benchmarks = getBenchmarkUris();
-  if (!benchmark.empty() &&
-      std::find(benchmarks.begin(), benchmarks.end(), benchmark) == benchmarks.end()) {
-    return Status(StatusCode::INVALID_ARGUMENT, "Unknown program name");
-  } else {
-    // If no benchmark was requested, choose one.
-    benchmark = "foo";
+  std::vector<ObservationSpace> getObservationSpaces() const override {
+    ObservationSpace ir;
+    ir.set_name("ir");
+    ScalarRange irSizeRange;
+    irSizeRange.mutable_min()->set_value(0);
+    *ir.mutable_string_size_range() = irSizeRange;
+    ir.set_deterministic(true);
+    ir.set_platform_dependent(false);
+
+    ObservationSpace features;
+    features.set_name("features");
+    for (int i = 0; i < 3; ++i) {
+      ScalarRange* featureSizeRange = features.mutable_int64_range_list()->add_range();
+      featureSizeRange->mutable_min()->set_value(-100);
+      featureSizeRange->mutable_max()->set_value(100);
+    }
+
+    ObservationSpace runtime;
+    runtime.set_name("runtime");
+    ScalarRange runtimeRange;
+    runtimeRange.mutable_min()->set_value(0);
+    *runtime.mutable_scalar_double_range() = runtimeRange;
+    runtime.set_deterministic(false);
+    runtime.set_platform_dependent(true);
+
+    return {ir, features, runtime};
   }
-  reply->set_benchmark(benchmark);
-
-  // Determine the action space.
-  const auto actionSpaces = getActionSpaces();
-  RETURN_IF_ERROR(
-      rangeCheck(request->action_space(), 0, static_cast<int32_t>(actionSpaces.size()) - 1));
-  const auto actionSpace = actionSpaces[request->action_space()];
-
-  // Create the new compilation session given.
-  auto session = std::make_unique<ExampleCompilationSession>(benchmark, actionSpace);
-
-  // Generate initial observations.
-  for (int i = 0; i < request->observation_space_size(); ++i) {
-    RETURN_IF_ERROR(rangeCheck(request->observation_space(i), 0,
-                               static_cast<int32_t>(getObservationSpaces().size()) - 1));
-    RETURN_IF_ERROR(
-        session->getObservation(request->observation_space(i), reply->add_observation()));
-  }
-
-  reply->set_session_id(nextSessionId_);
-  sessions_[nextSessionId_] = std::move(session);
-  ++nextSessionId_;
 
-  return Status::OK;
-}
-
-Status ExampleService::EndSession(ServerContext* /* unused*/, const EndSessionRequest* request,
-                                  EndSessionReply* /* unused */) {
-  const std::lock_guard<std::mutex> lock(sessionsMutex_);
-
-  auto session = sessions_.find(request->session_id());
-  // De-allocate the session.
-  if (session != sessions_.end()) {
-    sessions_.erase(session);
+  [[nodiscard]] grpc::Status init(const ActionSpace& actionSpace,
+                                  const compiler_gym::Benchmark& benchmark) final override {
+    VLOG(1) << "Starting a compilation session for " << benchmark.uri();
+    return Status::OK;
   }
-  return Status::OK;
-}
 
-Status ExampleService::Step(ServerContext* /* unused*/, const StepRequest* request,
-                            StepReply* reply) {
-  ExampleCompilationSession* sess;
-  RETURN_IF_ERROR(session(request->session_id(), &sess));
-  return sess->Step(request, reply);
-}
-
-Status ExampleService::session(uint64_t id, ExampleCompilationSession** sess) {
-  auto it = sessions_.find(id);
-  if (it == sessions_.end()) {
-    return Status(StatusCode::NOT_FOUND, fmt::format("Session not found: {}", id));
+  [[nodiscard]] grpc::Status init(CompilationSession* other) final override {
+    VLOG(1) << "Forking the compilation session";
+    return Status::OK;
   }
-  *sess = it->second.get();
-  return Status::OK;
-}
 
-ExampleCompilationSession::ExampleCompilationSession(const std::string& benchmark,
-                                                     ActionSpace actionSpace)
-    : benchmark_(benchmark), actionSpace_(actionSpace) {}
+  [[nodiscard]] grpc::Status applyAction(const Action& action, bool& endOfEpisode,
+                                         std::optional<ActionSpace>& newActionSpace,
+                                         bool& actionHadNoEffect) final override {
+    LOG(INFO) << "Applying action " << action.action();
 
-Status ExampleCompilationSession::Step(const StepRequest* request, StepReply* reply) {
-  for (int i = 0; i < request->action_size(); ++i) {
-    const auto action = request->action(i).action();
-    // Run the actual action. Here we just range check.
-    RETURN_IF_ERROR(rangeCheck(action, 0, static_cast<int32_t>(actionSpace_.action_size() - 1)));
-  }
+    if (action.action() < 0 || action.action() > getActionSpaces()[0].action_size()) {
+      return Status(StatusCode::INVALID_ARGUMENT, "Out-of-range");
+    }
 
-  // Generate observations.
-  for (int i = 0; i < request->observation_space_size(); ++i) {
-    RETURN_IF_ERROR(rangeCheck(request->observation_space(i), 0,
-                               static_cast<int32_t>(getObservationSpaces().size()) - 1));
-    RETURN_IF_ERROR(getObservation(request->observation_space(i), reply->add_observation()));
+    return Status::OK;
   }
 
-  return Status::OK;
-}
-
-Status ExampleCompilationSession::getObservation(int32_t observationSpace,
-                                                 Observation* observation) {
-  const auto observationSpaces = getObservationSpaces();
-  RETURN_IF_ERROR(
-      rangeCheck(observationSpace, 0, static_cast<int32_t>(observationSpaces.size()) - 1));
-  switch (observationSpace) {
-    case 0:  // IR
-      observation->set_string_value("Hello, world!");
-      break;
-    case 1:  // Features
+  [[nodiscard]] grpc::Status computeObservation(const ObservationSpace& observationSpace,
+                                                Observation& observation) final override {
+    std::cerr << "COMPUTING OBSERVATION" << std::endl;
+    LOG(INFO) << "Computing observation " << observationSpace.name();
+    std::cerr << "CP2" << std::endl;
+
+    if (observationSpace.name() == "ir") {
+      std::cerr << "IR" << std::endl;
+      observation.set_string_value("Hello, world!");
+    } else if (observationSpace.name() == "features") {
+      std::cerr << "IR" << std::endl;
       for (int i = 0; i < 3; ++i) {
-        observation->mutable_int64_list()->add_value(0);
+        observation.mutable_int64_list()->add_value(0);
       }
-      break;
-    case 2:  // Runtime
-      observation->set_scalar_double(0);
-    default:
-      break;
+    } else if (observationSpace.name() == "runtime") {
+      std::cerr << "IR" << std::endl;
+      observation.set_scalar_double(0);
+    } else {
+      UNREACHABLE(fmt::format("Unhandled observation space: {}", observationSpace.name()));
+    }
+
+    return Status::OK;
+
+    std::cerr << "DONE" << std::endl;
   }
-  return Status::OK;
-}
+};
 
-}  // namespace compiler_gym::example_service
+}  // namespace
+
+int main(int argc, char** argv) {
+  runtime::createAndRunCompilerGymService<ExampleCompilationSession>(argc, argv, usage);
+}
diff --git a/examples/example_compiler_gym_service/service_cc/ExampleService.h b/examples/example_compiler_gym_service/service_cc/ExampleService.h
deleted file mode 100644
index 0477e6aa6..000000000
--- a/examples/example_compiler_gym_service/service_cc/ExampleService.h
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-//
-// An example implementation of the CompilerGymService interface.
-#pragma once
-
-#include <memory>
-#include <mutex>
-#include <string>
-#include <vector>
-
-#include "boost/filesystem.hpp"
-#include "compiler_gym/service/proto/compiler_gym_service.grpc.pb.h"
-#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
-
-namespace compiler_gym::example_service {
-
-// Forward declaration of helper class.
-class ExampleCompilationSession;
-
-// An example compiler service. This class implements all of the RPC endpoints
-// of the CompilerGymService interface.
-class ExampleService final : public CompilerGymService::Service {
- public:
-  explicit ExampleService(const boost::filesystem::path& workingDirectory);
-
-  // RPC endpoints.
-  grpc::Status GetVersion(grpc::ServerContext* context, const GetVersionRequest* request,
-                          GetVersionReply* reply) final override;
-
-  grpc::Status GetSpaces(grpc::ServerContext* context, const GetSpacesRequest* request,
-                         GetSpacesReply* reply) final override;
-
-  grpc::Status StartSession(grpc::ServerContext* context, const StartSessionRequest* request,
-                            StartSessionReply* reply) final override;
-
-  grpc::Status EndSession(grpc::ServerContext* context, const EndSessionRequest* request,
-                          EndSessionReply* reply) final override;
-
-  grpc::Status Step(grpc::ServerContext* context, const StepRequest* request,
-                    StepReply* reply) final override;
-
- private:
-  [[nodiscard]] grpc::Status session(uint64_t id, ExampleCompilationSession** environment);
-
-  const boost::filesystem::path workingDirectory_;
-
-  // A single compiler service can support multiple concurrent sessions. This
-  // maps session IDs to objects that represent the individual sessions.
-  std::unordered_map<int, std::unique_ptr<ExampleCompilationSession>> sessions_;
-  // Mutex used to ensure thread safety of creation and destruction of sessions.
-  std::mutex sessionsMutex_;
-
-  std::vector<std::string> benchmarkNameList_;
-  uint64_t nextSessionId_;
-};
-
-// The representation of a compilation session.
-class ExampleCompilationSession {
- public:
-  ExampleCompilationSession(const std::string& benchmark, ActionSpace actionSpace);
-
-  [[nodiscard]] grpc::Status Step(const StepRequest* request, StepReply* reply);
-
-  grpc::Status getObservation(int32_t observationSpace, Observation* reply);
-
- private:
-  const std::string benchmark_;
-  ActionSpace actionSpace_;
-};
-
-}  // namespace compiler_gym::example_service
diff --git a/examples/example_compiler_gym_service/service_cc/RunService.cc b/examples/example_compiler_gym_service/service_cc/RunService.cc
deleted file mode 100644
index 92a135f6b..000000000
--- a/examples/example_compiler_gym_service/service_cc/RunService.cc
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-//
-// Run the example service on a local port.
-#include "compiler_gym/util/RunService.h"
-
-#include "examples/example_compiler_gym_service/service_cc/ExampleService.h"
-
-const char* usage = R"(LLVM CompilerGym service)";
-
-using namespace compiler_gym::util;
-using namespace compiler_gym::example_service;
-
-int main(int argc, char** argv) {
-  // Call the utility runService() function to launch the service. This function
-  // never returns.
-  return runService<ExampleService>(&argc, &argv, usage);
-}
diff --git a/examples/example_compiler_gym_service/service_py/BUILD b/examples/example_compiler_gym_service/service_py/BUILD
index f48ad22bc..5703fed5b 100644
--- a/examples/example_compiler_gym_service/service_py/BUILD
+++ b/examples/example_compiler_gym_service/service_py/BUILD
@@ -10,6 +10,8 @@ py_binary(
     main = "example_service.py",
     visibility = ["//visibility:public"],
     deps = [
+        "//compiler_gym/service",
         "//compiler_gym/service/proto",
+        "//compiler_gym/service/runtime",
     ],
 )
diff --git a/examples/example_compiler_gym_service/service_py/example_service.py b/examples/example_compiler_gym_service/service_py/example_service.py
index d483d4c7f..3ea023c37 100755
--- a/examples/example_compiler_gym_service/service_py/example_service.py
+++ b/examples/example_compiler_gym_service/service_py/example_service.py
@@ -6,222 +6,104 @@
 # LICENSE file in the root directory of this source tree.
 """An example CompilerGym service in python."""
 import logging
-from concurrent import futures
-from multiprocessing import cpu_count
 from pathlib import Path
-from typing import Dict
-
-import grpc
-from absl import app, flags
-
-import compiler_gym
-from compiler_gym.service import proto
-from compiler_gym.service.proto import compiler_gym_service_pb2_grpc
-
-flags.DEFINE_string(
-    "working_dir",
-    "/tmp/example_compiler_gym_service",
-    "Path to use as service working directory",
+from typing import Optional, Tuple
+
+from compiler_gym.service import CompilationSession
+from compiler_gym.service.proto import (
+    Action,
+    ActionSpace,
+    Benchmark,
+    Observation,
+    ObservationSpace,
+    ScalarLimit,
+    ScalarRange,
+    ScalarRangeList,
 )
-flags.DEFINE_integer("port", 0, "The service listening port")
-flags.DEFINE_integer("nproc", cpu_count(), "The number of server worker threads")
-flags.DEFINE_integer("logbuflevel", 0, "Flag for compatability with C++ service.")
-FLAGS = flags.FLAGS
+from compiler_gym.service.runtime import create_and_run_compiler_gym_service
 
-# For development / debugging, set environment variable COMPILER_GYM_DEBUG=3.
-# This will cause all output of this script to be logged to stdout. Otherwise
-# all output from this process is silently thrown away.
-logging.basicConfig(level=logging.DEBUG)
 
-# The names of the benchmarks that are supported
-BENCHMARKS = ["benchmark://example-v0/foo", "benchmark://example-v0/bar"]
-
-# The list of actions that are supported by this service. This example uses a
-# static (unchanging) action space, but this could be extended to support a
-# dynamic action space.
-ACTION_SPACE = proto.ActionSpace(
-    name="default",
-    action=[
-        "a",
-        "b",
-        "c",
-    ],
-)
+class ExampleCompilationSession(CompilationSession):
+    """Represents an instance of an interactive compilation session."""
 
-# A list of observation spaces supported by this service. Each of these
-# ObservationSpace protos describes an observation space.
-OBSERVATION_SPACES = [
-    proto.ObservationSpace(
-        name="ir",
-        string_size_range=proto.ScalarRange(min=proto.ScalarLimit(value=0)),
-        deterministic=True,
-        platform_dependent=False,
-        default_value=proto.Observation(string_value=""),
-    ),
-    proto.ObservationSpace(
-        name="features",
-        int64_range_list=proto.ScalarRangeList(
-            range=[
-                proto.ScalarRange(
-                    min=proto.ScalarLimit(value=-100), max=proto.ScalarLimit(value=100)
-                ),
-                proto.ScalarRange(
-                    min=proto.ScalarLimit(value=-100), max=proto.ScalarLimit(value=100)
-                ),
-                proto.ScalarRange(
-                    min=proto.ScalarLimit(value=-100), max=proto.ScalarLimit(value=100)
-                ),
-            ]
+    compiler_version: str = "1.0.0"
+
+    # The list of actions that are supported by this service. This example uses
+    # a static (unchanging) action space, but this could be extended to support
+    # a dynamic action space.
+    action_spaces = [
+        ActionSpace(
+            name="default",
+            action=[
+                "a",
+                "b",
+                "c",
+            ],
+        )
+    ]
+
+    # A list of observation spaces supported by this service. Each of these
+    # ObservationSpace protos describes an observation space.
+    observation_spaces = [
+        ObservationSpace(
+            name="ir",
+            string_size_range=ScalarRange(min=ScalarLimit(value=0)),
+            deterministic=True,
+            platform_dependent=False,
+            default_value=Observation(string_value=""),
         ),
-    ),
-    proto.ObservationSpace(
-        name="runtime",
-        scalar_double_range=proto.ScalarRange(min=proto.ScalarLimit(value=0)),
-        deterministic=False,
-        platform_dependent=True,
-        default_value=proto.Observation(
-            scalar_double=0,
+        ObservationSpace(
+            name="features",
+            int64_range_list=ScalarRangeList(
+                range=[
+                    ScalarRange(
+                        min=ScalarLimit(value=-100), max=ScalarLimit(value=100)
+                    ),
+                    ScalarRange(
+                        min=ScalarLimit(value=-100), max=ScalarLimit(value=100)
+                    ),
+                    ScalarRange(
+                        min=ScalarLimit(value=-100), max=ScalarLimit(value=100)
+                    ),
+                ]
+            ),
         ),
-    ),
-]
-
-
-class CompilationSession:
-    """Represents an instance of an interactive compilation session."""
-
-    def __init__(self, benchmark: str):
-        # Do any of the set up required to start a compilation "session".
-        self.benchmark = benchmark
-
-    def set_observation(self, observation_space, observation):
-        logging.debug("Compute observation %d", observation_space)
-        if observation_space == 0:  # ir
-            observation.string_value = "Hello, world!"
-        elif observation_space == 1:  # features
+        ObservationSpace(
+            name="runtime",
+            scalar_double_range=ScalarRange(min=ScalarLimit(value=0)),
+            deterministic=False,
+            platform_dependent=True,
+            default_value=Observation(
+                scalar_double=0,
+            ),
+        ),
+    ]
+
+    def __init__(
+        self, working_directory: Path, action_space: ActionSpace, benchmark: Benchmark
+    ):
+        super().__init__(working_directory, action_space, benchmark)
+        logging.info("Started a compilation session for %s", benchmark.uri)
+
+    def apply_action(self, action: Action) -> Tuple[bool, Optional[ActionSpace], bool]:
+        logging.info("Applied action %d", action.action)
+        if action.action < 0 or action.action > len(self.action_spaces[0].action):
+            raise ValueError("Out-of-range")
+        return False, None, False
+
+    def get_observation(self, observation_space: ObservationSpace) -> Observation:
+        logging.info("Computing observation from space %d", observation_space)
+        if observation_space.name == "ir":
+            return Observation(string_value="Hello, world!")
+        elif observation_space.name == "features":
+            observation = Observation()
             observation.int64_list.value[:] = [0, 0, 0]
-        elif observation_space == 1:  # runtime
-            observation.scalar_double = 0
-        return observation
-
-    def step(self, request: proto.StepRequest, context) -> proto.StepReply:
-        reply = proto.StepReply()
-
-        # Apply a list of actions from the user. Each value is an index into the
-        # ACTIONS_SPACE.action list.
-        for action in request.action:
-            logging.debug("Apply action %d", action.action)
-            if action.action < 0 or action.action >= len(ACTION_SPACE.action):
-                context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
-                context.set_details("Out-of-range")
-                return
-
-        # Compute a list of observations from the user. Each value is an index
-        # into the OBSERVATION_SPACES list.
-        for observation_space in request.observation_space:
-            self.set_observation(observation_space, reply.observation.add())
-
-        return reply
-
-
-class ExampleCompilerGymService(proto.CompilerGymServiceServicer):
-    """The service creates and manages sessions, and is responsible for
-    reporting the service capabilities to the user."""
-
-    def __init__(self, working_dir: Path):
-        self.working_dir = working_dir
-        self.sessions: Dict[int, CompilationSession] = {}
-
-    def GetVersion(
-        self, request: proto.GetVersionRequest, context
-    ) -> proto.GetVersionReply:
-        del context  # Unused
-        del request  # Unused
-        logging.debug("GetVersion()")
-        return proto.GetVersionReply(
-            service_version=compiler_gym.__version__,
-            compiler_version="1.0.0",  # Arbitrary version tag.
-        )
-
-    def GetSpaces(
-        self, request: proto.GetSpacesRequest, context
-    ) -> proto.GetSpacesReply:
-        del context  # Unused
-        del request  # Unused
-        logging.debug("GetSpaces()")
-        return proto.GetSpacesReply(
-            action_space_list=[ACTION_SPACE],
-            observation_space_list=OBSERVATION_SPACES,
-        )
-
-    def StartSession(
-        self, request: proto.StartSessionRequest, context
-    ) -> proto.StartSessionReply:
-        """Create a new compilation session."""
-        logging.debug("StartSession(benchmark=%s)", request.benchmark)
-        reply = proto.StartSessionReply()
-
-        if not request.benchmark:
-            benchmark = "foo"  # Pick a default benchmark is none was requested.
+            return observation
+        elif observation_space.name == "runtime":
+            return Observation(scalar_double=0)
         else:
-            benchmark = request.benchmark
-
-        if benchmark not in BENCHMARKS:
-            context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
-            context.set_details("Unknown program name")
-            return
-
-        session = CompilationSession(benchmark=benchmark)
-
-        # Generate the initial observations.
-        for observation_space in request.observation_space:
-            session.set_observation(observation_space, reply.observation.add())
-
-        reply.session_id = len(self.sessions)
-        reply.benchmark = session.benchmark
-        self.sessions[reply.session_id] = session
-
-        return reply
-
-    def EndSession(
-        self, request: proto.EndSessionRequest, context
-    ) -> proto.EndSessionReply:
-        """End a compilation session."""
-        del context  # Unused
-        logging.debug("EndSession()")
-        if request.session_id in self.sessions:
-            del self.sessions[request.session_id]
-        return proto.EndSessionReply(remaining_sessions=len(self.sessions))
-
-    def Step(self, request: proto.StepRequest, context) -> proto.StepReply:
-        logging.debug("Step()")
-        if request.session_id not in self.sessions:
-            context.set_code(grpc.StatusCode.NOT_FOUND)
-            context.set_details(f"Session not found: {request.session_id}")
-            return
-
-        return self.sessions[request.session_id].step(request, context)
-
-
-def main(argv):
-    assert argv  # Unused
-
-    working_dir = Path(FLAGS.working_dir)
-    working_dir.mkdir(exist_ok=True, parents=True)
-
-    # Create the service.
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=FLAGS.nproc))
-    compiler_gym_service_pb2_grpc.add_CompilerGymServiceServicer_to_server(
-        ExampleCompilerGymService(working_dir), server
-    )
-    port = server.add_insecure_port("0.0.0.0:0")
-    logging.info("Starting service on %s with working dir %s", port, working_dir)
-
-    with open(working_dir / "port.txt", "w") as f:
-        f.write(str(port))
-
-    server.start()
-    server.wait_for_termination()
+            raise KeyError(observation_space.name)
 
 
 if __name__ == "__main__":
-    app.run(main)
+    create_and_run_compiler_gym_service(ExampleCompilationSession)

From f7346d9d8b7c083d076710dfbc61535def708993 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 13 May 2021 16:25:30 -0700
Subject: [PATCH 080/141] [llvm] Port LLVM service to the new
 CompilationSession interface.

Issue #254.
---
 compiler_gym/envs/llvm/service/BUILD          |  32 +--
 compiler_gym/envs/llvm/service/Benchmark.cc   |  21 ++
 compiler_gym/envs/llvm/service/Benchmark.h    |   4 +
 .../envs/llvm/service/BenchmarkFactory.cc     |  35 +++-
 .../envs/llvm/service/BenchmarkFactory.h      |  29 ++-
 compiler_gym/envs/llvm/service/LlvmService.cc | 196 ------------------
 compiler_gym/envs/llvm/service/LlvmService.h  |  66 ------
 compiler_gym/envs/llvm/service/LlvmSession.cc | 194 +++++++++--------
 compiler_gym/envs/llvm/service/LlvmSession.h  |  77 ++++---
 compiler_gym/envs/llvm/service/RunService.cc  |   9 +-
 compiler_gym/service/runtime/BUILD            |   2 +-
 examples/BUILD                                |   3 +-
 examples/RandomSearch.cc                      |   6 +-
 .../example_compiler_gym_service/env_tests.py |  36 +++-
 14 files changed, 290 insertions(+), 420 deletions(-)
 delete mode 100644 compiler_gym/envs/llvm/service/LlvmService.cc
 delete mode 100644 compiler_gym/envs/llvm/service/LlvmService.h

diff --git a/compiler_gym/envs/llvm/service/BUILD b/compiler_gym/envs/llvm/service/BUILD
index 4f08d1dd5..064bfb93e 100644
--- a/compiler_gym/envs/llvm/service/BUILD
+++ b/compiler_gym/envs/llvm/service/BUILD
@@ -54,8 +54,8 @@ cc_binary(
     name = "compiler_gym-llvm-service-prelinked",
     srcs = ["RunService.cc"],
     deps = [
-        ":LlvmService",
-        "//compiler_gym/util:RunService",
+        ":LlvmSession",
+        "//compiler_gym/service/runtime:cc_runtime",
     ],
 )
 
@@ -105,6 +105,7 @@ cc_library(
     deps = [
         ":Benchmark",
         ":Cost",
+        "//compiler_gym/service/proto:compiler_gym_service_cc",
         "//compiler_gym/util:GrpcStatusMacros",
         "//compiler_gym/util:RunfilesPath",
         "//compiler_gym/util:StrLenConstexpr",
@@ -134,30 +135,9 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "LlvmService",
-    srcs = ["LlvmService.cc"],
-    hdrs = ["LlvmService.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":Benchmark",
-        ":BenchmarkFactory",
-        ":Cost",
-        ":LlvmSession",
-        ":ObservationSpaces",
-        "//compiler_gym/service/proto:compiler_gym_service_cc",
-        "//compiler_gym/util:GrpcStatusMacros",
-        "//compiler_gym/util:Version",
-        "@boost//:filesystem",
-        "@llvm//10.0.0",
-    ],
-)
-
 cc_library(
     name = "LlvmSession",
-    srcs = [
-        "LlvmSession.cc",
-    ],
+    srcs = ["LlvmSession.cc"],
     hdrs = [
         "LlvmSession.h",
         "//compiler_gym/envs/llvm/service/passes:ActionHeaders.h",
@@ -167,12 +147,14 @@ cc_library(
         "-DGOOGLE_PROTOBUF_NO_RTTI",
         "-fno-rtti",
     ],
-    visibility = ["//tests:__subpackages__"],
+    visibility = ["//visibility:public"],
     deps = [
         ":ActionSpace",
         ":Benchmark",
+        ":BenchmarkFactory",
         ":Cost",
         ":ObservationSpaces",
+        "//compiler_gym/service:CompilationSession",
         "//compiler_gym/service/proto:compiler_gym_service_cc_grpc",
         "//compiler_gym/third_party/autophase:InstCount",
         "//compiler_gym/third_party/cpuinfo",
diff --git a/compiler_gym/envs/llvm/service/Benchmark.cc b/compiler_gym/envs/llvm/service/Benchmark.cc
index 52d5de3a5..31799e8a4 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.cc
+++ b/compiler_gym/envs/llvm/service/Benchmark.cc
@@ -12,6 +12,8 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Support/SHA1.h"
 
 namespace fs = boost::filesystem;
@@ -86,6 +88,15 @@ std::unique_ptr<llvm::Module> makeModule(llvm::LLVMContext& context, const Bitco
     module->setModuleIdentifier("-");
     module->setSourceFileName("-");
 
+    // Strip module debug info.
+    llvm::StripDebugInfo(*module);
+
+    // Erase module-level named metadata.
+    while (!module->named_metadata_empty()) {
+      llvm::NamedMDNode* nmd = &*module->named_metadata_begin();
+      module->eraseNamedMetadata(nmd);
+    }
+
     return module;
   } else {
     *status = Status(StatusCode::INVALID_ARGUMENT,
@@ -122,4 +133,14 @@ std::unique_ptr<Benchmark> Benchmark::clone(const fs::path& workingDirectory) co
 
 BenchmarkHash Benchmark::module_hash() const { return getModuleHash(*module_); }
 
+Status Benchmark::verify_module() {
+  std::string errorMessage;
+  llvm::raw_string_ostream rso(errorMessage);
+  if (llvm::verifyModule(module(), &rso)) {
+    rso.flush();
+    return Status(StatusCode::DATA_LOSS, "Failed to verify module: " + errorMessage);
+  }
+  return Status::OK;
+}
+
 }  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h
index d9349ef47..5f92857d7 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.h
+++ b/compiler_gym/envs/llvm/service/Benchmark.h
@@ -46,6 +46,10 @@ class Benchmark {
   // Compute and return a SHA1 hash of the module.
   BenchmarkHash module_hash() const;
 
+  // Wrapper around llvm::verifyModule() which returns an error status on
+  // failure.
+  grpc::Status verify_module();
+
   inline const std::string& name() const { return name_; }
 
   inline const size_t bitcodeSize() const { return bitcodeSize_; }
diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.cc b/compiler_gym/envs/llvm/service/BenchmarkFactory.cc
index 73509654c..83decb8ee 100644
--- a/compiler_gym/envs/llvm/service/BenchmarkFactory.cc
+++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.cc
@@ -23,6 +23,8 @@ namespace fs = boost::filesystem;
 using grpc::Status;
 using grpc::StatusCode;
 
+using BenchmarkProto = compiler_gym::Benchmark;
+
 namespace compiler_gym::llvm_service {
 
 BenchmarkFactory::BenchmarkFactory(const boost::filesystem::path& workingDirectory,
@@ -35,16 +37,43 @@ BenchmarkFactory::BenchmarkFactory(const boost::filesystem::path& workingDirecto
   VLOG(2) << "BenchmarkFactory initialized";
 }
 
-Status BenchmarkFactory::getBenchmark(const std::string& uri,
+Status BenchmarkFactory::getBenchmark(const BenchmarkProto& benchmarkMessage,
                                       std::unique_ptr<Benchmark>* benchmark) {
   // Check if the benchmark has already been loaded into memory.
-  auto loaded = benchmarks_.find(uri);
+  auto loaded = benchmarks_.find(benchmarkMessage.uri());
   if (loaded != benchmarks_.end()) {
     *benchmark = loaded->second.clone(workingDirectory_);
     return Status::OK;
   }
 
-  return Status(StatusCode::NOT_FOUND, "Benchmark not found");
+  // Benchmark not cached, cache it and try again.
+  const auto& programFile = benchmarkMessage.program();
+  switch (programFile.data_case()) {
+    case compiler_gym::File::DataCase::kContents: {
+      RETURN_IF_ERROR(addBitcode(
+          benchmarkMessage.uri(),
+          llvm::SmallString<0>(programFile.contents().begin(), programFile.contents().end())));
+      break;
+    }
+    case compiler_gym::File::DataCase::kUri: {
+      // Check that protocol of the benmchmark URI.
+      if (programFile.uri().find("file:///") != 0) {
+        return Status(StatusCode::INVALID_ARGUMENT,
+                      fmt::format("Invalid benchmark data URI. "
+                                  "Only the file:/// protocol is supported: \"{}\"",
+                                  programFile.uri()));
+      }
+
+      const fs::path path(programFile.uri().substr(util::strLen("file:///"), std::string::npos));
+      RETURN_IF_ERROR(addBitcode(benchmarkMessage.uri(), path));
+      break;
+    }
+    case compiler_gym::File::DataCase::DATA_NOT_SET:
+      return Status(StatusCode::INVALID_ARGUMENT, fmt::format("No program set in Benchmark:\n{}",
+                                                              benchmarkMessage.DebugString()));
+  }
+
+  return getBenchmark(benchmarkMessage, benchmark);
 }
 
 Status BenchmarkFactory::addBitcode(const std::string& uri, const Bitcode& bitcode) {
diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.h b/compiler_gym/envs/llvm/service/BenchmarkFactory.h
index 8aef66dbd..73ba7b26e 100644
--- a/compiler_gym/envs/llvm/service/BenchmarkFactory.h
+++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.h
@@ -15,6 +15,7 @@
 
 #include "boost/filesystem.hpp"
 #include "compiler_gym/envs/llvm/service/Benchmark.h"
+#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 
@@ -35,6 +36,23 @@ constexpr size_t kMaxLoadedBenchmarkSize = 512 * 1024 * 1024;
 //     // ... do fun stuff
 class BenchmarkFactory {
  public:
+  static BenchmarkFactory& getSingleton(const boost::filesystem::path& workingDirectory,
+                                        std::optional<std::mt19937_64> rand = std::nullopt,
+                                        size_t maxLoadedBenchmarkSize = kMaxLoadedBenchmarkSize) {
+    static BenchmarkFactory instance(workingDirectory, rand, maxLoadedBenchmarkSize);
+    return instance;
+  }
+
+  // Get the requested named benchmark.
+  [[nodiscard]] grpc::Status getBenchmark(const compiler_gym::Benchmark& benchmarkMessage,
+                                          std::unique_ptr<Benchmark>* benchmark);
+
+ private:
+  [[nodiscard]] grpc::Status addBitcode(const std::string& uri, const Bitcode& bitcode);
+
+  [[nodiscard]] grpc::Status addBitcode(const std::string& uri,
+                                        const boost::filesystem::path& path);
+
   // Construct a benchmark factory. rand is a random seed used to control the
   // selection of random benchmarks. maxLoadedBenchmarkSize is the maximum
   // combined size of the bitcodes that may be cached in memory. Once this
@@ -44,16 +62,9 @@ class BenchmarkFactory {
                    std::optional<std::mt19937_64> rand = std::nullopt,
                    size_t maxLoadedBenchmarkSize = kMaxLoadedBenchmarkSize);
 
-  // Get the requested named benchmark.
-  [[nodiscard]] grpc::Status getBenchmark(const std::string& uri,
-                                          std::unique_ptr<Benchmark>* benchmark);
-
-  [[nodiscard]] grpc::Status addBitcode(const std::string& uri, const Bitcode& bitcode);
+  BenchmarkFactory(const BenchmarkFactory&) = delete;
+  BenchmarkFactory& operator=(const BenchmarkFactory&) = delete;
 
-  [[nodiscard]] grpc::Status addBitcode(const std::string& uri,
-                                        const boost::filesystem::path& path);
-
- private:
   // A mapping from URI to benchmarks which have been loaded into memory.
   std::unordered_map<std::string, Benchmark> benchmarks_;
 
diff --git a/compiler_gym/envs/llvm/service/LlvmService.cc b/compiler_gym/envs/llvm/service/LlvmService.cc
deleted file mode 100644
index 5627deb8b..000000000
--- a/compiler_gym/envs/llvm/service/LlvmService.cc
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-#include "compiler_gym/envs/llvm/service/LlvmService.h"
-
-#include <glog/logging.h>
-
-#include <optional>
-#include <sstream>
-
-#include "compiler_gym/envs/llvm/service/ActionSpace.h"
-#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"
-#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
-#include "compiler_gym/util/EnumUtil.h"
-#include "compiler_gym/util/GrpcStatusMacros.h"
-#include "compiler_gym/util/StrLenConstexpr.h"
-#include "compiler_gym/util/Version.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Config/llvm-config.h"
-
-namespace compiler_gym::llvm_service {
-
-using grpc::ServerContext;
-using grpc::Status;
-using grpc::StatusCode;
-namespace fs = boost::filesystem;
-
-LlvmService::LlvmService(const fs::path& workingDirectory)
-    : workingDirectory_(workingDirectory), benchmarkFactory_(workingDirectory), nextSessionId_(0) {}
-
-Status LlvmService::GetVersion(ServerContext* /* unused */, const GetVersionRequest* /* unused */,
-                               GetVersionReply* reply) {
-  VLOG(2) << "GetSpaces()";
-  reply->set_service_version(COMPILER_GYM_VERSION);
-  std::stringstream ss;
-  ss << LLVM_VERSION_STRING << " " << llvm::Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE);
-  reply->set_compiler_version(ss.str());
-  return Status::OK;
-}
-
-Status LlvmService::GetSpaces(ServerContext* /* unused */, const GetSpacesRequest* /* unused */,
-                              GetSpacesReply* reply) {
-  VLOG(2) << "GetSpaces()";
-  const auto actionSpaces = getLlvmActionSpaceList();
-  *reply->mutable_action_space_list() = {actionSpaces.begin(), actionSpaces.end()};
-  const auto observationSpaces = getLlvmObservationSpaceList();
-  *reply->mutable_observation_space_list() = {observationSpaces.begin(), observationSpaces.end()};
-
-  return Status::OK;
-}
-
-Status LlvmService::StartSession(ServerContext* /* unused */, const StartSessionRequest* request,
-                                 StartSessionReply* reply) {
-  const std::lock_guard<std::mutex> lock(sessionsMutex_);
-
-  if (!request->benchmark().size()) {
-    return Status(StatusCode::INVALID_ARGUMENT, "No benchmark URI set for StartSession()");
-  }
-
-  std::unique_ptr<Benchmark> benchmark;
-  RETURN_IF_ERROR(benchmarkFactory_.getBenchmark(request->benchmark(), &benchmark));
-
-  reply->set_benchmark(benchmark->name());
-  VLOG(1) << "StartSession(" << benchmark->name() << "), [" << nextSessionId_ << "]";
-
-  LlvmActionSpace actionSpace;
-  RETURN_IF_ERROR(util::intToEnum(request->action_space(), &actionSpace));
-
-  // Construct the environment.
-  auto session =
-      std::make_unique<LlvmSession>(std::move(benchmark), actionSpace, workingDirectory_);
-
-  // Compute the initial observations.
-  for (int i = 0; i < request->observation_space_size(); ++i) {
-    LlvmObservationSpace observationSpace;
-    RETURN_IF_ERROR(util::intToEnum(request->observation_space(i), &observationSpace));
-    auto observation = reply->add_observation();
-    RETURN_IF_ERROR(session->getObservation(observationSpace, observation));
-  }
-
-  reply->set_session_id(nextSessionId_);
-  sessions_[nextSessionId_] = std::move(session);
-  ++nextSessionId_;
-
-  return Status::OK;
-}
-
-Status LlvmService::ForkSession(ServerContext* /* unused */, const ForkSessionRequest* request,
-                                ForkSessionReply* reply) {
-  const std::lock_guard<std::mutex> lock(sessionsMutex_);
-
-  LlvmSession* environment;
-  RETURN_IF_ERROR(session(request->session_id(), &environment));
-  VLOG(1) << "ForkSession(" << request->session_id() << "), [" << nextSessionId_ << "]";
-
-  // Construct the environment.
-  reply->set_session_id(nextSessionId_);
-  sessions_[nextSessionId_] =
-      std::make_unique<LlvmSession>(environment->benchmark().clone(environment->workingDirectory()),
-                                    environment->actionSpace(), environment->workingDirectory());
-
-  ++nextSessionId_;
-
-  return Status::OK;
-}
-
-Status LlvmService::EndSession(grpc::ServerContext* /* unused */, const EndSessionRequest* request,
-                               EndSessionReply* reply) {
-  const std::lock_guard<std::mutex> lock(sessionsMutex_);
-
-  // Note that unlike the other methods, no error is thrown if the requested
-  // session does not exist.
-  if (sessions_.find(request->session_id()) != sessions_.end()) {
-    const LlvmSession* environment;
-    RETURN_IF_ERROR(session(request->session_id(), &environment));
-    VLOG(1) << "Step " << environment->actionCount() << " EndSession("
-            << environment->benchmark().name() << "), [" << request->session_id() << "]";
-
-    sessions_.erase(request->session_id());
-  }
-
-  reply->set_remaining_sessions(sessions_.size());
-  return Status::OK;
-}
-
-Status LlvmService::Step(ServerContext* /* unused */, const StepRequest* request,
-                         StepReply* reply) {
-  LlvmSession* environment;
-  RETURN_IF_ERROR(session(request->session_id(), &environment));
-
-  VLOG(2) << "Step " << environment->actionCount() << " Step()";
-  return environment->step(*request, reply);
-}
-
-Status LlvmService::AddBenchmark(ServerContext* /* unused */, const AddBenchmarkRequest* request,
-                                 AddBenchmarkReply* reply) {
-  VLOG(2) << "AddBenchmark()";
-  for (int i = 0; i < request->benchmark_size(); ++i) {
-    RETURN_IF_ERROR(addBenchmark(request->benchmark(i)));
-  }
-
-  return Status::OK;
-}
-
-Status LlvmService::addBenchmark(const ::compiler_gym::Benchmark& request) {
-  const std::string& uri = request.uri();
-  if (!uri.size()) {
-    return Status(StatusCode::INVALID_ARGUMENT, "Benchmark must have a URI");
-  }
-
-  const auto& programFile = request.program();
-  switch (programFile.data_case()) {
-    case ::compiler_gym::File::DataCase::kContents:
-      return benchmarkFactory_.addBitcode(
-          uri, llvm::SmallString<0>(programFile.contents().begin(), programFile.contents().end()));
-    case ::compiler_gym::File::DataCase::kUri: {
-      // Check that protocol of the benmchmark URI.
-      if (programFile.uri().find("file:///") != 0) {
-        return Status(StatusCode::INVALID_ARGUMENT,
-                      fmt::format("Invalid benchmark data URI. "
-                                  "Only the file:/// protocol is supported: \"{}\"",
-                                  programFile.uri()));
-      }
-
-      const fs::path path(programFile.uri().substr(util::strLen("file:///"), std::string::npos));
-      return benchmarkFactory_.addBitcode(uri, path);
-    }
-    case ::compiler_gym::File::DataCase::DATA_NOT_SET:
-      return Status(StatusCode::INVALID_ARGUMENT, "No program set");
-  }
-
-  return Status::OK;
-}
-
-Status LlvmService::session(uint64_t id, LlvmSession** environment) {
-  auto it = sessions_.find(id);
-  if (it == sessions_.end()) {
-    return Status(StatusCode::NOT_FOUND, fmt::format("Session not found: {}", id));
-  }
-
-  *environment = it->second.get();
-  return Status::OK;
-}
-
-Status LlvmService::session(uint64_t id, const LlvmSession** environment) const {
-  auto it = sessions_.find(id);
-  if (it == sessions_.end()) {
-    return Status(StatusCode::NOT_FOUND, fmt::format("Session not found: {}", id));
-  }
-
-  *environment = it->second.get();
-  return Status::OK;
-}
-
-}  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/envs/llvm/service/LlvmService.h b/compiler_gym/envs/llvm/service/LlvmService.h
deleted file mode 100644
index 4e321e1f9..000000000
--- a/compiler_gym/envs/llvm/service/LlvmService.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-#pragma once
-
-#include <grpcpp/grpcpp.h>
-
-#include <memory>
-#include <mutex>
-
-#include "boost/filesystem.hpp"
-#include "compiler_gym/envs/llvm/service/Benchmark.h"
-#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"
-#include "compiler_gym/envs/llvm/service/LlvmSession.h"
-#include "compiler_gym/service/proto/compiler_gym_service.grpc.pb.h"
-#include "compiler_gym/service/proto/compiler_gym_service.pb.h"
-
-namespace compiler_gym::llvm_service {
-
-// RPC service for LLVM.
-class LlvmService final : public CompilerGymService::Service {
- public:
-  explicit LlvmService(const boost::filesystem::path& workingDirectory);
-
-  // RPC endpoints.
-  grpc::Status GetVersion(grpc::ServerContext* context, const GetVersionRequest* request,
-                          GetVersionReply* reply) final override;
-
-  grpc::Status GetSpaces(grpc::ServerContext* context, const GetSpacesRequest* request,
-                         GetSpacesReply* reply) final override;
-
-  grpc::Status StartSession(grpc::ServerContext* context, const StartSessionRequest* request,
-                            StartSessionReply* reply) final override;
-
-  grpc::Status ForkSession(grpc::ServerContext* context, const ForkSessionRequest* request,
-                           ForkSessionReply* reply) final override;
-
-  grpc::Status EndSession(grpc::ServerContext* context, const EndSessionRequest* request,
-                          EndSessionReply* reply) final override;
-
-  // NOTE: Step() is not thread safe. The underlying assumption is that each
-  // LlvmSession is managed by a single thread, so race conditions between
-  // operations that affect the same LlvmSession are not protected against.
-  grpc::Status Step(grpc::ServerContext* context, const StepRequest* request,
-                    StepReply* reply) final override;
-
-  grpc::Status AddBenchmark(grpc::ServerContext* context, const AddBenchmarkRequest* request,
-                            AddBenchmarkReply* reply) final override;
-
- protected:
-  grpc::Status session(uint64_t id, LlvmSession** environment);
-  grpc::Status session(uint64_t id, const LlvmSession** environment) const;
-
-  grpc::Status addBenchmark(const ::compiler_gym::Benchmark& request);
-
- private:
-  const boost::filesystem::path workingDirectory_;
-  std::unordered_map<uint64_t, std::unique_ptr<LlvmSession>> sessions_;
-  // Mutex used to ensure thread safety of creation and destruction of sessions.
-  std::mutex sessionsMutex_;
-  BenchmarkFactory benchmarkFactory_;
-  uint64_t nextSessionId_;
-};
-
-}  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/envs/llvm/service/LlvmSession.cc b/compiler_gym/envs/llvm/service/LlvmSession.cc
index 21fb07a00..0052a87c6 100644
--- a/compiler_gym/envs/llvm/service/LlvmSession.cc
+++ b/compiler_gym/envs/llvm/service/LlvmSession.cc
@@ -14,7 +14,10 @@
 
 #include "boost/filesystem.hpp"
 #include "compiler_gym/envs/llvm/service/ActionSpace.h"
+#include "compiler_gym/envs/llvm/service/Benchmark.h"
+#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"
 #include "compiler_gym/envs/llvm/service/Cost.h"
+#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"
 #include "compiler_gym/envs/llvm/service/passes/ActionHeaders.h"
 #include "compiler_gym/envs/llvm/service/passes/ActionSwitch.h"
 #include "compiler_gym/third_party/autophase/InstCount.h"
@@ -24,11 +27,9 @@
 #include "compiler_gym/util/RunfilesPath.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/TargetSelect.h"
@@ -45,6 +46,9 @@ using grpc::Status;
 using grpc::StatusCode;
 using nlohmann::json;
 
+using BenchmarkProto = compiler_gym::Benchmark;
+using ActionSpaceProto = compiler_gym::ActionSpace;
+
 namespace {
 
 // Return the target library information for a module.
@@ -53,18 +57,6 @@ llvm::TargetLibraryInfoImpl getTargetLibraryInfo(llvm::Module& module) {
   return llvm::TargetLibraryInfoImpl(triple);
 }
 
-// Wrapper around llvm::verifyModule() which raises the given exception type
-// on failure.
-Status verifyModuleStatus(const llvm::Module& module) {
-  std::string errorMessage;
-  llvm::raw_string_ostream rso(errorMessage);
-  if (llvm::verifyModule(module, &rso)) {
-    rso.flush();
-    return Status(StatusCode::DATA_LOSS, "Failed to verify module: " + errorMessage);
-  }
-  return Status::OK;
-}
-
 void initLlvm() {
   llvm::InitializeAllTargets();
   llvm::InitializeAllTargetMCs();
@@ -119,59 +111,98 @@ Status writeBitcodeToFile(const llvm::Module& module, const fs::path& path) {
 
 }  // anonymous namespace
 
-LlvmSession::LlvmSession(std::unique_ptr<Benchmark> benchmark, LlvmActionSpace actionSpace,
-                         const boost::filesystem::path& workingDirectory)
-    : workingDirectory_(workingDirectory),
-      benchmark_(std::move(benchmark)),
-      actionSpace_(actionSpace),
-      tlii_(getTargetLibraryInfo(benchmark_->module())),
-      actionCount_(0) {
-  // Initialize LLVM.
-  initLlvm();
+std::string LlvmSession::getCompilerVersion() const {
+  std::stringstream ss;
+  ss << LLVM_VERSION_STRING << " " << llvm::Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE);
+  return ss.str();
+}
+
+std::vector<ActionSpace> LlvmSession::getActionSpaces() const { return getLlvmActionSpaceList(); }
 
-  // Initialize cpuinfo
+std::vector<ObservationSpace> LlvmSession::getObservationSpaces() const {
+  return getLlvmObservationSpaceList();
+}
+
+LlvmSession::LlvmSession(const boost::filesystem::path& workingDirectory)
+    : CompilationSession(workingDirectory),
+      observationSpaceNames_(util::createPascalCaseToEnumLookupTable<LlvmObservationSpace>()) {
+  initLlvm();
   cpuinfo_initialize();
+}
 
-  // Strip module debug info.
-  llvm::StripDebugInfo(benchmark_->module());
+Status LlvmSession::init(const ActionSpace& actionSpace, const BenchmarkProto& benchmark) {
+  BenchmarkFactory& benchmarkFactory = BenchmarkFactory::getSingleton(workingDirectory());
 
-  // Erase module-level named metadata.
-  while (!benchmark_->module().named_metadata_empty()) {
-    llvm::NamedMDNode* nmd = &*benchmark_->module().named_metadata_begin();
-    benchmark_->module().eraseNamedMetadata(nmd);
-  }
+  // Get the benchmark or return an error.
+  std::unique_ptr<Benchmark> llvmBenchmark;
+  RETURN_IF_ERROR(benchmarkFactory.getBenchmark(benchmark, &llvmBenchmark));
+
+  // Verify the benchmark now to catch errors early.
+  RETURN_IF_ERROR(llvmBenchmark->verify_module());
+
+  LlvmActionSpace actionSpaceEnum;
+  RETURN_IF_ERROR(util::pascalCaseToEnum(actionSpace.name(), &actionSpaceEnum));
+
+  return init(actionSpaceEnum, std::move(llvmBenchmark));
+}
+
+Status LlvmSession::init(CompilationSession* other) {
+  // TODO: Static cast?
+  auto llvmOther = static_cast<LlvmSession*>(other);
+  return init(llvmOther->actionSpace(), llvmOther->benchmark().clone(workingDirectory()));
+}
+
+Status LlvmSession::init(const LlvmActionSpace& actionSpace, std::unique_ptr<Benchmark> benchmark) {
+  benchmark_ = std::move(benchmark);
+  actionSpace_ = actionSpace;
+
+  tlii_ = getTargetLibraryInfo(benchmark_->module());
 
   // Verify the module now to catch any problems early.
-  CHECK(verifyModuleStatus(benchmark_->module()).ok());
+  return Status::OK;
 }
 
-Status LlvmSession::step(const StepRequest& request, StepReply* reply) {
-  // Apply the requested actions.
-  actionCount_ += request.action_size();
+Status LlvmSession::applyAction(const Action& action, bool& endOfEpisode,
+                                std::optional<ActionSpace>& newActionSpace,
+                                bool& actionHadNoEffect) {
+  DCHECK(benchmark_) << "Calling applyAction() before init()";
+
+  // Apply the requested action.
   switch (actionSpace()) {
     case LlvmActionSpace::PASSES_ALL:
-      for (int i = 0; i < request.action_size(); ++i) {
-        LlvmAction action;
-        RETURN_IF_ERROR(util::intToEnum(request.action(i).action(), &action));
-        RETURN_IF_ERROR(runAction(action, reply));
-      }
+      LlvmAction actionEnum;
+      RETURN_IF_ERROR(util::intToEnum(action.action(), &actionEnum));
+      RETURN_IF_ERROR(applyPassAction(actionEnum, actionHadNoEffect));
   }
 
-  // Fail now if we have broken something.
-  RETURN_IF_ERROR(verifyModuleStatus(benchmark().module()));
+  return Status::OK;
+}
 
-  // Compute the requested observations.
-  for (int i = 0; i < request.observation_space_size(); ++i) {
-    LlvmObservationSpace observationSpace;
-    RETURN_IF_ERROR(util::intToEnum(request.observation_space(i), &observationSpace));
-    auto observation = reply->add_observation();
-    RETURN_IF_ERROR(getObservation(observationSpace, observation));
+Status LlvmSession::endOfStep(bool actionHadNoEffect, bool& endOfEpisode,
+                              std::optional<ActionSpace>& newActionSpace) {
+  if (actionHadNoEffect) {
+    return Status::OK;
+  } else {
+    return benchmark().verify_module();
   }
+}
+
+Status LlvmSession::computeObservation(const ObservationSpace& observationSpace,
+                                       Observation& observation) {
+  DCHECK(benchmark_) << "Calling computeObservation() before init()";
 
+  const auto& it = observationSpaceNames_.find(observationSpace.name());
+  if (it == observationSpaceNames_.end()) {
+    return Status(
+        StatusCode::INVALID_ARGUMENT,
+        fmt::format("Could not interpret observation space name: {}", observationSpace.name()));
+  }
+  const LlvmObservationSpace observationSpaceEnum = it->second;
+  RETURN_IF_ERROR(computeObservation(observationSpaceEnum, observation));
   return Status::OK;
 }
 
-Status LlvmSession::runAction(LlvmAction action, StepReply* reply) {
+Status LlvmSession::applyPassAction(LlvmAction action, bool& actionHadNoEffect) {
 #ifdef EXPERIMENTAL_UNSTABLE_GVN_SINK_PASS
   // NOTE(https://github.com/facebookresearch/CompilerGym/issues/46): The
   // -gvn-sink pass has been found to have nondeterministic behavior so has
@@ -179,28 +210,27 @@ Status LlvmSession::runAction(LlvmAction action, StepReply* reply) {
   // the command line was found to produce more stable results.
   if (action == LlvmAction::GVNSINK_PASS) {
     RETURN_IF_ERROR(runOptWithArgs({"-gvn-sink"}));
-    reply->set_action_had_no_effect(true);
+    actionHadNoEffect = true;
     return Status::OK;
   }
 #endif
 
 // Use the generated HANDLE_PASS() switch statement to dispatch to runPass().
-#define HANDLE_PASS(pass) runPass(pass, reply);
+#define HANDLE_PASS(pass) actionHadNoEffect = !runPass(pass);
   HANDLE_ACTION(action, HANDLE_PASS)
 #undef HANDLE_PASS
 
   return Status::OK;
 }
 
-void LlvmSession::runPass(llvm::Pass* pass, StepReply* reply) {
+bool LlvmSession::runPass(llvm::Pass* pass) {
   llvm::legacy::PassManager passManager;
   setupPassManager(&passManager, pass);
 
-  const bool changed = passManager.run(benchmark().module());
-  reply->set_action_had_no_effect(!changed);
+  return passManager.run(benchmark().module());
 }
 
-void LlvmSession::runPass(llvm::FunctionPass* pass, StepReply* reply) {
+bool LlvmSession::runPass(llvm::FunctionPass* pass) {
   llvm::legacy::FunctionPassManager passManager(&benchmark().module());
   setupPassManager(&passManager, pass);
 
@@ -209,13 +239,13 @@ void LlvmSession::runPass(llvm::FunctionPass* pass, StepReply* reply) {
     changed |= (passManager.run(function) ? 1 : 0);
   }
   changed |= (passManager.doFinalization() ? 1 : 0);
-  reply->set_action_had_no_effect(!changed);
+  return changed;
 }
 
 Status LlvmSession::runOptWithArgs(const std::vector<std::string>& optArgs) {
   // Create temporary files for `opt` to read from and write to.
-  const auto before_path = fs::unique_path(workingDirectory_ / "module-%%%%%%%%.bc");
-  const auto after_path = fs::unique_path(workingDirectory_ / "module-%%%%%%%%.bc");
+  const auto before_path = fs::unique_path(workingDirectory() / "module-%%%%%%%%.bc");
+  const auto after_path = fs::unique_path(workingDirectory() / "module-%%%%%%%%.bc");
   RETURN_IF_ERROR(writeBitcodeToFile(benchmark().module(), before_path));
 
   // Build a command line invocation: `opt input.bc -o output.bc <optArgs...>`.
@@ -261,14 +291,14 @@ Status LlvmSession::runOptWithArgs(const std::vector<std::string>& optArgs) {
   return Status::OK;
 }
 
-Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* reply) {
+Status LlvmSession::computeObservation(LlvmObservationSpace space, Observation& reply) {
   switch (space) {
     case LlvmObservationSpace::IR: {
       // Serialize the LLVM module to an IR string.
       std::string ir;
       llvm::raw_string_ostream rso(ir);
       benchmark().module().print(rso, /*AAW=*/nullptr);
-      reply->set_string_value(ir);
+      reply.set_string_value(ir);
       break;
     }
     case LlvmObservationSpace::IR_SHA1: {
@@ -280,24 +310,24 @@ Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* repl
         ss << std::setfill('0') << std::setw(sizeof(BenchmarkHash::value_type) * 2) << std::hex
            << val;
       }
-      reply->set_string_value(ss.str());
+      reply.set_string_value(ss.str());
       break;
     }
     case LlvmObservationSpace::BITCODE_FILE: {
       // Generate an output path with 16 bits of randomness.
-      const auto outpath = fs::unique_path(workingDirectory_ / "module-%%%%%%%%.bc");
+      const auto outpath = fs::unique_path(workingDirectory() / "module-%%%%%%%%.bc");
       RETURN_IF_ERROR(writeBitcodeToFile(benchmark().module(), outpath));
-      reply->set_string_value(outpath.string());
+      reply.set_string_value(outpath.string());
       break;
     }
     case LlvmObservationSpace::INST_COUNT: {
       const auto features = InstCount::getFeatureVector(benchmark().module());
-      *reply->mutable_int64_list()->mutable_value() = {features.begin(), features.end()};
+      *reply.mutable_int64_list()->mutable_value() = {features.begin(), features.end()};
       break;
     }
     case LlvmObservationSpace::AUTOPHASE: {
       const auto features = autophase::InstCount::getFeatureVector(benchmark().module());
-      *reply->mutable_int64_list()->mutable_value() = {features.begin(), features.end()};
+      *reply.mutable_int64_list()->mutable_value() = {features.begin(), features.end()};
       break;
     }
     case LlvmObservationSpace::PROGRAML: {
@@ -315,7 +345,7 @@ Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* repl
       if (!status.ok()) {
         return Status(StatusCode::INTERNAL, status.error_message());
       }
-      *reply->mutable_string_value() = nodeLinkGraph.dump();
+      *reply.mutable_string_value() = nodeLinkGraph.dump();
       break;
     }
     case LlvmObservationSpace::CPU_INFO: {
@@ -340,83 +370,83 @@ Status LlvmSession::getObservation(LlvmObservationSpace space, Observation* repl
       hwinfo["cores_count"] = cpuinfo_get_cores_count();
       auto cpu = cpuinfo_get_packages();
       hwinfo["name"] = cpu->name;
-      *reply->mutable_string_value() = hwinfo.dump();
+      *reply.mutable_string_value() = hwinfo.dump();
       break;
     }
     case LlvmObservationSpace::IR_INSTRUCTION_COUNT: {
       double cost;
       RETURN_IF_ERROR(setCost(LlvmCostFunction::IR_INSTRUCTION_COUNT, benchmark().module(),
-                              workingDirectory_, &cost));
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+                              workingDirectory(), &cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::IR_INSTRUCTION_COUNT_O0: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::O0,
                                         LlvmCostFunction::IR_INSTRUCTION_COUNT);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::IR_INSTRUCTION_COUNT_O3: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::O3,
                                         LlvmCostFunction::IR_INSTRUCTION_COUNT);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::IR_INSTRUCTION_COUNT_OZ: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::Oz,
                                         LlvmCostFunction::IR_INSTRUCTION_COUNT);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::OBJECT_TEXT_SIZE_BYTES: {
       double cost;
       RETURN_IF_ERROR(setCost(LlvmCostFunction::OBJECT_TEXT_SIZE_BYTES, benchmark().module(),
-                              workingDirectory_, &cost));
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+                              workingDirectory(), &cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::OBJECT_TEXT_SIZE_O0: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::O0,
                                         LlvmCostFunction::OBJECT_TEXT_SIZE_BYTES);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::OBJECT_TEXT_SIZE_O3: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::O3,
                                         LlvmCostFunction::OBJECT_TEXT_SIZE_BYTES);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::OBJECT_TEXT_SIZE_OZ: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::Oz,
                                         LlvmCostFunction::OBJECT_TEXT_SIZE_BYTES);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
 #ifdef COMPILER_GYM_EXPERIMENTAL_TEXT_SIZE_COST
     case LlvmObservationSpace::TEXT_SIZE_BYTES: {
       double cost;
       RETURN_IF_ERROR(setCost(LlvmCostFunction::TEXT_SIZE_BYTES, benchmark().module(),
-                              workingDirectory_, &cost));
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+                              workingDirectory(), &cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::TEXT_SIZE_O0: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::O0,
                                         LlvmCostFunction::TEXT_SIZE_BYTES);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::TEXT_SIZE_O3: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::O3,
                                         LlvmCostFunction::TEXT_SIZE_BYTES);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
     case LlvmObservationSpace::TEXT_SIZE_OZ: {
       const auto cost = getBaselineCost(benchmark().baselineCosts(), LlvmBaselinePolicy::Oz,
                                         LlvmCostFunction::TEXT_SIZE_BYTES);
-      reply->set_scalar_int64(static_cast<int64_t>(cost));
+      reply.set_scalar_int64(static_cast<int64_t>(cost));
       break;
     }
 #endif
diff --git a/compiler_gym/envs/llvm/service/LlvmSession.h b/compiler_gym/envs/llvm/service/LlvmSession.h
index e9d55e3b1..a15db54c3 100644
--- a/compiler_gym/envs/llvm/service/LlvmSession.h
+++ b/compiler_gym/envs/llvm/service/LlvmSession.h
@@ -4,16 +4,19 @@
 // LICENSE file in the root directory of this source tree.
 #pragma once
 
+#include <glog/logging.h>
 #include <grpcpp/grpcpp.h>
 
 #include <magic_enum.hpp>
 #include <memory>
 #include <optional>
+#include <unordered_map>
 
 #include "compiler_gym/envs/llvm/service/ActionSpace.h"
 #include "compiler_gym/envs/llvm/service/Benchmark.h"
 #include "compiler_gym/envs/llvm/service/Cost.h"
 #include "compiler_gym/envs/llvm/service/ObservationSpaces.h"
+#include "compiler_gym/service/CompilationSession.h"
 #include "compiler_gym/service/proto/compiler_gym_service.grpc.pb.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -30,37 +33,56 @@ namespace compiler_gym::llvm_service {
 //
 // It can be used directly as a C++ API, or it can be accessed through an RPC
 // interface using the compiler_gym::service::LlvmService class.
-class LlvmSession {
+class LlvmSession final : public CompilationSession {
  public:
-  // Construct an environment by taking ownership of a benchmark. Throws
-  // std::invalid_argument if the benchmark's LLVM module fails verification.
-  LlvmSession(std::unique_ptr<Benchmark> benchmark, LlvmActionSpace actionSpace,
-              const boost::filesystem::path& workingDirectory);
+  LlvmSession(const boost::filesystem::path& workingDirectory);
 
-  inline const Benchmark& benchmark() const { return *benchmark_; }
-  inline Benchmark& benchmark() { return *benchmark_; }
+  std::string getCompilerVersion() const final override;
+
+  std::vector<ActionSpace> getActionSpaces() const final override;
+
+  std::vector<ObservationSpace> getObservationSpaces() const final override;
+
+  [[nodiscard]] grpc::Status init(const ActionSpace& actionSpace,
+                                  const compiler_gym::Benchmark& benchmark) final override;
+
+  [[nodiscard]] grpc::Status init(CompilationSession* other) final override;
+
+  [[nodiscard]] grpc::Status applyAction(const Action& action, bool& endOfEpisode,
+                                         std::optional<ActionSpace>& newActionSpace,
+                                         bool& actionHadNoEffect) final override;
+
+  [[nodiscard]] grpc::Status endOfStep(bool actionHadNoEffect, bool& endOfEpisode,
+                                       std::optional<ActionSpace>& newActionSpace) final override;
+
+  [[nodiscard]] grpc::Status computeObservation(const ObservationSpace& observationSpace,
+                                                Observation& observation) final override;
 
   inline const LlvmActionSpace actionSpace() const { return actionSpace_; }
 
-  inline const boost::filesystem::path& workingDirectory() const { return workingDirectory_; }
+ private:
+  [[nodiscard]] grpc::Status computeObservation(LlvmObservationSpace observationSpace,
+                                                Observation& observation);
 
-  // Run the requested action(s) then compute the requested observation(s).
-  [[nodiscard]] grpc::Status step(const StepRequest& request, StepReply* reply);
+  [[nodiscard]] grpc::Status init(const LlvmActionSpace& actionSpace,
+                                  std::unique_ptr<Benchmark> benchmark);
 
-  // Returns the number of actions that have been applied in calls to step()
-  // since the start of the session. This is just for logging and has no effect.
-  inline int actionCount() const { return actionCount_; }
+  inline const Benchmark& benchmark() const {
+    DCHECK(benchmark_) << "Calling benchmark() before init()";
+    return *benchmark_;
+  }
+  inline Benchmark& benchmark() {
+    DCHECK(benchmark_) << "Calling benchmark() before init()";
+    return *benchmark_;
+  }
 
   // Run the requested action.
-  [[nodiscard]] grpc::Status runAction(LlvmAction action, StepReply* reply);
+  [[nodiscard]] grpc::Status applyPassAction(LlvmAction action, bool& actionHadNoEffect);
 
-  // Compute the requested observation.
-  [[nodiscard]] grpc::Status getObservation(LlvmObservationSpace space, Observation* reply);
-
- protected:
-  // Run the given pass, possibly modifying the underlying LLVM module.
-  void runPass(llvm::Pass* pass, StepReply* reply);
-  void runPass(llvm::FunctionPass* pass, StepReply* reply);
+  // Run the given pass, possibly modifying the underlying LLVM module. Return
+  // whether the module was modified.
+  bool runPass(llvm::Pass* pass);
+  bool runPass(llvm::FunctionPass* pass);
 
   // Run the commandline `opt` tool on the current LLVM module with the given
   // arguments, replacing the environment state with the generated output.
@@ -68,7 +90,6 @@ class LlvmSession {
 
   inline const llvm::TargetLibraryInfoImpl& tlii() const { return tlii_; }
 
- private:
   // Setup pass manager with depdendent passes and the specified pass.
   template <typename PassManager, typename Pass>
   inline void setupPassManager(PassManager* passManager, Pass* pass) {
@@ -78,13 +99,13 @@ class LlvmSession {
     passManager->add(pass);
   }
 
-  const boost::filesystem::path workingDirectory_;
-  const std::unique_ptr<Benchmark> benchmark_;
-  const LlvmActionSpace actionSpace_;
-  const llvm::TargetLibraryInfoImpl tlii_;
+  // Immutable state.
   const programl::ProgramGraphOptions programlOptions_;
-
-  int actionCount_;
+  const std::unordered_map<std::string, LlvmObservationSpace> observationSpaceNames_;
+  // Mutable state initialized in init().
+  LlvmActionSpace actionSpace_;
+  std::unique_ptr<Benchmark> benchmark_;
+  llvm::TargetLibraryInfoImpl tlii_;
 };
 
 }  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/envs/llvm/service/RunService.cc b/compiler_gym/envs/llvm/service/RunService.cc
index b7b6b72f0..6a76ae186 100644
--- a/compiler_gym/envs/llvm/service/RunService.cc
+++ b/compiler_gym/envs/llvm/service/RunService.cc
@@ -2,13 +2,12 @@
 //
 // This source code is licensed under the MIT license found in the
 // LICENSE file in the root directory of this source tree.
-#include "compiler_gym/util/RunService.h"
-
-#include "compiler_gym/envs/llvm/service/LlvmService.h"
+#include "compiler_gym/envs/llvm/service/LlvmSession.h"
+#include "compiler_gym/service/runtime/Runtime.h"
 
 const char* usage = R"(LLVM CompilerGym service)";
 
-using namespace compiler_gym::util;
+using namespace compiler_gym::runtime;
 using namespace compiler_gym::llvm_service;
 
-int main(int argc, char** argv) { return runService<LlvmService>(&argc, &argv, usage); }
+int main(int argc, char** argv) { createAndRunCompilerGymService<LlvmSession>(argc, argv, usage); }
diff --git a/compiler_gym/service/runtime/BUILD b/compiler_gym/service/runtime/BUILD
index 05fac86e8..2b55122b1 100644
--- a/compiler_gym/service/runtime/BUILD
+++ b/compiler_gym/service/runtime/BUILD
@@ -65,7 +65,7 @@ cc_library(
         "CompilerGymService.h",
         "CompilerGymServiceImpl.h",
     ],
-    visibility = ["//tests/service/runtime:__subpackages__"],
+    visibility = ["//visibility:public"],
     deps = [
         ":BenchmarkCache",
         ":CompilerGymServiceImpl",
diff --git a/examples/BUILD b/examples/BUILD
index c4a2f49a9..49a939f18 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -121,7 +121,8 @@ cc_binary(
     name = "RandomSearch",
     srcs = ["RandomSearch.cc"],
     deps = [
-        "//compiler_gym/envs/llvm/service:LlvmService",
+        "//compiler_gym/envs/llvm/service:LlvmSession",
+        "//compiler_gym/service/runtime:CompilerGymService",
         "@boost//:filesystem",
         "@gflags",
         "@glog",
diff --git a/examples/RandomSearch.cc b/examples/RandomSearch.cc
index d8d7505cb..2ce5cda86 100644
--- a/examples/RandomSearch.cc
+++ b/examples/RandomSearch.cc
@@ -21,8 +21,9 @@
 #include <thread>
 #include <vector>
 
-#include "compiler_gym/envs/llvm/service/LlvmService.h"
+#include "compiler_gym/envs/llvm/service/LlvmSession.h"
 #include "compiler_gym/envs/llvm/service/ObservationSpaces.h"
+#include "compiler_gym/service/runtime/CompilerGymService.h"
 #include "compiler_gym/util/GrpcStatusMacros.h"
 
 DEFINE_string(benchmark, "benchmark://cbench-v1/crc32", "The benchmark to use.");
@@ -37,7 +38,7 @@ namespace compiler_gym {
 using grpc::Status;
 using llvm_service::LlvmAction;
 using llvm_service::LlvmObservationSpace;
-using llvm_service::LlvmService;
+using LlvmService = runtime::CompilerGymService<llvm_service::LlvmSession>;
 
 // A wrapper around an LLVM service. Here, we call the RPC enpoints directly
 // on the service, we do not use RPC. This means that we do not get the
@@ -57,7 +58,6 @@ class Environment {
 
     StartSessionRequest startRequest;
     StartSessionReply startReply;
-    startRequest.set_benchmark(benchmark_);
     RETURN_IF_ERROR(service_.StartSession(nullptr, &startRequest, &startReply));
     sessionId_ = startReply.session_id();
 
diff --git a/examples/example_compiler_gym_service/env_tests.py b/examples/example_compiler_gym_service/env_tests.py
index 854f234b8..40908fc92 100644
--- a/examples/example_compiler_gym_service/env_tests.py
+++ b/examples/example_compiler_gym_service/env_tests.py
@@ -3,13 +3,16 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Tests for the example CompilerGym service."""
+import subprocess
+from pathlib import Path
+
 import gym
 import numpy as np
 import pytest
 from gym.spaces import Box
 
 import compiler_gym
-import examples.example_compiler_gym_service  # noqa Register environments.
+import examples.example_compiler_gym_service as example
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.service import SessionNotFound
 from compiler_gym.spaces import NamedDiscrete, Scalar, Sequence
@@ -27,6 +30,37 @@ def env(request) -> CompilerEnv:
         env.close()
 
 
+@pytest.fixture(
+    scope="module",
+    params=[example.EXAMPLE_CC_SERVICE_BINARY, example.EXAMPLE_PY_SERVICE_BINARY],
+)
+def bin(request) -> Path:
+    yield request.param
+
+
+def test_invalid_arguments(bin: Path):
+    """Test that running the binary with unrecognized arguments is an error."""
+
+    def run(cmd):
+        p = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
+        )
+        stdout, stderr = p.communicate(timeout=10)
+        return p.returncode, stdout, stderr
+
+    returncode, _, stderr = run([str(bin), "foobar"])
+    assert stderr.startswith("ERROR:")
+    assert "'foobar'" in stderr
+    assert returncode == 1
+
+    returncode, _, stderr = run([str(bin), "--foobar"])
+    # C++ and python flag parsing library emit slightly different error
+    # messages.
+    assert stderr.startswith("ERROR:") or "FATAL" in stderr
+    assert "'foobar'" in stderr
+    assert returncode == 1
+
+
 def test_versions(env: CompilerEnv):
     """Tests the GetVersion() RPC endpoint."""
     assert env.version == compiler_gym.__version__

From 964cdf437b4af1d39350869dda1beb8aec912cc1 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Fri, 14 May 2021 04:06:28 -0700
Subject: [PATCH 081/141] [env] Add a fallback fork() implementation.

Supporting the Fork() operator is optional for CompilationSessions, so
provide a fallback implementation that creates a new environment and
replays the action sequence by hand.
---
 compiler_gym/envs/compiler_env.py             | 70 ++++++++++++-------
 .../example_compiler_gym_service/env_tests.py | 12 ++++
 2 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 0538f47bb..85ba627bc 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -524,39 +524,57 @@ def fork(self) -> "CompilerEnv":
         :return: A new environment instance.
         """
         if not self.in_episode:
-            if self.actions and not self.in_episode:
+            actions = self.actions.copy()
+            self.reset()
+            if actions:
                 self.logger.warning(
                     "Parent service of fork() has died, replaying state"
                 )
-                self.apply(self.state)
-            else:
-                self.reset()
+                _, _, done, _ = self.step(actions)
+                assert not done, "Failed to replay action sequence"
 
         request = ForkSessionRequest(session_id=self._session_id)
-        reply: ForkSessionReply = self.service(self.service.stub.ForkSession, request)
-
-        # Create a new environment that shares the connection.
-        new_env = type(self)(
-            service=self._service_endpoint,
-            action_space=self.action_space,
-            connection_settings=self._connection_settings,
-            service_connection=self.service,
-        )
-
-        # Set the session ID.
-        new_env._session_id = reply.session_id  # pylint: disable=protected-access
-        new_env.observation.session_id = reply.session_id
+        try:
+            reply: ForkSessionReply = self.service(
+                self.service.stub.ForkSession, request
+            )
 
-        # Now that we have initialized the environment with the current state,
-        # set the benchmark so that calls to new_env.reset() will correctly
-        # revert the environment to the initial benchmark state.
-        #
-        # pylint: disable=protected-access
-        new_env._next_benchmark = self._benchmark_in_use
+            # Create a new environment that shares the connection.
+            new_env = type(self)(
+                service=self._service_endpoint,
+                action_space=self.action_space,
+                connection_settings=self._connection_settings,
+                service_connection=self.service,
+            )
 
-        # Set the "visible" name of the current benchmark to hide the fact that
-        # we loaded from a custom bitcode file.
-        new_env._benchmark_in_use = self._benchmark_in_use
+            # Set the session ID.
+            new_env._session_id = reply.session_id  # pylint: disable=protected-access
+            new_env.observation.session_id = reply.session_id
+
+            # Now that we have initialized the environment with the current state,
+            # set the benchmark so that calls to new_env.reset() will correctly
+            # revert the environment to the initial benchmark state.
+            #
+            # pylint: disable=protected-access
+            new_env._next_benchmark = self._benchmark_in_use
+
+            # Set the "visible" name of the current benchmark to hide the fact that
+            # we loaded from a custom bitcode file.
+            new_env._benchmark_in_use = self._benchmark_in_use
+        except NotImplementedError:
+            # Fallback implementation. If the compiler service does not support
+            # the Fork() operator then we create a new independent environment
+            # and apply the sequence of actions in the current environment to
+            # replay the state.
+            new_env = type(self)(
+                service=self._service_endpoint,
+                action_space=self.action_space,
+                benchmark=self.benchmark,
+                connection_settings=self._connection_settings,
+            )
+            new_env.reset()
+            _, _, done, _ = new_env.step(self.actions)
+            assert not done, "Failed to replay action sequence in forked environment"
 
         # Create copies of the mutable reward and observation spaces. This
         # is required to correctly calculate incremental updates.
diff --git a/examples/example_compiler_gym_service/env_tests.py b/examples/example_compiler_gym_service/env_tests.py
index 40908fc92..76f67dabe 100644
--- a/examples/example_compiler_gym_service/env_tests.py
+++ b/examples/example_compiler_gym_service/env_tests.py
@@ -205,5 +205,17 @@ def test_benchmarks(env: CompilerEnv):
     ]
 
 
+def test_fork(env: CompilerEnv):
+    env.reset()
+    env.step(0)
+    env.step(1)
+    other_env = env.fork()
+    try:
+        assert env.benchmark == other_env.benchmark
+        assert other_env.actions == [0, 1]
+    finally:
+        other_env.close()
+
+
 if __name__ == "__main__":
     main()

From 96ad041b1e743ea05bccc0cbd054dcf27be9e447 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:52:25 -0700
Subject: [PATCH 082/141] Run `pre-commit install` as part of `make init`.

---
 .pre-commit-config.yaml | 4 ++--
 Makefile                | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6e6e3f75b..249ab6d03 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,9 +25,9 @@
 #
 #     $ pre-commit run --all-files
 #
-# Install the pre-commit hook using:
+# The pre-commit git hook is installed using:
 #
-#     $ pre-commit install
+#     $ make init
 #
 repos:
     - repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/Makefile b/Makefile
index f8a9e9181..735f81b59 100644
--- a/Makefile
+++ b/Makefile
@@ -142,6 +142,7 @@ help:
 
 init:
 	$(PYTHON) -m pip install -r requirements.txt
+	pre-commit install
 
 
 ############

From d3c03be514ed23bce0926e7c8fc20d0cf66aea33 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:52:43 -0700
Subject: [PATCH 083/141] [ci] Remove unnecessary env variables from build
 steps.

---
 .github/workflows/ci.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 5301c2cf1..9794d2af6 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -72,15 +72,10 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
-                  BAZEL_BUILD_OPTS: --config=ci
               if: ${{ ! (matrix.os == 'ubuntu-latest' && matrix.python == 3.9) }}
 
             - name: Test with coverage
               run: make install-test-cov
-              env:
-                  CC: clang
-                  CXX: clang++
-                  BAZEL_BUILD_OPTS: --config=ci
               if: ${{ matrix.os == 'ubuntu-latest' && matrix.python == 3.9 }}
 
             - name: Upload coverage to Codecov

From 94dcc6b81b367c76017b7a70d182f0fca99074e0 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:53:06 -0700
Subject: [PATCH 084/141] [makefile] Add a BUILD_TARGET variable.

---
 Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 735f81b59..5a74efb17 100644
--- a/Makefile
+++ b/Makefile
@@ -152,8 +152,10 @@ init:
 # Files and directories generated by python disttools.
 DISTTOOLS_OUTS := dist build compiler_gym.egg-info
 
+BUILD_TARGET ?= //:package
+
 bazel-build:
-	$(BAZEL) $(BAZEL_OPTS) build $(BAZEL_BUILD_OPTS) //:package
+	$(BAZEL) $(BAZEL_OPTS) build $(BAZEL_BUILD_OPTS) $(BUILD_TARGET)
 
 bdist_wheel: bazel-build
 	$(PYTHON) setup.py bdist_wheel
@@ -178,6 +180,7 @@ all: docs bdist_wheel bdist_wheel-linux
 
 .PHONY: bazel-build bdist_wheel bdist_wheel-linux bdist_wheel-linux-shell bdist_wheel-linux-test
 
+
 #################
 # Documentation #
 #################

From c89b6a4970687c4f56371856e051515ead50b944 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:54:08 -0700
Subject: [PATCH 085/141] [makefile] Make `make install-test` rules more
 flexible.

Encourage re-use of code within rules, and allow the test target to be
overriden from the command line.
---
 Makefile | 49 +++++++++++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index 5a74efb17..95b88af3f 100644
--- a/Makefile
+++ b/Makefile
@@ -224,40 +224,47 @@ livedocs: gendocs doxygen
 COMPILER_GYM_SITE_DATA ?= "/tmp/compiler_gym/tests/site_data"
 COMPILER_GYM_CACHE ?= "/tmp/compiler_gym/tests/cache"
 
+# The target to use. If not provided, all tests will be run. For `make test` and
+# related, this is a bazel target pattern, with default value '//...'. For `make
+# install-test` and related, this is a relative file path of the directory or
+# file to test, with default value 'tests'.
+TEST_TARGET ?=
+
+# Extra command line arguments for pytest.
+PYTEST_ARGS ?=
+
 test:
-	$(BAZEL) $(BAZEL_OPTS) test $(BAZEL_TEST_OPTS) //...
+	$(BAZEL) $(BAZEL_OPTS) test $(BAZEL_TEST_OPTS) $(if $(TEST_TARGET),$(TEST_TARGET),//...)
 
 itest:
-	$(IBAZEL) $(BAZEL_OPTS) test $(BAZEL_TEST_OPTS) //...
-
+	$(IBAZEL) $(BAZEL_OPTS) test $(BAZEL_TEST_OPTS) $(if $(TEST_TARGET),$(TEST_TARGET),//...)
 
 # Since we can't run compiler_gym from the project root we need to jump through
 # some hoops to run pytest "out of tree" by creating an empty directory and
 # symlinking the test directory into it so that pytest can be invoked.
-define run_pytest_suite
-	mkdir -p /tmp/compiler_gym/wheel_tests
-	rm -f /tmp/compiler_gym/wheel_tests/tests /tmp/compiler_gym/wheel_tests/tox.ini
-	ln -s $(ROOT)/tests /tmp/compiler_gym/wheel_tests
-	ln -s $(ROOT)/tox.ini /tmp/compiler_gym/wheel_tests
-	cd /tmp/compiler_gym/wheel_tests && pytest tests $(1) --durations=5 --benchmark-disable -n auto -k "not fuzz"
+install-test-setup:
+	mkdir -p /tmp/compiler_gym/install_tests
+	rm -f /tmp/compiler_gym/install_tests/tests /tmp/compiler_gym/install_tests/tox.ini
+	ln -s $(ROOT)/tests /tmp/compiler_gym/install_tests
+	ln -s $(ROOT)/tox.ini /tmp/compiler_gym/install_tests
+
+define pytest
+	cd /tmp/compiler_gym/install_tests && pytest $(if $(TEST_TARGET),$(TEST_TARGET),tests) $(1) $(PYTEST_ARGS)
 endef
 
-install-test:
-	$(call run_pytest_suite,)
+install-test: install-test-setup
+	$(call pytest,--benchmark-disable -n auto -k "not fuzz" --durations=5)
 
-install-test-cov:
-	$(call run_pytest_suite,--cov=compiler_gym --cov-report=xml --cov-report=term)
+install-test-cov: install-test-setup
+	$(call pytest,--benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml --cov-report=term)
 	@mv /tmp/compiler_gym/wheel_tests/coverage.xml .
 
 # The minimum number of seconds to run the fuzz tests in a loop for. Override
 # this at the commandline, e.g. `FUZZ_SECONDS=1800 make fuzz`.
 FUZZ_SECONDS ?= 300
 
-install-fuzz:
-	mkdir -p /tmp/compiler_gym/wheel_fuzz_tests
-	rm -f /tmp/compiler_gym/wheel_fuzz_tests/tests
-	ln -s $(ROOT)/tests /tmp/compiler_gym/wheel_fuzz_tests
-	cd /tmp/compiler_gym/wheel_fuzz_tests && pytest tests -p no:sugar -x -vv -k fuzz --seconds=$(FUZZ_SECONDS)
+install-fuzz: install-test-setup
+	$(call pytest,-p no:sugar -x -vv -k fuzz --seconds=$(FUZZ_SECONDS))
 
 post-install-test:
 	$(MAKE) -C examples/makefile_integration clean
@@ -270,10 +277,12 @@ post-install-test:
 # Installation #
 ################
 
-install: bazel-build
+pip-install:
 	$(PYTHON) setup.py install
 
-.PHONY: install
+install: | bazel-build pip-install
+
+.PHONY: pip-install install
 
 
 ##############

From fb6c2f1585692ae2f2e318bfc5b3d466ef9dafcf Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:54:41 -0700
Subject: [PATCH 086/141] [tests] Fix fork() test.

process.poll() may return 0.
---
 tests/llvm/fork_env_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/llvm/fork_env_test.py b/tests/llvm/fork_env_test.py
index 59d239f1d..cb52f30fd 100644
--- a/tests/llvm/fork_env_test.py
+++ b/tests/llvm/fork_env_test.py
@@ -43,7 +43,7 @@ def test_fork_child_process_is_not_orphaned(env: LlvmEnv):
         fkd.close()
 
         # Check that the service has been killed.
-        assert process.poll()
+        assert process.poll() is not None
     finally:
         fkd.close()
 

From 0afa35363e469493d57c7caa6f9f9a4174fb918b Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:55:36 -0700
Subject: [PATCH 087/141] [tests] Update llvm-stress fuzzer to new dataset API.

---
 tests/fuzzing/llvm_stress_fuzz_test.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tests/fuzzing/llvm_stress_fuzz_test.py b/tests/fuzzing/llvm_stress_fuzz_test.py
index 6c6446716..751fc4284 100644
--- a/tests/fuzzing/llvm_stress_fuzz_test.py
+++ b/tests/fuzzing/llvm_stress_fuzz_test.py
@@ -3,12 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Fuzz test LLVM backend using llvm-stress."""
-import random
-import subprocess
-
 from compiler_gym.envs import LlvmEnv
-from compiler_gym.service.proto import Benchmark, File
-from compiler_gym.third_party import llvm
 from tests.pytest_plugins.random_util import apply_random_trajectory
 from tests.test_main import main
 
@@ -22,10 +17,7 @@ def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
     """This test produces a random trajectory using a program generated using
     llvm-stress.
     """
-    seed = random.randint(0, 2 << 31)
-    llvm_ir = subprocess.check_output([str(llvm.llvm_stress_path()), f"--seed={seed}"])
-    print(f"llvm-stress --seed={seed}")  # For debugging in case of failure.
-    env.benchmark = Benchmark(uri="stress", program=File(contents=llvm_ir))
+    env.benchmark = env.datasets["llvm-stress-v0"].random_benchmark()
 
     env.observation_space = observation_space
     env.reward_space = reward_space

From db532972ca1b4c763d4100029fc0ed32af278916 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 06:57:20 -0700
Subject: [PATCH 088/141] [rpc] Raise ServiceError on service does not close
 gracefully.

Use SIGTERM rather than SIGKILL to terminate backend services, and
then check that the process ended with returncode zero.
---
 compiler_gym/service/connection.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index 86db14673..cd5e0dbbc 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -302,16 +302,17 @@ def __init__(
         env["COMPILER_GYM_RUNFILES"] = str(runfiles_path("."))
         env["COMPILER_GYM_SITE_DATA"] = str(site_data_path("."))
 
-        # Set the verbosity of the service. The logging level of the service
-        # is the debug level - 1, so that COMPILER_GYM_DEUG=3 will cause VLOG(2)
+        # Set the verbosity of the service. The logging level of the service is
+        # the debug level - 1, so that COMPILER_GYM_DEBUG=3 will cause VLOG(2)
         # and lower to be logged to stdout.
         debug_level = get_debug_level()
         if debug_level > 0:
             cmd.append("--alsologtostderr")
             cmd.append(f"-v={debug_level - 1}")
             # If we are debugging the backend, set the logbuflevel to a low
-            # value to disable buffering of logging messages. This makes it
-            # easier to `LOG(INFO) << "..."` debug things.
+            # value to disable buffering of logging messages. This removes any
+            # buffering between `LOG(INFO) << "..."` and the message being
+            # emited to stderr.
             cmd.append("--logbuflevel=-1")
         else:
             # Silence the gRPC logs as we will do our own error reporting, but
@@ -432,13 +433,24 @@ def loglines(self) -> Iterable[str]:
     def close(self):
         """Terminate a local subprocess and close the connection."""
         try:
-            self.process.kill()
+            self.process.terminate()
             self.process.communicate(timeout=self.process_exit_max_seconds)
+            if self.process.returncode:
+                self.logger.fatal(
+                    f"Service exited with returncode {self.process.returncode}"
+                )
+                sys.exit(1)
         except ProcessLookupError:
             self.logger.warning("Service process not found at %s", self.working_dir)
         except subprocess.TimeoutExpired:
+            # Try and kill it and then walk away.
+            try:
+                self.process.kill()
+            except:  # noqa
+                pass
             self.logger.warning("Abandoning orphan service at %s", self.working_dir)
-        shutil.rmtree(self.working_dir, ignore_errors=True)
+        finally:
+            shutil.rmtree(self.working_dir, ignore_errors=True)
         super().close()
 
     def __repr__(self):

From dab6533d17b33c68fdbed4a4d65061e17f7ebba1 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 07:00:32 -0700
Subject: [PATCH 089/141] [service] Add graceful shutdown handling to gym
 services.

This adds a SIGTERM handler which enables the gRPC server to be
shutdown gracefully. This enables an address sanitized build to run
the end-of-process leak check.
---
 compiler_gym/service/runtime/BUILD            |  1 -
 .../service/runtime/CompilerGymService.h      |  3 ++
 .../service/runtime/CompilerGymServiceImpl.h  |  7 ++--
 .../CreateAndRunCompilerGymServiceImpl.cc     | 11 ++++++
 .../CreateAndRunCompilerGymServiceImpl.h      | 29 ++++++++++++--
 .../create_and_run_compiler_gym_service.py    | 38 ++++++++++++++++---
 6 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/compiler_gym/service/runtime/BUILD b/compiler_gym/service/runtime/BUILD
index 2b55122b1..9432e0dea 100644
--- a/compiler_gym/service/runtime/BUILD
+++ b/compiler_gym/service/runtime/BUILD
@@ -105,7 +105,6 @@ cc_library(
     deps = [
         ":CompilerGymService",
         "//compiler_gym/util:GrpcStatusMacros",
-        "//compiler_gym/util:Unreachable",
         "@boost//:filesystem",
         "@com_github_grpc_grpc//:grpc++",
         "@gflags",
diff --git a/compiler_gym/service/runtime/CompilerGymService.h b/compiler_gym/service/runtime/CompilerGymService.h
index 106c3f76f..a4a19c833 100644
--- a/compiler_gym/service/runtime/CompilerGymService.h
+++ b/compiler_gym/service/runtime/CompilerGymService.h
@@ -54,6 +54,9 @@ class CompilerGymService final : public compiler_gym::CompilerGymService::Servic
 
   inline BenchmarkCache& benchmarks() { return *benchmarks_; }
 
+  // Get the number of active sessions.
+  inline int sessionCount() const { return static_cast<int>(sessions_.size()); }
+
  protected:
   [[nodiscard]] grpc::Status session(uint64_t id, CompilationSession** environment);
 
diff --git a/compiler_gym/service/runtime/CompilerGymServiceImpl.h b/compiler_gym/service/runtime/CompilerGymServiceImpl.h
index 4aee1c6d9..9cb3fb8ed 100644
--- a/compiler_gym/service/runtime/CompilerGymServiceImpl.h
+++ b/compiler_gym/service/runtime/CompilerGymServiceImpl.h
@@ -52,8 +52,9 @@ grpc::Status CompilerGymService<CompilationSessionType>::StartSession(
                         "No benchmark URI set for StartSession()");
   }
 
-  VLOG(1) << "StartSession(" << request->benchmark() << "), [" << nextSessionId_ << "]";
   const std::lock_guard<std::mutex> lock(sessionsMutex_);
+  VLOG(1) << "StartSession(" << request->benchmark() << "), " << sessionCount()
+          << " active sessions";
 
   const Benchmark* benchmark = benchmarks().get(request->benchmark());
   if (!benchmark) {
@@ -106,7 +107,7 @@ grpc::Status CompilerGymService<CompilationSessionType>::ForkSession(
 template <typename CompilationSessionType>
 grpc::Status CompilerGymService<CompilationSessionType>::EndSession(
     grpc::ServerContext* context, const EndSessionRequest* request, EndSessionReply* reply) {
-  VLOG(1) << "EndSession(" << request->session_id() << "), " << sessions_.size() - 1
+  VLOG(1) << "EndSession(" << request->session_id() << "), " << sessionCount() - 1
           << " sessions remaining";
 
   const std::lock_guard<std::mutex> lock(sessionsMutex_);
@@ -119,7 +120,7 @@ grpc::Status CompilerGymService<CompilationSessionType>::EndSession(
     sessions_.erase(request->session_id());
   }
 
-  reply->set_remaining_sessions(sessions_.size());
+  reply->set_remaining_sessions(sessionCount());
   return Status::OK;
 }
 
diff --git a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc
index 7ea3ebdd9..28319136d 100644
--- a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc
+++ b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.cc
@@ -10,3 +10,14 @@ DEFINE_string(
 DEFINE_string(port, "0",
               "The port to listen on. If 0, an unused port will be selected. The selected port is "
               "written to <working_dir>/port.txt.");
+
+namespace compiler_gym::runtime {
+
+std::promise<void> shutdownSignal;
+
+void shutdown_handler(int signum) {
+  VLOG(1) << "Service received signal: " << signum;
+  shutdownSignal.set_value();
+}
+
+}  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
index fafe7227f..0083102e2 100644
--- a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
+++ b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
@@ -12,6 +12,8 @@
 #include <grpcpp/grpcpp.h>
 #include <unistd.h>
 
+#include <csignal>
+#include <future>
 #include <iostream>
 #include <string>
 #include <vector>
@@ -19,17 +21,20 @@
 #include "boost/filesystem.hpp"
 #include "compiler_gym/service/proto/compiler_gym_service.pb.h"
 #include "compiler_gym/service/runtime/CompilerGymService.h"
-#include "compiler_gym/util/Unreachable.h"
 
 DECLARE_string(port);
 DECLARE_string(working_dir);
 
 namespace compiler_gym::runtime {
 
+extern std::promise<void> shutdownSignal;
+
 // Increase maximum message size beyond the 4MB default as inbound message
 // may be larger (e.g., in the case of IR strings).
 constexpr size_t kMaxMessageSizeInBytes = 512 * 1024 * 1024;
 
+void shutdown_handler(int signum);
+
 // Create a service, configured using --port and --working_dir flags, and run
 // it. This function never returns.
 //
@@ -59,7 +64,7 @@ template <typename CompilationSessionType>
   }
 
   // Set up the working and logging directories.
-  boost::filesystem::path workingDirectory = FLAGS_working_dir;
+  boost::filesystem::path workingDirectory{FLAGS_working_dir};
   bool createdWorkingDir = false;
   if (FLAGS_working_dir.empty()) {
     // If no working directory was set, create one.
@@ -118,8 +123,24 @@ template <typename CompilationSessionType>
 
   LOG(INFO) << "Service " << workingDirectory << " listening on " << port << ", PID = " << getpid();
 
-  server->Wait();
-  UNREACHABLE("grpc::Server::Wait() should not return");
+  // Block on the RPC service in a separate thread. This enables the current
+  // thread to handle the shutdown routine.
+  std::thread serverThread([&]() { server->Wait(); });
+
+  // Register the signal handlers for a shutdown request that will each set the
+  // shutdownSignal future value.
+  std::signal(SIGTERM, shutdown_handler);
+
+  // Block until this shutdown signal is received.
+  shutdownSignal.get_future().wait();
+  VLOG(2) << "Shutting down the RPC service";
+  server->Shutdown();
+  serverThread.join();
+
+  CHECK(service.sessionCount() == 0)
+      << "Killing a service with " << service.sessionCount() << " active sessions!";
+
+  exit(0);
 }
 
 }  // namespace compiler_gym::runtime
diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index a4d0dc004..6daba610d 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -10,7 +10,9 @@
 from concurrent import futures
 from multiprocessing import cpu_count
 from pathlib import Path
+from signal import SIGTERM, signal
 from tempfile import mkdtemp
+from threading import Event, Thread
 
 import grpc
 from absl import app, flags, logging
@@ -30,6 +32,14 @@
 MAX_MESSAGE_SIZE_IN_BYTES = 512 * 1024 * 1024
 
 
+shutdown_signal = Event()
+
+
+def _shutdown_handler(signum):
+    logging.info("Service received signal: %d", signum)
+    shutdown_signal.set()
+
+
 def create_and_run_compiler_gym_service(compilation_session_type):
     def main(argv):
         argv = [x for x in argv if x.strip()]
@@ -54,12 +64,12 @@ def main(argv):
                 ("grpc.max_receive_message_length", MAX_MESSAGE_SIZE_IN_BYTES),
             ],
         )
-        servicer = CompilerGymService(
+        service = CompilerGymService(
             working_directory=working_dir,
             compilation_session_type=compilation_session_type,
         )
         compiler_gym_service_pb2_grpc.add_CompilerGymServiceServicer_to_server(
-            servicer, server
+            service, server
         )
         port = server.add_insecure_port("0.0.0.0:0")
 
@@ -74,9 +84,25 @@ def main(argv):
         )
 
         server.start()
-        server.wait_for_termination()
-        logging.fatal(
-            "Unreachable! grpc.server.wait_for_termination() should not return"
-        )
+
+        # Block on the RPC service in a separate thread. This enables the
+        # current thread to handle the shutdown routine.
+        server_thread = Thread(target=server.wait_for_termination)
+        server_thread.start()
+
+        # Register the signal handlers for a shutdown request that will each set
+        # the shutdownSignal future value.
+        signal(SIGTERM, _shutdown_handler)
+
+        # Block until the shutdown signal is received.
+        shutdown_signal.wait()
+        logging.info("Shutting down the RPC service")
+        server.stop(60).wait()
+        server_thread.join()
+
+        if len(service.sessions):
+            logging.fatal(
+                "Killing a service with %d active sessions!", len(service.sessions)
+            )
 
     app.run(main)

From 7a92fa5079d565f61f1e8b6a53268b7372ed978d Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 07:18:44 -0700
Subject: [PATCH 090/141] [ci] Add an asan test for LLVM service.

---
 .github/workflows/ci.yaml | 40 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 9794d2af6..932b9677f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -86,3 +86,43 @@ jobs:
 
             - name: Uninstall
               run: make purge
+
+    llvm-asan:
+        runs-on: ${{ matrix.os }}
+
+        strategy:
+            matrix:
+                os: [ubuntu-latest]
+                python: [3.9]
+
+        steps:
+            - uses: actions/checkout@v2
+
+            - name: Set up Python ${{ matrix.python }}
+              uses: actions/setup-python@v2
+              with:
+                  python-version: ${{ matrix.python }}
+
+            - name: Install dependencies
+              run: |
+                  curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.6.1/bazelisk-linux-amd64" > bazel
+                  chmod +x bazel
+                  sudo mv bazel /usr/local/bin/bazel
+                  sudo apt install clang-9 patchelf
+                  python -m pip install -r compiler_gym/requirements.txt -r tests/requirements.txt
+
+            - name: Install address sanitized package
+              run: |
+                  make bazel-build
+                  make bazel-build BAZEL_BUILD_OPTS=--config=asan BUILD_TARGET=//compiler_gym/envs/llvm/service:compiler_gym-llvm-service
+                  make pip-install
+              env:
+                  CC: clang
+                  CXX: clang++
+                  BAZEL_OPTS: --batch
+                  BAZEL_BUILD_OPTS: --config=ci
+
+            - name: Test
+              run: make install-test
+              env:
+                  ASAN_OPTIONS: detect_leaks=1

From 6d60103c24056837233c480101f1eaa497367ea5 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 18 May 2021 07:55:39 -0700
Subject: [PATCH 091/141] [rpc] Make session leak detection consistent between
 C++/Python.

---
 .../service/runtime/CreateAndRunCompilerGymServiceImpl.h  | 8 ++++++--
 .../runtime/create_and_run_compiler_gym_service.py        | 8 ++++++--
 tests/llvm/llvm_env_test.py                               | 5 ++++-
 tests/llvm/service_connection_test.py                     | 4 +++-
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
index 0083102e2..960151b45 100644
--- a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
+++ b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
@@ -137,8 +137,12 @@ template <typename CompilationSessionType>
   server->Shutdown();
   serverThread.join();
 
-  CHECK(service.sessionCount() == 0)
-      << "Killing a service with " << service.sessionCount() << " active sessions!";
+  if (service.sessionCount()) {
+    std::cerr << "ERROR: Killing a service with " << service.sessionCount()
+              << (service.sessionCount() > 1 ? " active sessions!" : " active session!")
+              << std::endl;
+    exit(6);
+  }
 
   exit(0);
 }
diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index 6daba610d..23f8274df 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -20,6 +20,7 @@
 from compiler_gym.service.proto import compiler_gym_service_pb2_grpc
 from compiler_gym.service.runtime.compiler_gym_service import CompilerGymService
 from compiler_gym.util.filesystem import atomic_file_write
+from compiler_gym.util.shell_format import plural
 
 flags.DEFINE_string("working_dir", "", "Path to use as service working directory")
 flags.DEFINE_integer("port", 0, "The service listening port")
@@ -101,8 +102,11 @@ def main(argv):
         server_thread.join()
 
         if len(service.sessions):
-            logging.fatal(
-                "Killing a service with %d active sessions!", len(service.sessions)
+            print(
+                "ERROR: Killing a service with",
+                plural(len(service.session), "active session", "active sessions"),
+                file=sys.stderr,
             )
+            sys.exit(6)
 
     app.run(main)
diff --git a/tests/llvm/llvm_env_test.py b/tests/llvm/llvm_env_test.py
index 65f9b3597..2bfa41bd7 100644
--- a/tests/llvm/llvm_env_test.py
+++ b/tests/llvm/llvm_env_test.py
@@ -18,6 +18,7 @@
 )
 from compiler_gym.envs import CompilerEnv, llvm
 from compiler_gym.envs.llvm.llvm_env import LlvmEnv
+from compiler_gym.service import ServiceError
 from compiler_gym.service.connection import CompilerGymServiceConnection
 from compiler_gym.util import debug_util as dbg
 from tests.pytest_plugins import llvm as llvm_plugin
@@ -102,7 +103,9 @@ def test_connection_dies_default_reward(env: LlvmEnv):
     env.reward_space.default_value = 2.5
     env.episode_reward = 10
 
-    env.service.close()
+    with pytest.raises(ServiceError, match="Service exited with returncode 1"):
+        env.service.close()
+
     observation, reward, done, _ = env.step(0)
     assert done
 
diff --git a/tests/llvm/service_connection_test.py b/tests/llvm/service_connection_test.py
index db67e73d9..9a3599f1a 100644
--- a/tests/llvm/service_connection_test.py
+++ b/tests/llvm/service_connection_test.py
@@ -10,6 +10,7 @@
 import compiler_gym  # noqa Register environments.
 from compiler_gym.envs import CompilerEnv, llvm
 from compiler_gym.envs.llvm.llvm_env import LlvmEnv
+from compiler_gym.service import ServiceError
 from compiler_gym.service.connection import CompilerGymServiceConnection
 from compiler_gym.third_party.autophase import AUTOPHASE_FEATURE_DIM
 from tests.test_main import main
@@ -43,7 +44,8 @@ def test_service_env_dies_reset(env: CompilerEnv):
     env.reset("cbench-v1/crc32")
 
     # Kill the service.
-    env.service.close()
+    with pytest.raises(ServiceError, match="Service exited with returncode 1"):
+        env.service.close()
 
     # Check that the environment doesn't fall over.
     observation, reward, done, info = env.step(0)

From 295e3d93a5c97bebd2eb98f2fdd00436f324151d Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 05:30:08 -0700
Subject: [PATCH 092/141] [tests] Update the connection tests.

---
 tests/llvm/service_connection_test.py |  6 ++++--
 tests/service/connection_test.py      | 24 +++++++++++++-----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/tests/llvm/service_connection_test.py b/tests/llvm/service_connection_test.py
index 9a3599f1a..bff4f05d6 100644
--- a/tests/llvm/service_connection_test.py
+++ b/tests/llvm/service_connection_test.py
@@ -43,8 +43,10 @@ def test_service_env_dies_reset(env: CompilerEnv):
     env.reward_space = "IrInstructionCount"
     env.reset("cbench-v1/crc32")
 
-    # Kill the service.
-    with pytest.raises(ServiceError, match="Service exited with returncode 1"):
+    # Kill the service. Note killing the service directly will result in an
+    # error because we have not ended the session we started with env.reset()
+    # above.
+    with pytest.raises(ServiceError, match="Service exited with returncode "):
         env.service.close()
 
     # Check that the environment doesn't fall over.
diff --git a/tests/service/connection_test.py b/tests/service/connection_test.py
index 443ed67be..1203f3be4 100644
--- a/tests/service/connection_test.py
+++ b/tests/service/connection_test.py
@@ -32,7 +32,7 @@ def dead_connection() -> CompilerGymServiceConnection:
     env = gym.make("llvm-v0")
     try:
         # Kill the service.
-        env.service.connection.process.kill()
+        env.service.connection.process.terminate()
         env.service.connection.process.communicate()
 
         yield env.service
@@ -41,9 +41,8 @@ def dead_connection() -> CompilerGymServiceConnection:
 
 
 def test_create_invalid_options():
-    with pytest.raises(TypeError) as ctx:
+    with pytest.raises(TypeError, match="No endpoint provided for service connection"):
         CompilerGymServiceConnection("")
-    assert str(ctx.value) == "No endpoint provided for service connection"
 
 
 def test_create_channel_failed_subprocess(
@@ -68,7 +67,13 @@ def test_create_channel_failed_subprocess_rpc_timeout(
     """Same as the above test, but RPC timeout is long enough that only a single
     attempt can be made.
     """
-    with pytest.raises(OSError) as ctx:
+    with pytest.raises(
+        OSError,
+        match=(
+            r"Failed to create connection to localhost:\d+ after "
+            r"[\d\.]+ seconds \(1 attempt made\)"
+        ),
+    ):
         CompilerGymServiceConnection(
             f"{dead_connection.connection.url}",
             ConnectionOpts(
@@ -78,20 +83,17 @@ def test_create_channel_failed_subprocess_rpc_timeout(
             ),
         )
 
-    assert str(ctx.value).startswith("Failed to create connection to localhost:")
-    assert " (1 attempt made)" in str(ctx.value)
-
 
 def test_call_stub_invalid_type(connection: CompilerGymServiceConnection):
-    with pytest.raises(TypeError) as ctx:
+    with pytest.raises(
+        TypeError, match="Exception serializing request! Request type: type"
+    ):
         connection(connection.stub.GetSpaces, int)
-    assert str(ctx.value) == "Exception serializing request! Request type: type"
 
 
 def test_call_stub_negative_timeout(connection: CompilerGymServiceConnection):
-    with pytest.raises(TimeoutError) as ctx:
+    with pytest.raises(TimeoutError, match=r"Deadline Exceeded \(-10.0 seconds\)"):
         connection(connection.stub.GetSpaces, GetSpacesRequest(), timeout=-10)
-    assert str(ctx.value) == "Deadline Exceeded (-10.0 seconds)"
 
 
 if __name__ == "__main__":

From 69de1c6f5dee8e65af640ba6b9a126bfa97d1df2 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 05:47:14 -0700
Subject: [PATCH 093/141] [tests] Update LLVM env tests.

---
 tests/llvm/custom_benchmarks_test.py  |  8 ++++----
 tests/llvm/llvm_env_test.py           | 20 +++++++++++++++++---
 tests/llvm/service_connection_test.py | 11 +++++++----
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/tests/llvm/custom_benchmarks_test.py b/tests/llvm/custom_benchmarks_test.py
index 292a6c9bd..68d5fa6ec 100644
--- a/tests/llvm/custom_benchmarks_test.py
+++ b/tests/llvm/custom_benchmarks_test.py
@@ -245,8 +245,8 @@ def test_make_benchmark_invalid_clang_job():
 
 
 def test_custom_benchmark_is_added_on_service_restart(env: LlvmEnv):
-    # When the service is restarted, the environment must send a custom
-    # benchmark to it again.
+    # When the service is restarted, the environment still uses the same custom
+    # benchmark.
     with tempfile.TemporaryDirectory() as d:
         source = Path(d) / "a.c"
         with open(str(source), "w") as f:
@@ -258,8 +258,8 @@ def test_custom_benchmark_is_added_on_service_restart(env: LlvmEnv):
     assert env.benchmark == benchmark.uri
 
     # Kill the service so that the next call to reset() starts a new one.
-    env.service.close()
-    env.service = None
+    env.close()
+    assert env.service is None
 
     env.reset()
     assert env.benchmark == benchmark.uri
diff --git a/tests/llvm/llvm_env_test.py b/tests/llvm/llvm_env_test.py
index 2bfa41bd7..234537f9a 100644
--- a/tests/llvm/llvm_env_test.py
+++ b/tests/llvm/llvm_env_test.py
@@ -103,10 +103,16 @@ def test_connection_dies_default_reward(env: LlvmEnv):
     env.reward_space.default_value = 2.5
     env.episode_reward = 10
 
-    with pytest.raises(ServiceError, match="Service exited with returncode 1"):
+    # Kill the service. Note killing the service for a ManagedConnection will
+    # result in a ServiceError because we have not ended the session we started
+    # with env.reset() above. For UnmanagedConnection, this error will not be
+    # raised.
+    try:
         env.service.close()
+    except ServiceError as e:
+        assert "Service exited with returncode " in str(e)
 
-    observation, reward, done, _ = env.step(0)
+    _, reward, done, _ = env.step(0)
     assert done
 
     assert reward == 2.5
@@ -120,7 +126,15 @@ def test_connection_dies_default_reward_negated(env: LlvmEnv):
     env.reward_space.default_value = 2.5
     env.episode_reward = 10
 
-    env.service.close()
+    # Kill the service. Note killing the service for a ManagedConnection will
+    # result in a ServiceError because we have not ended the session we started
+    # with env.reset() above. For UnmanagedConnection, this error will not be
+    # raised.
+    try:
+        env.service.close()
+    except ServiceError as e:
+        assert "Service exited with returncode " in str(e)
+
     observation, reward, done, _ = env.step(0)
     assert done
 
diff --git a/tests/llvm/service_connection_test.py b/tests/llvm/service_connection_test.py
index bff4f05d6..3cc0eeab8 100644
--- a/tests/llvm/service_connection_test.py
+++ b/tests/llvm/service_connection_test.py
@@ -43,11 +43,14 @@ def test_service_env_dies_reset(env: CompilerEnv):
     env.reward_space = "IrInstructionCount"
     env.reset("cbench-v1/crc32")
 
-    # Kill the service. Note killing the service directly will result in an
-    # error because we have not ended the session we started with env.reset()
-    # above.
-    with pytest.raises(ServiceError, match="Service exited with returncode "):
+    # Kill the service. Note killing the service for a ManagedConnection will
+    # result in a ServiceError because we have not ended the session we started
+    # with env.reset() above. For UnmanagedConnection, this error will not be
+    # raised.
+    try:
         env.service.close()
+    except ServiceError as e:
+        assert "Service exited with returncode " in str(e)
 
     # Check that the environment doesn't fall over.
     observation, reward, done, info = env.step(0)

From 24221efa3ee573ccbf7c84a6695b196a16f6162d Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 05:54:29 -0700
Subject: [PATCH 094/141] [rpc] Raise a single exception on sessions leak check
 failure.

---
 compiler_gym/service/connection.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/compiler_gym/service/connection.py b/compiler_gym/service/connection.py
index cd5e0dbbc..9aebe4aad 100644
--- a/compiler_gym/service/connection.py
+++ b/compiler_gym/service/connection.py
@@ -333,6 +333,7 @@ def __init__(
             env=env,
             cwd=local_service_binary.parent,
         )
+        self._process_returncode_exception_raised = False
 
         # Read the port from a file generated by the service.
         wait_secs = 0.1
@@ -435,11 +436,16 @@ def close(self):
         try:
             self.process.terminate()
             self.process.communicate(timeout=self.process_exit_max_seconds)
-            if self.process.returncode:
-                self.logger.fatal(
+            if (
+                self.process.returncode
+                and not self._process_returncode_exception_raised
+            ):
+                # You can call close() multiple times but we only want to emit
+                # the exception once.
+                self._process_returncode_exception_raised = True
+                raise ServiceError(
                     f"Service exited with returncode {self.process.returncode}"
                 )
-                sys.exit(1)
         except ProcessLookupError:
             self.logger.warning("Service process not found at %s", self.working_dir)
         except subprocess.TimeoutExpired:
@@ -451,7 +457,7 @@ def close(self):
             self.logger.warning("Abandoning orphan service at %s", self.working_dir)
         finally:
             shutil.rmtree(self.working_dir, ignore_errors=True)
-        super().close()
+            super().close()
 
     def __repr__(self):
         alive_or_dead = "alive" if self.process.poll() else "dead"

From 46794766f452959386744a5e48765e0d66588253 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 05:55:21 -0700
Subject: [PATCH 095/141] [env] Log a failed attempt to close a compiler
 session.

---
 compiler_gym/envs/compiler_env.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 85ba627bc..16db59fb3 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -631,8 +631,12 @@ def close(self):
                 # not kill it.
                 if reply.remaining_sessions:
                     close_service = False
-            except:  # noqa pylint: disable=bare-except
-                pass  # Don't feel bad, computer, you tried ;-)
+            except Exception as e:
+                self.logger.warning(
+                    "Failed to end active compiler session on close(): %s (%s)",
+                    e,
+                    type(e).__name__,
+                )
             self._session_id = None
 
         if self.service and close_service:
@@ -846,6 +850,7 @@ def raw_step(
             # end the current episode and provide some diagnostic information to
             # the user through the `info` dict.
             self.close()
+
             info = {
                 "error_type": type(e).__name__,
                 "error_details": str(e),

From bdeb6facb9e2d84b4eef00cc5178ba8e1d3917ca Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 05:57:37 -0700
Subject: [PATCH 096/141] [ci] Tidy up workflow definitions.

---
 .github/workflows/ci.yaml | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 932b9677f..028070d28 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -69,9 +69,6 @@ jobs:
 
             - name: Test
               run: make install-test
-              env:
-                  CC: clang
-                  CXX: clang++
               if: ${{ ! (matrix.os == 'ubuntu-latest' && matrix.python == 3.9) }}
 
             - name: Test with coverage
@@ -88,12 +85,7 @@ jobs:
               run: make purge
 
     llvm-asan:
-        runs-on: ${{ matrix.os }}
-
-        strategy:
-            matrix:
-                os: [ubuntu-latest]
-                python: [3.9]
+        runs-on: ubuntu-latest
 
         steps:
             - uses: actions/checkout@v2
@@ -101,7 +93,7 @@ jobs:
             - name: Set up Python ${{ matrix.python }}
               uses: actions/setup-python@v2
               with:
-                  python-version: ${{ matrix.python }}
+                  python-version: 3.9
 
             - name: Install dependencies
               run: |

From 7e2bc9619f7c849077e61f1710d0f9c4b958e988 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 06:07:06 -0700
Subject: [PATCH 097/141] [runtime] Set Python runtime logging verbosity from
 debug level.

---
 .../service/runtime/create_and_run_compiler_gym_service.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index 23f8274df..7a740bb68 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -19,6 +19,7 @@
 
 from compiler_gym.service.proto import compiler_gym_service_pb2_grpc
 from compiler_gym.service.runtime.compiler_gym_service import CompilerGymService
+from compiler_gym.util import debug_util as dbg
 from compiler_gym.util.filesystem import atomic_file_write
 from compiler_gym.util.shell_format import plural
 
@@ -56,6 +57,7 @@ def main(argv):
 
         FLAGS.log_dir = str(working_dir / "logs")
         logging.get_absl_handler().use_absl_log_file()
+        logging.set_verbosity(dbg.get_logging_level())
 
         # Create the service.
         server = grpc.server(

From d9e74c379c02e0ccb1044c830972ec1c04bc0da3 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 06:07:26 -0700
Subject: [PATCH 098/141] [runtime] Fix Python shutdown signal handler.

---
 .../service/runtime/create_and_run_compiler_gym_service.py   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index 7a740bb68..2a6b6c77b 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -37,8 +37,9 @@
 shutdown_signal = Event()
 
 
-def _shutdown_handler(signum):
-    logging.info("Service received signal: %d", signum)
+def _shutdown_handler(signal_number, stack_frame):
+    del stack_frame  # Unused
+    logging.info("Service received signal: %d", signal_number)
     shutdown_signal.set()
 
 

From 136bec92d7f545649b03044cac5775899a248602 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 06:17:11 -0700
Subject: [PATCH 099/141] [tests] Treat warnings as errors in `make
 install-test`.

---
 tests/bin/datasets_bin_test.py | 14 +++++---------
 tests/llvm/module_id_test.py   |  2 +-
 tox.ini                        |  1 +
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tests/bin/datasets_bin_test.py b/tests/bin/datasets_bin_test.py
index 33d6cab8a..884eb61c6 100644
--- a/tests/bin/datasets_bin_test.py
+++ b/tests/bin/datasets_bin_test.py
@@ -17,18 +17,14 @@ def run_main(*args):
 
 
 def test_llvm_summary():
-    with capture_output() as out:
-        run_main("--env=llvm-v0")
+    with pytest.warns(
+        DeprecationWarning, match="Command-line management of datasets is deprecated"
+    ):
+        with capture_output() as out:
+            run_main("--env=llvm-v0")
 
     assert "cbench-v1" in out.stdout
 
 
-def test_datasets_is_deprecated():
-    with pytest.deprecated_call(
-        match="Command-line management of datasets is deprecated"
-    ):
-        run_main("--env=llvm-v0")
-
-
 if __name__ == "__main__":
     _test_main()
diff --git a/tests/llvm/module_id_test.py b/tests/llvm/module_id_test.py
index d5e9e6936..de5fe46c3 100644
--- a/tests/llvm/module_id_test.py
+++ b/tests/llvm/module_id_test.py
@@ -11,7 +11,7 @@
 
 def test_no_module_id_builtin_benchmark(env: LlvmEnv):
     """Test that the module and source IDs are stripped in shipped benchmark."""
-    env.reset("cBench-v1/crc32")
+    env.reset("cbench-v1/crc32")
     ir = env.ir
 
     print(ir)  # For debugging in case of error.
diff --git a/tox.ini b/tox.ini
index 236c34bff..f9c70b212 100644
--- a/tox.ini
+++ b/tox.ini
@@ -11,4 +11,5 @@ ignore = E501, E302, W503, E203
 
 [pytest]
 filterwarnings =
+    error
     ignore::pytest.PytestAssertRewriteWarning:

From 4ad84aa500fa8bf6c8945daed1c08cbb0ef9711f Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 08:04:23 -0700
Subject: [PATCH 100/141] Bump ProGraML dependency.

---
 WORKSPACE                            | 6 +++---
 compiler_gym/envs/llvm/service/BUILD | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index f546ac5ff..7b42e9db9 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -339,9 +339,9 @@ boost_deps()
 
 http_archive(
     name = "programl",
-    sha256 = "5a8fc6f71f1971b265cfe0c4b224b45e87ef08e40b49e693a96022063345a9c8",
-    strip_prefix = "ProGraML-0476300f2db3724c7b6ecd10970b4525fccc8628",
-    urls = ["https://github.com/ChrisCummins/ProGraML/archive/0476300f2db3724c7b6ecd10970b4525fccc8628.tar.gz"],
+    sha256 = "c56360aade351eda1c138a594177fcb7cd2cda2a0a6c5c0d9aa62c7f856194bd",
+    strip_prefix = "ProGraML-4f0981d7a0d27aecef3d6e918c886642b231562d",
+    urls = ["https://github.com/ChrisCummins/ProGraML/archive/4f0981d7a0d27aecef3d6e918c886642b231562d.tar.gz"],
 )
 
 load("@programl//tools:bzl/deps.bzl", "programl_deps")
diff --git a/compiler_gym/envs/llvm/service/BUILD b/compiler_gym/envs/llvm/service/BUILD
index 064bfb93e..ab7cc0b24 100644
--- a/compiler_gym/envs/llvm/service/BUILD
+++ b/compiler_gym/envs/llvm/service/BUILD
@@ -168,7 +168,7 @@ cc_library(
         "@magic_enum",
         "@nlohmann_json//:json",
         "@programl//programl/graph/format:node_link_graph",
-        "@programl//programl/ir/llvm",
+        "@programl//programl/ir/llvm:llvm-10",
         "@programl//programl/proto:programl_cc",
         "@subprocess",
     ],

From 3a5aad8f12b47e91bfddb680e81cc15533f044e8 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Wed, 19 May 2021 13:49:31 -0700
Subject: [PATCH 101/141] [ci] Keep the $CC and $CXX variables set for
 install-test.

---
 .github/workflows/ci.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 028070d28..82655c8a8 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -69,10 +69,16 @@ jobs:
 
             - name: Test
               run: make install-test
+              env:
+                  CC: clang
+                  CXX: clang++
               if: ${{ ! (matrix.os == 'ubuntu-latest' && matrix.python == 3.9) }}
 
             - name: Test with coverage
               run: make install-test-cov
+              env:
+                  CC: clang
+                  CXX: clang++
               if: ${{ matrix.os == 'ubuntu-latest' && matrix.python == 3.9 }}
 
             - name: Upload coverage to Codecov

From c33b164e2a22dfb372ab6db1dec0b1d9c44c12eb Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 102/141] Add zlib to list of install deps.

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index 0a373384a..875bff712 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -31,7 +31,7 @@ Now proceed to [All platforms](#all-platforms) below.
 On debian-based linux systems, install the required toolchain using:
 
 ```sh
-sudo apt install clang-9 libtinfo5 libjpeg-dev
+sudo apt install clang-9 libtinfo5 libjpeg-dev zlib1g-dev
 wget https://github.com/bazelbuild/bazelisk/releases/download/v1.7.5/bazelisk-linux-amd64 -O bazel
 chmod +x bazel && mkdir -p ~/.local/bin && mv -v bazel ~/.local/bin
 export PATH="$HOME/.local/bin:$PATH"

From 3a289beb533ce0cebe0bfb3993d015fc9f5a5e11 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 103/141] Rollback grpc dependency to 1.36.0.

---
 WORKSPACE | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 7b42e9db9..fe6c01526 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -121,10 +121,10 @@ rules_proto_toolchains()
 # Version should be kept in step with compiler_gym/requirements.txt.
 http_archive(
     name = "com_github_grpc_grpc",
-    sha256 = "acf247ec3a52edaee5dee28644a4e485c5e5badf46bdb24a80ca1d76cb8f1174",
-    strip_prefix = "grpc-1.37.1",
+    sha256 = "1a5127c81487f4e3e57973bb332f04b9159f94d860c207e096d8a587d371edbd",
+    strip_prefix = "grpc-1.36.0",
     urls = [
-        "https://github.com/grpc/grpc/archive/v1.37.1.tar.gz",
+        "https://github.com/grpc/grpc/archive/v1.36.0.tar.gz",
     ],
 )
 

From 8a0d4200f32d27bf47b929564ae5ef2552e31ad8 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 104/141] Add missing whitespace in command.

---
 compiler_gym/third_party/cbench/BUILD | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/third_party/cbench/BUILD b/compiler_gym/third_party/cbench/BUILD
index 4b4aa2cb2..8648ae400 100644
--- a/compiler_gym/third_party/cbench/BUILD
+++ b/compiler_gym/third_party/cbench/BUILD
@@ -222,7 +222,7 @@ genrule(
     ],
     outs = ["cbench-v1/gsm.bc"],
     cmd = (
-        "mkdir -p $(@D) &&rsync -rL $$(dirname $(location @cBench//:readme))/telecom_gsm/ $(@D)/telecom_gsm_src/ &&" +
+        "mkdir -p $(@D) && rsync -rL $$(dirname $(location @cBench//:readme))/telecom_gsm/ $(@D)/telecom_gsm_src/ &&" +
         "patch --quiet --forward $(@D)/telecom_gsm_src/src/add.c < $(location cBench-gsm-add.c.patch);" +
         "$(location :make_llvm_module) $(@D)/telecom_gsm_src $@ -DSASR -DSTUPID_COMPILER -DNeedFunctionPrototypes=1"
     ),
@@ -238,7 +238,7 @@ genrule(
     ],
     outs = ["cbench-v1/ispell.bc"],
     cmd = (
-        "mkdir -p $(@D) &&rsync -rL $$(dirname $(location @cBench//:readme))/office_ispell/ $(@D)/office_ispell_src/ &&" +
+        "mkdir -p $(@D) && rsync -rL $$(dirname $(location @cBench//:readme))/office_ispell/ $(@D)/office_ispell_src/ &&" +
         "patch --quiet --forward $(@D)/office_ispell_src/src/correct.c < $(location cBench-ispell-correct.c.patch);" +
         "$(location :make_llvm_module) $(@D)/office_ispell_src $@"
     ),

From dc8ec76d66dd5fc23b31c4e591847d3341c312db Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 105/141] [packaging] Add missing dependencies to Dockerfile.

---
 packaging/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packaging/Dockerfile b/packaging/Dockerfile
index 902f0e726..7262e2dd9 100644
--- a/packaging/Dockerfile
+++ b/packaging/Dockerfile
@@ -11,13 +11,14 @@ RUN apt-get update \
         'clang=1:6.0-41~exp5~ubuntu1' \
         'cmake=3.10.2-1ubuntu2.18.04.1' \
         'curl=7.58.0-2ubuntu3.12' \
+        'libtinfo5=6.1-1ubuntu1.18.04' \
         'make=4.1-9.1ubuntu1' \
         'patchelf=0.9-1' \
         'python3-dev=3.6.7-1~18.04' \
         'python3-distutils=3.6.9-1~18.04' \
         'python3-pip=9.0.1-2.3~ubuntu1.18.04.4' \
         'python3=3.6.7-1~18.04' \
-        'libtinfo5=6.1-1ubuntu1.18.04' \
+        'rsync=3.1.2-2.1ubuntu1.1' \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 

From b2ac824bd8b7657fd7dcb6260f842c4f1bc19297 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 106/141] [packaging] Update curl package version for docker.

---
 packaging/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packaging/Dockerfile b/packaging/Dockerfile
index 7262e2dd9..265a91f62 100644
--- a/packaging/Dockerfile
+++ b/packaging/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update \
     && apt-get install -y --no-install-recommends \
         'clang=1:6.0-41~exp5~ubuntu1' \
         'cmake=3.10.2-1ubuntu2.18.04.1' \
-        'curl=7.58.0-2ubuntu3.12' \
+        'curl=7.58.0-2ubuntu3.13' \
         'libtinfo5=6.1-1ubuntu1.18.04' \
         'make=4.1-9.1ubuntu1' \
         'patchelf=0.9-1' \

From b6877ecdbdf65a1b7bbca3821cb7f1dbc209a14c Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 107/141] [packaging] Add missing test dependencies to
 Dockerfile.

---
 packaging/Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/packaging/Dockerfile b/packaging/Dockerfile
index 265a91f62..82b4137f8 100644
--- a/packaging/Dockerfile
+++ b/packaging/Dockerfile
@@ -13,12 +13,14 @@ RUN apt-get update \
         'curl=7.58.0-2ubuntu3.13' \
         'libtinfo5=6.1-1ubuntu1.18.04' \
         'make=4.1-9.1ubuntu1' \
+        'patch=2.7.6-2ubuntu1.1' \
         'patchelf=0.9-1' \
         'python3-dev=3.6.7-1~18.04' \
         'python3-distutils=3.6.9-1~18.04' \
         'python3-pip=9.0.1-2.3~ubuntu1.18.04.4' \
         'python3=3.6.7-1~18.04' \
         'rsync=3.1.2-2.1ubuntu1.1' \
+        'zlib1g-dev=1:1.2.11.dfsg-0ubuntu2' \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 

From efeb0c9f1b37175cea6f3090f555742fd2770d00 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 108/141] [service] Register SIGTERM handlers immediately.

---
 .../service/runtime/CreateAndRunCompilerGymServiceImpl.h  | 8 ++++----
 .../runtime/create_and_run_compiler_gym_service.py        | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
index 960151b45..09147edac 100644
--- a/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
+++ b/compiler_gym/service/runtime/CreateAndRunCompilerGymServiceImpl.h
@@ -54,6 +54,10 @@ void shutdown_handler(int signum);
 //     }
 template <typename CompilationSessionType>
 [[noreturn]] void createAndRunCompilerGymServiceImpl(int argc, char** argv, const char* usage) {
+  // Register a signal handler for SIGTERM that will set the shutdown_signal
+  // future value.
+  std::signal(SIGTERM, shutdown_handler);
+
   gflags::SetUsageMessage(std::string(usage));
 
   // Parse the command line arguments and die if any are unrecognized.
@@ -127,10 +131,6 @@ template <typename CompilationSessionType>
   // thread to handle the shutdown routine.
   std::thread serverThread([&]() { server->Wait(); });
 
-  // Register the signal handlers for a shutdown request that will each set the
-  // shutdownSignal future value.
-  std::signal(SIGTERM, shutdown_handler);
-
   // Block until this shutdown signal is received.
   shutdownSignal.get_future().wait();
   VLOG(2) << "Shutting down the RPC service";
diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index 2a6b6c77b..3a0423bfa 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -45,6 +45,10 @@ def _shutdown_handler(signal_number, stack_frame):
 
 def create_and_run_compiler_gym_service(compilation_session_type):
     def main(argv):
+        # Register a signal handler for SIGTERM that will set the shutdownSignal
+        # future value.
+        signal(SIGTERM, _shutdown_handler)
+
         argv = [x for x in argv if x.strip()]
         if len(argv) > 1:
             print(
@@ -94,10 +98,6 @@ def main(argv):
         server_thread = Thread(target=server.wait_for_termination)
         server_thread.start()
 
-        # Register the signal handlers for a shutdown request that will each set
-        # the shutdownSignal future value.
-        signal(SIGTERM, _shutdown_handler)
-
         # Block until the shutdown signal is received.
         shutdown_signal.wait()
         logging.info("Shutting down the RPC service")

From fb936905d204f4abd2005c5893f3c5e3983f2cbb Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Wed, 19 May 2021 22:16:19 +0100
Subject: [PATCH 109/141] [packaging] Add a container_init script.

---
 Makefile                    |  6 +++---
 packaging/container_init.sh | 13 +++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 packaging/container_init.sh

diff --git a/Makefile b/Makefile
index 95b88af3f..2295a347b 100644
--- a/Makefile
+++ b/Makefile
@@ -166,15 +166,15 @@ bdist_wheel-linux-rename:
 bdist_wheel-linux:
 	rm -rf build
 	docker build -t chriscummins/compiler_gym-linux-build packaging
-	docker run -v $(ROOT):/CompilerGym --rm chriscummins/compiler_gym-linux-build:latest /bin/sh -c 'cd /CompilerGym && pip3 install gym numpy requests networkx && make bdist_wheel'
+	docker run -v $(ROOT):/CompilerGym --workdir /CompilerGym --rm --shm-size=8g chriscummins/compiler_gym-linux-build:latest /bin/sh -c './packaging/container_init.sh && make bdist_wheel'
 	mv dist/compiler_gym-$(VERSION)-py3-none-linux_x86_64.whl dist/compiler_gym-$(VERSION)-py3-none-manylinux2014_x86_64.whl
 	rm -rf build
 
 bdist_wheel-linux-shell:
-	docker run -v $(ROOT):/CompilerGym --rm -it --entrypoint "/bin/bash" chriscummins/compiler_gym-linux-build:latest
+	docker run -v $(ROOT):/CompilerGym --workdir /CompilerGym --rm --shm-size=8g -it --entrypoint "/bin/bash" chriscummins/compiler_gym-linux-build:latest
 
 bdist_wheel-linux-test:
-	docker run -v $(ROOT):/CompilerGym --rm chriscummins/compiler_gym-linux-build:latest /bin/sh -c 'cd /CompilerGym && pip3 install -U pip && pip3 install dist/compiler_gym-$(VERSION)-py3-none-manylinux2014_x86_64.whl && pip install -r tests/requirements.txt && make install-test'
+	docker run -v $(ROOT):/CompilerGym --workdir /CompilerGym --rm --shm-size=8g chriscummins/compiler_gym-linux-build:latest /bin/sh -c 'cd /CompilerGym && pip3 install -U pip && pip3 install dist/compiler_gym-$(VERSION)-py3-none-manylinux2014_x86_64.whl && pip install -r tests/requirements.txt && make install-test'
 
 all: docs bdist_wheel bdist_wheel-linux
 
diff --git a/packaging/container_init.sh b/packaging/container_init.sh
new file mode 100644
index 000000000..525374943
--- /dev/null
+++ b/packaging/container_init.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+#
+# Perform post-launch initialization of a docker container for building
+# CompilerGym. Usage:
+#
+#     make bdist_wheel-linux-shell
+#     # in docker:
+#     bash packaging/container_init.sh
+#     make bdist_wheel
+set -euxo pipefail
+
+apt-get update
+grep -v grpc compiler_gym/requirements.txt | xargs pip3 install

From e54df2795c38b5eeec2323c5384ee426bf4b1706 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Thu, 20 May 2021 06:18:07 -0700
Subject: [PATCH 110/141] [rpc] Workaround service link-time error.

---
 compiler_gym/service/runtime/CompilerGymServiceImpl.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/service/runtime/CompilerGymServiceImpl.h b/compiler_gym/service/runtime/CompilerGymServiceImpl.h
index 9cb3fb8ed..6bb976514 100644
--- a/compiler_gym/service/runtime/CompilerGymServiceImpl.h
+++ b/compiler_gym/service/runtime/CompilerGymServiceImpl.h
@@ -39,8 +39,12 @@ grpc::Status CompilerGymService<CompilationSessionType>::GetSpaces(grpc::ServerC
                                                                    const GetSpacesRequest* request,
                                                                    GetSpacesReply* reply) {
   VLOG(2) << "GetSpaces()";
-  *reply->mutable_action_space_list() = {actionSpaces_.begin(), actionSpaces_.end()};
-  *reply->mutable_observation_space_list() = {observationSpaces_.begin(), observationSpaces_.end()};
+  for (const auto& actionSpace : actionSpaces_) {
+    *reply->add_action_space_list() = actionSpace;
+  }
+  for (const auto& observationSpace : observationSpaces_) {
+    *reply->add_observation_space_list() = observationSpace;
+  }
   return grpc::Status::OK;
 }
 

From c2d511fe4dd2add16f2b8c12534ba21e6d170f84 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 27 May 2021 14:30:42 +0100
Subject: [PATCH 111/141] [tests] Relax strictness of error message checks.

---
 examples/example_compiler_gym_service/env_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/example_compiler_gym_service/env_tests.py b/examples/example_compiler_gym_service/env_tests.py
index 76f67dabe..c3462bfef 100644
--- a/examples/example_compiler_gym_service/env_tests.py
+++ b/examples/example_compiler_gym_service/env_tests.py
@@ -49,14 +49,14 @@ def run(cmd):
         return p.returncode, stdout, stderr
 
     returncode, _, stderr = run([str(bin), "foobar"])
-    assert stderr.startswith("ERROR:")
+    assert "ERROR:" in stderr
     assert "'foobar'" in stderr
     assert returncode == 1
 
     returncode, _, stderr = run([str(bin), "--foobar"])
     # C++ and python flag parsing library emit slightly different error
     # messages.
-    assert stderr.startswith("ERROR:") or "FATAL" in stderr
+    assert "ERROR:" in stderr or "FATAL" in stderr
     assert "'foobar'" in stderr
     assert returncode == 1
 

From f858b348b217d3a94eb9bf3fe16102d9016f02ce Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 27 May 2021 14:46:59 +0100
Subject: [PATCH 112/141] [Makefile] Fix install-test working directory path
 for coverage.

---
 Makefile | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 2295a347b..1bf6dbd3d 100644
--- a/Makefile
+++ b/Makefile
@@ -224,6 +224,10 @@ livedocs: gendocs doxygen
 COMPILER_GYM_SITE_DATA ?= "/tmp/compiler_gym/tests/site_data"
 COMPILER_GYM_CACHE ?= "/tmp/compiler_gym/tests/cache"
 
+# A directory that is used as the working directory for running pytest tests
+# by symlinking the tests directory into it.
+INSTALL_TEST_ROOT ?= "/tmp/compiler_gym/install_tests"
+
 # The target to use. If not provided, all tests will be run. For `make test` and
 # related, this is a bazel target pattern, with default value '//...'. For `make
 # install-test` and related, this is a relative file path of the directory or
@@ -243,13 +247,13 @@ itest:
 # some hoops to run pytest "out of tree" by creating an empty directory and
 # symlinking the test directory into it so that pytest can be invoked.
 install-test-setup:
-	mkdir -p /tmp/compiler_gym/install_tests
-	rm -f /tmp/compiler_gym/install_tests/tests /tmp/compiler_gym/install_tests/tox.ini
-	ln -s $(ROOT)/tests /tmp/compiler_gym/install_tests
-	ln -s $(ROOT)/tox.ini /tmp/compiler_gym/install_tests
+	mkdir -p "$(INSTALL_TEST_ROOT)"
+	rm -f "$(INSTALL_TEST_ROOT)/tests" "$(INSTALL_TEST_ROOT)/tox.ini"
+	ln -s "$(ROOT)/tests" "$(INSTALL_TEST_ROOT)"
+	ln -s "$(ROOT)/tox.ini" "$(INSTALL_TEST_ROOT)"
 
 define pytest
-	cd /tmp/compiler_gym/install_tests && pytest $(if $(TEST_TARGET),$(TEST_TARGET),tests) $(1) $(PYTEST_ARGS)
+	cd "$(INSTALL_TEST_ROOT)" && pytest $(if $(TEST_TARGET),$(TEST_TARGET),tests) $(1) $(PYTEST_ARGS)
 endef
 
 install-test: install-test-setup
@@ -257,7 +261,7 @@ install-test: install-test-setup
 
 install-test-cov: install-test-setup
 	$(call pytest,--benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml --cov-report=term)
-	@mv /tmp/compiler_gym/wheel_tests/coverage.xml .
+	@mv "$(INSTALL_TEST_ROOT)/coverage.xml" .
 
 # The minimum number of seconds to run the fuzz tests in a loop for. Override
 # this at the commandline, e.g. `FUZZ_SECONDS=1800 make fuzz`.

From 54e5ad05ee3f7e9d4c265f620614b84869709a93 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 27 May 2021 14:57:27 +0100
Subject: [PATCH 113/141] [ci] Split test jobs into multiple workflows.

Try and keep all jobs in a workflow the same. E.g. don't bundle code
coverage along with unit tests, etc.
---
 .github/workflows/ci.yaml         | 49 ------------------------------
 .github/workflows/coverage.yaml   | 46 ++++++++++++++++++++++++++++
 .github/workflows/fuzz.yaml       |  2 +-
 .github/workflows/pre_commit.yaml |  9 +++++-
 .github/workflows/sanitizers.yaml | 50 +++++++++++++++++++++++++++++++
 5 files changed, 105 insertions(+), 51 deletions(-)
 create mode 100644 .github/workflows/coverage.yaml
 create mode 100644 .github/workflows/sanitizers.yaml

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 82655c8a8..257eacf44 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -72,55 +72,6 @@ jobs:
               env:
                   CC: clang
                   CXX: clang++
-              if: ${{ ! (matrix.os == 'ubuntu-latest' && matrix.python == 3.9) }}
-
-            - name: Test with coverage
-              run: make install-test-cov
-              env:
-                  CC: clang
-                  CXX: clang++
-              if: ${{ matrix.os == 'ubuntu-latest' && matrix.python == 3.9 }}
-
-            - name: Upload coverage to Codecov
-              uses: codecov/codecov-action@v1
-              with:
-                  files: ./coverage.xml
-              if: ${{ always() && matrix.os == 'ubuntu-latest' }}
 
             - name: Uninstall
               run: make purge
-
-    llvm-asan:
-        runs-on: ubuntu-latest
-
-        steps:
-            - uses: actions/checkout@v2
-
-            - name: Set up Python ${{ matrix.python }}
-              uses: actions/setup-python@v2
-              with:
-                  python-version: 3.9
-
-            - name: Install dependencies
-              run: |
-                  curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.6.1/bazelisk-linux-amd64" > bazel
-                  chmod +x bazel
-                  sudo mv bazel /usr/local/bin/bazel
-                  sudo apt install clang-9 patchelf
-                  python -m pip install -r compiler_gym/requirements.txt -r tests/requirements.txt
-
-            - name: Install address sanitized package
-              run: |
-                  make bazel-build
-                  make bazel-build BAZEL_BUILD_OPTS=--config=asan BUILD_TARGET=//compiler_gym/envs/llvm/service:compiler_gym-llvm-service
-                  make pip-install
-              env:
-                  CC: clang
-                  CXX: clang++
-                  BAZEL_OPTS: --batch
-                  BAZEL_BUILD_OPTS: --config=ci
-
-            - name: Test
-              run: make install-test
-              env:
-                  ASAN_OPTIONS: detect_leaks=1
diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml
new file mode 100644
index 000000000..1027a9cdc
--- /dev/null
+++ b/.github/workflows/coverage.yaml
@@ -0,0 +1,46 @@
+---
+name: Test Coverage
+
+on:
+    push:
+        branches:
+            - development
+            - stable
+    pull_request:
+
+jobs:
+    pytest-cov:
+        runs-on: ubuntu-latest
+
+        steps:
+            - uses: actions/checkout@v2
+
+            - name: Setup python
+              uses: actions/setup-python@v2
+              with:
+                  python-version: 3.9
+
+            - name: Install build dependencies
+              uses: ./.github/actions/install-build-dependencies
+
+            - name: Install
+              run: make install
+              env:
+                  CC: clang
+                  CXX: clang++
+                  BAZEL_OPTS: --batch
+                  BAZEL_BUILD_OPTS: --config=ci
+
+            - name: Test
+              # Note the `|| true` as we don't care about failing tests in this
+              # job.
+              run: make install-test-cov || true
+              env:
+                  CC: clang
+                  CXX: clang++
+
+            - name: Upload coverage to Codecov
+              uses: codecov/codecov-action@v1
+              with:
+                  files: ./coverage.xml
+              if: ${{ always() }}
diff --git a/.github/workflows/fuzz.yaml b/.github/workflows/fuzz.yaml
index 195a7d404..84a031c01 100644
--- a/.github/workflows/fuzz.yaml
+++ b/.github/workflows/fuzz.yaml
@@ -13,7 +13,7 @@ on:
         - cron: 0 9 * * 1-5  # every weekday at 9am
 
 jobs:
-    test:
+    fuzz:
         runs-on: ${{ matrix.os }}
 
         strategy:
diff --git a/.github/workflows/pre_commit.yaml b/.github/workflows/pre_commit.yaml
index 227f61a00..b77f50302 100644
--- a/.github/workflows/pre_commit.yaml
+++ b/.github/workflows/pre_commit.yaml
@@ -22,27 +22,33 @@ jobs:
                   sudo rm -f /usr/bin/clang-format
                   sudo ln -s /usr/bin/clang-format-10 /usr/bin/clang-format
                   clang-format --version
+
             - name: Install go
               uses: actions/setup-go@v2
               with:
                   go-version: ^1.13.1
+
             - name: Install buildifier
               run: |
                   go get github.com/bazelbuild/buildtools/buildifier
                   buildifier --version
+
             - name: Install prototool
               run: |
                   GO111MODULE=on go get github.com/uber/prototool/cmd/prototool@dev
                   prototool version
+
             - name: Install hadolint
               run: |
                   wget -O hadolint https://github.com/hadolint/hadolint/releases/download/v1.19.0/hadolint-Linux-x86_64
                   chmod +x hadolint
                   sudo mv hadolint /usr/local/bin
-            - name: Install Python 3.9
+
+            - name: Set up Python
               uses: actions/setup-python@v2
               with:
                   python-version: 3.9
+
             - name: Install Python dependencies
               run: |
                   sudo apt-get install python3-setuptools
@@ -51,6 +57,7 @@ jobs:
                   python3 -m isort --version
                   python3 -m black --version
                   python3 -m pre_commit --version
+
             - name: Run pre-commit checks
               # TODO(github.com/facebookresearch/CompilerGym/issues/1): Disable
               # isort due to inconsistent results when run locally versus in
diff --git a/.github/workflows/sanitizers.yaml b/.github/workflows/sanitizers.yaml
new file mode 100644
index 000000000..2774b59e6
--- /dev/null
+++ b/.github/workflows/sanitizers.yaml
@@ -0,0 +1,50 @@
+---
+name: LLVM Sanitizers
+
+on:
+    push:
+        branches:
+            - development
+            - stable
+    pull_request:
+
+jobs:
+    llvm-service-asan:
+        runs-on: ubuntu-latest
+
+        steps:
+            - uses: actions/checkout@v2
+
+            - name: Setup python
+              uses: actions/setup-python@v2
+              with:
+                  python-version: 3.9
+
+            - name: Install build dependencies
+              uses: ./.github/actions/install-build-dependencies
+
+            - name: Build pip package
+              run: make bazel-build
+              env:
+                  CC: clang
+                  CXX: clang++
+                  BAZEL_OPTS: --batch
+                  BAZEL_BUILD_OPTS: --config=ci
+
+            - name: Build address sanitized LLVM compiler service
+              run: make bazel-build BAZEL_BUILD_OPTS=--config=asan BUILD_TARGET=//compiler_gym/envs/llvm/service:compiler_gym-llvm-service
+              env:
+                  CC: clang
+                  CXX: clang++
+                  BAZEL_OPTS: --batch
+                  BAZEL_BUILD_OPTS: --config=ci
+
+            - name: Install pip package
+              run: make pip-install
+
+            - name: Test
+              run: make install-test TEST_TARGET=tests/llvm
+              env:
+                  ASAN_OPTIONS: detect_leaks=1
+                  CC: clang
+                  CXX: clang++

From c5faa409f8d431c2b16d97cdac7fe07208c10e16 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 27 May 2021 16:25:04 +0100
Subject: [PATCH 114/141] [service] Disable coverage in subprocess script.

---
 .../service/runtime/create_and_run_compiler_gym_service.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index 3a0423bfa..a9d93663c 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -37,13 +37,16 @@
 shutdown_signal = Event()
 
 
-def _shutdown_handler(signal_number, stack_frame):
+# NOTE(cummins): This script is executed in a subprocess, so code coverage
+# tracking does not work. As such we use "# pragma: no cover" annotation for all
+# functions.
+def _shutdown_handler(signal_number, stack_frame):  # pragma: no cover
     del stack_frame  # Unused
     logging.info("Service received signal: %d", signal_number)
     shutdown_signal.set()
 
 
-def create_and_run_compiler_gym_service(compilation_session_type):
+def create_and_run_compiler_gym_service(compilation_session_type):  # pragma: no cover
     def main(argv):
         # Register a signal handler for SIGTERM that will set the shutdownSignal
         # future value.

From 5ab1b5113a929546e230ae945de1b37254246330 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 27 May 2021 16:25:21 +0100
Subject: [PATCH 115/141] [tests] Add a verbose logging test for example
 environments.

---
 .../example_compiler_gym_service/env_tests.py | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/examples/example_compiler_gym_service/env_tests.py b/examples/example_compiler_gym_service/env_tests.py
index c3462bfef..1293f5705 100644
--- a/examples/example_compiler_gym_service/env_tests.py
+++ b/examples/example_compiler_gym_service/env_tests.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Tests for the example CompilerGym service."""
+import logging
 import subprocess
 from pathlib import Path
 
@@ -16,28 +17,38 @@
 from compiler_gym.envs import CompilerEnv
 from compiler_gym.service import SessionNotFound
 from compiler_gym.spaces import NamedDiscrete, Scalar, Sequence
+from compiler_gym.util.debug_util import set_debug_level
 from tests.test_main import main
 
-
 # Given that the C++ and Python service implementations have identical
 # featuresets, we can parameterize the tests and run them against both backends.
-@pytest.fixture(scope="function", params=["example-cc-v0", "example-py-v0"])
+EXAMPLE_ENVIRONMENTS = ["example-cc-v0", "example-py-v0"]
+
+
+@pytest.fixture(scope="function", params=EXAMPLE_ENVIRONMENTS)
 def env(request) -> CompilerEnv:
-    env = gym.make(request.param)
-    try:
+    """Text fixture that yields an environment."""
+    with gym.make(request.param) as env:
         yield env
-    finally:
-        env.close()
 
 
 @pytest.fixture(
     scope="module",
     params=[example.EXAMPLE_CC_SERVICE_BINARY, example.EXAMPLE_PY_SERVICE_BINARY],
+    ids=["example-cc-v0", "example-py-v0"],
 )
 def bin(request) -> Path:
     yield request.param
 
 
+@pytest.mark.parametrize("env_id", EXAMPLE_ENVIRONMENTS)
+def test_debug_level(env_id: str):
+    """Test that debug level is set."""
+    set_debug_level(3)
+    with gym.make(env_id) as env:
+        assert env.logger.level == logging.DEBUG
+
+
 def test_invalid_arguments(bin: Path):
     """Test that running the binary with unrecognized arguments is an error."""
 

From f4fe600ede3afe7ef40fec263a4a98459e563255 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 27 May 2021 16:27:42 +0100
Subject: [PATCH 116/141] [tests] Move random actions test into fuzz directory.

---
 tests/fuzzing/BUILD                               | 15 +++++++++++++++
 .../llvm_random_actions_fuzz_test.py}             |  2 +-
 tests/llvm/BUILD                                  | 14 --------------
 3 files changed, 16 insertions(+), 15 deletions(-)
 rename tests/{llvm/benchmarks_random_actions_test.py => fuzzing/llvm_random_actions_fuzz_test.py} (95%)

diff --git a/tests/fuzzing/BUILD b/tests/fuzzing/BUILD
index 1e2abee82..ac4d27556 100644
--- a/tests/fuzzing/BUILD
+++ b/tests/fuzzing/BUILD
@@ -41,6 +41,21 @@ py_test(
     ],
 )
 
+py_test(
+    name = "benchmarks_random_actions_test",
+    timeout = "long",
+    srcs = ["benchmarks_random_actions_test.py"],
+    shard_count = 4,
+    tags = ["manual"],
+    deps = [
+        "//compiler_gym",
+        "//compiler_gym/envs",
+        "//compiler_gym/third_party/autophase",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
 py_test(
     name = "llvm_trajectory_replay_fuzz_test",
     srcs = ["llvm_trajectory_replay_fuzz_test.py"],
diff --git a/tests/llvm/benchmarks_random_actions_test.py b/tests/fuzzing/llvm_random_actions_fuzz_test.py
similarity index 95%
rename from tests/llvm/benchmarks_random_actions_test.py
rename to tests/fuzzing/llvm_random_actions_fuzz_test.py
index 693853247..f4c68ceb7 100644
--- a/tests/llvm/benchmarks_random_actions_test.py
+++ b/tests/fuzzing/llvm_random_actions_fuzz_test.py
@@ -32,7 +32,7 @@ def test_benchmark_random_actions(benchmark_name: str):
         # Take a random step until a predetermined amount of time has elapsed.
         end_time = time() + FUZZ_TIME_SECONDS
         while time() < end_time:
-            observation, reward, done, info = env.step(env.action_space.sample())
+            observation, reward, done, _ = env.step(env.action_space.sample())
             if done:
                 # Default-value for observation is an array of zeros.
                 np.testing.assert_array_equal(
diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD
index c6e9a1be0..22f35fbdb 100644
--- a/tests/llvm/BUILD
+++ b/tests/llvm/BUILD
@@ -40,20 +40,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "benchmarks_random_actions_test",
-    timeout = "long",
-    srcs = ["benchmarks_random_actions_test.py"],
-    shard_count = 4,
-    deps = [
-        "//compiler_gym",
-        "//compiler_gym/envs",
-        "//compiler_gym/third_party/autophase",
-        "//tests:test_main",
-        "//tests/pytest_plugins:llvm",
-    ],
-)
-
 py_test(
     name = "custom_benchmarks_test",
     srcs = ["custom_benchmarks_test.py"],

From b618d2354877a4aab91b377e8a1087efe4920827 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Fri, 28 May 2021 11:13:18 +0100
Subject: [PATCH 117/141] [examples] Port the explor script to the new wrappers
 classes.

Same refactor as in:

    commit fa6877dcbc0b94b4208a61c0962a3f98f1f733a3
    Date:   Tue May 18 03:01:01 2021 +0100
    [examples] Update Actor Critic to use the new wrappers classes.

applied to the exploration script.
---
 examples/explore.py | 70 +++++++++------------------------------------
 1 file changed, 14 insertions(+), 56 deletions(-)

diff --git a/examples/explore.py b/examples/explore.py
index 0a94db4f1..77f75c3c9 100644
--- a/examples/explore.py
+++ b/examples/explore.py
@@ -31,11 +31,12 @@
 
 from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags
 from compiler_gym.util.flags.env_from_flags import env_from_flags
+from compiler_gym.wrappers import ConstrainedCommandline
 
 flags.DEFINE_list(
     "actions",
     [],
-    "A list of action names to enumerate. If not provided, all actions are used.",
+    "A list of flag names to enumerate. If not provided, all actions are used.",
 )
 flags.DEFINE_integer("episode_length", 5, "The number of steps in each episode.")
 flags.DEFINE_integer(
@@ -54,54 +55,11 @@
 FLAGS = flags.FLAGS
 
 
-class CustomEnv:
-    """A wrapper for an LLVM env that takes a subset of the actions.
-
-    Taking a subset in the env avoids the easy error to make to pass in
-    i as an action instead of actions[i] where actions is the subset.
-    """
-
-    def __init__(self):
-        self._env = env_from_flags(benchmark_from_flags())
-        try:
-            # Project onto the subset of transformations that have
-            # been specified to be used.
-            if not FLAGS.actions:
-                self._action_indices = list(range(len(self._env.action_space.names)))
-            else:
-                self._action_indices = [
-                    self._env.action_space.flags.index(a) for a in FLAGS.actions
-                ]
-            self._action_names = [
-                self._env.action_space.names[a] for a in self._action_indices
-            ]
-
-        finally:
-            # The program will not terminate until the environment is
-            # closed, not even if there is an exception.
-            self._env.close()
-
-    def action_names(self, actions):
-        return [self._action_names[a] for a in actions]
-
-    def step(self, action):
-        return self._env.step(self._action_indices[action])
-
-    def reset(self):
-        self._env.reset()
-
-    def close(self):
-        self._env.close()
-
-    def action_count(self):
-        return len(self._action_indices)
-
-    def actions(self):
-        return range(self.action_count())
-
-    @property
-    def observation(self):
-        return self._env.observation
+def make_env():
+    env = env_from_flags(benchmark=benchmark_from_flags())
+    if FLAGS.actions:
+        env = ConstrainedCommandline(env, flags=FLAGS.actions)
+    return env
 
 
 # Used to determine if two rewards are equal up to a small
@@ -201,7 +159,7 @@ def env_to_fingerprint(env):
 
 def compute_edges(env, sequence):
     edges = []
-    for action in env.actions():
+    for action in range(env.action_space.n):
         env.reset()
         reward_sum = 0.0
         for action in sequence + [action]:
@@ -305,8 +263,8 @@ def number_list(stats):
         # not check this in that case.
         full_all_sum = sum(self._full_all_stats)
         assert full_all_sum > 1e9 or full_all_sum == (
-            pow(env.action_count(), self._depth + 1) - 1
-        ) / (env.action_count() - 1)
+            pow(env.action_space.n, self._depth + 1) - 1
+        ) / (env.action_space.n - 1)
 
         depth_time_in_seconds = time() - self._depth_start_time_in_seconds
         print()
@@ -321,7 +279,7 @@ def number_list(stats):
             ):
                 print(
                     f"  {graph.reward_sum(n):0.4f} ",
-                    ", ".join(env.action_names(graph.node_path(n))),
+                    ", ".join(env.action_space.flags[f] for f in graph.node_path(n)),
                 )
 
         print("\n")
@@ -337,8 +295,8 @@ def compute_action_graph(envs, episode_length):
         env_queue.put(env)
     pool = ThreadPool(len(envs))
 
-    stats = NodeTypeStats(action_count=env.action_count())
-    graph = StateGraph(edges_per_node=env.action_count())
+    stats = NodeTypeStats(action_count=env.action_space.n)
+    graph = StateGraph(edges_per_node=env.action_space.n)
 
     # Add the empty sequence of actions as the starting state.
     envs[0].reset()
@@ -492,7 +450,7 @@ def main(argv):
     try:
         envs = []
         for _ in range(FLAGS.nproc):
-            envs.append(CustomEnv())
+            envs.append(make_env())
         compute_action_graph(envs, episode_length=FLAGS.episode_length)
     finally:
         for env in envs:

From 4783a3118bf2f5859597ee3e889c74e0b5630f99 Mon Sep 17 00:00:00 2001
From: Chris Cummins <cummins@fb.com>
Date: Tue, 1 Jun 2021 12:45:09 +0100
Subject: [PATCH 118/141] Add missing license headers.

---
 docs/generate_cc_rst.py     | 6 ++++++
 packaging/container_init.sh | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/docs/generate_cc_rst.py b/docs/generate_cc_rst.py
index f20e502d7..50ff1805d 100644
--- a/docs/generate_cc_rst.py
+++ b/docs/generate_cc_rst.py
@@ -1,3 +1,9 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 """A script to auto-populate RST files from the CompilerGym header files.
 
 Usage:
diff --git a/packaging/container_init.sh b/packaging/container_init.sh
index 525374943..f319b2253 100644
--- a/packaging/container_init.sh
+++ b/packaging/container_init.sh
@@ -1,4 +1,8 @@
 #!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
 #
 # Perform post-launch initialization of a docker container for building
 # CompilerGym. Usage:

From bd9bff9dbadf6a7b3dfe6f52735f40feaa914910 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 00:36:15 +0100
Subject: [PATCH 119/141] [Makefile] Always use CI config for coverage target.

---
 Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1bf6dbd3d..15e3ec046 100644
--- a/Makefile
+++ b/Makefile
@@ -259,8 +259,11 @@ endef
 install-test: install-test-setup
 	$(call pytest,--benchmark-disable -n auto -k "not fuzz" --durations=5)
 
+# Note we export $CI=1 so that the tests always run as if within the CI
+# environement. This is to ensure that the reported coverage matches that of
+# the value on: https://codecov.io/gh/facebookresearch/CompilerGym
 install-test-cov: install-test-setup
-	$(call pytest,--benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml --cov-report=term)
+	export CI=1; $(call pytest,--benchmark-disable -n auto -k "not fuzz" --durations=5 --cov=compiler_gym --cov-report=xml --cov-report=term)
 	@mv "$(INSTALL_TEST_ROOT)/coverage.xml" .
 
 # The minimum number of seconds to run the fuzz tests in a loop for. Override

From 85e2bd860f7fdfc23be4932a44ce69c8bd4dea92 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 02:07:33 +0100
Subject: [PATCH 120/141] [docs] Use doxygen-format comments for LLVM service.

---
 compiler_gym/envs/llvm/service/ActionSpace.h  |  29 ++---
 compiler_gym/envs/llvm/service/Benchmark.h    | 102 +++++++++++++++---
 .../envs/llvm/service/BenchmarkFactory.cc     |   2 +-
 .../envs/llvm/service/BenchmarkFactory.h      |  75 +++++++++----
 compiler_gym/envs/llvm/service/Cost.h         |  64 ++++++++---
 compiler_gym/envs/llvm/service/LlvmSession.h  |  44 ++++++--
 .../envs/llvm/service/ObservationSpaces.h     |  81 ++++++++------
 compiler_gym/service/runtime/BenchmarkCache.h |   2 +-
 compiler_gym/spaces/named_discrete.py         |   1 +
 .../cc/compiler_gym/envs/llvm/service.rst     |  50 ++++-----
 10 files changed, 321 insertions(+), 129 deletions(-)

diff --git a/compiler_gym/envs/llvm/service/ActionSpace.h b/compiler_gym/envs/llvm/service/ActionSpace.h
index 606c9e416..9a6abe50a 100644
--- a/compiler_gym/envs/llvm/service/ActionSpace.h
+++ b/compiler_gym/envs/llvm/service/ActionSpace.h
@@ -12,21 +12,26 @@ namespace compiler_gym::llvm_service {
 // LLVM transforms. Generated by //compiler_gym/envs/llvm/service/passes:action-genfiles.
 #include "compiler_gym/envs/llvm/service/passes/ActionEnum.h"  // @donotremove
 
-// The available action spaces for LLVM.
-//
-// NOTE(cummins): Housekeeping rules - to add a new action space:
-//   1. Add a new entry to this LlvmActionSpace enum.
-//   2. Add a new switch case to getLlvmActionSpaceList() to return the
-//      ActionSpace.
-//   3. Add a new switch case to LlvmSession::step() to compute
-//      the actual action.
-//   4. Run `bazel test //compiler_gym/...` and update the newly failing tests.
+/**
+ * The available action spaces for LLVM.
+ *
+ * \note Implementation housekeeping rules - to add a new action space:
+ *   1. Add a new entry to this LlvmActionSpace enum.
+ *   2. Add a new switch case to getLlvmActionSpaceList() to return the
+ *      ActionSpace.
+ *   3. Add a new switch case to LlvmSession::step() to compute
+ *      the actual action.
+ *   4. Run `bazel test //compiler_gym/...` and update the newly failing tests.
+ */
 enum class LlvmActionSpace {
-  // The full set of transform passes for LLVM.
-  PASSES_ALL,
+  PASSES_ALL,  ///< The full set of transform passes for LLVM.
 };
 
-// Get the list of LLVM action spaces.
+/**
+ * Get the list of LLVM action spaces.
+ *
+ * @return A list of ActionSpace instances.
+ */
 std::vector<ActionSpace> getLlvmActionSpaceList();
 
 }  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h
index 5f92857d7..4074cd788 100644
--- a/compiler_gym/envs/llvm/service/Benchmark.h
+++ b/compiler_gym/envs/llvm/service/Benchmark.h
@@ -17,62 +17,138 @@
 
 namespace compiler_gym::llvm_service {
 
-// A 160 bits SHA1 that identifies an LLVM module.
+/**
+ * A 160 bits SHA1 that identifies an LLVM module.
+ */
 using BenchmarkHash = llvm::ModuleHash;
 
+/**
+ * A bitcode.
+ */
 using Bitcode = llvm::SmallString<0>;
 
+/**
+ * Read a bitcode file from disk.
+ *
+ * @param path The path of the bitcode file to read.
+ * @param bitcode The destination bitcode.
+ * @return `OK` on success, `NOT_FOUND` if the file is not found, or
+ *     `INVALID_ARGUMENT` if the file is invalid.
+ */
 grpc::Status readBitcodeFile(const boost::filesystem::path& path, Bitcode* bitcode);
 
-// Parses the given bitcode into a module and strips the identifying ModuleID
-// and source_filename attributes. Returns nullptr on error and sets status.
+/**
+ * Construct an LLVM module from a bitcode.
+ *
+ * Parses the given bitcode into a module and strips the identifying `ModuleID`
+ * and `source_filename` attributes.
+ *
+ * @param context An LLVM context for the new module.
+ * @param bitcode The bitcode to parse.
+ * @param name The name of the module.
+ * @param status An error status that is set to `OK` on success or
+ *    `INVALID_ARGUMENT` if the bitcode cannot be parsed.
+ * @return A unique pointer to an LLVM module, or `nullptr` on error and sets
+ *    `status`.
+ */
 std::unique_ptr<llvm::Module> makeModule(llvm::LLVMContext& context, const Bitcode& bitcode,
                                          const std::string& name, grpc::Status* status);
 
-// A benchmark is an LLVM module and the LLVM context that owns it. A benchmark
-// is mutable and can be changed over the course of a session.
+/**
+ * An LLVM module and the LLVM context that owns it.
+ *
+ * A benchmark is mutable and can be changed over the course of a session.
+ */
 class Benchmark {
  public:
+  /**
+   * Construct a benchmark from a bitcode.
+   */
   Benchmark(const std::string& name, const Bitcode& bitcode,
             const boost::filesystem::path& workingDirectory, const BaselineCosts& baselineCosts);
 
+  /**
+   * Construct a benchmark from an LLVM module.
+   */
   Benchmark(const std::string& name, std::unique_ptr<llvm::LLVMContext> context,
             std::unique_ptr<llvm::Module> module, size_t bitcodeSize,
             const boost::filesystem::path& workingDirectory, const BaselineCosts& baselineCosts);
 
-  // Make a copy of the benchmark.
+  /**
+   * Make a copy of the benchmark.
+   *
+   * @param workingDirectory The working directory for the new benchmark.
+   * @return A copy of the benchmark.
+   */
   std::unique_ptr<Benchmark> clone(const boost::filesystem::path& workingDirectory) const;
 
-  // Compute and return a SHA1 hash of the module.
+  /**
+   * Compute and return a SHA1 hash of the module.
+   *
+   * @return A SHA1 hash of the module.
+   */
   BenchmarkHash module_hash() const;
 
-  // Wrapper around llvm::verifyModule() which returns an error status on
-  // failure.
+  /**
+   * Wrapper around `llvm::verifyModule()` which returns an error status on
+   * failure.
+   *
+   * @return `OK` on success, else `DATA_LOSS` if verification fails.
+   */
   grpc::Status verify_module();
 
+  /**
+   * The name of the benchmark.
+   */
   inline const std::string& name() const { return name_; }
 
+  /**
+   * The size of the bitcode that was parsed to produce the initial benchmark.
+   */
   inline const size_t bitcodeSize() const { return bitcodeSize_; }
 
+  /**
+   * The underlying LLVM module.
+   */
   inline llvm::Module& module() { return *module_; }
 
+  /**
+   * The underlying LLVM module.
+   */
   inline const llvm::Module& module() const { return *module_; }
 
+  /**
+   * The underlying LLVM context.
+   */
   inline llvm::LLVMContext& context() { return *context_; }
 
+  /**
+   * The underlying LLVM context.
+   */
   inline const llvm::LLVMContext& context() const { return *context_; }
 
   inline const BaselineCosts& baselineCosts() const { return baselineCosts_; }
 
   // Accessors for the underlying raw pointers.
+
+  /**
+   * A pointer to the underlying LLVM context.
+   */
   inline const llvm::LLVMContext* context_ptr() const { return context_.get(); }
 
+  /**
+   * A pointer to the underlying LLVM module.
+   */
   inline const llvm::Module* module_ptr() const { return module_.get(); }
 
-  // Replace the benchmark module with a new one. This is to enable
-  // out-of-process modification of the IR by serializing the benchmark to a
-  // file, modifying the file, then loading the modified file and updating the
-  // module pointer here.
+  /** Replace the benchmark module with a new one.
+   *
+   * This is to enable out-of-process modification of the IR by serializing the
+   * benchmark to a file, modifying the file, then loading the modified file and
+   * updating the module pointer here.
+   *
+   * @param module A new module.
+   */
   inline void replaceModule(std::unique_ptr<llvm::Module> module) { module_ = std::move(module); }
 
  private:
diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.cc b/compiler_gym/envs/llvm/service/BenchmarkFactory.cc
index 83decb8ee..d63af6d32 100644
--- a/compiler_gym/envs/llvm/service/BenchmarkFactory.cc
+++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.cc
@@ -56,7 +56,7 @@ Status BenchmarkFactory::getBenchmark(const BenchmarkProto& benchmarkMessage,
       break;
     }
     case compiler_gym::File::DataCase::kUri: {
-      // Check that protocol of the benmchmark URI.
+      // Check the protocol of the benchmark URI.
       if (programFile.uri().find("file:///") != 0) {
         return Status(StatusCode::INVALID_ARGUMENT,
                       fmt::format("Invalid benchmark data URI. "
diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.h b/compiler_gym/envs/llvm/service/BenchmarkFactory.h
index 73ba7b26e..75c865bbc 100644
--- a/compiler_gym/envs/llvm/service/BenchmarkFactory.h
+++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.h
@@ -21,21 +21,41 @@
 
 namespace compiler_gym::llvm_service {
 
-// Benchmarks are loaded from disk and cached in-memory so that future uses
-// do not require a disk access. The number of benchmarks that may be
-// simultaneously loaded is limited by the combined size of the bitcodes, in
-// bytes. Once this size is reached, benchmarks are offloaded so that they must
-// be re-read from disk.
+/**
+ * Maximum number of bytes before benchmark cache eviction.
+ *
+ * Benchmarks are loaded from disk and cached in-memory so that future uses do
+ * not require a disk access. The number of benchmarks that may be
+ * simultaneously loaded is limited by the combined size of the bitcodes, in
+ * bytes. Once this size is reached, benchmarks are offloaded so that they must
+ * be re-read from disk.
+ */
 constexpr size_t kMaxLoadedBenchmarkSize = 512 * 1024 * 1024;
 
-// A factory object for instantiating LLVM modules for use in optimization
-// sessions. Example usage:
-//
-//     BenchmarkFactory factory;
-//     auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc");
-//     // ... do fun stuff
+/**
+ * A factory object for instantiating LLVM modules for use in optimization
+ * sessions.
+ *
+ * Example usage:
+ *
+ * \code{.cpp}
+ * BenchmarkFactory factory;
+ * auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc");
+ * // ... do fun stuff
+ * \endcode
+ */
 class BenchmarkFactory {
  public:
+  /**
+   * Return the global benchmark factory singleton.
+   *
+   * @param workingDirectory The working directory.
+   * @param rand An optional random number generator. This is used for cache
+   *     evictions.
+   * @param maxLoadedBenchmarkSize The maximum size in bytes of the benchmark
+   *     cache before evictions.
+   * @return The benchmark factory singleton instance.
+   */
   static BenchmarkFactory& getSingleton(const boost::filesystem::path& workingDirectory,
                                         std::optional<std::mt19937_64> rand = std::nullopt,
                                         size_t maxLoadedBenchmarkSize = kMaxLoadedBenchmarkSize) {
@@ -43,7 +63,14 @@ class BenchmarkFactory {
     return instance;
   }
 
-  // Get the requested named benchmark.
+  /**
+   * Get the requested named benchmark.
+   *
+   * @param benchmarkMessage A Benchmark protocol message.
+   * @param benchmark A benchmark instance to assign this benchmark to.
+   * @return `OK` on success, or `INVALID_ARGUMENT` if the protocol message is
+   *    invalid.
+   */
   [[nodiscard]] grpc::Status getBenchmark(const compiler_gym::Benchmark& benchmarkMessage,
                                           std::unique_ptr<Benchmark>* benchmark);
 
@@ -53,11 +80,17 @@ class BenchmarkFactory {
   [[nodiscard]] grpc::Status addBitcode(const std::string& uri,
                                         const boost::filesystem::path& path);
 
-  // Construct a benchmark factory. rand is a random seed used to control the
-  // selection of random benchmarks. maxLoadedBenchmarkSize is the maximum
-  // combined size of the bitcodes that may be cached in memory. Once this
-  // size is reached, benchmarks are offloaded so that they must be re-read from
-  // disk.
+  /**
+   * Construct a benchmark factory.
+   *
+   * @param workingDirectory A filesystem directory to use for storing temporary
+   *    files.
+   * @param rand is a random seed used to control the selection of random
+   *    benchmarks.
+   * @param maxLoadedBenchmarkSize is the maximum combined size of the bitcodes
+   *    that may be cached in memory. Once this size is reached, benchmarks are
+   *    offloaded so that they must be re-read from disk.
+   */
   BenchmarkFactory(const boost::filesystem::path& workingDirectory,
                    std::optional<std::mt19937_64> rand = std::nullopt,
                    size_t maxLoadedBenchmarkSize = kMaxLoadedBenchmarkSize);
@@ -65,12 +98,16 @@ class BenchmarkFactory {
   BenchmarkFactory(const BenchmarkFactory&) = delete;
   BenchmarkFactory& operator=(const BenchmarkFactory&) = delete;
 
-  // A mapping from URI to benchmarks which have been loaded into memory.
+  /**
+   * A mapping from URI to benchmarks which have been loaded into memory.
+   */
   std::unordered_map<std::string, Benchmark> benchmarks_;
 
   const boost::filesystem::path workingDirectory_;
   std::mt19937_64 rand_;
-  // The current and maximum allowed sizes of the loaded benchmarks.
+  /**
+   * The current and maximum allowed sizes of the loaded benchmarks.
+   */
   size_t loadedBenchmarksSize_;
   const size_t maxLoadedBenchmarkSize_;
 };
diff --git a/compiler_gym/envs/llvm/service/Cost.h b/compiler_gym/envs/llvm/service/Cost.h
index 4b520cc97..c5a467100 100644
--- a/compiler_gym/envs/llvm/service/Cost.h
+++ b/compiler_gym/envs/llvm/service/Cost.h
@@ -14,22 +14,35 @@
 
 namespace compiler_gym::llvm_service {
 
+/**
+ * A cost function for LLVM benchmarks.
+ */
 enum class LlvmCostFunction {
-  // The number of instructions in the LLVM-IR module. This is fast to compute
-  // and deterministic.
+  /**
+   * The number of instructions in the LLVM-IR module.
+   *
+   * IR instruction count is fast to compute and deterministic.
+   */
   IR_INSTRUCTION_COUNT,
-  // Returns the size (in bytes) of the .TEXT section of the compiled module.
+  /**
+   * Returns the size (in bytes) of the .TEXT section of the compiled module.
+   */
   OBJECT_TEXT_SIZE_BYTES,
 #ifdef COMPILER_GYM_EXPERIMENTAL_TEXT_SIZE_COST
-  // Returns the size (in bytes) of the .TEXT section of the compiled binary.
+  /**
+   * Returns the size (in bytes) of the .TEXT section of the compiled binary.
+   */
   TEXT_SIZE_BYTES,
 #endif
 };
 
+/**
+ * LLVM's builtin policies.
+ */
 enum class LlvmBaselinePolicy {
-  O0,  // No optimizations.
-  O3,  // -O3 optimizations.
-  Oz,  // -Oz optimizations.
+  O0,  ///< No optimizations.
+  O3,  ///< `-O3` optimizations.
+  Oz,  ///< `-Oz` optimizations.
 };
 
 constexpr size_t numCosts = magic_enum::enum_count<LlvmCostFunction>();
@@ -38,20 +51,41 @@ constexpr size_t numBaselineCosts = magic_enum::enum_count<LlvmBaselinePolicy>()
 using BaselineCosts = std::array<double, numBaselineCosts>;
 using PreviousCosts = std::array<std::optional<double>, numCosts>;
 
-// TODO(cummins): Refactor cost calculation to allow graceful error handling
-// by returning a grpc::Status.
-
-// Compute the cost using a given cost function. A lower cost is better.
+/**
+ * Compute the cost using a given cost function. A lower cost is better.
+ *
+ * @param costFunction The cost function to use.
+ * @param module The module to compute the cost for.
+ * @param workingDirectory A directory that can be used for temporary file
+ *    storage.
+ * @param cost The cost to write.
+ * @return `OK` on success.
+ */
 [[nodiscard]] grpc::Status setCost(const LlvmCostFunction& costFunction, llvm::Module& module,
                                    const boost::filesystem::path& workingDirectory, double* cost);
 
-// Return a baseline cost.
+/**
+ * Return a baseline cost.
+ *
+ * @param baselineCosts The baseline costs list.
+ * @param policy The baseline policy to return the cost of.
+ * @param cost The cost function to use.
+ * @return A cost.
+ */
 double getBaselineCost(const BaselineCosts& baselineCosts, LlvmBaselinePolicy policy,
                        LlvmCostFunction cost);
 
-// Compute the costs of baseline policies. The unoptimizedModule parameter is
-// unmodified, but is not const because various LLVM API calls require a mutable
-// reference.
+/**
+ * Compute the costs of baseline policies.
+ *
+ * \note The `unoptimizedModule` parameter is unmodified, but is not const
+ *    because various LLVM API calls require a mutable reference.
+ *
+ * @param unoptimizedModule The module to compute the baseline costs of.
+ * @param baselineCosts The costs to write.
+ * @param workingDirectory A directory that can be used for temporary file
+ *    storage.
+ */
 [[nodiscard]] grpc::Status setBaselineCosts(llvm::Module& unoptimizedModule,
                                             BaselineCosts* baselineCosts,
                                             const boost::filesystem::path& workingDirectory);
diff --git a/compiler_gym/envs/llvm/service/LlvmSession.h b/compiler_gym/envs/llvm/service/LlvmSession.h
index a15db54c3..69e050792 100644
--- a/compiler_gym/envs/llvm/service/LlvmSession.h
+++ b/compiler_gym/envs/llvm/service/LlvmSession.h
@@ -28,11 +28,13 @@
 
 namespace compiler_gym::llvm_service {
 
-// This class exposes the LLVM optimization pipeline for an LLVM module as an
-// interactive environment.
-//
-// It can be used directly as a C++ API, or it can be accessed through an RPC
-// interface using the compiler_gym::service::LlvmService class.
+/**
+ * An interactive LLVM compilation session.
+ *
+ * This class exposes the LLVM optimization pipeline for an LLVM module as an
+ * interactive environment. It can be used directly as a C++ API, or it can be
+ * accessed through an RPC interface using the CompilerGym RPC runtime.
+ */
 class LlvmSession final : public CompilationSession {
  public:
   LlvmSession(const boost::filesystem::path& workingDirectory);
@@ -76,21 +78,41 @@ class LlvmSession final : public CompilationSession {
     return *benchmark_;
   }
 
-  // Run the requested action.
+  /**
+   * Run the requested action.
+   *
+   * @param action An action to apply.
+   * @param actionHadNoEffect Set to true if LLVM reported that any passes that
+   *    were run made no modifications to the module.
+   * @return `OK` on success.
+   */
   [[nodiscard]] grpc::Status applyPassAction(LlvmAction action, bool& actionHadNoEffect);
 
-  // Run the given pass, possibly modifying the underlying LLVM module. Return
-  // whether the module was modified.
+  /**
+   * Run the given pass, possibly modifying the underlying LLVM module.
+   *
+   * @return Whether the module was modified.
+   */
   bool runPass(llvm::Pass* pass);
+
+  /**
+   * Run the given pass, possibly modifying the underlying LLVM module.
+   *
+   * @return Whether the module was modified.
+   */
   bool runPass(llvm::FunctionPass* pass);
 
-  // Run the commandline `opt` tool on the current LLVM module with the given
-  // arguments, replacing the environment state with the generated output.
+  /**
+   * Run the commandline `opt` tool on the current LLVM module with the given
+   * arguments, replacing the environment state with the generated output.
+   */
   [[nodiscard]] grpc::Status runOptWithArgs(const std::vector<std::string>& optArgs);
 
   inline const llvm::TargetLibraryInfoImpl& tlii() const { return tlii_; }
 
-  // Setup pass manager with depdendent passes and the specified pass.
+  /**
+   * Setup pass manager with depdendent passes and the specified pass.
+   */
   template <typename PassManager, typename Pass>
   inline void setupPassManager(PassManager* passManager, Pass* pass) {
     passManager->add(new llvm::ProfileSummaryInfoWrapperPass());
diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.h b/compiler_gym/envs/llvm/service/ObservationSpaces.h
index 358ebaa73..e15371e3e 100644
--- a/compiler_gym/envs/llvm/service/ObservationSpaces.h
+++ b/compiler_gym/envs/llvm/service/ObservationSpaces.h
@@ -10,60 +10,81 @@
 
 namespace compiler_gym::llvm_service {
 
-// The available observation spaces for LLVM.
-//
-// NOTE(cummins): Housekeeping rules - to add a new observation space:
-//   1. Add a new entry to this LlvmObservationSpace enum.
-//   2. Add a new switch case to getLlvmObservationSpaceList() to return the
-//      ObserverationSpace.
-//   3. Add a new switch case to LlvmSession::getObservation() to compute
-//      the actual observation.
-//   4. Run `bazel test //compiler_gym/...` and update the newly failing tests.
+/**
+ * The available observation spaces for LLVM.
+ *
+ * \note Housekeeping rules - to add a new observation space:
+ *   1. Add a new entry to this LlvmObservationSpace enum.
+ *   2. Add a new switch case to getLlvmObservationSpaceList() to return the
+ *      ObserverationSpace.
+ *   3. Add a new switch case to LlvmSession::getObservation() to compute
+ *      the actual observation.
+ *   4. Run `bazel test //compiler_gym/...` and update the newly failing tests.
+ */
 enum class LlvmObservationSpace {
-  // The entire LLVM module as an IR string. This allows the user to do its own
-  // feature extraction.
+  /**
+   * The entire LLVM module as an IR string.
+   *
+   * This allows the user to do their own feature extraction.
+   */
   IR,
-  // The 40-digit hex SHA1 checksum of the LLVM module.
+  /** The 40-digit hex SHA1 checksum of the LLVM module. */
   IR_SHA1,
-  // Write the bitcode to a file. Returns a string, which is the path of the
-  // written file.
+  /** Write the bitcode to a file and return its path as a string. */
   BITCODE_FILE,
-  // The counts of all instructions in a program.
+  /** The counts of all instructions in a program. */
   INST_COUNT,
-  // The Autophase feature vector from:
-  //
-  //   Huang, Q., Haj-Ali, A., Moses, W., Xiang, J., Stoica, I., Asanovic, K., &
-  //   Wawrzynek, J. (2019). Autophase: Compiler phase-ordering for HLS with
-  //   deep reinforcement learning. FCCM.
+  /**
+   * The Autophase feature vector.
+   *
+   * From:
+   *
+   *     Huang, Q., Haj-Ali, A., Moses, W., Xiang, J., Stoica, I., Asanovic, K.,
+   *     & Wawrzynek, J. (2019). Autophase: Compiler phase-ordering for HLS with
+   *     deep reinforcement learning. FCCM.
+   */
   AUTOPHASE,
-  // Returns the graph representation of a program from:
-  //
-  //     Cummins, C., Fisches, Z. V., Ben-Nun, T., Hoefler, T., & Leather, H.
-  //     (2020). ProGraML: Graph-based Deep Learning for Program Optimization
-  //     and Analysis. ArXiv:2003.10536. https://arxiv.org/abs/2003.10536
+  /**
+   * Returns the graph representation of a program.
+   *
+   * From:
+   *
+   *     Cummins, C., Fisches, Z. V., Ben-Nun, T., Hoefler, T., & Leather, H.
+   *     (2020). ProGraML: Graph-based Deep Learning for Program Optimization
+   *     and Analysis. ArXiv:2003.10536. https://arxiv.org/abs/2003.10536
+   */
   PROGRAML,
-  // A JSON dictionary of properties describing the CPU.
+  /** A JSON dictionary of properties describing the CPU. */
   CPU_INFO,
-  // The number of LLVM-IR instructions in the current module.
+  /** The number of LLVM-IR instructions in the current module. */
   IR_INSTRUCTION_COUNT,
+  /** The number of LLVM-IR instructions normalized to `-O0`. */
   IR_INSTRUCTION_COUNT_O0,
+  /** The number of LLVM-IR instructions normalized to `-O3`. */
   IR_INSTRUCTION_COUNT_O3,
+  /** The number of LLVM-IR instructions normalized to `-Oz`. */
   IR_INSTRUCTION_COUNT_OZ,
-  // The size of the .text section of the lowered module. Platform dependent.
+  /** The platform-dependent size of the .text section of the lowered module. */
   OBJECT_TEXT_SIZE_BYTES,
+  /** The platform-dependent size of the .text section of the lowered module. */
   OBJECT_TEXT_SIZE_O0,
+  /** The platform-dependent size of the .text section of the lowered module. */
   OBJECT_TEXT_SIZE_O3,
+  /** The platform-dependent size of the .text section of the lowered module. */
   OBJECT_TEXT_SIZE_OZ,
 #ifdef COMPILER_GYM_EXPERIMENTAL_TEXT_SIZE_COST
-  // The size of the .text section of the compiled binary. Platform dependent.
+  /** The platform-dependent size of the .text section of the compiled binary. */
   TEXT_SIZE_BYTES,
+  /** The platform-dependent size of the .text section of the compiled binary. */
   TEXT_SIZE_O0,
+  /** The platform-dependent size of the .text section of the compiled binary. */
   TEXT_SIZE_O3,
+  /** The platform-dependent size of the .text section of the compiled binary. */
   TEXT_SIZE_OZ,
 #endif
 };
 
-// Return the list of available observation spaces.
+/** Return the list of available observation spaces. */
 std::vector<ObservationSpace> getLlvmObservationSpaceList();
 
 }  // namespace compiler_gym::llvm_service
diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h
index d54645ae7..7f48d87f3 100644
--- a/compiler_gym/service/runtime/BenchmarkCache.h
+++ b/compiler_gym/service/runtime/BenchmarkCache.h
@@ -42,7 +42,7 @@ class BenchmarkCache {
    * Lookup a benchmark. The pointer set by this method is valid only until the
    * next call to add().
    *
-   * @param uri The URI of the benchmark
+   * @param uri The URI of the benchmark.
    * @return A Benchmark pointer.
    */
   const Benchmark* get(const std::string& uri) const;
diff --git a/compiler_gym/spaces/named_discrete.py b/compiler_gym/spaces/named_discrete.py
index 604d4d36b..043bd192a 100644
--- a/compiler_gym/spaces/named_discrete.py
+++ b/compiler_gym/spaces/named_discrete.py
@@ -13,6 +13,7 @@ class NamedDiscrete(Discrete):
 
     :ivar name: The name of the space. :code:`None` if the space has no name.
     :vartype name: Optional[str]
+
     :ivar names: A list of names for each element in the space.
     :vartype names: List[str]
 
diff --git a/docs/source/cc/compiler_gym/envs/llvm/service.rst b/docs/source/cc/compiler_gym/envs/llvm/service.rst
index eb9d886b2..95b888291 100644
--- a/docs/source/cc/compiler_gym/envs/llvm/service.rst
+++ b/docs/source/cc/compiler_gym/envs/llvm/service.rst
@@ -2,38 +2,41 @@ compiler_gym/envs/llvm/service
 ==============================
 
 This directory contains the core C++ implementation of the LLVM environment for
-CompilerGym.
+CompilerGym. The base session is implemented by a
+:code:`compiler_gym::llvm_service::LlvmSession` class, defined in
+:ref:`LlvmSession.h <cc/compiler_gym/envs/llvm/service:LlvmSession.h>`.
 
 .. contents::
    :local:
 
-LlvmService.h
+ActionSpace.h
 -------------
 
-:code:`#include "compiler_gym/envs/llvm/service/LlvmService.h"`
+:code:`#include "compiler_gym/envs/llvm/service/ActionSpace.h"`
 
-.. doxygenfile:: compiler_gym/envs/llvm/service/LlvmService.h
+.. doxygenfile:: compiler_gym/envs/llvm/service/ActionSpace.h
 
-Cost.h
-------
+Benchmark.h
+-----------
 
-:code:`#include "compiler_gym/envs/llvm/service/Cost.h"`
+:code:`#include "compiler_gym/envs/llvm/service/Benchmark.h"`
 
-.. doxygenfile:: compiler_gym/envs/llvm/service/Cost.h
+.. doxygenfile:: compiler_gym/envs/llvm/service/Benchmark.h
 
-ActionSpace.h
--------------
+BenchmarkFactory.h
+------------------
 
-:code:`#include "compiler_gym/envs/llvm/service/ActionSpace.h"`
+:code:`#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"`
 
-.. doxygenfile:: compiler_gym/envs/llvm/service/ActionSpace.h
+.. doxygenfile:: compiler_gym/envs/llvm/service/BenchmarkFactory.h
 
-ObservationSpaces.h
--------------------
+Cost.h
+------
 
-:code:`#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"`
+:code:`#include "compiler_gym/envs/llvm/service/Cost.h"`
+
+.. doxygenfile:: compiler_gym/envs/llvm/service/Cost.h
 
-.. doxygenfile:: compiler_gym/envs/llvm/service/ObservationSpaces.h
 
 LlvmSession.h
 -------------
@@ -42,16 +45,9 @@ LlvmSession.h
 
 .. doxygenfile:: compiler_gym/envs/llvm/service/LlvmSession.h
 
-Benchmark.h
------------
-
-:code:`#include "compiler_gym/envs/llvm/service/Benchmark.h"`
-
-.. doxygenfile:: compiler_gym/envs/llvm/service/Benchmark.h
-
-BenchmarkFactory.h
-------------------
+ObservationSpaces.h
+-------------------
 
-:code:`#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"`
+:code:`#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"`
 
-.. doxygenfile:: compiler_gym/envs/llvm/service/BenchmarkFactory.h
+.. doxygenfile:: compiler_gym/envs/llvm/service/ObservationSpaces.h

From 6ce79fd1d21a7188db3c7961db637bdc8c84ab5a Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 02:07:57 +0100
Subject: [PATCH 121/141] [docs] Merge "Developer Manual" into "User Guide"

---
 docs/source/index.rst | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 48bfd14e0..6b928c78e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,7 +11,9 @@ for applying reinforcement learning to compiler optimizations.
    llvm/index
    cli
    about
+   rpc
    changelog
+   contributing
    faq
 
 ..
@@ -22,13 +24,6 @@ for applying reinforcement learning to compiler optimizations.
        tutorial/reinforcement_learning
        tutorial/example_service
 
-.. toctree::
-   :maxdepth: 3
-   :caption: Developer Manual
-
-   contributing
-   rpc.rst
-
 .. toctree::
    :maxdepth: 3
    :caption: Python API Reference
@@ -47,10 +42,10 @@ for applying reinforcement learning to compiler optimizations.
    :maxdepth: 3
    :caption: C++ API Reference
 
-   cc/compiler_gym/envs/llvm/service.rst
-   cc/compiler_gym/service.rst
-   cc/compiler_gym/service/runtime.rst
-   cc/compiler_gym/util.rst
+   cc/compiler_gym/envs/llvm/service
+   cc/compiler_gym/service
+   cc/compiler_gym/service/runtime
+   cc/compiler_gym/util
 
 
 Indices and tables

From bada607eef30196869e798b9a803cfdb6dd8bbc3 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 11:07:09 +0100
Subject: [PATCH 122/141] [service] Remove unused RunService implementation.

---
 compiler_gym/util/BUILD            | 23 ----------
 compiler_gym/util/RunService.cc    | 12 -----
 compiler_gym/util/RunService.h     | 50 --------------------
 compiler_gym/util/RunServiceImpl.h | 73 ------------------------------
 4 files changed, 158 deletions(-)
 delete mode 100644 compiler_gym/util/RunService.cc
 delete mode 100644 compiler_gym/util/RunService.h
 delete mode 100644 compiler_gym/util/RunServiceImpl.h

diff --git a/compiler_gym/util/BUILD b/compiler_gym/util/BUILD
index 9d1dc1f45..e7f5862d1 100644
--- a/compiler_gym/util/BUILD
+++ b/compiler_gym/util/BUILD
@@ -66,29 +66,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "RunService",
-    srcs = ["RunService.cc"],
-    hdrs = ["RunService.h"],
-    visibility = ["//visibility:public"],
-    deps = [
-        ":RunServiceImpl",
-        "@boost//:filesystem",
-        "@gflags",
-        "@glog",
-    ],
-)
-
-cc_library(
-    name = "RunServiceImpl",
-    hdrs = ["RunServiceImpl.h"],
-    deps = [
-        "@boost//:filesystem",
-        "@com_github_grpc_grpc//:grpc++",
-        "@glog",
-    ],
-)
-
 cc_library(
     name = "StrLenConstexpr",
     hdrs = ["StrLenConstexpr.h"],
diff --git a/compiler_gym/util/RunService.cc b/compiler_gym/util/RunService.cc
deleted file mode 100644
index e484c7109..000000000
--- a/compiler_gym/util/RunService.cc
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-#include "compiler_gym/util/RunService.h"
-
-DEFINE_string(
-    working_dir, "",
-    "The working directory to use. Must be an existing directory with write permissions.");
-DEFINE_string(port, "0",
-              "The port to listen on. If 0, an unused port will be selected. The selected port is "
-              "written to <working_dir>/port.txt.");
diff --git a/compiler_gym/util/RunService.h b/compiler_gym/util/RunService.h
deleted file mode 100644
index 60893653c..000000000
--- a/compiler_gym/util/RunService.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-#pragma once
-
-#include <gflags/gflags.h>
-#include <glog/logging.h>
-#include <unistd.h>
-
-#include <string>
-
-#include "compiler_gym/util/RunServiceImpl.h"
-
-DECLARE_string(port);
-DECLARE_string(working_dir);
-
-namespace compiler_gym::util {
-
-// Create a service, configured using --port and --working_dir flags, and
-// run it. This function never returns.
-//
-// Service must be a subclass of CompilerGymService::Service that implements all
-// RPC endpoints and takes a single-argument working directory constructor:
-//
-//    class MyService final : public CompilerGymService::Service {
-//     public:
-//      explicit MyService(const boost::filesystem::path& workingDirectory);
-//    }
-//
-// Usage:
-//
-//     int main(int argc, char** argv) {
-//         return runService<MyService>(&argc, &argv, "usage string");
-//     }
-template <typename Service>
-int runService(int* argc, char*** argv, const char* usage) {
-  gflags::SetUsageMessage(std::string(usage));
-  gflags::ParseCommandLineFlags(argc, argv, /*remove_flags=*/false);
-
-  CHECK(!FLAGS_working_dir.empty()) << "--working_dir flag not set";
-  CHECK(!FLAGS_port.empty()) << "--port flag not set";
-
-  FLAGS_log_dir = std::string(FLAGS_working_dir) + "/logs";
-  google::InitGoogleLogging((*argv)[0]);
-
-  return createAndRunService<Service>(FLAGS_working_dir, FLAGS_port);
-}
-
-}  // namespace compiler_gym::util
diff --git a/compiler_gym/util/RunServiceImpl.h b/compiler_gym/util/RunServiceImpl.h
deleted file mode 100644
index a5b6759e0..000000000
--- a/compiler_gym/util/RunServiceImpl.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// Private implementation header for //compiler_gym/util:RunService.
-//
-// Copyright (c) Facebook, Inc. and its affiliates.
-//
-// This source code is licensed under the MIT license found in the
-// LICENSE file in the root directory of this source tree.
-#pragma once
-
-#include <glog/logging.h>
-#include <grpcpp/grpcpp.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <fstream>
-#include <memory>
-#include <string>
-
-#include "boost/filesystem.hpp"
-
-namespace compiler_gym::util {
-
-// Create a service and run it. This function never returns.
-template <typename Service>
-int createAndRunService(const boost::filesystem::path& workingDirectory,
-                        const std::string& requestedPort) {
-  CHECK(boost::filesystem::is_directory(workingDirectory))
-      << "Directory not found: " << workingDirectory.string();
-  Service service{workingDirectory};
-
-  grpc::ServerBuilder builder;
-  builder.RegisterService(&service);
-
-  // Increase maximum message size beyond the 4MB default as inbound message
-  // may be larger (e.g., in the case of IR strings).
-  builder.SetMaxMessageSize(512 * 1024 * 1024);
-
-  // Start a channel on the port.
-  int port;
-  std::string serverAddress = "0.0.0.0:" + requestedPort;
-  builder.AddListeningPort(serverAddress, grpc::InsecureServerCredentials(), &port);
-
-  // Start the server.
-  std::unique_ptr<grpc::Server> server(builder.BuildAndStart());
-  CHECK(server) << "Failed to build RPC service";
-
-  {
-    // Write the port to a <working_dir>/port.txt file, which an external
-    // process can read to determine how to get in touch. First write the port
-    // to a temporary file and rename it, since renaming is atomic.
-    const boost::filesystem::path portPath = workingDirectory / "port.txt";
-    std::ofstream out(portPath.string() + ".tmp");
-    out << std::to_string(port) << std::endl;
-    out.close();
-    boost::filesystem::rename(portPath.string() + ".tmp", portPath);
-  }
-
-  {
-    // Write the process ID to a <working_dir>/pid.txt file, which can
-    // external process can later use to determine if this service is still
-    // alive.
-    const boost::filesystem::path pidPath = workingDirectory / "pid.txt";
-    std::ofstream out(pidPath.string());
-    out << std::to_string(getpid()) << std::endl;
-    out.close();
-  }
-
-  LOG(INFO) << "Service " << workingDirectory << " listening on " << port << ", PID = " << getpid();
-
-  server->Wait();
-  return 0;
-}
-
-}  // namespace compiler_gym::util

From 3f0e2172648e022bad8e66bfb16c8bb958ad6a4c Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 11:07:51 +0100
Subject: [PATCH 123/141] [docs] Populate C++ service documentation.

---
 .../envs/llvm/service/BenchmarkFactory.h      |   6 +-
 compiler_gym/service/CompilationSession.h     | 129 +++++++++++++-----
 compiler_gym/service/compilation_session.py   |   6 +-
 compiler_gym/service/runtime/BenchmarkCache.h |   4 +-
 .../service/runtime/CompilerGymService.h      |  11 +-
 compiler_gym/service/runtime/Runtime.h        |  22 +++
 .../create_and_run_compiler_gym_service.py    |  26 +++-
 compiler_gym/util/EnumUtil.h                  |   4 +-
 compiler_gym/util/GrpcStatusMacros.h          |  27 ++--
 compiler_gym/util/RunfilesPath.h              |  20 +--
 compiler_gym/util/StrLenConstexpr.h           |  11 +-
 compiler_gym/util/Unreachable.h               |  11 +-
 docs/source/cc/compiler_gym/service.rst       |   2 +
 .../cc/compiler_gym/service/runtime.rst       |  23 ++--
 docs/source/cc/compiler_gym/util.rst          |  21 +--
 15 files changed, 232 insertions(+), 91 deletions(-)

diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.h b/compiler_gym/envs/llvm/service/BenchmarkFactory.h
index 75c865bbc..f274641af 100644
--- a/compiler_gym/envs/llvm/service/BenchmarkFactory.h
+++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.h
@@ -39,9 +39,9 @@ constexpr size_t kMaxLoadedBenchmarkSize = 512 * 1024 * 1024;
  * Example usage:
  *
  * \code{.cpp}
- * BenchmarkFactory factory;
- * auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc");
- * // ... do fun stuff
+ *     BenchmarkFactory factory;
+ *     auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc");
+ *     // ... do fun stuff
  * \endcode
  */
 class BenchmarkFactory {
diff --git a/compiler_gym/service/CompilationSession.h b/compiler_gym/service/CompilationSession.h
index a97b2eb7d..6bcd77ddc 100644
--- a/compiler_gym/service/CompilationSession.h
+++ b/compiler_gym/service/CompilationSession.h
@@ -14,57 +14,104 @@
 
 namespace compiler_gym {
 
-// Base class for encapsulating an incremental compilation session.
-//
-// To add support for a new compiler, subclass from this base and provide
-// implementations of the abstract methods, then call
-// createAndRunCompilerGymService() and parametrize it with your class type:
-//
-//     #include "compiler_gym/service/CompilationSession.h"
-//     #include "compiler_gym/service/runtime/Runtime.h"
-//
-//     using namespace compiler_gym;
-//
-//     class MyCompilationSession final : public CompilationSession { ... }
-//
-//     int main(int argc, char** argv) {
-//         runtime::createAndRunCompilerGymService<MyCompilationSession>();
-//     }
-//
+/**
+ * Base class for encapsulating an incremental compilation session.
+ *
+ * To add support for a new compiler, subclass from this base and provide
+ * implementations of the abstract methods, then call
+ * createAndRunCompilerGymService() and parametrize it with your class type:
+ *
+ * \code{.cpp}
+ *     #include "compiler_gym/service/CompilationSession.h"
+ *     #include "compiler_gym/service/runtime/Runtime.h"
+ *
+ *     using namespace compiler_gym;
+ *
+ *     class MyCompilationSession final : public CompilationSession { ... }
+ *
+ *     int main(int argc, char** argv) {
+ *         runtime::createAndRunCompilerGymService<MyCompilationSession>();
+ *     }
+ * \endcode
+ */
 class CompilationSession {
  public:
-  // Get the compiler version.
+  /**
+   * Get the compiler version.
+   *
+   * @return A string indicating the compiler version.
+   */
   virtual std::string getCompilerVersion() const;
 
-  // A list of action spaces describing the capabilities of the compiler.
+  /**
+   * A list of action spaces describing the capabilities of the compiler.
+   *
+   * @return A list of ActionSpace instances.
+   */
   virtual std::vector<ActionSpace> getActionSpaces() const = 0;
 
-  // A list of feature vectors that this compiler provides.
+  /**
+   * A list of feature vectors that this compiler provides.
+   *
+   * @return A list of ObservationSpace instances.
+   */
   virtual std::vector<ObservationSpace> getObservationSpaces() const = 0;
 
-  // Start a CompilationSession. This will be called after construction and
-  // before applyAction() or computeObservation(). This will only be called
-  // once.
+  /**
+   * Start a CompilationSession.
+   *
+   * This will be called after construction and before applyAction() or
+   * computeObservation(). This will only be called once.
+   *
+   * @param actionSpace The action space to use.
+   * @param benchmark The benchmark to use.
+   * @return `OK` on success, else an error code and message.
+   */
   [[nodiscard]] virtual grpc::Status init(const ActionSpace& actionSpace,
                                           const Benchmark& benchmark) = 0;
 
-  // Initialize the state from another CompilerSession. This will be called
-  // after construction and before applyAction() or computeObservation(). This
-  // will only be called once.
+  /**
+   * Initialize a CompilationSession from another CompilerSession.
+   *
+   * Think of this like a copy constructor, except that this method is allowed
+   * to fail.
+   *
+   * This will be called after construction and before applyAction() or
+   * computeObservation(). This will only be called once.
+   *
+   * @param other The CompilationSession to initialize from.
+   * @return `OK` on success, else an errro code and message.
+   */
   [[nodiscard]] virtual grpc::Status init(CompilationSession* other);
 
-  // Apply an action.
+  /**
+   * Apply an action.
+   *
+   * @param action The action to apply.
+   * @param newActionSpace If applying the action mutated the action space, set
+   *    this value to the new action space.
+   * @param actionHadNoEffect If the action had no effect, set this to true.
+   * @return `OK` on success, else an errro code and message.
+   */
   [[nodiscard]] virtual grpc::Status applyAction(const Action& action, bool& endOfEpisode,
                                                  std::optional<ActionSpace>& newActionSpace,
                                                  bool& actionHadNoEffect) = 0;
 
-  // Compute an observation.
+  /**
+   * Compute an observation.
+   *
+   * @return `OK` on success, else an errro code and message.
+   */
   [[nodiscard]] virtual grpc::Status computeObservation(const ObservationSpace& observationSpace,
                                                         Observation& observation) = 0;
 
-  // Optional. This will be called after all applyAction() and
-  // computeObservation() in a step. Use this method if you would like to
-  // perform post-transform validation of compiler state.
+  /**
+   * Optional. This will be called after all applyAction() and
+   * computeObservation() in a step. Use this method if you would like to
+   * perform post-transform validation of compiler state.
+   *
+   * @return `OK` on success, else an errro code and message.
+   */
   [[nodiscard]] virtual grpc::Status endOfStep(bool actionHadNoEffect, bool& endOfEpisode,
                                                std::optional<ActionSpace>& newActionSpace);
 
@@ -73,9 +120,23 @@ class CompilationSession {
   virtual ~CompilationSession() = default;
 
  protected:
-  // Get the working directory, which is a local filesystem directory that this
-  // CompilationSession can use to store temporary files such as build
-  // artifacts.
+  /**
+   * Get the working directory.
+   *
+   * The working directory is a local filesystem directory that this
+   * CompilationSession can use to store temporary files such as build
+   * artifacts. The directory exists.
+   *
+   * \note If you need to store very large files for a CompilationSession then
+   *    consider using an alternate filesystem path as, when possible, an
+   *    in-memory filesystem will be used for the working directory.
+   *
+   * \note A single working directory may be shared by multiple
+   *    CompilationSession instances. Do not assume that you have exclusive
+   *    access.
+   *
+   * @return A path.
+   */
   inline const boost::filesystem::path& workingDirectory() { return workingDirectory_; }
 
  private:
diff --git a/compiler_gym/service/compilation_session.py b/compiler_gym/service/compilation_session.py
index 20c2f7632..278dbc396 100644
--- a/compiler_gym/service/compilation_session.py
+++ b/compiler_gym/service/compilation_session.py
@@ -82,9 +82,11 @@ def get_observation(self, observation_space: ObservationSpace) -> Observation:
         raise NotImplementedError
 
     def fork(self) -> "CompilationSession":
-        """Optional. Create a copy of current session state.
+        """Create a copy of current session state.
 
-        :return: A new CopmilationSession with the same state.
+        Implementing this method is optional.
+
+        :return: A new CompilationSession with the same state.
         """
         # No need to override this if you are not adding support to fork().
         raise NotImplementedError("CompilationSession.fork() not supported")
diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h
index 7f48d87f3..f285e9f1d 100644
--- a/compiler_gym/service/runtime/BenchmarkCache.h
+++ b/compiler_gym/service/runtime/BenchmarkCache.h
@@ -19,7 +19,7 @@ namespace compiler_gym::runtime {
 constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024;
 
 /**
- * @brief  A cache of Benchmark protocol messages.
+ * A cache of Benchmark protocol messages.
  *
  * This object caches Benchmark messages by URI. Once the cache reaches a
  * predetermined size, benchmarks are evicted randomly until the capacity is
@@ -28,7 +28,7 @@ constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024;
 class BenchmarkCache {
  public:
   /**
-   * @brief Constructor.
+   * Constructor.
    *
    * @param maxSizeInBytes The maximum size of the benchmark buffer before an
    *    automated eviction is run.
diff --git a/compiler_gym/service/runtime/CompilerGymService.h b/compiler_gym/service/runtime/CompilerGymService.h
index a4a19c833..16672caaa 100644
--- a/compiler_gym/service/runtime/CompilerGymService.h
+++ b/compiler_gym/service/runtime/CompilerGymService.h
@@ -17,9 +17,14 @@
 
 namespace compiler_gym::runtime {
 
-// A default implementation of the CompilerGymService. When parametrized by a
-// CompilationSession subclass, this provides the RPC handling logic to run a
-// gym service.
+/**
+ * A default implementation of the CompilerGymService.
+ *
+ * When parametrized by a CompilationSession subclass, this provides the RPC
+ * handling logic to run a gym service. User should call
+ * createAndRunCompilerGymService() rather than interacting with this class
+ * directly.
+ */
 template <typename CompilationSessionType>
 class CompilerGymService final : public compiler_gym::CompilerGymService::Service {
  public:
diff --git a/compiler_gym/service/runtime/Runtime.h b/compiler_gym/service/runtime/Runtime.h
index f49d0caa4..ef154bb1c 100644
--- a/compiler_gym/service/runtime/Runtime.h
+++ b/compiler_gym/service/runtime/Runtime.h
@@ -9,6 +9,28 @@
 
 namespace compiler_gym::runtime {
 
+/**
+ * Create and run an RPC service for the given compilation session.
+ *
+ * This should be called on its own in a self contained script to implement a
+ * compilation service. Example:
+ *
+ * \code{.cpp}
+ *     #include "compiler_gym/service/runtime/Runtime.h"
+ *     #include "my_compiler_service/MyCompilationSession.h"
+ *
+ *     int main(int argc, char** argv) {
+ *       createAndRunCompilerGymService<MyCompilationSession>(
+ *           argc, argc, "My compiler service"
+ *       );
+ *     }
+ * \endcode
+ *
+ * This function never returns.
+ *
+ * @tparam CompilationSessionType A sublass of CompilationSession that provides
+ *    implementations of the abstract methods.
+ */
 template <typename CompilationSessionType>
 [[noreturn]] void createAndRunCompilerGymService(int argc, char** argv, const char* usage) {
   createAndRunCompilerGymServiceImpl<CompilationSessionType>(argc, argv, usage);
diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index a9d93663c..19e9750b9 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -13,10 +13,12 @@
 from signal import SIGTERM, signal
 from tempfile import mkdtemp
 from threading import Event, Thread
+from typing import Type
 
 import grpc
 from absl import app, flags, logging
 
+from compiler_gym.service.compilation_session import CompilationSession
 from compiler_gym.service.proto import compiler_gym_service_pb2_grpc
 from compiler_gym.service.runtime.compiler_gym_service import CompilerGymService
 from compiler_gym.util import debug_util as dbg
@@ -46,7 +48,29 @@ def _shutdown_handler(signal_number, stack_frame):  # pragma: no cover
     shutdown_signal.set()
 
 
-def create_and_run_compiler_gym_service(compilation_session_type):  # pragma: no cover
+def create_and_run_compiler_gym_service(  # pragma: no cover
+    compilation_session_type: Type[CompilationSession],
+):
+    """Create and run an RPC service for the given compilation session.
+
+    This should be called on its own in a self contained script to implement a
+    compilation service. Example:
+
+    .. code-block:: python
+
+        from compiler_gym.service import runtime
+        from my_compiler_service import MyCompilationSession
+
+        if __name__ == "__main__":
+            runtime.create_and_run_compiler_gym_service(MyCompilationSession)
+
+    This function never returns.
+
+    :param compilation_session_type: A sublass of :class:`CompilationSession
+        <compiler_gym.service.CompilationSession>` that provides implementations
+        of the abstract methods.
+    """
+
     def main(argv):
         # Register a signal handler for SIGTERM that will set the shutdownSignal
         # future value.
diff --git a/compiler_gym/util/EnumUtil.h b/compiler_gym/util/EnumUtil.h
index 0429a0bfe..08b201506 100644
--- a/compiler_gym/util/EnumUtil.h
+++ b/compiler_gym/util/EnumUtil.h
@@ -58,7 +58,7 @@ std::string enumNameToPascalCase(std::optional<Enum> value) {
 }
 
 /**
- * Enumearate all values of an optional Enum, including `std::nullopt`.
+ * Enumerate all values of an optional Enum, including `std::nullopt`.
  *
  * @return A vector of optional enum values.
  */
@@ -114,7 +114,7 @@ template <typename Enum>
 }
 
 /**
- * @brief Create a map from PascalCase enum value names to enum values.
+ * Create a map from PascalCase enum value names to enum values.
  *
  * @tparam Enum Enum type.
  * @return A `name -> value` lookup table.
diff --git a/compiler_gym/util/GrpcStatusMacros.h b/compiler_gym/util/GrpcStatusMacros.h
index f1ab01c73..c24233542 100644
--- a/compiler_gym/util/GrpcStatusMacros.h
+++ b/compiler_gym/util/GrpcStatusMacros.h
@@ -10,6 +10,11 @@
 using grpc::Status;
 
 #undef ASSERT_OK
+/**
+ * Fatal error if expression returns an error status.
+ *
+ * @param expr An expression that returns a `grpc::Status`.
+ */
 #define ASSERT_OK(expr)                             \
   do {                                              \
     const Status _status = (expr);                  \
@@ -17,17 +22,23 @@ using grpc::Status;
   } while (0)
 
 #undef RETURN_IF_ERROR
+/**
+ * Return from the current function if the expression returns an error status.
+ *
+ * This is equivalent to:
+ *
+ * \code{.cpp}
+ *     Status status = expr;
+ *     if (!status.ok()) {
+ *         return status;
+ *     }
+ * \endcode
+ *
+ * @param expr An expression that return a `grpc::Status`.
+ */
 #define RETURN_IF_ERROR(expr)      \
   do {                             \
     const Status _status = (expr); \
     if (!_status.ok())             \
       return _status;              \
   } while (0)
-
-// Like RETURN_IF_ERROR(), but when you really want to commit!
-#undef CRASH_IF_ERROR
-#define CRASH_IF_ERROR(expr)                        \
-  do {                                              \
-    const Status _status = (expr);                  \
-    CHECK(_status.ok()) << _status.error_message(); \
-  } while (0)
diff --git a/compiler_gym/util/RunfilesPath.h b/compiler_gym/util/RunfilesPath.h
index ef0dafdfc..f2308edd2 100644
--- a/compiler_gym/util/RunfilesPath.h
+++ b/compiler_gym/util/RunfilesPath.h
@@ -6,16 +6,20 @@
 
 namespace compiler_gym::util {
 
-// Resolve the path to a runfiles data path.
-//
-// Use environment variable COMPILER_GYM_RUNFILES=/path/to/runfiles if running
-// outside of bazel.
+/**
+ * Resolve the path to a runfiles data path.
+ *
+ * Use environment variable `COMPILER_GYM_RUNFILES=/path/to/runfiles` if running
+ * outside of bazel.
+ */
 boost::filesystem::path getRunfilesPath(const std::string& relPath);
 
-// Resolve the path to the site data path.
-//
-// The site data path is used for storing persistent data files, such as
-// benchmark datasets.
+/**
+ * Resolve the path to the site data path.
+ *
+ * The site data path is used for storing persistent data files, such as
+ * benchmark datasets.
+ */
 boost::filesystem::path getSiteDataPath(const std::string& relPath);
 
 }  // namespace compiler_gym::util
diff --git a/compiler_gym/util/StrLenConstexpr.h b/compiler_gym/util/StrLenConstexpr.h
index e19aa44a6..48ebc62e8 100644
--- a/compiler_gym/util/StrLenConstexpr.h
+++ b/compiler_gym/util/StrLenConstexpr.h
@@ -8,8 +8,15 @@
 
 namespace compiler_gym::util {
 
-// Calculate the length of a string literal at compile-time.
-// E.g., strLen("abc") -> 3.
+/**
+ * Calculate the length of a string literal at compile-time.
+ *
+ * E.g., `strLen("abc") -> 3`.
+ *
+ * @tparam T The character type.
+ * @param str A string.
+ * @return A nonnegative integer.
+ */
 template <typename T>
 size_t constexpr strLen(const T* str) {
   return *str ? 1 + strLen(str + 1) : 0;
diff --git a/compiler_gym/util/Unreachable.h b/compiler_gym/util/Unreachable.h
index b960de113..43f1efc3f 100644
--- a/compiler_gym/util/Unreachable.h
+++ b/compiler_gym/util/Unreachable.h
@@ -6,9 +6,14 @@
 
 #include <glog/logging.h>
 
-// Declare a program point as unreachable. For debug builds, this will trigger
-// a fatal error if reached. For optimized builds (i.e. ones built using
-// `bazel build -c opt`), this is totally undefined.
+/**
+ * Declare a program point as unreachable. For debug builds, this will trigger a
+ * fatal error if reached. For optimized builds (i.e. ones built using `bazel
+ * build -c opt`), this is undefined.
+ *
+ * @param msg A message that will be printed if this program point is reached
+ *    in a debug build.
+ */
 #define UNREACHABLE(msg)                   \
   DLOG(FATAL) << "Unreachable: " << (msg); \
   __builtin_unreachable();
diff --git a/docs/source/cc/compiler_gym/service.rst b/docs/source/cc/compiler_gym/service.rst
index e33f2f6ed..0174abba9 100644
--- a/docs/source/cc/compiler_gym/service.rst
+++ b/docs/source/cc/compiler_gym/service.rst
@@ -1,6 +1,8 @@
 compiler_gym/service
 ====================
 
+This directory contains the base class for implementing compilation sessions.
+
 .. contents::
    :local:
 
diff --git a/docs/source/cc/compiler_gym/service/runtime.rst b/docs/source/cc/compiler_gym/service/runtime.rst
index e96d78abf..c032821e0 100644
--- a/docs/source/cc/compiler_gym/service/runtime.rst
+++ b/docs/source/cc/compiler_gym/service/runtime.rst
@@ -1,8 +1,12 @@
 compiler_gym/service/runtime
 ============================
 
+This directory contains the CompilerGym runtime that takes a
+:code:`compiler_gym::CompilationSession` subclass and provides an RPC service
+that can be used by the Python frontend.
+
 .. contents::
-   :local:
+:local:
 
 Runtime.h
 ---------
@@ -11,16 +15,17 @@ Runtime.h
 
 .. doxygenfile:: compiler_gym/service/runtime/Runtime.h
 
-CompilerGymService.h
---------------------
+..
+  CompilerGymService.h
+  --------------------
 
-:code:`#include "compiler_gym/service/runtime/CompilerGymService.h"`
+  :code:`#include "compiler_gym/service/runtime/CompilerGymService.h"`
 
-.. doxygenfile:: compiler_gym/service/runtime/CompilerGymService.h
+  .. doxygenfile:: compiler_gym/service/runtime/CompilerGymService.h
 
-BenchmarkCache.h
-----------------
+  BenchmarkCache.h
+  ----------------
 
-:code:`#include "compiler_gym/service/runtime/BenchmarkCache.h"`
+  :code:`#include "compiler_gym/service/runtime/BenchmarkCache.h"`
 
-.. doxygenfile:: compiler_gym/service/runtime/BenchmarkCache.h
+  .. doxygenfile:: compiler_gym/service/runtime/BenchmarkCache.h
diff --git a/docs/source/cc/compiler_gym/util.rst b/docs/source/cc/compiler_gym/util.rst
index f21ad3a4b..ac53808ee 100644
--- a/docs/source/cc/compiler_gym/util.rst
+++ b/docs/source/cc/compiler_gym/util.rst
@@ -4,13 +4,6 @@ compiler_gym/util
 .. contents::
    :local:
 
-RunfilesPath.h
---------------
-
-:code:`#include "compiler_gym/util/RunfilesPath.h"`
-
-.. doxygenfile:: compiler_gym/util/RunfilesPath.h
-
 EnumUtil.h
 ----------
 
@@ -25,6 +18,13 @@ GrpcStatusMacros.h
 
 .. doxygenfile:: compiler_gym/util/GrpcStatusMacros.h
 
+RunfilesPath.h
+--------------
+
+:code:`#include "compiler_gym/util/RunfilesPath.h"`
+
+.. doxygenfile:: compiler_gym/util/RunfilesPath.h
+
 StrLenConstexpr.h
 -----------------
 
@@ -32,13 +32,6 @@ StrLenConstexpr.h
 
 .. doxygenfile:: compiler_gym/util/StrLenConstexpr.h
 
-RunService.h
-------------
-
-:code:`#include "compiler_gym/util/RunService.h"`
-
-.. doxygenfile:: compiler_gym/util/RunService.h
-
 Unreachable.h
 -------------
 

From 3d8535a85b4af35d70c61c14c1146ebdefe9cefd Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 12:04:49 +0100
Subject: [PATCH 124/141] [wrappers] Add base observation and reward wrappers.

---
 compiler_gym/wrappers/__init__.py     |  9 ++++++-
 compiler_gym/wrappers/core.py         | 35 +++++++++++++++++++++++++++
 docs/source/compiler_gym/wrappers.rst | 10 ++++++++
 tests/wrappers/core_wrappers_test.py  | 33 ++++++++++++++++++++++++-
 4 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py
index 142f4b0ba..5d01ae118 100644
--- a/compiler_gym/wrappers/__init__.py
+++ b/compiler_gym/wrappers/__init__.py
@@ -8,7 +8,12 @@
     CommandlineWithTerminalAction,
     ConstrainedCommandline,
 )
-from compiler_gym.wrappers.core import ActionWrapper, CompilerEnvWrapper
+from compiler_gym.wrappers.core import (
+    ActionWrapper,
+    CompilerEnvWrapper,
+    ObservationWrapper,
+    RewardWrapper,
+)
 from compiler_gym.wrappers.datasets import (
     CycleOverBenchmarks,
     IterateOverBenchmarks,
@@ -23,6 +28,8 @@
     "ConstrainedCommandline",
     "CycleOverBenchmarks",
     "IterateOverBenchmarks",
+    "ObservationWrapper",
     "RandomOrderBenchmarks",
+    "RewardWrapper",
     "TimeLimit",
 ]
diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
index e886bf84d..fb9fddf99 100644
--- a/compiler_gym/wrappers/core.py
+++ b/compiler_gym/wrappers/core.py
@@ -51,3 +51,38 @@ def action(self, action):
     def reverse_action(self, action):
         """Translate an action from the new space to the wrapped space."""
         raise NotImplementedError
+
+
+class ObservationWrapper(CompilerEnvWrapper):
+    """Wraps a :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` environment
+    to allow an observation space transformation.
+    """
+
+    def reset(self, *args, **kwargs):
+        observation = self.env.reset(*args, **kwargs)
+        return self.observation(observation)
+
+    def step(self, *args, **kwargs):
+        observation, reward, done, info = self.env.step(*args, **kwargs)
+        return self.observation(observation), reward, done, info
+
+    def observation(self, observation):
+        """Translate an observation to the new space."""
+        raise NotImplementedError
+
+
+class RewardWrapper(CompilerEnvWrapper):
+    """Wraps a :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>` environment
+    to allow an reward space transformation.
+    """
+
+    def reset(self, *args, **kwargs):
+        return self.env.reset(*args, **kwargs)
+
+    def step(self, *args, **kwargs):
+        observation, reward, done, info = self.env.step(*args, **kwargs)
+        return observation, self.reward(reward), done, info
+
+    def reward(self, reward):
+        """Translate a reward to the new space."""
+        raise NotImplementedError
diff --git a/docs/source/compiler_gym/wrappers.rst b/docs/source/compiler_gym/wrappers.rst
index f042ba014..207042293 100644
--- a/docs/source/compiler_gym/wrappers.rst
+++ b/docs/source/compiler_gym/wrappers.rst
@@ -24,6 +24,16 @@ Base wrappers
     .. automethod:: reverse_action
 
 
+.. autoclass:: ObservationWrapper
+
+    .. automethod:: observation
+
+
+.. autoclass:: RewardWrapper
+
+    .. automethod:: reward
+
+
 Action space wrappers
 ---------------------
 
diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py
index 5e1738e42..a4c8f7a0b 100644
--- a/tests/wrappers/core_wrappers_test.py
+++ b/tests/wrappers/core_wrappers_test.py
@@ -5,7 +5,12 @@
 """Unit tests for //compiler_gym/wrappers."""
 from compiler_gym.datasets import Datasets
 from compiler_gym.envs.llvm import LlvmEnv
-from compiler_gym.wrappers import ActionWrapper, CompilerEnvWrapper
+from compiler_gym.wrappers import (
+    ActionWrapper,
+    CompilerEnvWrapper,
+    ObservationWrapper,
+    RewardWrapper,
+)
 from tests.test_main import main
 
 pytest_plugins = ["tests.pytest_plugins.llvm"]
@@ -95,5 +100,31 @@ def reverse_action(self, action):
     assert env.actions == [0, 1]
 
 
+def test_wrapped_observation(env: LlvmEnv):
+    class MyWrapper(ObservationWrapper):
+        def observation(self, observation):
+            return isinstance(observation, str)
+            return len(str)
+
+    env.observation_space = "Ir"
+    env = MyWrapper(env)
+    assert env.reset() > 0
+    observation, _, _, _ = env.step(0)
+    assert observation > 0
+
+
+def test_wrapped_reward(env: LlvmEnv):
+    class MyWrapper(RewardWrapper):
+        def reward(self, reward):
+            return -5
+
+    env.reward_space = "IrInstructionCount"
+    env = MyWrapper(env)
+
+    env.reset()
+    _, reward, _, _ = env.step(0)
+    assert reward == -5
+
+
 if __name__ == "__main__":
     main()

From 39c1d3012d3ad9b77505083bd39a4341ed0cbbef Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 12:05:02 +0100
Subject: [PATCH 125/141] [docs] Split the FAQ into sections.

Issue #239.
---
 docs/source/faq.rst | 98 +++++++++++++++++++++++++++------------------
 1 file changed, 58 insertions(+), 40 deletions(-)

diff --git a/docs/source/faq.rst b/docs/source/faq.rst
index eb588dae9..6c13142d9 100644
--- a/docs/source/faq.rst
+++ b/docs/source/faq.rst
@@ -5,11 +5,15 @@ This page answers some of the commonly asked questions about CompilerGym. Have a
 question not answered here? File an issue on the `GitHub issue tracker
 <https://github.com/facebookresearch/CompilerGym/issues>`_.
 
-.. contents:: Questions:
+.. contents:: Topics:
     :local:
 
+General
+-------
+
+
 What can I do with this?
-------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 This projects lets you control the decisions that a compiler makes when
 optimizing a program. Currently, it lets you control the selection and ordering
@@ -27,16 +31,8 @@ Once you get the hang of things, try submitting your best algorithm to our
 `leaderboards <https://github.com/facebookresearch/CompilerGym#leaderboards>`_!
 
 
-I found a bug. How do I report it?
-----------------------------------
-
-Great! Please file an issue using the `GitHub issue tracker
-<https://github.com/facebookresearch/CompilerGym/issues>`_.  See
-:doc:`contributing` for more details.
-
-
 Do I have to use reinforcement learning?
-----------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 No. We think that the the gym provides a useful abstraction for sequential
 decision making. You may use any technique you wish to explore the optimization
@@ -44,13 +40,13 @@ space.
 
 
 What features are going to be added in the future?
---------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 See :ref:`roadmap <about:roadmap>`.
 
 
 Is compiler optimization really a sequential decision process?
---------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Compilers frequently package individual transformations as "optimization passes"
 which are then applied in a sequential order. Usually this order is fixed (e.g.
@@ -60,8 +56,12 @@ CompilerGym replaces that fixed order with a sequential decision process where
 any pass may be applied at any stage.
 
 
-When does a compiler enviornment consider an episode “done”?
-------------------------------------------------------------
+LLVM Environment
+----------------
+
+
+When does the environment consider an episode “done”?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The compiler itself doesn't have a signal for termination. Actions are like
 rewrite rules, it is up to the user to decide when no more improvement can be
@@ -73,10 +73,10 @@ unexpected state - we have to abort. This happens.
 
 
 How do I run this on my own program?
-------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-For LLVM, you compile your program to an unoptimized LLVM bitcode file. This can
-be done automatically for C/C++ programs using the :meth:`env.make_benchmark()
+By compiling your program to an unoptimized LLVM bitcode file. This can be done
+automatically for C/C++ programs using the :meth:`env.make_benchmark()
 <compiler_gym.envs.llvm.make_benchmark>` API, or you can do this yourself using
 clang:
 
@@ -89,31 +89,13 @@ tools using the `--benchmark` flag, e.g.
 
 ::
 
-    $ bazel run -c opt //compiler_gym/bin:random_search -- --env=llvm-ic-v0 \
+    $ bazel run -c opt //compiler_gym/bin:random_search -- \
+        --env=llvm-ic-v0 \
         --benchmark=file:///$PWD/myapp.bc
 
 
-I want to add a new program representation / reward signal. How do I do that?
------------------------------------------------------------------------------
-
-If your program representation can be computed from existing observations,
-consider using the :meth:`add_derived_space()
-<compiler_gym.views.ObservationSpace.add_derived_space>` API to add a derived
-observation or :meth:`add_space() <compiler_gym.views.RewardView.add_space>` to
-add a new reward space.
-
-If you require modifying the underlying compiler service implementation, fork
-this project and build it from source (see `installation
-<https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md>`_).
-Then modify the C++ service implementation for the compiler that you are
-interested in. The service codebase is located at
-:code:`compiler_gym/envs/$COMPILER/service`, where :code:`$COMPILER` is the name
-of the compiler service you would wish to modify, e.g. llvm. Once done, send us
-a pull request!
-
-
 Should I always try different actions?
---------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Some optimization actions may be called multiple times after other actions. An
 example of this is `dead code elimination
@@ -122,11 +104,25 @@ example of this is `dead code elimination
 in different context can bring improvements.
 
 
+Development
+-----------
+
+
+I found a bug. How do I report it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Great! Please file an issue using the `GitHub issue tracker
+<https://github.com/facebookresearch/CompilerGym/issues>`_.  See
+:doc:`contributing` for more details.
+
+
 I updated with "git pull" and now it doesn't work
--------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The first thing to is to re-run :code:`make init` to ensure that you have the
 correct development depencies installed, as those can change between releases.
+Then run :code:`make distclean` to tidy up any build artifacts from the old
+version.
 
 If that doesn't fix the problem, feel free to
 `file an issue <https://github.com/facebookresearch/CompilerGym/issues>`_, but
@@ -137,3 +133,25 @@ stability. If you would like to build from source but do not require the
 latest feature set, use the
 `stable <https://github.com/facebookresearch/CompilerGym/commits/stable>`_
 branch which lags to the latest release with hotfixes.
+
+
+I want to add a new program representation / reward signal. How do I do that?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If your reward or observation is a transformation of an existing space, consider
+using the :mod:`compiler_gym.wrappers` module to define a wrapper that performs
+the translation from the base space.
+
+If your reward or observation requires combining multiple existing spaces,
+consider using :meth:`add_derived_space()
+<compiler_gym.views.ObservationView.add_derived_space>` or :meth:`add_space()
+<compiler_gym.views.RewardView.add_space>`.
+
+If you require modifying the underlying compiler service implementation, fork
+this project and build it from source (see `installation
+<https://github.com/facebookresearch/CompilerGym/blob/development/INSTALL.md>`_).
+Then modify the service implementation for the compiler that you are interested
+in. The service codebase is located at
+:code:`compiler_gym/envs/$COMPILER/service`, where :code:`$COMPILER` is the name
+of the compiler service you would wish to modify, e.g. llvm. Once done, send us
+a pull request!

From de14bfa20d2c183c7da0bf504999fc9383f77eb2 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 12:06:09 +0100
Subject: [PATCH 126/141] Rejig 'no cover' pragma.

---
 .../service/runtime/create_and_run_compiler_gym_service.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
index 19e9750b9..f02f35a03 100644
--- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
+++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py
@@ -48,9 +48,9 @@ def _shutdown_handler(signal_number, stack_frame):  # pragma: no cover
     shutdown_signal.set()
 
 
-def create_and_run_compiler_gym_service(  # pragma: no cover
+def create_and_run_compiler_gym_service(
     compilation_session_type: Type[CompilationSession],
-):
+):  # pragma: no cover
     """Create and run an RPC service for the given compilation session.
 
     This should be called on its own in a self contained script to implement a

From bf8601179f80c9f36ec84fe9a42d20a9956f3481 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 15:02:31 +0100
Subject: [PATCH 127/141] [wrappers] Fix episode_reward value for
 RewardWrappers.

---
 compiler_gym/wrappers/core.py        | 11 ++++++++++-
 tests/wrappers/core_wrappers_test.py |  5 +++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py
index fb9fddf99..f74b46fa5 100644
--- a/compiler_gym/wrappers/core.py
+++ b/compiler_gym/wrappers/core.py
@@ -81,7 +81,16 @@ def reset(self, *args, **kwargs):
 
     def step(self, *args, **kwargs):
         observation, reward, done, info = self.env.step(*args, **kwargs)
-        return observation, self.reward(reward), done, info
+        # Undo the episode_reward update and reapply it once we have transformed
+        # the reward.
+        #
+        # TODO(cummins): Refactor step() so that we don't have to do this
+        # recalculation of episode_reward, as this is prone to errors if, say,
+        # the base reward returns NaN or an invalid type.
+        self.unwrapped.episode_reward -= reward
+        reward = self.reward(reward)
+        self.unwrapped.episode_reward += reward
+        return observation, reward, done, info
 
     def reward(self, reward):
         """Translate a reward to the new space."""
diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py
index a4c8f7a0b..8080288a7 100644
--- a/tests/wrappers/core_wrappers_test.py
+++ b/tests/wrappers/core_wrappers_test.py
@@ -124,6 +124,11 @@ def reward(self, reward):
     env.reset()
     _, reward, _, _ = env.step(0)
     assert reward == -5
+    assert env.episode_reward == -5
+
+    _, reward, _, _ = env.step(0)
+    assert reward == -5
+    assert env.episode_reward == -10
 
 
 if __name__ == "__main__":

From 2d388950f4878f926a472ca0e22df3a261bcdc20 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 15:28:19 +0100
Subject: [PATCH 128/141] [env] Fix default reward and observation return
 tests.

---
 compiler_gym/envs/compiler_env.py | 18 ++++++++-------
 compiler_gym/spaces/reward.py     | 10 +++++++-
 tests/llvm/BUILD                  | 11 +++++++++
 tests/llvm/episode_reward_test.py | 38 +++++++++++++++++++++++++++++++
 4 files changed, 68 insertions(+), 9 deletions(-)
 create mode 100644 tests/llvm/episode_reward_test.py

diff --git a/compiler_gym/envs/compiler_env.py b/compiler_gym/envs/compiler_env.py
index 16db59fb3..d819512f4 100644
--- a/compiler_gym/envs/compiler_env.py
+++ b/compiler_gym/envs/compiler_env.py
@@ -973,23 +973,25 @@ def step(
             reward_spaces: List[Reward] = []
 
         # Perform the underlying environment step.
-        observations, rewards, done, info = self.raw_step(
+        observation_values, reward_values, done, info = self.raw_step(
             actions, observation_spaces, reward_spaces
         )
 
         # Translate observations lists back to the appropriate types.
-        if self.observation_space_spec and len(observations) == 1:
-            observations = observations[0]
+        if observations is None and self.observation_space_spec:
+            observation_values = observation_values[0]
         elif not observation_spaces:
-            observations = None
+            observation_values = None
 
         # Translate reward lists back to the appropriate types.
-        if self.reward_space_spec and len(rewards) == 1:
-            rewards = rewards[0]
+        if rewards is None and self.reward_space:
+            reward_values = reward_values[0]
+            # Update the cumulative episode reward
+            self.episode_reward += reward_values
         elif not reward_spaces:
-            rewards = None
+            reward_values = None
 
-        return observations, rewards, done, info
+        return observation_values, reward_values, done, info
 
     def render(
         self,
diff --git a/compiler_gym/spaces/reward.py b/compiler_gym/spaces/reward.py
index 1576d5154..7c2ea57ea 100644
--- a/compiler_gym/spaces/reward.py
+++ b/compiler_gym/spaces/reward.py
@@ -2,7 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -137,6 +137,14 @@ def range(self) -> Tuple[RewardType, RewardType]:
     def __repr__(self):
         return self.id
 
+    def __eq__(self, other: Union["Reward", str]) -> bool:
+        if isinstance(other, str):
+            return self.id == other
+        elif isinstance(other, Reward):
+            return self.id == other.id
+        else:
+            return False
+
 
 class DefaultRewardFromObservation(Reward):
     def __init__(self, observation_name: str, **kwargs):
diff --git a/tests/llvm/BUILD b/tests/llvm/BUILD
index 22f35fbdb..f37795922 100644
--- a/tests/llvm/BUILD
+++ b/tests/llvm/BUILD
@@ -68,6 +68,17 @@ py_test(
     ],
 )
 
+py_test(
+    name = "episode_reward_test",
+    timeout = "long",
+    srcs = ["episode_reward_test.py"],
+    deps = [
+        "//compiler_gym/envs",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
 py_test(
     name = "fork_env_test",
     timeout = "long",
diff --git a/tests/llvm/episode_reward_test.py b/tests/llvm/episode_reward_test.py
new file mode 100644
index 000000000..ba5d6e3d7
--- /dev/null
+++ b/tests/llvm/episode_reward_test.py
@@ -0,0 +1,38 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""Tests for LlvmEnv.episode_reward."""
+from compiler_gym.envs import LlvmEnv
+from tests.test_main import main
+
+pytest_plugins = ["tests.pytest_plugins.llvm"]
+
+
+def test_episode_reward_init_zero(env: LlvmEnv):
+    env.reward_space = "IrInstructionCount"
+    env.reset("cbench-v1/crc32")
+    assert env.episode_reward == 0
+    _, reward, _, _ = env.step(env.action_space["-mem2reg"])
+    assert reward > 0
+    assert env.episode_reward == reward
+    env.reset()
+    assert env.episode_reward == 0
+
+
+def test_episode_reward_with_non_default_reward_space(env: LlvmEnv):
+    """Test that episode_reward is not updated when custom rewards passed to
+    step()."""
+    env.reward_space = "IrInstructionCountOz"
+    env.reset("cbench-v1/crc32")
+    assert env.episode_reward == 0
+    _, rewards, _, _ = env.step(
+        env.action_space["-mem2reg"],
+        rewards=["IrInstructionCount"],
+    )
+    assert rewards[0] > 0
+    assert env.episode_reward == 0
+
+
+if __name__ == "__main__":
+    main()

From 764fe9c5636bb00ccc03822d1c5608b8b8c55279 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 16:46:37 +0100
Subject: [PATCH 129/141] [ci] Fix fuzz target name.

---
 .github/workflows/fuzz.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/fuzz.yaml b/.github/workflows/fuzz.yaml
index 84a031c01..1ae439a14 100644
--- a/.github/workflows/fuzz.yaml
+++ b/.github/workflows/fuzz.yaml
@@ -42,4 +42,4 @@ jobs:
                   BAZEL_TEST_OPTS: --config=ci
 
             - name: Test
-              run: FUZZ_TIME=600 make fuzz
+              run: FUZZ_TIME=600 make install-fuzz

From a3d076d3e432beaa171d743bb21d3414ae1204af Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 16:46:50 +0100
Subject: [PATCH 130/141] [llvm] Add missing llvm-diff path.

---
 compiler_gym/third_party/llvm/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/compiler_gym/third_party/llvm/__init__.py b/compiler_gym/third_party/llvm/__init__.py
index d832b7a89..58be6ba2c 100644
--- a/compiler_gym/third_party/llvm/__init__.py
+++ b/compiler_gym/third_party/llvm/__init__.py
@@ -107,6 +107,11 @@ def llvm_stress_path() -> Path:
     return download_llvm_files() / "bin/llvm-stress"
 
 
+def llvm_diff_path() -> Path:
+    """Return the path of llvm-diff."""
+    return download_llvm_files() / "bin/llvm-diff"
+
+
 def opt_path() -> Path:
     """Return the path of opt."""
     return download_llvm_files() / "bin/opt"

From 5fa23b6b62bbe2fc1f5fd401022b600e356725ad Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 16:46:59 +0100
Subject: [PATCH 131/141] [tests] Fix URI of dataset.

---
 tests/fuzzing/llvm_stress_fuzz_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fuzzing/llvm_stress_fuzz_test.py b/tests/fuzzing/llvm_stress_fuzz_test.py
index 751fc4284..852ce7640 100644
--- a/tests/fuzzing/llvm_stress_fuzz_test.py
+++ b/tests/fuzzing/llvm_stress_fuzz_test.py
@@ -17,7 +17,7 @@ def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
     """This test produces a random trajectory using a program generated using
     llvm-stress.
     """
-    env.benchmark = env.datasets["llvm-stress-v0"].random_benchmark()
+    env.benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark()
 
     env.observation_space = observation_space
     env.reward_space = reward_space

From 69390dd78dc34b166bc9019e7421d93d842045ab Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 16:52:02 +0100
Subject: [PATCH 132/141] [tests] Permit benchmark init to fail on llvm-stress
 fuzz test.

---
 tests/fuzzing/llvm_stress_fuzz_test.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/tests/fuzzing/llvm_stress_fuzz_test.py b/tests/fuzzing/llvm_stress_fuzz_test.py
index 852ce7640..4985cfb88 100644
--- a/tests/fuzzing/llvm_stress_fuzz_test.py
+++ b/tests/fuzzing/llvm_stress_fuzz_test.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Fuzz test LLVM backend using llvm-stress."""
+from compiler_gym.datasets import BenchmarkInitError
 from compiler_gym.envs import LlvmEnv
 from tests.pytest_plugins.random_util import apply_random_trajectory
 from tests.test_main import main
@@ -22,13 +23,17 @@ def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
     env.observation_space = observation_space
     env.reward_space = reward_space
 
-    env.reset()
-    apply_random_trajectory(
-        env,
-        random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
-        timeout=10,
-    )
-    print(env.state)  # For debugging in case of failure.
+    try:
+        env.reset()
+        apply_random_trajectory(
+            env,
+            random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
+            timeout=10,
+        )
+        print(env.state)  # For debugging in case of failure.
+    except BenchmarkInitError:
+        # Benchmark is invalid.
+        pass
 
 
 if __name__ == "__main__":

From 9765b26b968cb155f24d452c6ace726661928c6c Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 16:54:45 +0100
Subject: [PATCH 133/141] [tests] Print benchmark URI in fuzz test.

---
 tests/fuzzing/llvm_stress_fuzz_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/fuzzing/llvm_stress_fuzz_test.py b/tests/fuzzing/llvm_stress_fuzz_test.py
index 4985cfb88..bfee5b6a7 100644
--- a/tests/fuzzing/llvm_stress_fuzz_test.py
+++ b/tests/fuzzing/llvm_stress_fuzz_test.py
@@ -19,6 +19,7 @@ def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
     llvm-stress.
     """
     env.benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark()
+    print(env.benchmark.uri)  # For debugging in case of failure.
 
     env.observation_space = observation_space
     env.reward_space = reward_space

From b5ad804cb36fc0d1f46864eafe9bb2e9e5cf3bba Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 17:55:36 +0100
Subject: [PATCH 134/141] [tests] Update fuzz tests.

---
 tests/fuzzing/BUILD                           | 22 +++++++-------
 ...t.py => llvm_cbench_validate_fuzz_test.py} |  6 +++-
 ...m_commandline_opt_equivalence_fuzz_test.py | 30 +++++++++++--------
 .../llvm_deterministic_action_fuzz_test.py    |  1 +
 tests/fuzzing/llvm_fork_env_fuzz_test.py      |  1 +
 .../fuzzing/llvm_random_actions_fuzz_test.py  |  4 ++-
 tests/fuzzing/llvm_stress_fuzz_test.py        |  9 ++++--
 .../llvm_trajectory_replay_fuzz_test.py       | 16 ++++++----
 tests/requirements.txt                        |  1 +
 9 files changed, 56 insertions(+), 34 deletions(-)
 rename tests/fuzzing/{llvm_validate_fuzz_test.py => llvm_cbench_validate_fuzz_test.py} (90%)

diff --git a/tests/fuzzing/BUILD b/tests/fuzzing/BUILD
index ac4d27556..682779ff4 100644
--- a/tests/fuzzing/BUILD
+++ b/tests/fuzzing/BUILD
@@ -6,6 +6,17 @@
 # LICENSE file in the root directory of this source tree.
 load("@rules_python//python:defs.bzl", "py_test")
 
+py_test(
+    name = "llvm_cbench_validate_fuzz_test",
+    srcs = ["llvm_cbench_validate_fuzz_test.py"],
+    tags = ["manual"],
+    deps = [
+        "//compiler_gym",
+        "//tests:test_main",
+        "//tests/pytest_plugins:llvm",
+    ],
+)
+
 py_test(
     name = "llvm_commandline_opt_equivalence_fuzz_test",
     srcs = ["llvm_commandline_opt_equivalence_fuzz_test.py"],
@@ -68,17 +79,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "llvm_validate_fuzz_test",
-    srcs = ["llvm_validate_fuzz_test.py"],
-    tags = ["manual"],
-    deps = [
-        "//compiler_gym",
-        "//tests:test_main",
-        "//tests/pytest_plugins:llvm",
-    ],
-)
-
 py_test(
     name = "llvm_stress_fuzz_test",
     timeout = "long",
diff --git a/tests/fuzzing/llvm_validate_fuzz_test.py b/tests/fuzzing/llvm_cbench_validate_fuzz_test.py
similarity index 90%
rename from tests/fuzzing/llvm_validate_fuzz_test.py
rename to tests/fuzzing/llvm_cbench_validate_fuzz_test.py
index eae460a0b..8a5faa67e 100644
--- a/tests/fuzzing/llvm_validate_fuzz_test.py
+++ b/tests/fuzzing/llvm_cbench_validate_fuzz_test.py
@@ -5,6 +5,8 @@
 """Fuzz test for LlvmEnv.validate()."""
 import random
 
+import pytest
+
 from compiler_gym.envs import LlvmEnv
 from tests.pytest_plugins.llvm import VALIDATABLE_CBENCH_URIS
 from tests.test_main import main
@@ -16,6 +18,7 @@
 RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50)
 
 
+@pytest.mark.timeout(600)
 def test_fuzz(env: LlvmEnv):
     """This test generates a random trajectory and validates the semantics."""
     benchmark = random.choice(VALIDATABLE_CBENCH_URIS)
@@ -29,7 +32,8 @@ def test_fuzz(env: LlvmEnv):
                 break  # Broken trajectory, retry.
         else:
             print(f"Validating state {env.state}")
-            assert env.validate() == []
+            result = env.validate()
+            assert result.okay(), result
             # Stop the test.
             break
 
diff --git a/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py b/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py
index 12a9a9122..57b852b23 100644
--- a/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py
+++ b/tests/fuzzing/llvm_commandline_opt_equivalence_fuzz_test.py
@@ -3,8 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Fuzz test for LlvmEnv.commandline()."""
+import os
 import subprocess
-from difflib import unified_diff
 from pathlib import Path
 
 import pytest
@@ -22,10 +22,13 @@
 RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50)
 
 
+@pytest.mark.timeout(600)
 def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path):
     """This test produces a random trajectory and then uses the commandline()
     generated with opt to check that the states are equivalent.
     """
+    del tmpwd
+
     env.reset()
     env.write_ir("input.ll")
     assert Path("input.ll").is_file()
@@ -47,20 +50,21 @@ def test_fuzz(env: LlvmEnv, tmpwd: Path, llvm_opt: Path, llvm_diff: Path):
         commandline, env={"PATH": str(llvm_opt.parent)}, shell=True, timeout=60
     )
     assert Path("output.ll").is_file()
+    os.rename("output.ll", "opt.ll")
 
-    with open("output.ll") as f1, open("env.ll") as f2:
-        # Diff the IR files but exclude the first line which is the module name.
-        diff = list(unified_diff(f1.readlines()[1:], f2.readlines()[1:]))
+    diff = subprocess.Popen(
+        [llvm_diff, "opt.ll", "env.ll"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True,
+    )
+    stdout, stderr = diff.communicate(timeout=300)
 
-        if diff and len(diff) < 25:
-            diff = "\n".join(diff)
-            pytest.fail(f"Opt produced different output to CompilerGym:\n{diff}")
-        elif diff:
-            # If it's a big diff then we will require the user to reproduce it
-            # themselves using the environment state we printed earlier.
-            pytest.fail(
-                f"Opt produced different output to CompilerGym ({len(diff)}-line diff)"
-            )
+    if diff.returncode:
+        pytest.fail(
+            f"Opt produced different output to CompilerGym "
+            f"(returncode: {diff.returncode}):\n{stdout}\n{stderr}"
+        )
 
 
 if __name__ == "__main__":
diff --git a/tests/fuzzing/llvm_deterministic_action_fuzz_test.py b/tests/fuzzing/llvm_deterministic_action_fuzz_test.py
index 6e5b314a9..3c4af9617 100644
--- a/tests/fuzzing/llvm_deterministic_action_fuzz_test.py
+++ b/tests/fuzzing/llvm_deterministic_action_fuzz_test.py
@@ -24,6 +24,7 @@ def sha1(string: str):
     return sha1.hexdigest()
 
 
+@pytest.mark.timeout(600)
 def test_fuzz(env: LlvmEnv):
     """Run an action multiple times from the same starting state and check that
     the generated LLVM-IR is the same.
diff --git a/tests/fuzzing/llvm_fork_env_fuzz_test.py b/tests/fuzzing/llvm_fork_env_fuzz_test.py
index 800838e87..ac73a5f5b 100644
--- a/tests/fuzzing/llvm_fork_env_fuzz_test.py
+++ b/tests/fuzzing/llvm_fork_env_fuzz_test.py
@@ -17,6 +17,7 @@
 POST_FORK_ACTIONS = 10
 
 
+@pytest.mark.timeout(600)
 def test_fuzz(env: LlvmEnv, reward_space: str):
     """This test generates a random trajectory and checks that fork() produces
     an equivalent state. It then runs a second trajectory on the two
diff --git a/tests/fuzzing/llvm_random_actions_fuzz_test.py b/tests/fuzzing/llvm_random_actions_fuzz_test.py
index f4c68ceb7..aabff3455 100644
--- a/tests/fuzzing/llvm_random_actions_fuzz_test.py
+++ b/tests/fuzzing/llvm_random_actions_fuzz_test.py
@@ -8,6 +8,7 @@
 
 import gym
 import numpy as np
+import pytest
 
 from compiler_gym.third_party.autophase import AUTOPHASE_FEATURE_DIM
 from tests.test_main import main
@@ -18,7 +19,8 @@
 FUZZ_TIME_SECONDS = 2
 
 
-def test_benchmark_random_actions(benchmark_name: str):
+@pytest.mark.timeout(600)
+def test_fuzz(benchmark_name: str):
     """Run randomly selected actions on a benchmark until a minimum amount of time has elapsed."""
     env = gym.make(
         "llvm-v0",
diff --git a/tests/fuzzing/llvm_stress_fuzz_test.py b/tests/fuzzing/llvm_stress_fuzz_test.py
index bfee5b6a7..9960d67a4 100644
--- a/tests/fuzzing/llvm_stress_fuzz_test.py
+++ b/tests/fuzzing/llvm_stress_fuzz_test.py
@@ -3,6 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Fuzz test LLVM backend using llvm-stress."""
+import pytest
+
 from compiler_gym.datasets import BenchmarkInitError
 from compiler_gym.envs import LlvmEnv
 from tests.pytest_plugins.random_util import apply_random_trajectory
@@ -14,18 +16,19 @@
 RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 10)
 
 
+@pytest.mark.timeout(600)
 def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
     """This test produces a random trajectory using a program generated using
     llvm-stress.
     """
-    env.benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark()
-    print(env.benchmark.uri)  # For debugging in case of failure.
+    benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark()
+    print(benchmark.uri)  # For debugging in case of failure.
 
     env.observation_space = observation_space
     env.reward_space = reward_space
 
     try:
-        env.reset()
+        env.reset(benchmark=benchmark)
         apply_random_trajectory(
             env,
             random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
diff --git a/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py b/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py
index a95486f6a..b0c8b07cf 100644
--- a/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py
+++ b/tests/fuzzing/llvm_trajectory_replay_fuzz_test.py
@@ -3,12 +3,11 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 """Fuzz test for LlvmEnv.validate()."""
-import random
-
 import numpy as np
+import pytest
 
+from compiler_gym.datasets import BenchmarkInitError
 from compiler_gym.envs import LlvmEnv
-from tests.pytest_plugins.llvm import BENCHMARK_NAMES
 from tests.pytest_plugins.random_util import apply_random_trajectory
 from tests.test_main import main
 
@@ -19,19 +18,26 @@
 RANDOM_TRAJECTORY_LENGTH_RANGE = (1, 50)
 
 
+@pytest.mark.timeout(600)
 def test_fuzz(env: LlvmEnv, reward_space: str):
     """This test produces a random trajectory, resets the environment, then
     replays the trajectory and checks that it produces the same state.
     """
     env.observation_space = "Autophase"
     env.reward_space = reward_space
+    benchmark = env.datasets["generator://csmith-v0"].random_benchmark()
+    print(benchmark.uri)  # For debugging in case of failure.
+
+    try:
+        env.reset(benchmark=benchmark)
+    except BenchmarkInitError:
+        return
 
-    env.reset(benchmark=random.choice(BENCHMARK_NAMES))
     trajectory = apply_random_trajectory(
         env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE
     )
     print(env.state)  # For debugging in case of failure.
-    env.reset()
+    env.reset(benchmark=benchmark)
 
     for i, (action, observation, reward, done) in enumerate(trajectory, start=1):
         print(f"Replaying step {i}: {env.action_space.flags[action]}")
diff --git a/tests/requirements.txt b/tests/requirements.txt
index c99c25b9c..6eadb637d 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -5,4 +5,5 @@ pytest-mock==3.6.0
 pytest-shard==0.1.1
 pytest-stress==1.0.1
 pytest-sugar==0.9.4
+pytest-timeout==1.4.2
 pytest-xdist==2.2.1

From 2fda248d77bf516a034e942ddd78e0768109637f Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 18:02:37 +0100
Subject: [PATCH 135/141] Release v0.1.9.

This release of CompilerGym focuses on **backend extensibility** and
adds a bunch of new features to make it easier to add support for new
compilers:

- Adds a new `CompilationSession` class encapsulates a single
incremental compilation session.

- Adds a common runtime for CompilerGym services that takes a
`CompilationSession` subclass and handles all the RPC wrangling for
you.

- Ports the LLVM service and example services to the new runtime. This
provides a net performance win with fewer lines of code.

Other highlights of this release include:

- [Core API] Adds a new `compiler_gym.wrappers` module that makes it
easy to apply modular transformations to CompilerGym environments
without modifying the environment code.

- [Core API] Adds a new `Datasets.random_benchmark()` method for
selecting a uniform random benchmark from one or more datasets.

- [Core API] Adds a new `compiler_gym.make()` function, equivalent to
`gym.make()`.

- [LLVM] Adds a new `IrSha1` observation space that uses a fast,
service-side C++ implementation to compute a checksum of the
environment state.

- [LLVM] Adds 12 new C programs from the CHStone benchmark suite.

- [LLVM] Adds the `anghabench-v1` dataset and deprecated
`anghabench-v0`.

- Numerous bug fixes and improvements.
---
 CHANGELOG.md | 36 ++++++++++++++++++++++++++++++++++++
 VERSION      |  2 +-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 73776f516..ca9117607 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,39 @@
+## Release 0.1.9 (2021-06-03)
+
+This release of CompilerGym focuses on **backend extensibility** and adds a
+bunch of new features to make it easier to add support for new compilers:
+
+- Adds a new `CompilationSession` class encapsulates a single incremental
+  compilation session
+  ([#261](https://github.com/facebookresearch/CompilerGym/pull/261)).
+- Adds a common runtime for CompilerGym services that takes a
+  `CompilationSession` subclass and handles all the RPC wrangling for you
+  ([#270](https://github.com/facebookresearch/CompilerGym/pull/270)).
+- Ports the LLVM service and example services to the new runtime
+  ([#277](https://github.com/facebookresearch/CompilerGym/pull/277)). This
+  provides a net performance win with fewer lines of code.
+
+Other highlights of this release include:
+
+- [Core API] Adds a new `compiler_gym.wrappers` module that makes it easy to
+  apply modular transformations to CompilerGym environments without modifying
+  the environment code
+  ([#272](https://github.com/facebookresearch/CompilerGym/pull/272)).
+- [Core API] Adds a new `Datasets.random_benchmark()` method for selecting a
+  uniform random benchmark from one or more datasets
+  ([#247](https://github.com/facebookresearch/CompilerGym/pull/247)).
+- [Core API] Adds a new `compiler_gym.make()` function, equivalent to
+  `gym.make()`
+  ([#257](https://github.com/facebookresearch/CompilerGym/pull/257)).
+- [LLVM] Adds a new `IrSha1` observation space that uses a fast, service-side
+  C++ implementation to compute a checksum of the environment state
+  ([#267](https://github.com/facebookresearch/CompilerGym/pull/267)).
+- [LLVM] Adds 12 new C programs from the CHStone benchmark suite
+  ([#284](https://github.com/facebookresearch/CompilerGym/pull/284)).
+- [LLVM] Adds the `anghabench-v1` dataset and deprecated `anghabench-v0`
+  ([#242](https://github.com/facebookresearch/CompilerGym/pull/242)).
+- Numerous bug fixes and improvements.
+
 ## Release 0.1.8 (2021-04-30)
 
 This release introduces some significant changes to the way that benchmarks are
diff --git a/VERSION b/VERSION
index 699c6c6d4..1a030947e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.8
+0.1.9

From 47ab31f9181b144d699ec967aab59597e4de89b9 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 18:48:46 +0100
Subject: [PATCH 136/141] [docs] Update sphinx versions.

---
 docs/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index a027d3a7c..192b8453f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,5 @@
 breathe==4.30.0
-sphinx==3.3.1
+sphinx==4.0.2
 sphinx-autobuild
-sphinx-rtd-theme==0.5.0
+sphinx-rtd-theme==0.5.2
 sphinxemoji

From e371908215cdc8161fbf50d14312c0bd0888adda Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 19:13:49 +0100
Subject: [PATCH 137/141] [docs] Add names to TOCs.

---
 docs/source/compiler_gym/compiler_gym.rst | 2 +-
 docs/source/compiler_gym/datasets.rst     | 2 +-
 docs/source/compiler_gym/envs.rst         | 2 +-
 docs/source/compiler_gym/leaderboard.rst  | 3 +++
 docs/source/compiler_gym/views.rst        | 2 ++
 docs/source/llvm/api.rst                  | 2 +-
 6 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/source/compiler_gym/compiler_gym.rst b/docs/source/compiler_gym/compiler_gym.rst
index 37cdb3ef4..a1203f412 100644
--- a/docs/source/compiler_gym/compiler_gym.rst
+++ b/docs/source/compiler_gym/compiler_gym.rst
@@ -1,7 +1,7 @@
 compiler_gym
 ============
 
-.. contents::
+.. contents:: Document contents:
     :local:
 
 .. currentmodule:: compiler_gym
diff --git a/docs/source/compiler_gym/datasets.rst b/docs/source/compiler_gym/datasets.rst
index 2290cc50b..e39b7bb97 100644
--- a/docs/source/compiler_gym/datasets.rst
+++ b/docs/source/compiler_gym/datasets.rst
@@ -6,7 +6,7 @@ An instance of a CompilerGym environment uses a :class:`Benchmark
 :class:`Dataset <compiler_gym.datasets.Dataset>` is collection of benchmarks
 that can be installed and made available for use.
 
-.. contents::
+.. contents:: Dataset Classes:
   :local:
 
 .. currentmodule:: compiler_gym.datasets
diff --git a/docs/source/compiler_gym/envs.rst b/docs/source/compiler_gym/envs.rst
index 78ba5de83..bd56172fc 100644
--- a/docs/source/compiler_gym/envs.rst
+++ b/docs/source/compiler_gym/envs.rst
@@ -8,7 +8,7 @@ extend the functionality by subclassing from
 :class:`CompilerEnv <compiler_gym.envs.CompilerEnv>`. The following
 environment classes are available:
 
-.. contents::
+.. contents:: Environment Classes:
     :local:
 
 .. currentmodule:: compiler_gym.envs
diff --git a/docs/source/compiler_gym/leaderboard.rst b/docs/source/compiler_gym/leaderboard.rst
index 0e97d5ce2..ff0956e25 100644
--- a/docs/source/compiler_gym/leaderboard.rst
+++ b/docs/source/compiler_gym/leaderboard.rst
@@ -3,6 +3,9 @@ compiler_gym.leaderboard
 
 .. automodule:: compiler_gym.leaderboard
 
+.. contents:: Leaderboards:
+    :local:
+
 LLVM Instruction Count
 ----------------------
 
diff --git a/docs/source/compiler_gym/views.rst b/docs/source/compiler_gym/views.rst
index 5547e06f0..8b9529295 100644
--- a/docs/source/compiler_gym/views.rst
+++ b/docs/source/compiler_gym/views.rst
@@ -10,6 +10,8 @@ available observation and reward spaces can be queried through the
 :py:attr:`~compiler_gym.envs.CompilerEnv.reward` attributes,
 respectively.
 
+.. contents:: Document contents:
+    :local:
 
 .. currentmodule:: compiler_gym.views
 
diff --git a/docs/source/llvm/api.rst b/docs/source/llvm/api.rst
index 0e3ebeb6e..8210a898c 100644
--- a/docs/source/llvm/api.rst
+++ b/docs/source/llvm/api.rst
@@ -5,7 +5,7 @@ The :code:`compiler_gym.envs.llvm` module contains datasets and API extensions
 for the :doc:`LLVM Environments <index>`. See :class:`LlvmEnv
 <compiler_gym.envs.LlvmEnv>` for the class definition.
 
-.. contents::
+.. contents:: Document contents:
    :local:
 
 Constructing Benchmarks

From bacbe5516d8f863883f2ec349bff4a54f82fab86 Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 19:14:02 +0100
Subject: [PATCH 138/141] [docs] Remove broken search page reference.

---
 docs/source/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6b928c78e..cccdd03b4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -53,4 +53,3 @@ Indices and tables
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`

From 1cc7d968ad09b0476f185fdd93772094e7401afc Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 19:14:15 +0100
Subject: [PATCH 139/141] [docs] Fix CSS rendering of lists.

---
 docs/source/_static/css/custom.css | 33 ++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index c89a9abdd..49ac19b55 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -49,3 +49,36 @@ div.wy-side-scroll a {
 	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
 	font-weight: 300;
 }
+
+/* Tables of contents. */
+div.contents {
+	padding: 0 0 24px;
+}
+div.contents ul,
+section#indices-and-tables ul {
+	padding-left: 24px;
+}
+div.contents ul.simple li,
+section#indices-and-tables li,
+table.indextable li {
+	list-style: disc;
+}
+div.contents ul.simple li p,
+section#indices-and-tables li p {
+	margin: 0;
+}
+section#indices-and-tables h1 {
+	margin-bottom: .3em;
+}
+
+/* Sidebar headers */
+div.wy-menu.wy-menu-vertical p {
+	margin: 1.5em 0.3em 0 .5em;
+	color: #404040;
+}
+
+/* Table of contents index */
+div.toctree-wrapper.compound p {
+	margin: 1.5em 0 0.3em 0;
+	color: #404040;
+}

From fe7ff197a5822ee2e0b08048b2682507e12387ce Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 20:50:17 +0100
Subject: [PATCH 140/141] [docs] Flatten first nested namespace in C++ docs.

---
 docs/source/_static/css/custom.css | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index 49ac19b55..31e5c9522 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -82,3 +82,8 @@ div.toctree-wrapper.compound p {
 	margin: 1.5em 0 0.3em 0;
 	color: #404040;
 }
+
+/* Flatten first nested namespace in C++ docs. */
+div.section > dl.cpp.type > dd {
+	margin: 0 0 12px 0;
+}

From 1584b283221f6b403af24651b57afa5f41f5f53d Mon Sep 17 00:00:00 2001
From: Chris Cummins <chrisc.101@gmail.com>
Date: Thu, 3 Jun 2021 21:30:22 +0100
Subject: [PATCH 141/141] [docs] Fix CSS selector for C++ nested namespace.

---
 docs/source/_static/css/custom.css | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index 31e5c9522..d50bf179b 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -84,6 +84,7 @@ div.toctree-wrapper.compound p {
 }
 
 /* Flatten first nested namespace in C++ docs. */
-div.section > dl.cpp.type > dd {
+div.section > dl.cpp.type > dd,
+section > dl.cpp.type > dd {
 	margin: 0 0 12px 0;
 }