Skip to content

Commit

Permalink
generate_input receives a random seed argument
Browse files Browse the repository at this point in the history
  • Loading branch information
ngc92 committed Jan 14, 2025
1 parent d26faaf commit 35785a9
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 25 deletions.
8 changes: 6 additions & 2 deletions examples/identity_cuda/reference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cstdlib>
#include <cmath>
#include <array>
#include <random>
#include <iostream>

#define N_SIZES 10
Expand All @@ -15,13 +16,16 @@ const int Ns[N_SIZES] = {128, 256, 512, 1024, 2048,
using input_t = std::array<std::vector<float>, N_SIZES>;
using output_t = input_t;

input_t generate_input() {
input_t generate_input(int seed) {
std::mt19937 rng(seed);
input_t data;

std::uniform_real_distribution<float> dist(0, 1);

for (int i = 0; i < N_SIZES; ++i) {
data[i].resize(Ns[i]);
for (int j = 0; j < Ns[i]; ++j) {
data[i][j] = static_cast<float>(rand()) / RAND_MAX;
data[i][j] = dist(rng);
}
}

Expand Down
6 changes: 4 additions & 2 deletions examples/identity_py/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def ref_kernel(xs: List[torch.Tensor]) -> List[torch.Tensor]:
return xs


def generate_input() -> List[torch.Tensor]:
def generate_input(seed: int) -> List[torch.Tensor]:
"""
Generates random input tensor of the specified shape.
Returns:
Expand All @@ -34,8 +34,10 @@ def generate_input() -> List[torch.Tensor]:
device = torch.device("cpu")

tensors = []
rng = torch.Generator(device=device)
rng.manual_seed(seed)
for shape in shapes:
tensors.append(torch.randn(shape, device=device))
tensors.append(torch.randn(shape, device=device, generator=rng))

return tensors

Expand Down
29 changes: 20 additions & 9 deletions src/discord-cluster-manager/eval.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,24 @@ static void cuda_check(cudaError_t status, const char* expr, const char* file, i

#define cuda_check(expr) cuda_check(expr, #expr, __FILE__, __LINE__, __FUNCTION__)

void measure_runtime(PopcornOutput& logger) {
void measure_runtime(PopcornOutput& logger, std::mt19937& rng) {
std::cout << "warming up..." << std::endl;

for (int i = 0; i < WARMUP_RUNS; i++) {
auto data = generate_input();
// discard result; this is just warmup, we don't care what it returns
(void)custom_kernel(data);
{
auto warmup_data = generate_input(rng());
for (int i = 0; i < WARMUP_RUNS; i++) {
// discard result; this is just warmup, we don't care what it returns
(void)custom_kernel(warmup_data);
cuda_check(cudaDeviceSynchronize());
}
}
cuda_check(cudaDeviceSynchronize());

std::vector<std::int64_t> durations;
durations.reserve(TIMED_RUNS);

for (int i = 0; i < TIMED_RUNS; i++) {
auto data = generate_input();
auto data = generate_input(rng());

// make a copy of the input data to be used by the reference implementation
auto copy = data;

Expand Down Expand Up @@ -124,7 +127,15 @@ int main() {
return 111;
}

auto data = generate_input();
// get the seed
const char *seed_str = std::getenv("POPCORN_SEED");
int seed = 42;
if (seed_str) {
seed = std::stoi(output_fd);
}

std::mt19937 rng(seed);
auto data = generate_input(rng());
auto reference_output = ref_kernel(data);
auto submission_output = custom_kernel(data);

Expand All @@ -133,6 +144,6 @@ int main() {
return 112;
}

measure_runtime(logger);
measure_runtime(logger, rng);
return 0;
}
19 changes: 11 additions & 8 deletions src/discord-cluster-manager/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ def log(self, key: str, value):
print(f"{key}: {value}\n", file=self.channel)


def correctness() -> bool:
def correctness(rng: torch.Generator) -> bool:
for _ in range(10): # check multiple times
inputs = generate_input()

inputs = generate_input(torch.randint(0, int(2**31), (), generator=rng).item())
custom_output = custom_kernel(inputs)
ref_output = ref_kernel(inputs)

Expand All @@ -30,22 +29,22 @@ def correctness() -> bool:
return True


def metric(logger: PopcornLogger):
def metric(logger: PopcornLogger, rng: torch.Generator):
warmup_runs = 10
timed_runs = 100

# Warmup Code
print("warming up...")
for _ in range(warmup_runs):
inputs = generate_input()
inputs = generate_input(torch.randint(0, int(2**31), (), generator=rng).item())
_ = custom_kernel(inputs)
torch.cuda.synchronize()

# Timing Code
times = []

for _ in range(timed_runs):
inputs = generate_input()
inputs = generate_input(torch.randint(0, int(2**31), (), generator=rng).item())

start_time = time.time()
custom_output = custom_kernel(inputs)
Expand Down Expand Up @@ -82,10 +81,14 @@ def main():
print(e, file=sys.stderr)
exit(111)

if not correctness():
seed = int(os.environ.get("POPCORN_FD", 42))
rng = torch.Generator()
rng.manual_seed(seed)

if not correctness(rng):
logger.log("check", "fail")
exit(112)
metric(logger)
metric(logger, rng)


if __name__ == "__main__":
Expand Down
13 changes: 9 additions & 4 deletions src/discord-cluster-manager/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def compile_cuda_script( # # noqa: C901
arch: Architecture to compile for. If None, uses `native`
include_dirs: additional include directories to supply to nvcc
verbose: whether to print progress or be silent
seed: Seed value to use for generating test cases
Returns:
A `CompileResult` that summarizes the compilation process.
Expand Down Expand Up @@ -114,11 +114,12 @@ def compile_cuda_script( # # noqa: C901
)


def run_program(args: list[str]) -> RunResult:
def run_program(args: list[str], seed: int) -> RunResult:
# set up a pipe so the tester can communicate its verdict with us
env = os.environ.copy()
pipe_read, pipe_write = os.pipe()
env["POPCORN_FD"] = str(pipe_write)
env["POPCORN_SEED"] = str(seed)

execution_start_time = time.perf_counter()
run_process = subprocess.run(
Expand Down Expand Up @@ -158,6 +159,7 @@ def run_cuda_script( # # noqa: C901
submission_content: str = None,
arch: int = None,
include_dirs: list[str] = None,
seed: int = 42,
) -> tuple[CompileResult, RunResult]:
"""
Executes the provided CUDA kernel in an isolated environment with a timeout
Expand All @@ -168,6 +170,7 @@ def run_cuda_script( # # noqa: C901
submission_content: The (optional) submission code, used for leaderboards.
arch: The arch code for the compute/sm versions. If None, native arch is used.
include_dirs: Additional include directories, e.g., for thunderkittens/cutlass etc
seed: Random seed to initialize the RNG for testing
Returns:
tuple[CompileResult, RunResult]: CUDA compile/eval result information
Expand Down Expand Up @@ -206,7 +209,7 @@ def run_cuda_script( # # noqa: C901
result={},
)

run_result = run_program(["./eval.out"])
run_result = run_program(["./eval.out"], seed=seed)
return compile_result, run_result

finally:
Expand All @@ -221,6 +224,7 @@ def run_pytorch_script( # noqa: C901
reference_content: Optional[str] = None,
submission_content: Optional[str] = None,
arch: int = None,
seed: int = 42,
) -> RunResult:
"""
Executes the provided PyTorch GPU kernel in an isolated environment
Expand All @@ -230,6 +234,7 @@ def run_pytorch_script( # noqa: C901
reference_content: The (optional) reference code, used for leaderboards.
submission_content: The (optional) submission code, used for leaderboards.
arch: The arch code for the compute/sm versions.
seed: Random seed to initialize the RNG for testing
Returns:
tuple[str, float]: (Kernel output, execution time in milliseconds)
Expand All @@ -247,7 +252,7 @@ def run_pytorch_script( # noqa: C901
with open("eval.py", "w") as f:
f.write(script_content)

return run_program(["python", "eval.py"])
return run_program(["python", "eval.py"], seed=seed)

finally:
tmp_files = ["eval.py", "reference.py", "train.py"]
Expand Down

0 comments on commit 35785a9

Please sign in to comment.