diff --git a/_includes/ising_model_speed.ipynb b/_includes/ising_model_speed.ipynb index 124ce0596e8..5d4742544b5 100644 --- a/_includes/ising_model_speed.ipynb +++ b/_includes/ising_model_speed.ipynb @@ -282,7 +282,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Although faster than Python, it is still much slower than Cython." + "Although faster than Python, it is still ~2x slower than Cython." ] }, { @@ -1839,7 +1839,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1849,7 +1849,7 @@ "[NbConvertApp] Converting notebook ising_model_speed.ipynb to markdown\n", "/Users/jakeetaylor/opt/anaconda3/lib/python3.9/site-packages/nbconvert/filters/datatypefilter.py:39: UserWarning: Your element with mimetype(s) dict_keys(['application/vnd.plotly.v1+json']) is not able to be represented.\n", " warn(\"Your element with mimetype(s) {mimetypes}\"\n", - "[NbConvertApp] Writing 23203 bytes to ising_model_speed.md\n" + "[NbConvertApp] Writing 23202 bytes to ising_model_speed.md\n" ] }, { @@ -1858,7 +1858,7 @@ "CompletedProcess(args=['jupyter', 'nbconvert', '--to', 'markdown', 'ising_model_speed.ipynb'], returncode=0)" ] }, - "execution_count": 30, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } diff --git a/_includes/ising_model_speed.md b/_includes/ising_model_speed.md index e8983348322..df94d84b9d7 100644 --- a/_includes/ising_model_speed.md +++ b/_includes/ising_model_speed.md @@ -187,7 +187,7 @@ numba_ising_step(field) 1.09 ms ± 7.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each) -Although faster than Python, it is still much slower than Cython. +Although faster than Python, it is still ~2x slower than Cython. ### @njit(parallel=True) ~ 650µs Before trying to speed this up, lets first understand a common exploit of the Ising Model. First, lets extract our outer for-loops and simply keep record of the `n`'s, `m`s, and offset. diff --git a/_includes/ising_model_speed_2.ipynb b/_includes/ising_model_speed_2.ipynb index 13710ee200c..08cfcd146fd 100644 --- a/_includes/ising_model_speed_2.ipynb +++ b/_includes/ising_model_speed_2.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -46,7 +46,7 @@ " var t = rand[data_type](N, M)\n", " for i in range(N):\n", " for j in range(M):\n", - " if t[Index(i, j)] < 0.5:\n", + " if t[i, j] < 0.5:\n", " t[Index(i, j)] = -1\n", " else:\n", " t[Index(i, j)] = 1\n", @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -75,11 +75,11 @@ " for j in range(m - 1, m + 2):\n", " if i == n and j == m:\n", " continue\n", - " total += field[Index(i % N, j % M)]\n", - " var dE = 2 * field[Index(n, m)] * total\n", + " total += field[i % N, j % M]\n", + " var dE = 2 * field[n, m] * total\n", " if dE <= 0:\n", " field[Index(n, m)] *= -1\n", - " elif exp(-dE * beta) > rand[data_type](1)[Index(0)]:\n", + " elif exp(-dE * beta) > rand[data_type](1)[0]:\n", " field[Index(n, m)] *= -1" ] }, @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -116,29 +116,7 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "@always_inline\n", - "fn bench() -> Report:\n", - " var N = 200\n", - " var M = 200\n", - " var field = random_spin_field(N, M)\n", - "\n", - " @always_inline\n", - " @parameter\n", - " fn ising_step_fn():\n", - " ising_step(field=field)\n", - "\n", - " return benchmark.run[ising_step_fn](max_runtime_secs=10)\n", - "\n", - "var report = bench()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -148,19 +126,33 @@ "---------------------\n", "Benchmark Report (ms)\n", "---------------------\n", - "Mean: 2.915081212121212\n", - "Total: 2404.942\n", + "Mean: 2.9266933333333331\n", + "Total: 2414.5219999999999\n", "Iters: 825\n", - "Warmup Mean: 2.508\n", - "Warmup Total: 5.016\n", + "Warmup Mean: 2.5630000000000002\n", + "Warmup Total: 5.1260000000000003\n", "Warmup Iters: 2\n", - "Fastest Mean: 2.915081212121212\n", - "Slowest Mean: 2.915081212121212\n", + "Fastest Mean: 2.9266933333333336\n", + "Slowest Mean: 2.9266933333333336\n", "\n" ] } ], "source": [ + "@always_inline\n", + "fn bench() -> Report:\n", + " var N = 200\n", + " var M = 200\n", + " var field = random_spin_field(N, M)\n", + "\n", + " @always_inline\n", + " @parameter\n", + " fn ising_step_fn():\n", + " ising_step(field=field)\n", + "\n", + " return benchmark.run[ising_step_fn](max_runtime_secs=10)\n", + "\n", + "var report = bench()\n", "# Print a report in Milliseconds\n", "report.print(\"ms\")" ] @@ -174,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -182,7 +174,7 @@ "output_type": "stream", "text": [ "[NbConvertApp] Converting notebook ising_model_speed_2.ipynb to markdown\n", - "[NbConvertApp] Writing 3067 bytes to ising_model_speed_2.md\n" + "[NbConvertApp] Writing 3064 bytes to ising_model_speed_2.md\n" ] } ], diff --git a/_includes/ising_model_speed_2.md b/_includes/ising_model_speed_2.md index 7f1177a2572..81d12a88087 100644 --- a/_includes/ising_model_speed_2.md +++ b/_includes/ising_model_speed_2.md @@ -26,7 +26,7 @@ fn random_spin_field(N: Int, M: Int) -> Tensor[data_type]: var t = rand[data_type](N, M) for i in range(N): for j in range(M): - if t[Index(i, j)] < 0.5: + if t[i, j] < 0.5: t[Index(i, j)] = -1 else: t[Index(i, j)] = 1 @@ -46,11 +46,11 @@ fn _ising_update(inout field: Tensor[data_type], n: Int, m: Int, beta: Float32) for j in range(m - 1, m + 2): if i == n and j == m: continue - total += field[Index(i % N, j % M)] - var dE = 2 * field[Index(n, m)] * total + total += field[i % N, j % M] + var dE = 2 * field[n, m] * total if dE <= 0: field[Index(n, m)] *= -1 - elif exp(-dE * beta) > rand[data_type](1)[Index(0)]: + elif exp(-dE * beta) > rand[data_type](1)[0]: field[Index(n, m)] *= -1 ``` @@ -87,10 +87,6 @@ fn bench() -> Report: return benchmark.run[ising_step_fn](max_runtime_secs=10) var report = bench() -``` - - -```python # Print a report in Milliseconds report.print("ms") ``` @@ -98,14 +94,14 @@ report.print("ms") --------------------- Benchmark Report (ms) --------------------- - Mean: 2.915081212121212 - Total: 2404.942 + Mean: 2.9266933333333331 + Total: 2414.5219999999999 Iters: 825 - Warmup Mean: 2.508 - Warmup Total: 5.016 + Warmup Mean: 2.5630000000000002 + Warmup Total: 5.1260000000000003 Warmup Iters: 2 - Fastest Mean: 2.915081212121212 - Slowest Mean: 2.915081212121212 + Fastest Mean: 2.9266933333333336 + Slowest Mean: 2.9266933333333336 diff --git a/_includes/ising_model_speed_2.md-e b/_includes/ising_model_speed_2.md-e index bf1311484c5..573b1cceb6e 100644 --- a/_includes/ising_model_speed_2.md-e +++ b/_includes/ising_model_speed_2.md-e @@ -26,7 +26,7 @@ fn random_spin_field(N: Int, M: Int) -> Tensor[data_type]: var t = rand[data_type](N, M) for i in range(N): for j in range(M): - if t[Index(i, j)] < 0.5: + if t[i, j] < 0.5: t[Index(i, j)] = -1 else: t[Index(i, j)] = 1 @@ -46,11 +46,11 @@ fn _ising_update(inout field: Tensor[data_type], n: Int, m: Int, beta: Float32) for j in range(m - 1, m + 2): if i == n and j == m: continue - total += field[Index(i % N, j % M)] - var dE = 2 * field[Index(n, m)] * total + total += field[i % N, j % M] + var dE = 2 * field[n, m] * total if dE <= 0: field[Index(n, m)] *= -1 - elif exp(-dE * beta) > rand[data_type](1)[Index(0)]: + elif exp(-dE * beta) > rand[data_type](1)[0]: field[Index(n, m)] *= -1 ``` @@ -87,10 +87,6 @@ fn bench() -> Report: return benchmark.run[ising_step_fn](max_runtime_secs=10) var report = bench() -``` - - -```mojo # Print a report in Milliseconds report.print("ms") ``` @@ -98,14 +94,14 @@ report.print("ms") --------------------- Benchmark Report (ms) --------------------- - Mean: 2.915081212121212 - Total: 2404.942 + Mean: 2.9266933333333331 + Total: 2414.5219999999999 Iters: 825 - Warmup Mean: 2.508 - Warmup Total: 5.016 + Warmup Mean: 2.5630000000000002 + Warmup Total: 5.1260000000000003 Warmup Iters: 2 - Fastest Mean: 2.915081212121212 - Slowest Mean: 2.915081212121212 + Fastest Mean: 2.9266933333333336 + Slowest Mean: 2.9266933333333336 diff --git a/_includes/ising_model_speed_3.ipynb b/_includes/ising_model_speed_3.ipynb index 537e27e5a2a..04747378438 100644 --- a/_includes/ising_model_speed_3.ipynb +++ b/_includes/ising_model_speed_3.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Julia ~ 3.2ms\n", + "## Julia ~ 850μs \n", "Lastly, we look at [Julia](https://docs.julialang.org/en/v1/), another member of the [LLVM](https://en.wikipedia.org/wiki/LLVM) family." ] }, @@ -64,13 +64,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Naive ~ 3.2ms\n", "Julia translates pretty closely from Python, just take note of 1-indexed arrays instead of 0-indexed arrays." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -87,19 +86,16 @@ "function _ising_step(field::Matrix{Int8}, n::Integer, m::Integer, beta::Float32)\n", " total = 0\n", " N, M = size(field)\n", - "\n", - " # 1-indexed arrays cannot use the % trick from before.\n", - " nm1 = n - 1 == 0 ? N : n\n", - " np1 = n + 1 == N ? 1 : n\n", - " mm1 = m - 1 == 0 ? M : m\n", - " mp1 = m + 1 == M ? 1 : m\n", - "\n", - " for i in [nm1, n, np1]\n", - " for j in [mm1, m, mp1]\n", + " for i in n-1:n+1\n", + " for j in m-1:m+1\n", " if i == n && j == m\n", " continue\n", " end\n", - " total += field[i, j]\n", + " # Convert to 0-indexing\n", + " i -= 1\n", + " j -= 1\n", + " # Take the remainder and convert back to 1-indexing.\n", + " total += field[abs(i % N) + 1, abs(j % M) + 1]\n", " end\n", " end\n", " dE = 2 * field[n, m] * total\n", @@ -113,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -133,14 +129,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " 3.282 ms (158404 allocations: 12.09 MiB)\n", + " 853.750 μs (0 allocations: 0 bytes)\n", "\n" ] } @@ -155,84 +151,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Unrolled ~ 1.3ms\n", - "We can also include the unrolled version from before." + "Which almost runs as fast as Cython" ] }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "_ising_step_unrolled (generic function with 1 method)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "function _ising_step_unrolled(field::Matrix{Int8}, n::Integer, m::Integer, beta::Float32)\n", - " total = 0\n", - " N, M = size(field)\n", - " nm1 = n - 1 == 0 ? N : n\n", - " np1 = n + 1 == N ? 1 : n\n", - " mm1 = m - 1 == 0 ? M : m\n", - " mp1 = m + 1 == M ? 1 : m\n", - " dE = (\n", - " 2\n", - " * field[n, m]\n", - " * (\n", - " field[nm1, mm1]\n", - " + field[nm1, m]\n", - " + field[nm1, mp1]\n", - " + field[n, mm1]\n", - " + field[n, mp1]\n", - " + field[np1, mm1]\n", - " + field[np1, m]\n", - " + field[np1, mp1]\n", - " )\n", - " )\n", - " if dE <= 0\n", - " field[n, m] *= -1\n", - " elseif exp(-dE * beta) > rand()\n", - " field[n, m] *= -1\n", - " end\n", - "end" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 1.302 ms (0 allocations: 0 bytes)\n", - "\n" - ] - } - ], - "source": [ - "@btime ising_step(field, 0.04f0, _ising_step_unrolled)\n", - "println(\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Which runs around the speed of Mojo when using for-loops and around the speed of Numba when using unrolled." - ] - }, - { - "cell_type": "code", - "execution_count": 8, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -246,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[NbConvertApp] Writing 2981 bytes to ising_model_speed_3.md\n" + "[NbConvertApp] Writing 1923 bytes to ising_model_speed_3.md\n" ] }, { diff --git a/_includes/ising_model_speed_3.md b/_includes/ising_model_speed_3.md index 53c4ea5995a..52fb04c2729 100644 --- a/_includes/ising_model_speed_3.md +++ b/_includes/ising_model_speed_3.md @@ -1,4 +1,4 @@ -## Julia ~ 3.2ms +## Julia ~ 850μs Lastly, we look at [Julia](https://docs.julialang.org/en/v1/), another member of the [LLVM](https://en.wikipedia.org/wiki/LLVM) family. @@ -33,7 +33,6 @@ end ising_step (generic function with 1 method) -### Naive ~ 3.2ms Julia translates pretty closely from Python, just take note of 1-indexed arrays instead of 0-indexed arrays. @@ -41,19 +40,16 @@ Julia translates pretty closely from Python, just take note of 1-indexed arrays function _ising_step(field::Matrix{Int8}, n::Integer, m::Integer, beta::Float32) total = 0 N, M = size(field) - - # 1-indexed arrays cannot use the % trick from before. - nm1 = n - 1 == 0 ? N : n - np1 = n + 1 == N ? 1 : n - mm1 = m - 1 == 0 ? M : m - mp1 = m + 1 == M ? 1 : m - - for i in [nm1, n, np1] - for j in [mm1, m, mp1] + for i in n-1:n+1 + for j in m-1:m+1 if i == n && j == m continue end - total += field[i, j] + # Convert to 0-indexing + i -= 1 + j -= 1 + # Take the remainder and convert back to 1-indexing. + total += field[abs(i % N) + 1, abs(j % M) + 1] end end dE = 2 * field[n, m] * total @@ -87,59 +83,11 @@ using BenchmarkTools println("") ``` - 3.282 ms (158404 allocations: 12.09 MiB) - - - -### Unrolled ~ 1.3ms -We can also include the unrolled version from before. - - -```julia -function _ising_step_unrolled(field::Matrix{Int8}, n::Integer, m::Integer, beta::Float32) - total = 0 - N, M = size(field) - nm1 = n - 1 == 0 ? N : n - np1 = n + 1 == N ? 1 : n - mm1 = m - 1 == 0 ? M : m - mp1 = m + 1 == M ? 1 : m - dE = ( - 2 - * field[n, m] - * ( - field[nm1, mm1] - + field[nm1, m] - + field[nm1, mp1] - + field[n, mm1] - + field[n, mp1] - + field[np1, mm1] - + field[np1, m] - + field[np1, mp1] - ) - ) - if dE <= 0 - field[n, m] *= -1 - elseif exp(-dE * beta) > rand() - field[n, m] *= -1 - end -end -``` - - - _ising_step_unrolled (generic function with 1 method) - - - -```julia -@btime ising_step(field, 0.04f0, _ising_step_unrolled) -println("") -``` - - 1.302 ms (0 allocations: 0 bytes) + 853.750 μs (0 allocations: 0 bytes) -Which runs around the speed of Mojo when using for-loops and around the speed of Numba when using unrolled. +Which almost runs as fast as Cython ```julia diff --git a/_posts/2024-02-14-ising-model_speed.md b/_posts/2024-02-14-ising-model_speed.md index 8a5b49516e3..5b8c5d556b3 100644 --- a/_posts/2024-02-14-ising-model_speed.md +++ b/_posts/2024-02-14-ising-model_speed.md @@ -22,7 +22,7 @@ A post about the *speed* of the computation of the [Ising Model](https://en.wiki | Cython | 0.580ms | 5 | | Numba | 1.1ms | 2 | | Mojo | 3ms | 4 | -| Julia | 3.2ms | 3 | +| Julia | 0.850ms | 3 | {% include ising_model_speed.md %}