diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 95b7ac6..a31973e 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -1,33 +1,44 @@ # CPU info ``` -Architecture: x86_64 -CPU op-mode(s): 32-bit, 64-bit -Byte Order: Little Endian -Address sizes: 39 bits physical, 48 bits virtual -CPU(s): 4 -On-line CPU(s) list: 0-3 -Thread(s) per core: 2 -Core(s) per socket: 2 -Socket(s): 1 -NUMA node(s): 1 -Vendor ID: GenuineIntel -CPU family: 6 -Model: 142 -Model name: Intel(R) Core(TM) i5-7200U CPU @ 2.50GHz -Stepping: 9 -CPU MHz: 2574.592 -CPU max MHz: 3100.0000 -CPU min MHz: 400.0000 -BogoMIPS: 5426.00 -Virtualization: VT-x -L1d cache: 32K -L1i cache: 32K -L2 cache: 256K -L3 cache: 3072K -NUMA node0 CPU(s): 0-3 -Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb invpcid_single pti ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp flush_l1d +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 39 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 16 +On-line CPU(s) list: 0-15 +Vendor ID: GenuineIntel +Model name: Intel(R) Core(TM) i7-10700K CPU @ 3.80GHz +CPU family: 6 +Model: 165 +Thread(s) per core: 2 +Core(s) per socket: 8 +Socket(s): 1 +Stepping: 5 +CPU(s) scaling MHz: 57% +CPU max MHz: 5100.0000 +CPU min MHz: 800.0000 +BogoMIPS: 7602.45 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts pku ospke md_clear flush_l1d arch_capabilities +Virtualization: VT-x +L1d cache: 256 KiB (8 instances) +L1i cache: 256 KiB (8 instances) +L2 cache: 2 MiB (8 instances) +L3 cache: 16 MiB (1 instance) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-15 +Vulnerability Itlb multihit: KVM: Mitigation: VMX disabled +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT vulnerable +Vulnerability Retbleed: Mitigation; Enhanced IBRS +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence +Vulnerability Srbds: Vulnerable: No microcode +Vulnerability Tsx async abort: Not affected ``` -Running on Linux 4.19.1-arch1-1-ARCH #1 SMP PREEMPT Sun Nov 4 16:49:26 UTC 2018 x86_64 GNU/Linux +Running on Linux 6.1.4-arch1-1 #1 SMP PREEMPT_DYNAMIC Sat, 07 Jan 2023 15:10:07 +0000 x86_64 GNU/Linux # Benchmarks Benchmark code under [benchmark](benchmark) directory. @@ -40,63 +51,63 @@ As you can see, there is a lot to improve! ``` Result for 1 threads: Target 0 (std/global): -mean of 563244497.238 r/s (704108544 rounds in 1.250 seconds) +mean of 851216201.175 r/s (1064085504 rounds in 1.250 seconds) Target 1 (blocking): -mean of 127740592.346 r/s (159698944 rounds in 1.250 seconds) +mean of 433788599.293 r/s (542268416 rounds in 1.250 seconds) Target 2 (blocking with cached access): -mean of 322415766.353 r/s (403059712 rounds in 1.250 seconds) +mean of 432815147.726 r/s (541053952 rounds in 1.250 seconds) Target 3 (lockfree): -mean of 139950785.210 r/s (174964736 rounds in 1.250 seconds) +mean of 281114121.514 r/s (351413248 rounds in 1.250 seconds) Target 4 (lockfree with cached id): -mean of 204579727.638 r/s (255761408 rounds in 1.250 seconds) +mean of 303622538.540 r/s (379551744 rounds in 1.250 seconds) Result for 4 threads: Target 0 (std/global): -mean of 1254346568.165 r/s (1568060416 rounds in 1.250 seconds) +mean of 3363902641.766 r/s (4205216768 rounds in 1.250 seconds) Target 1 (blocking): -mean of 201892643.702 r/s (252385280 rounds in 1.250 seconds) +mean of 277761936.865 r/s (347229184 rounds in 1.250 seconds) Target 2 (blocking with cached access): -mean of 341823902.079 r/s (427312128 rounds in 1.250 seconds) +mean of 185943879.551 r/s (232444928 rounds in 1.250 seconds) Target 3 (lockfree): -mean of 279972809.868 r/s (349997056 rounds in 1.250 seconds) +mean of 1108497663.496 r/s (1385724928 rounds in 1.250 seconds) Target 4 (lockfree with cached id): -mean of 407243227.338 r/s (509092864 rounds in 1.250 seconds) +mean of 1171230609.323 r/s (1464145920 rounds in 1.250 seconds) Result for 16 threads: Target 0 (std/global): -mean of 1265788513.195 r/s (1582526464 rounds in 1.250 seconds) +mean of 8345282740.296 r/s (10433639424 rounds in 1.250 seconds) Target 1 (blocking): -mean of 146437704.110 r/s (183084032 rounds in 1.250 seconds) +mean of 676102912.883 r/s (845292544 rounds in 1.250 seconds) Target 2 (blocking with cached access): -mean of 187998457.204 r/s (235046912 rounds in 1.250 seconds) +mean of 681291406.219 r/s (851744768 rounds in 1.250 seconds) Target 3 (lockfree): -mean of 280079755.878 r/s (350157824 rounds in 1.250 seconds) +mean of 2359727923.404 r/s (2950123520 rounds in 1.250 seconds) Target 4 (lockfree with cached id): -mean of 410127128.874 r/s (512739328 rounds in 1.250 seconds) +mean of 2203697438.159 r/s (2755062784 rounds in 1.250 seconds) Result for 32 threads: Target 0 (std/global): -mean of 1253856389.371 r/s (1567800320 rounds in 1.250 seconds) +mean of 8311773286.023 r/s (10394966016 rounds in 1.251 seconds) Target 1 (blocking): -mean of 117252735.879 r/s (146627584 rounds in 1.251 seconds) +mean of 1465673720.646 r/s (1832720384 rounds in 1.250 seconds) Target 2 (blocking with cached access): -mean of 146574797.405 r/s (183299072 rounds in 1.251 seconds) +mean of 1624175423.440 r/s (2030846976 rounds in 1.250 seconds) Target 3 (lockfree): -mean of 279772786.027 r/s (349842432 rounds in 1.250 seconds) +mean of 2364188552.573 r/s (2956139520 rounds in 1.250 seconds) Target 4 (lockfree with cached id): -mean of 410133726.205 r/s (512842752 rounds in 1.250 seconds) +mean of 2205742548.857 r/s (2758129664 rounds in 1.250 seconds) Result for 128 threads: Target 0 (std/global): -mean of 1253819121.657 r/s (1569345536 rounds in 1.252 seconds) +mean of 8333737198.803 r/s (10431406080 rounds in 1.252 seconds) Target 1 (blocking): -mean of 56362911.831 r/s (70654976 rounds in 1.254 seconds) +mean of 2843529021.661 r/s (3559099392 rounds in 1.252 seconds) Target 2 (blocking with cached access): -mean of 61073798.426 r/s (76540928 rounds in 1.253 seconds) +mean of 2907274859.564 r/s (3638416384 rounds in 1.251 seconds) Target 3 (lockfree): -mean of 279925671.251 r/s (350430208 rounds in 1.252 seconds) +mean of 2342497340.538 r/s (2931552256 rounds in 1.251 seconds) Target 4 (lockfree with cached id): -mean of 409816050.587 r/s (512987136 rounds in 1.252 seconds) +mean of 2203641854.647 r/s (2757732352 rounds in 1.251 seconds) ``` @@ -104,43 +115,43 @@ mean of 409816050.587 r/s (512987136 rounds in 1.252 seconds) ``` Result for 1 threads: Target 0 (mutex vector): -mean of 46009691.533 r/s (57522176 rounds in 1.250 seconds) +mean of 89099127.009 r/s (111380480 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 29984570.170 r/s (37487616 rounds in 1.250 seconds) +mean of 50152567.538 r/s (62694400 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 14504967.628 r/s (18134016 rounds in 1.250 seconds) +mean of 25788471.153 r/s (32237568 rounds in 1.250 seconds) Result for 2 threads: Target 0 (mutex vector): -mean of 11336923.499 r/s (14173184 rounds in 1.250 seconds) +mean of 10269221.414 r/s (12838912 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 3333379.883 r/s (4167680 rounds in 1.250 seconds) +mean of 15027960.930 r/s (18787328 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 5218179.185 r/s (6523904 rounds in 1.250 seconds) +mean of 6761175.851 r/s (8452096 rounds in 1.250 seconds) Result for 4 threads: Target 0 (mutex vector): -mean of 8587407.595 r/s (10736640 rounds in 1.250 seconds) +mean of 18438887.804 r/s (23052288 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 5018169.267 r/s (6275072 rounds in 1.250 seconds) +mean of 11941267.117 r/s (14928896 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 6167520.930 r/s (7711744 rounds in 1.250 seconds) +mean of 5866365.571 r/s (7334912 rounds in 1.250 seconds) Result for 8 threads: Target 0 (mutex vector): -mean of 8277195.290 r/s (10351616 rounds in 1.251 seconds) +mean of 10719935.758 r/s (13403136 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 4920637.475 r/s (6155264 rounds in 1.251 seconds) +mean of 5805096.283 r/s (7259136 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 5725833.254 r/s (7340032 rounds in 1.282 seconds) +mean of 5299406.812 r/s (6633472 rounds in 1.252 seconds) Result for 16 threads: Target 0 (mutex vector): -mean of 8258765.613 r/s (10331136 rounds in 1.251 seconds) +mean of 11043957.290 r/s (13813760 rounds in 1.251 seconds) Target 1 (mutex linked list): -mean of 4928023.480 r/s (6169600 rounds in 1.252 seconds) +mean of 6037878.001 r/s (7555072 rounds in 1.251 seconds) Target 2 (lockfree): -mean of 5764235.033 r/s (7392256 rounds in 1.282 seconds) +mean of 4700504.434 r/s (5966848 rounds in 1.269 seconds) ``` @@ -149,43 +160,43 @@ mean of 5764235.033 r/s (7392256 rounds in 1.282 seconds) Result for 1 threads: Target 0 (mutex vector): -mean of 48626690.729 r/s (60792832 rounds in 1.250 seconds) +mean of 93598777.390 r/s (117006336 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 29654579.252 r/s (37074944 rounds in 1.250 seconds) +mean of 50316733.711 r/s (62900224 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 19572942.644 r/s (24470528 rounds in 1.250 seconds) +mean of 32980961.200 r/s (41229312 rounds in 1.250 seconds) Result for 2 threads: Target 0 (mutex vector): -mean of 8048881.361 r/s (10061824 rounds in 1.250 seconds) +mean of 16577396.180 r/s (20723712 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 4044680.737 r/s (5056512 rounds in 1.250 seconds) +mean of 15416987.390 r/s (19272704 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 5921813.570 r/s (7403520 rounds in 1.250 seconds) +mean of 7796934.280 r/s (9748480 rounds in 1.250 seconds) Result for 4 threads: Target 0 (mutex vector): -mean of 7946160.211 r/s (9934848 rounds in 1.250 seconds) +mean of 20578554.254 r/s (25726976 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 4890576.599 r/s (6115328 rounds in 1.250 seconds) +mean of 13290525.384 r/s (16615424 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 6409920.374 r/s (8014848 rounds in 1.250 seconds) +mean of 6550535.360 r/s (8190976 rounds in 1.250 seconds) Result for 8 threads: Target 0 (mutex vector): -mean of 7650499.209 r/s (9567232 rounds in 1.251 seconds) +mean of 11936346.837 r/s (14925824 rounds in 1.250 seconds) Target 1 (mutex linked list): -mean of 4813496.739 r/s (6020096 rounds in 1.251 seconds) +mean of 5336906.024 r/s (6673408 rounds in 1.250 seconds) Target 2 (lockfree): -mean of 6670965.273 r/s (8513536 rounds in 1.276 seconds) +mean of 6232121.974 r/s (7796736 rounds in 1.251 seconds) Result for 16 threads: Target 0 (mutex vector): -mean of 7615925.275 r/s (9525248 rounds in 1.251 seconds) +mean of 11595009.037 r/s (14504960 rounds in 1.251 seconds) Target 1 (mutex linked list): -mean of 4796021.841 r/s (6003712 rounds in 1.252 seconds) +mean of 6157158.225 r/s (7704576 rounds in 1.251 seconds) Target 2 (lockfree): -mean of 6442273.200 r/s (8230912 rounds in 1.278 seconds) +mean of 5123655.888 r/s (6430720 rounds in 1.255 seconds) ``` @@ -193,231 +204,279 @@ mean of 6442273.200 r/s (8230912 rounds in 1.278 seconds) ``` Result for 1 threads: Target 0 (mutex insert): -mean of 3266460.771 r/s (4520960 rounds in 1.384 seconds) +mean of 10846880.976 r/s (13559808 rounds in 1.250 seconds) Target 1 (lockfree insert): -mean of 2006847.396 r/s (2509824 rounds in 1.251 seconds) +mean of 3083291.818 r/s (3855360 rounds in 1.250 seconds) Result for 2 threads: Target 0 (mutex insert): -mean of 1998542.799 r/s (2498560 rounds in 1.250 seconds) +mean of 5545038.052 r/s (6932480 rounds in 1.250 seconds) Target 1 (lockfree insert): -mean of 2494350.353 r/s (3119104 rounds in 1.250 seconds) +mean of 4706173.460 r/s (5986304 rounds in 1.272 seconds) Result for 4 threads: Target 0 (mutex insert): -mean of 2295868.648 r/s (2873344 rounds in 1.252 seconds) +mean of 5346668.400 r/s (6686720 rounds in 1.251 seconds) Target 1 (lockfree insert): -mean of 3688243.156 r/s (4612096 rounds in 1.250 seconds) +mean of 4469036.132 r/s (5588992 rounds in 1.251 seconds) Result for 8 threads: Target 0 (mutex insert): -mean of 2505145.993 r/s (3135488 rounds in 1.252 seconds) +mean of 4228601.332 r/s (5289984 rounds in 1.251 seconds) Target 1 (lockfree insert): -mean of 3923203.772 r/s (4945920 rounds in 1.261 seconds) +mean of 4485519.649 r/s (5612544 rounds in 1.251 seconds) Result for 1 threads: Target 0 (mutex get): -mean of 6535583.089 r/s (8170496 rounds in 1.250 seconds) +mean of 8456796.039 r/s (10571776 rounds in 1.250 seconds) Target 1 (lockfree get): -mean of 4508820.420 r/s (5637120 rounds in 1.250 seconds) +mean of 5186747.966 r/s (6483968 rounds in 1.250 seconds) Result for 2 threads: Target 0 (mutex get): -mean of 2058094.237 r/s (2573312 rounds in 1.250 seconds) +mean of 5509905.538 r/s (6889472 rounds in 1.250 seconds) Target 1 (lockfree get): -mean of 7697488.160 r/s (9623552 rounds in 1.250 seconds) +mean of 8401315.266 r/s (10504192 rounds in 1.250 seconds) Result for 4 threads: Target 0 (mutex get): -mean of 2417756.745 r/s (3023872 rounds in 1.251 seconds) +mean of 5940744.451 r/s (7427072 rounds in 1.250 seconds) Target 1 (lockfree get): -mean of 10679469.481 r/s (13352960 rounds in 1.250 seconds) +mean of 11178689.147 r/s (13976576 rounds in 1.250 seconds) Result for 8 threads: Target 0 (mutex get): -mean of 2577102.331 r/s (3224576 rounds in 1.251 seconds) +mean of 4668769.695 r/s (5840896 rounds in 1.251 seconds) Target 1 (lockfree get): -mean of 10314698.163 r/s (12916736 rounds in 1.252 seconds) +mean of 12274648.553 r/s (15347712 rounds in 1.250 seconds) Result for 1 threads: Target 0 (mutex remove): -mean of 21127356.471 r/s (26413056 rounds in 1.250 seconds) +mean of 9078530.816 r/s (11348992 rounds in 1.250 seconds) Target 1 (lockfree remove): -mean of 3093448.918 r/s (3867648 rounds in 1.250 seconds) +mean of 2963957.668 r/s (3705856 rounds in 1.250 seconds) Result for 2 threads: Target 0 (mutex remove): -mean of 12228997.875 r/s (15288320 rounds in 1.250 seconds) +mean of 8176245.955 r/s (10221568 rounds in 1.250 seconds) Target 1 (lockfree remove): -mean of 9171024.742 r/s (11464704 rounds in 1.250 seconds) +mean of 13052455.433 r/s (16318464 rounds in 1.250 seconds) Result for 4 threads: Target 0 (mutex remove): -mean of 10815691.241 r/s (13521920 rounds in 1.250 seconds) +mean of 6763708.250 r/s (8456192 rounds in 1.250 seconds) Target 1 (lockfree remove): -mean of 11237987.111 r/s (14049280 rounds in 1.250 seconds) +mean of 13651569.473 r/s (17068032 rounds in 1.250 seconds) Result for 8 threads: Target 0 (mutex remove): -mean of 10836043.306 r/s (13548544 rounds in 1.250 seconds) +mean of 6107615.974 r/s (7639040 rounds in 1.251 seconds) Target 1 (lockfree remove): -mean of 11590965.619 r/s (14494720 rounds in 1.251 seconds) +mean of 11677936.452 r/s (14601216 rounds in 1.250 seconds) Result for 1 threads: Target 0 (mutex mixed): -mean of 4552824.329 r/s (5692416 rounds in 1.250 seconds) +mean of 8923787.074 r/s (11155456 rounds in 1.250 seconds) Target 1 (lockfree mixed): -mean of 2001439.755 r/s (2502656 rounds in 1.250 seconds) +mean of 3198851.262 r/s (3999744 rounds in 1.250 seconds) Result for 2 threads: Target 0 (mutex mixed): -mean of 1520136.949 r/s (1901568 rounds in 1.251 seconds) +mean of 4703759.900 r/s (5880832 rounds in 1.250 seconds) Target 1 (lockfree mixed): -mean of 1099053.074 r/s (1376256 rounds in 1.252 seconds) +mean of 2210759.001 r/s (2764800 rounds in 1.251 seconds) Result for 4 threads: Target 0 (mutex mixed): -mean of 1297798.011 r/s (1624064 rounds in 1.251 seconds) +mean of 4431257.942 r/s (5541888 rounds in 1.251 seconds) Target 1 (lockfree mixed): -mean of 1868782.116 r/s (2338816 rounds in 1.252 seconds) +mean of 2673543.465 r/s (3344384 rounds in 1.251 seconds) Result for 8 threads: Target 0 (mutex mixed): -mean of 1422417.889 r/s (1782784 rounds in 1.253 seconds) +mean of 3281505.872 r/s (4107264 rounds in 1.252 seconds) Target 1 (lockfree mixed): -mean of 1966307.117 r/s (2547712 rounds in 1.296 seconds) +mean of 2563777.779 r/s (3209216 rounds in 1.252 seconds) ``` ## MPSC CHANNEL ``` -Mutexed VecDeque with 3 threads total time: 195.828541ms -Std's MPSC with 3 threads total time: 144.951974ms -Lockfree MPSC with 3 threads total time: 112.348453ms +Mutexed VecDeque with 3 threads total time: 99.229711ms +Std's MPSC with 3 threads total time: 37.291016ms +Lockfree MPSC with 3 threads total time: 78.990366ms -Mutexed VecDeque with 5 threads total time: 422.126165ms -Std's MPSC with 5 threads total time: 250.879545ms -Lockfree MPSC with 5 threads total time: 201.960734ms +Mutexed VecDeque with 5 threads total time: 250.676698ms +Std's MPSC with 5 threads total time: 165.328736ms +Lockfree MPSC with 5 threads total time: 174.561518ms -Mutexed VecDeque with 9 threads total time: 803.066437ms -Std's MPSC with 9 threads total time: 479.966615ms -Lockfree MPSC with 9 threads total time: 385.762984ms +Mutexed VecDeque with 9 threads total time: 533.562317ms +Std's MPSC with 9 threads total time: 449.719126ms +Lockfree MPSC with 9 threads total time: 406.557324ms -Mutexed VecDeque with 17 threads total time: 1.584427583s -Std's MPSC with 17 threads total time: 978.197558ms -Lockfree MPSC with 17 threads total time: 744.057727ms +Mutexed VecDeque with 17 threads total time: 972.406324ms +Std's MPSC with 17 threads total time: 984.550523ms +Lockfree MPSC with 17 threads total time: 1.406755476s -Mutexed VecDeque with 33 threads total time: 3.086542744s -Std's MPSC with 33 threads total time: 1.973712673s -Lockfree MPSC with 33 threads total time: 1.544679447s +Mutexed VecDeque with 33 threads total time: 1.996519495s +Std's MPSC with 33 threads total time: 1.97039955s +Lockfree MPSC with 33 threads total time: 2.491584752s ``` ## SPSC CHANNEL ``` -Mutexed VecDeque total time: 759.772544ms -Std's MPSC (as SPSC) total time: 96.751346ms -Lockfree SPSC total time: 384.928062ms +Mutexed VecDeque total time: 280.167002ms +Std's MPSC (as SPSC) total time: 56.16794ms +Lockfree SPSC total time: 288.539261ms ``` ## SPMC CHANNEL ``` -Mutexed VecDeque with 3 threads total time: 227.881661ms -Mutexed Std's MPSC (as SPMC) with 3 threads total time: 304.818219ms -Lockfree SPMC with 3 threads total time: 111.830327ms +Mutexed VecDeque with 3 threads total time: 77.228983ms +Mutexed Std's MPSC (as SPMC) with 3 threads total time: 45.99267ms +Lockfree SPMC with 3 threads total time: 96.200218ms -Mutexed VecDeque with 5 threads total time: 363.383696ms -Mutexed Std's MPSC (as SPMC) with 5 threads total time: 158.106779ms -Lockfree SPMC with 5 threads total time: 88.795666ms +Mutexed VecDeque with 5 threads total time: 226.588922ms +Mutexed Std's MPSC (as SPMC) with 5 threads total time: 70.179382ms +Lockfree SPMC with 5 threads total time: 85.865068ms -Mutexed VecDeque with 9 threads total time: 722.081453ms -Mutexed Std's MPSC (as SPMC) with 9 threads total time: 166.285144ms -Lockfree SPMC with 9 threads total time: 90.274454ms +Mutexed VecDeque with 9 threads total time: 450.579857ms +Mutexed Std's MPSC (as SPMC) with 9 threads total time: 130.481769ms +Lockfree SPMC with 9 threads total time: 114.333799ms -Mutexed VecDeque with 17 threads total time: 1.168055717s -Mutexed Std's MPSC (as SPMC) with 17 threads total time: 164.420737ms -Lockfree SPMC with 17 threads total time: 169.7209ms +Mutexed VecDeque with 17 threads total time: 815.07391ms +Mutexed Std's MPSC (as SPMC) with 17 threads total time: 125.530757ms +Lockfree SPMC with 17 threads total time: 133.102409ms -Mutexed VecDeque with 33 threads total time: 2.262141374s -Mutexed Std's MPSC (as SPMC) with 33 threads total time: 240.266825ms -Lockfree SPMC with 33 threads total time: 555.498619ms +Mutexed VecDeque with 33 threads total time: 1.618507497s +Mutexed Std's MPSC (as SPMC) with 33 threads total time: 133.219862ms +Lockfree SPMC with 33 threads total time: 142.728936ms ``` ## MPMC CHANNEL ``` -Mutexed VecDeque with 4 threads total time: 85.693921ms -Mutexed Std's MPSC (as MPMC) with 4 threads total time: 147.843224ms -Lockfree MPMC with 4 threads total time: 47.507121ms +Mutexed VecDeque with 4 threads total time: 44.44874ms +Mutexed Std's MPSC (as MPMC) with 4 threads total time: 24.819183ms +Lockfree MPMC with 4 threads total time: 38.809402ms -Mutexed VecDeque with 8 threads total time: 181.006411ms -Mutexed Std's MPSC (as MPMC) with 8 threads total time: 178.166068ms -Lockfree MPMC with 8 threads total time: 92.244031ms +Mutexed VecDeque with 8 threads total time: 127.893584ms +Mutexed Std's MPSC (as MPMC) with 8 threads total time: 69.969399ms +Lockfree MPMC with 8 threads total time: 96.48539ms -Mutexed VecDeque with 16 threads total time: 392.331879ms -Mutexed Std's MPSC (as MPMC) with 16 threads total time: 403.369533ms -Lockfree MPMC with 16 threads total time: 324.461572ms +Mutexed VecDeque with 16 threads total time: 241.13194ms +Mutexed Std's MPSC (as MPMC) with 16 threads total time: 259.731871ms +Lockfree MPMC with 16 threads total time: 221.155085ms ``` -## REQUEST PROGRAM +## SKIPLIST ``` -A program simulating a concurrent server. +Result for 1 threads: +Target 0 (mutex btree_map insert): +mean of 33394591.582 r/s (41746432 rounds in 1.250 seconds) +Target 1 (lockfree insert): +mean of 535996.169 r/s (670720 rounds in 1.251 seconds) -Mutexed HashMap and Std's MPSC with 2 threads total time: 793.554071ms -Lockfree structures with 2 threads total time: 490.944222ms +Result for 2 threads: +Target 0 (mutex btree_map insert): +mean of 9779213.134 r/s (12225536 rounds in 1.250 seconds) +Target 1 (lockfree insert): +mean of 122675.592 r/s (154624 rounds in 1.260 seconds) -Mutexed HashMap and Std's MPSC with 4 threads total time: 369.968509ms -Lockfree structures with 4 threads total time: 409.906746ms +Result for 4 threads: +Target 0 (mutex btree_map insert): +mean of 9550015.484 r/s (11940864 rounds in 1.250 seconds) +Target 1 (lockfree insert): +mean of 87827.288 r/s (111616 rounds in 1.271 seconds) -Mutexed HashMap and Std's MPSC with 8 threads total time: 390.376975ms -Lockfree structures with 8 threads total time: 412.722696ms +Result for 8 threads: +Target 0 (mutex btree_map insert): +mean of 5873525.523 r/s (7346176 rounds in 1.251 seconds) +Target 1 (lockfree insert): +mean of 74706.574 r/s (97280 rounds in 1.302 seconds) -Mutexed HashMap and Std's MPSC with 16 threads total time: 386.407811ms -Lockfree structures with 16 threads total time: 325.562949ms -``` +Result for 16 threads: +Target 0 (mutex btree_map insert): +mean of 6428722.881 r/s (8048640 rounds in 1.252 seconds) +Target 1 (lockfree insert): +mean of 66287.957 r/s (92160 rounds in 1.390 seconds) -## MESSAGE REVERB PROGRAM -``` -A program which reverberates messages through a plain queue channel +Result for 32 threads: +Target 0 (mutex btree_map insert): +mean of 6397300.717 r/s (8016896 rounds in 1.253 seconds) +Target 1 (lockfree insert): +mean of 66126.691 r/s (97280 rounds in 1.471 seconds) -Mutexed VecDeque with 2 threads total time: 220.035769ms -Mutexed LinkedList with 2 threads total time: 586.576805ms -Lockfree Queue with 2 threads total time: 197.928229ms +Result for 1 threads: +Target 0 (mutex btree_map get): +mean of 33017271.809 r/s (41274368 rounds in 1.250 seconds) +Target 1 (lockfree get): +mean of 1179084.029 r/s (1474560 rounds in 1.251 seconds) -Mutexed VecDeque with 4 threads total time: 238.337063ms -Mutexed LinkedList with 4 threads total time: 453.672894ms -Lockfree Queue with 4 threads total time: 158.472864ms +Result for 2 threads: +Target 0 (mutex btree_map get): +mean of 9267551.222 r/s (11585536 rounds in 1.250 seconds) +Target 1 (lockfree get): +mean of 215240.748 r/s (269312 rounds in 1.251 seconds) -Mutexed VecDeque with 8 threads total time: 243.002558ms -Mutexed LinkedList with 8 threads total time: 477.545286ms -Lockfree Queue with 8 threads total time: 155.281614ms +Result for 4 threads: +Target 0 (mutex btree_map get): +mean of 10536458.831 r/s (13172736 rounds in 1.250 seconds) +Target 1 (lockfree get): +mean of 171996.438 r/s (216064 rounds in 1.256 seconds) -Mutexed VecDeque with 16 threads total time: 238.828929ms -Mutexed LinkedList with 16 threads total time: 476.347392ms -Lockfree Queue with 16 threads total time: 141.994415ms -``` +Result for 8 threads: +Target 0 (mutex btree_map get): +mean of 8212504.786 r/s (10269696 rounds in 1.250 seconds) +Target 1 (lockfree get): +mean of 147577.612 r/s (188416 rounds in 1.277 seconds) -## HASH MINING -``` -A program simulating a hash miner. +Result for 16 threads: +Target 0 (mutex btree_map get): +mean of 7855815.680 r/s (9828352 rounds in 1.251 seconds) +Target 1 (lockfree get): +mean of 129797.793 r/s (171008 rounds in 1.317 seconds) -Mutexed structures with 2 threads total time: 454.91082ms -Lockfree structures with 2 threads total time: 481.382395ms +Result for 32 threads: +Target 0 (mutex btree_map get): +mean of 7839423.390 r/s (9816064 rounds in 1.252 seconds) +Target 1 (lockfree get): +mean of 129421.077 r/s (176128 rounds in 1.361 seconds) + +Result for 1 threads: +Target 0 (mutex btree_map pop_first): +mean of 93689253.493 r/s (117118976 rounds in 1.250 seconds) +Target 1 (lockfree get pop_first): +mean of 35461165.484 r/s (44329984 rounds in 1.250 seconds) -Mutexed structures with 4 threads total time: 317.253501ms -Lockfree structures with 4 threads total time: 323.152586ms +Result for 2 threads: +Target 0 (mutex btree_map pop_first): +mean of 20534562.895 r/s (25670656 rounds in 1.250 seconds) +Target 1 (lockfree get pop_first): +mean of 6837216.726 r/s (8547328 rounds in 1.250 seconds) -Mutexed structures with 8 threads total time: 250.116503ms -Lockfree structures with 8 threads total time: 260.129446ms +Result for 4 threads: +Target 0 (mutex btree_map pop_first): +mean of 19646908.862 r/s (24561664 rounds in 1.250 seconds) +Target 1 (lockfree get pop_first): +mean of 5247058.236 r/s (6561792 rounds in 1.251 seconds) -Mutexed structures with 16 threads total time: 245.25719ms -Lockfree structures with 16 threads total time: 246.603846ms +Result for 8 threads: +Target 0 (mutex btree_map pop_first): +mean of 15501697.026 r/s (19382272 rounds in 1.250 seconds) +Target 1 (lockfree get pop_first): +mean of 4519344.860 r/s (5653504 rounds in 1.251 seconds) -Mutexed structures with 32 threads total time: 247.683849ms -Lockfree structures with 32 threads total time: 245.1651ms +Result for 16 threads: +Target 0 (mutex btree_map pop_first): +mean of 16499415.992 r/s (20634624 rounds in 1.251 seconds) +Target 1 (lockfree get pop_first): +mean of 4097288.540 r/s (5129216 rounds in 1.252 seconds) -Mutexed structures with 64 threads total time: 249.568002ms -Lockfree structures with 64 threads total time: 261.512353ms +Result for 32 threads: +Target 0 (mutex btree_map pop_first): +mean of 16703976.673 r/s (20898816 rounds in 1.251 seconds) +Target 1 (lockfree get pop_first): +mean of 4103046.322 r/s (5147648 rounds in 1.255 seconds) -Mutexed structures with 128 threads total time: 327.423669ms -Lockfree structures with 128 threads total time: 336.425139ms ``` diff --git a/Cargo.toml b/Cargo.toml index d7974b7..5286ad2 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,4 @@ readme = "README.md" [dependencies] owned-alloc = "0.2" +rand = "0.8" diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml index fce526e..072fa2d 100644 --- a/benchmark/Cargo.toml +++ b/benchmark/Cargo.toml @@ -8,6 +8,7 @@ publish = false lockfree = { path = "../" } benchsuite = { path = "benchsuite" } thread_local = "*" +rand = "0.8" [workspace] members = ["benchsuite", "."] @@ -55,3 +56,7 @@ path = "src/spmc.rs" [[bin]] name = "mpmc" path = "src/mpmc.rs" + +[[bin]] +name = "skiplist" +path = "src/skiplist.rs" diff --git a/benchmark/src/skiplist.rs b/benchmark/src/skiplist.rs new file mode 100644 index 0000000..33d943d --- /dev/null +++ b/benchmark/src/skiplist.rs @@ -0,0 +1,151 @@ +#[macro_use] +extern crate benchsuite; +extern crate lockfree; +extern crate rand; + +use benchsuite::exec::Target; +use lockfree::skiplist::SkipList; +use std::{ + collections::BTreeMap, + sync::{Arc, Mutex}, +}; + +type MutexBTreeMapInner = Arc>>; + +type LockfreeInner = Arc>; + +fn randomize(mut i: usize) -> usize { + i ^= i >> 12; + i ^= i << 25; + i ^ i >> 27 +} + +#[derive(Debug, Clone, Default)] +struct MutexBTreeMapInsert { + inner: MutexBTreeMapInner, + seed: usize, +} + +impl Target for MutexBTreeMapInsert { + #[inline(always)] + fn round(&mut self) { + let i = randomize(self.seed); + + self.inner.lock().unwrap().insert(i as u8, i as u8); + + self.seed = i; + } +} + +#[derive(Debug, Clone, Default)] +struct LockfreeInsert { + inner: LockfreeInner, + seed: usize, +} + +impl Target for LockfreeInsert { + #[inline(always)] + fn round(&mut self) { + let i = randomize(self.seed); + + self.inner.insert(i as u8, i as u8); + + self.seed = i; + } +} + +#[derive(Debug, Clone, Default)] +struct MutexBTreeMapGet { + inner: MutexBTreeMapInner, + seed: usize, +} + +impl Target for MutexBTreeMapGet { + #[inline(always)] + fn round(&mut self) { + let i = randomize(self.seed); + + self.inner.lock().unwrap().get(&(i as u8)); + + self.seed = i; + } +} + +#[derive(Debug, Clone, Default)] +struct LockfreeGet { + inner: LockfreeInner, + seed: usize, +} + +impl Target for LockfreeGet { + #[inline(always)] + fn round(&mut self) { + let i = randomize(self.seed); + + self.inner.get(&(i as u8)); + + self.seed = i; + } +} + +#[derive(Debug, Clone, Default)] +struct MutexBTreeMapPopFirst { + inner: MutexBTreeMapInner, +} + +impl Target for MutexBTreeMapPopFirst { + #[inline(always)] + fn round(&mut self) { + self.inner.lock().unwrap().pop_first(); + } +} + +#[derive(Debug, Clone, Default)] +struct LockfreePopFirst { + inner: LockfreeInner, +} + +impl Target for LockfreePopFirst { + #[inline(always)] + fn round(&mut self) { + self.inner.pop_first(); + } +} + +fn main() { + let mutex = MutexBTreeMapInner::default(); + let lockfree = LockfreeInner::default(); + bench! { + levels 1, 2, 4, 8, 16, 32; + "mutex btree_map insert" => MutexBTreeMapInsert { + inner: mutex.clone(), + seed: rand::random::(), + }, + "lockfree insert" => LockfreeInsert { + inner: lockfree.clone(), + seed: rand::random::(), + }, + } + + bench! { + levels 1, 2, 4, 8, 16, 32; + "mutex btree_map get" => MutexBTreeMapGet { + inner: mutex.clone(), + seed: rand::random::(), + }, + "lockfree get" => LockfreeGet { + inner: lockfree.clone(), + seed: rand::random::(), + }, + } + + bench! { + levels 1, 2, 4, 8, 16, 32; + "mutex btree_map pop_first" => MutexBTreeMapPopFirst { + inner: mutex.clone(), + }, + "lockfree get pop_first" => LockfreePopFirst { + inner: lockfree.clone(), + }, + } +} diff --git a/benchmark/src/tls.rs b/benchmark/src/tls.rs index a29426f..90919f4 100644 --- a/benchmark/src/tls.rs +++ b/benchmark/src/tls.rs @@ -53,7 +53,7 @@ thread_local! { impl Target for BlockingTarget { #[inline(always)] fn round(&mut self) { - let cell = self.inner.get_or(|| Box::new(Cell::new(0))); + let cell = self.inner.get_or(|| *Box::new(Cell::new(0))); cell.set(cell.get().wrapping_add(1)); } } @@ -61,7 +61,7 @@ impl Target for BlockingTarget { impl Target for BlkCachedTarget { #[inline(always)] fn round(&mut self) { - let cell = self.inner.get_or(|| Box::new(Cell::new(0))); + let cell = self.inner.get_or(|| *Box::new(Cell::new(0))); cell.set(cell.get().wrapping_add(1)); } } diff --git a/build-benchmarks.sh b/build-benchmarks.sh index 129e077..24737c8 100755 --- a/build-benchmarks.sh +++ b/build-benchmarks.sh @@ -69,20 +69,26 @@ cargo run --bin mpmc --release >> $FILE || exit 1 echo '```' >> $FILE echo '' >> $FILE -echo '## REQUEST PROGRAM' >> $FILE -echo '```' >> $FILE -cargo run --bin request --release >> $FILE || exit 1 -echo '```' >> $FILE -echo '' >> $FILE +# echo '## REQUEST PROGRAM' >> $FILE +# echo '```' >> $FILE +# cargo run --bin request --release >> $FILE || exit 1 +# echo '```' >> $FILE +# echo '' >> $FILE -echo '## MESSAGE REVERB PROGRAM' >> $FILE -echo '```' >> $FILE -cargo run --bin reverb --release >> $FILE || exit 1 -echo '```' >> $FILE -echo '' >> $FILE +# echo '## MESSAGE REVERB PROGRAM' >> $FILE +# echo '```' >> $FILE +# cargo run --bin reverb --release >> $FILE || exit 1 +# echo '```' >> $FILE +# echo '' >> $FILE + +# echo '## HASH MINING' >> $FILE +# echo '```' >> $FILE +# cargo run --bin mining --release >> $FILE || exit 1 +# echo '```' >> $FILE +# echo '' >> $FILE -echo '## HASH MINING' >> $FILE +echo '## SKIPLIST' >> $FILE echo '```' >> $FILE -cargo run --bin mining --release >> $FILE || exit 1 +cargo run --bin skiplist --release >> $FILE || exit 1 echo '```' >> $FILE echo '' >> $FILE diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 1e611ba..ae8d61a 100755 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -46,5 +46,9 @@ path = "fuzz_targets/spmc.rs" name = "mpmc" path = "fuzz_targets/mpmc.rs" +[[bin]] +name = "skiplist" +path = "fuzz_targets/skiplist.rs" + [profile.release] lto = "off" diff --git a/fuzz/fuzz_targets/skiplist.rs b/fuzz/fuzz_targets/skiplist.rs new file mode 100644 index 0000000..32cc194 --- /dev/null +++ b/fuzz/fuzz_targets/skiplist.rs @@ -0,0 +1,51 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate fuzzsuite; +extern crate lockfree; + +use fuzzsuite::*; +use lockfree::prelude::*; +use std::sync::Arc; + +#[derive(Debug, Clone, Default)] +struct SkipListMachine { + list: Arc, Box>>, +} + +impl Spawn for SkipListMachine { + fn spawn() -> Self { + Self::default() + } + + fn fork(&self) -> Self { + self.clone() + } +} + +impl Machine for SkipListMachine { + fn interpret(&mut self, mut byte: u8, bytecode: &mut Bytecode) { + loop { + match byte % 4 { + 0 | 1 => { + let val = ((bytecode.next().unwrap_or(0) as u16) << 8) + + bytecode.next().unwrap_or(0) as u16; + self.list.remove(&Box::new(val)); + break; + }, + + 2 | 3 => { + let val = ((bytecode.next().unwrap_or(0) as u16) << 8) + + bytecode.next().unwrap_or(0) as u16; + self.list.insert(Box::new(val), Box::new(val)); + break; + }, + _ => unreachable!(), + } + } + } +} + +fuzz_target!(|data: &[u8]| { + let _ = test::(Bytecode::no_symbols(data)); +}); diff --git a/src/lib.rs b/src/lib.rs index 9210005..bf635e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ //! - `[x]` [Set](set::Set) //! - `[x]` [Stack](stack::Stack) //! - `[x]` [Queue](queue::Queue) +//! - `[x]` [SkipList](skiplist::SkipList) //! - `[ ]` Deque //! //! # Performance Guide @@ -55,6 +56,9 @@ pub mod map; /// A lock-free set. pub mod set; +/// A lock-free skip list. +pub mod skiplist; + /// Collection of lock-free FIFO channels. These channels are fully asynchronous /// and their receivers do not provide any sort of `wait-for-message` operation. /// It would be blocking otherwise, thus not lock-free. If you need such a diff --git a/src/prelude.rs b/src/prelude.rs index 603d240..85bfaa5 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -2,5 +2,6 @@ pub use channel::{mpmc, mpsc, spmc, spsc}; pub use map::Map; pub use queue::Queue; pub use set::Set; +pub use skiplist::SkipList; pub use stack::Stack; pub use tls::ThreadLocal; diff --git a/src/skiplist/mod.rs b/src/skiplist/mod.rs new file mode 100644 index 0000000..25a3354 --- /dev/null +++ b/src/skiplist/mod.rs @@ -0,0 +1,1449 @@ +extern crate alloc; +extern crate rand; + +mod node; +mod padded; +mod tagged; + +use std::{ + fmt::Debug, + ptr::NonNull, + sync::atomic::{fence, AtomicUsize, Ordering}, +}; + +use self::{ + node::{Head, Node}, + padded::Padded, +}; + +const HEIGHT_BITS: usize = 5; +const HEIGHT: usize = 1 << HEIGHT_BITS; +const HEIGHT_MASK: usize = (1 << (HEIGHT_BITS + 1)) - 1; + +/// A lock-free skip list, similar in its applications to +/// [BTreeMap](std::collections::BTreeMap) +/// and [BTreeSet](std::collections::BTreeSet). Implemented using +/// dynamically-allocated, multi-leveled `Node` towers. +/// +/// # Design +/// The SkipList consists of a sorted, multi-leveled linked list, where not +/// every [Node](node::Node) is present at each level. On average, there are +/// twice as many [Nodes](node::Node) on level `i` than on level `i+1`, which is +/// our list **invariant**. This allows us to traverse the list in search for a +/// value and get the minimum value with average time complexities of +/// `O(log(n))` and `O(1)` respectively. +/// +/// We achieve this invariant of `|level[i]| +/// == 2 * |level[i+1]|` by randomizing the height of our [Nodes](node::Node) on +/// insertion. +/// +/// We get our claimed lookup speeds by starting our search on the highest level +/// of our list, dropping down a level when we reach a [Node](node::Node) with a +/// `key` larger than our search bound. +/// +/// For further, and much more thorough explanation and derivation of skip list +/// and their invariant, see also: +/// - [Skip List CS.CMU](https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/skiplists.pdf) +/// - [Skip List Data Structure](https://www.mydistributed.systems/2021/03/skip-list-data-structure.html) +/// - [Skip List Proposal/Priority Queue](https://tstentz.github.io/418proposal/) +pub struct SkipList { + head: NonNull>, + state: Padded, + incin: SharedIncin, +} + +make_shared_incin! { + { "[`SkipList`]" } + SharedIncin of DeallocOnDrop +} + +impl SkipList { + /// Create a new and empty [SkipList](SkipList). + pub fn new() -> Self { + SkipList { + head: Head::new(), + state: Padded::new(ListState::new()), + incin: SharedIncin::new(), + } + } + + /// Returns the length of the [SkipList](SkipList). This is more of an + /// estimate and there are no strong + /// validity guarantees. + pub fn len(&self) -> usize { + match self.state.len.load(Ordering::Relaxed) { + // Due to relaxed memory ordering, this may underflow at times. + len if len < (isize::MAX as usize) => 0, + len => len, + } + } + + /// Returns true if the [SkipList](SkipList) is *about* empty. Similar to + /// [len](SkipList::len) this does not come with strong validity guarantees. + pub fn is_empty(&self) -> bool { + self.state.len.load(Ordering::Relaxed) < 1 + } + + /// Generates a random height for a [Node](node::Node) and updates the list + /// seed. + fn gen_height(&self) -> usize { + let mut seed = self.state.seed.load(Ordering::Relaxed); + seed ^= seed << 13; + seed ^= seed >> 17; + seed ^= seed << 5; + + self.state.seed.store(seed, Ordering::Relaxed); + + let mut height = + std::cmp::min(HEIGHT, seed.trailing_zeros() as usize + 1); + + let head = unsafe { &(*self.head.as_ptr()) }; + + while height >= 4 && head.levels[height - 2].load_ptr().is_null() { + height -= 1; + } + + if height > self.state.max_height.load(Ordering::Relaxed) { + self.state.max_height.store(height, Ordering::Relaxed); + } + + height + } +} + +impl SkipList +where + K: Ord + Send + Sync, + V: Send + Sync, +{ + /// Inserts a value in the list given a key. + pub fn insert<'a>(&'a self, key: K, val: V) -> Option> { + // After this check, whether we are holding the head or a regular Node + // will not impact the operation. + let mut insertion_point = self.find(&key, false); + let mut existing = None; + + while let Some(target) = insertion_point.target.take() { + if target.try_remove_and_tag().is_ok() { + existing = Some(target.clone()); + unsafe { + let _ = self.unlink( + target.clone(), + target.height(), + &insertion_point.prev, + ); + } + insertion_point = self.find(&key, false); + } + } + + let mut prev = insertion_point.prev; + + let new_node_raw = Node::new_rand_height(key, val, self); + + // Protects the new_node so concurrent removals do not invalidate our + // pointer. + let new_node = self + .node_ref_with(|| new_node_raw) + .expect("new_node to not be null!"); + + let mut starting_height = 0; + + self.state.len.fetch_add(1, Ordering::AcqRel); + + unsafe { + while let Err(starting) = + self.link_nodes(&new_node, prev, starting_height) + { + let mut search = self.find(&new_node.key, false); + + while let Some(target) = search.target.take() { + if core::ptr::eq(target.as_ptr(), new_node.as_ptr()) { + break; + } + + if target.try_remove_and_tag().is_ok() { + existing = Some(target.clone()); + let _ = self.unlink( + target.clone(), + target.height(), + &search.prev, + ); + search = self.find(&new_node.key, false); + } + } + + (starting_height, prev) = (starting, search.prev); + } + } + + existing.map(|existing| existing.into()) + } + + /// This function is unsafe, as it does not check whether new_node or link + /// node are valid pointers. + /// + /// # Safety + /// + /// 1. `new_node` cannot be null + /// 2. A tower of sufficient height must eventually be reached, the list + /// head can be this tower + unsafe fn link_nodes<'a>( + &self, + new_node: &'a NodeRef<'a, K, V>, + previous_nodes: [NodeRef<'a, K, V>; HEIGHT], + start_height: usize, + ) -> Result<(), usize> { + // iterate over all the levels in the new nodes pointer tower + fence(Ordering::Release); + + for i in start_height .. new_node.height() { + let prev = &previous_nodes[i]; + + let next = self.node_ref_with(|| prev.levels[i].load_ptr()); + + let next_ptr = + next.as_ref().map_or(std::ptr::null_mut(), |n| n.as_ptr()); + + let curr_next = new_node.levels[i].load_ptr(); + + if new_node.removed() { + break; + } + + // We check if the next node is actually lower in key than our + // current node. If the key is not greater we stop + // building our node. + match next.as_ref() { + Some(next) + if next.key <= new_node.key && !new_node.removed() => + { + break + }, + _ => (), + }; + + // Swap the previous' next node into the new_node's level + // It could be the case that we link ourselves to the previous node, + // but just as we do this `next` attempts to unlink + // itself and fails. So while we succeeded, `next` + // repeats its search and finds that we are the next + if new_node.levels[i] + .compare_exchange( + curr_next, + next_ptr, + Ordering::Acquire, + Ordering::Relaxed, + ) + .is_err() + { + return Err(i); + }; + + // If this is the base level, we simply increment the ref count, as + // we expect it to be 0. If it is not, we only increment + // if it > 0. + if i == 0 { + new_node.add_ref(); + } else if new_node.try_add_ref().is_err() { + break; + } + + // Swap the new_node into the previous' level. If the previous' + // level has changed since the search, we repeat the + // search from this level. + if prev.levels[i] + .compare_exchange( + next_ptr, + new_node.as_ptr(), + Ordering::AcqRel, + Ordering::Acquire, + ) + .is_err() + { + new_node.sub_ref(); + return Err(i); + } + } + + // IF we linked the node, yet it was removed during that process, there + // may be some levels that we linked and that were missed by the + // removers. We search to unlink those too. + if new_node.removed() { + self.find(&new_node.key, false); + } + + Ok(()) + } + + /// Removes a key-value pair from the [SkipList](SkipList) if the given + /// `key` is present and returns a protected *immutable* reference to the + /// pair. + pub fn remove<'a>(&'a self, key: &K) -> Option> + where + K: Send, + V: Send, + { + match self.find(key, false) { + SearchResult { target: Some(target), prev } => { + // Set the target state to being removed + // If this errors, it is already being removed by someone else + // and thus we exit early. + if target.set_removed().is_err() { + return None; + } + + // # Safety: + // 1. `key` and `val` will not be tempered with. + // TODO This works for now, yet once `Atomic` is used + // this may need to change. + let height = target.height(); + + if let Err(_) = target.tag_levels(1) { + panic!("SHOULD NOT BE TAGGED!") + }; + + // #Safety: + // 1. The height we got from the `node` guarantees it is a valid + // height for levels. + unsafe { + if self.unlink(target.clone(), height, &prev).is_err() { + self.find(&key, false); + } + } + + Some(target.into()) + }, + _ => None, + } + } + + /// Logically removes the node from the list by linking its adjacent nodes + /// to one-another. + /// + /// # Safety + /// 1. All indices in [0, height) are valid indices for `node.levels`. + unsafe fn unlink<'a>( + &self, + mut node: NodeRef<'a, K, V>, + height: usize, + previous_nodes: &[NodeRef<'a, K, V>; HEIGHT], + ) -> Result<(), usize> { + // safety check against UB caused by unlinking the head + if self.is_head(node.as_ptr()) { + panic!() + } + + // # Safety + // + // 1.-3. Some as method and covered by method caller. + // 4. We are not unlinking the head. - Covered by previous safety check. + for (i, prev) in previous_nodes.iter().enumerate().take(height).rev() { + let (new_next, _tag) = node.levels[i].load_decomposed(); + + // Performs a compare_exchange, expecting the old value of the + // pointer to be the current node. If it is not, we + // cannot make any reasonable progress, so we search again. + if prev.levels[i] + .compare_exchange( + node.as_ptr(), + new_next, + Ordering::AcqRel, + Ordering::Relaxed, + ) + .is_err() + { + return Err(i + 1); + } + + node = if let Some(node) = self.sub_ref(node) { + node + } else { + break; + }; + } + + self.state.len.fetch_sub(1, Ordering::Relaxed); + + drop(previous_nodes); + + // we see if we can drop some pointers in the list. + self.incin.inner.try_clear(); + Ok(()) + } + + /// Decrements the reference count of the `Node` by 1. If the reference + /// count is thus 0, we retire the node. + fn sub_ref<'a>( + &self, + node: NodeRef<'a, K, V>, + ) -> Option> { + if node.sub_ref() == 0 { + let NodeRef { node, _pause, .. } = node; + + _pause.add_to_incin(DeallocOnDrop::from(node.as_ptr())); + None + } else { + Some(node) + } + } + + /// Unlink [Node](Node) `curr` at the given level of [Node](Node) `prev` by + /// exchanging the pointer for `next`. + /// + /// # Safety + /// + /// 1. `prev`, `curr`, are protected accesses. + #[allow(unused)] + unsafe fn unlink_level<'a>( + &'a self, + prev: &NodeRef<'a, K, V>, + curr: NodeRef<'a, K, V>, + next: Option>, + level: usize, + ) -> Result>, ()> { + // The pointer to `next` is tagged to signal unlinking. + let next_ptr = + next.as_ref().map_or(core::ptr::null_mut(), |n| n.as_ptr()); + + if let Ok(_) = prev.levels[level].compare_exchange( + curr.as_ptr(), + next_ptr, + Ordering::AcqRel, + Ordering::Relaxed, + ) { + self.sub_ref(curr); + + Ok(next) + } else { + Err(()) + } + } + + /// Find a `Node` given a `key`. If `search_closest` it may also return the + /// next greater `Node` if the `key` is not present. Additionally, it + /// returns an array holding the previous `Nodes` in the list that link + /// to the target node. + fn find<'a>( + &'a self, + key: &K, + search_closest: bool, + ) -> SearchResult<'a, K, V> { + let head = unsafe { &(*self.head.as_ptr()) }; + + // Initialize the `prev` array. + let mut prev = unsafe { + let mut prev: [core::mem::MaybeUninit>; HEIGHT] = + core::mem::MaybeUninit::uninit().assume_init(); + + for level in prev.iter_mut() { + core::ptr::write( + level.as_mut_ptr(), + self.node_ref_with(|| { + self.head.cast::>().as_ptr() + }) + .expect("Head to not be null!"), + ) + } + + core::mem::transmute::<_, [NodeRef<'a, K, V>; HEIGHT]>(prev) + }; + + '_search: loop { + let mut level = self.state.max_height.load(Ordering::Relaxed); + // Find the first and highest node tower + while level > 1 && head.levels[level - 1].load_ptr().is_null() { + level -= 1; + } + + // We need not protect the head, as it will always be valid, as long + // as we are in a sane state. + let mut curr = self + .node_ref_with(|| self.head.as_ptr().cast::>()) + .expect("Head to not be null!"); + + // steps: + // 1. Go through each level until we reach a node with a key GEQ to + // ours or that is null 1.1 If we are equal, then + // the node must either be marked as removed or removed nodes + // are allowed in this search. + // Should this be the case, then we drop down a level while + // also protecting a pointer to the current node, + // in order to keep the `Level` valid in our `prev` array. + // 1.2 If we the `next` node is less or equal but removed and + // removed nodes are disallowed, then we set our + // current node to the next node. + while level > 0 { + let next = unsafe { + let mut next = self + .node_ref_with(|| curr.levels[level - 1].load_ptr()); + + loop { + if next.is_none() { + break next; + } + + if let Some(n) = next.as_ref() { + if n.levels[level - 1].load_tag() == 0 { + break next; + } + } + + let n = next.unwrap(); + + let new_next = self + .node_ref_with(|| n.levels[level - 1].load_ptr()); + + let Ok(n) = self.unlink_level(&curr, n, new_next, level - 1) else { + continue '_search; + }; + + next = n + } + }; + + match next { + Some(next) if (*next).key < *key => { + prev[level - 1] = curr; + + curr = next; + }, + _ => { + // Update previous_nodes. + prev[level - 1] = curr.clone(); + + level -= 1; + }, + } + } + let next = self.node_ref_with(|| curr.levels[0].load_ptr()); + + return if search_closest { + SearchResult { prev, target: next } + } else { + match next { + Some(next) if next.key == *key && !next.removed() => { + SearchResult { prev, target: Some(next) } + }, + _ => SearchResult { prev, target: None }, + } + }; + } + } + + /// Get a reference to an [Entry](Entry) if one with the given key exists. + pub fn get<'a>(&'a self, key: &K) -> Option> { + if self.is_empty() { + return None; + } + + // Perform safety check for whether we are dealing with the head. + match self.find(key, false) { + SearchResult { target: Some(target), .. } => { + Some(Entry::from(target)) + }, + _ => None, + } + } + + fn is_head(&self, ptr: *const Node) -> bool { + std::ptr::eq(ptr, self.head.as_ptr().cast()) + } + + /// Returns the next [Node](Node) in the [SkipList](SkipList) if the given + /// [Node](Node) is not the last. + fn next_node<'a>( + &'a self, + node: &Entry<'a, K, V>, + ) -> Option> { + let node: &NodeRef<'_, _, _> = unsafe { core::mem::transmute(node) }; + + // This means we have a stale node and cannot return a sane answer! + if node.levels[0].load_tag() == 1 { + return self.find(&node.key, true).target.map(|t| t.into()); + }; + + let mut next = self.node_ref_with(|| node.levels[0].load_ptr())?; + + // Unlink and skip all removed `Node`s we may encounter. + while next.levels[0].load_tag() == 1 { + let new = self.node_ref_with(|| next.levels[0].load_ptr()); + next = unsafe { + self.unlink_level(&node, next, new, 0) + .ok() + .unwrap_or_else(|| self.find(&node.key, true).target)? + }; + } + + Some(next.into()) + } + + /// Returns the first [Node](Node) in the [SkipList](SkipList) if the list + /// is not empty. + pub fn get_first<'a>(&'a self) -> Option> { + if self.is_empty() { + return None; + } + + let curr = self + .node_ref_with(|| self.head.as_ptr().cast::>()) + .expect("Head to not be null"); + + self.next_node(&curr.into()) + } + + /// Removes the first [Node](Node) (with the smallest key) from the list if + /// it is not empty. + pub fn pop_first<'a>(&'a self) -> Option> { + self.get_first()?.remove() + } + + /// Returns the last [Node](Node) in the [SkipList](SkipList) if the list + /// is not empty. Runtime is `O(n)` + pub fn get_last<'a>(&'a self) -> Option> { + let mut curr = self.get_first()?; + + while let Some(next) = self.next_node(&curr) { + curr = next; + } + + return Some(curr.into()); + } + + /// Removes the first [Node](Node) (with the smallest key) from the list if + /// it is not empty. + pub fn pop_last<'a>(&'a self) -> Option> { + self.get_last()?.remove() + } + + /// Returns a borrowing iterator over the [SkipList](SkipList) that yields + /// [Entries](Entry) into the list. + pub fn iter<'a>(&'a self) -> iter::Iter<'a, K, V> { + iter::Iter::from_list(self) + } + + fn node_ref_with(&self, f: F) -> Option> + where + F: FnOnce() -> *mut Node, + { + NodeRef::from_pause_with_in(self.incin.inner.pause(), self, f) + } +} + +impl Default for SkipList +where + K: Sync, + V: Sync, +{ + fn default() -> Self { + Self::new() + } +} + +unsafe impl Send for SkipList +where + K: Send + Sync, + V: Send + Sync, +{ +} + +unsafe impl Sync for SkipList +where + K: Send + Sync, + V: Send + Sync, +{ +} + +// TODO Verify this is sound for all variants of SkipList +/// Manual `Drop` implementation for all `SkipList`s +impl Drop for SkipList { + fn drop(&mut self) { + // To ensure this is safe, clear all `HazardPointer`s in the + // domain. We do not want to drop a node twice! + self.incin.clear(); + let mut node = unsafe { (*self.head.as_ptr()).levels[0].load_ptr() }; + + // # Safety + // + // We have an exclusive reference to `SkipList`. + unsafe { + while !node.is_null() { + let temp = node; + node = (*temp).levels[0].load_ptr(); + Node::::drop(temp); + } + + Head::::drop(self.head); + } + } +} + +impl Debug for SkipList { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SkipList").field("head", &self.head.as_ptr()).finish() + } +} + +/// Frequently accessed data in the [SkipList](SkipList). +struct ListState { + len: AtomicUsize, + max_height: AtomicUsize, + seed: AtomicUsize, +} + +impl ListState { + fn new() -> Self { + ListState { + len: AtomicUsize::new(0), + max_height: AtomicUsize::new(1), + seed: AtomicUsize::new(rand::random()), + } + } +} + +/// A protected and *shared* reference to a key-value pair from or in the +/// [SkipList](SkipList). +#[repr(C)] +pub struct Entry<'a, K, V> { + node: core::ptr::NonNull>, + list: &'a SkipList, + _pause: crate::incin::Pause<'a, DeallocOnDrop>, +} + +impl<'a, K, V> Entry<'a, K, V> { + /// Returns the value of the key-value pair. + pub fn val(&self) -> &V { + // #Safety + // + // Our `HazardPointer` ensures that our pointers is valid. + unsafe { &self.node.as_ref().val } + } + + /// Returns the key of the key-value pair. + pub fn key(&self) -> &K { + // #Safety + // + // Our `HazardPointer` ensures that our pointers is valid. + unsafe { &self.node.as_ref().key } + } +} +impl<'a, K, V> Entry<'a, K, V> +where + K: Ord + Send + Sync, + V: Send + Sync, +{ + /// Removes the [Entry](Entry) from the [SkipList](SkipList) if + /// it is not already removed. + pub fn remove(self) -> Option> { + unsafe { + self.node.as_ref().set_removed().ok()?; + + self.node.as_ref().tag_levels(1).expect("no tags to exists"); + + self.list.find(&self.key, false); + + Some(self) + } + } +} + +impl<'a, K, V> core::ops::Deref for Entry<'a, K, V> { + type Target = Node; + + fn deref(&self) -> &Self::Target { + unsafe { self.node.as_ref() } + } +} + +struct SearchResult<'a, K, V> { + prev: [NodeRef<'a, K, V>; HEIGHT], + target: Option>, +} + +impl<'a, K, V> Debug for SearchResult<'a, K, V> +where + K: Debug + Default, + V: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SearchResult").field("target", &self.target).finish() + } +} + +#[repr(C)] +struct NodeRef<'a, K, V> { + node: NonNull>, + list: &'a SkipList, + _pause: crate::incin::Pause<'a, DeallocOnDrop>, +} + +impl<'a, K, V> NodeRef<'a, K, V> { + fn from_pause_with_in( + pause: crate::incin::Pause<'a, DeallocOnDrop>, + list: &'a SkipList, + f: F, + ) -> Option + where + F: FnOnce() -> *mut Node, + { + let ptr = f(); + unsafe { + if !ptr.is_null() { + Some(NodeRef { + node: NonNull::new_unchecked(ptr), + list, + _pause: pause, + }) + } else { + None + } + } + } + + fn from_raw_and_pause( + list: &'a SkipList, + pause: crate::incin::Pause<'a, DeallocOnDrop>, + raw: *mut Node, + ) -> NodeRef<'a, K, V> { + unsafe { + NodeRef { node: NonNull::new_unchecked(raw), list, _pause: pause } + } + } + + fn as_ptr(&self) -> *mut Node { + self.node.as_ptr() + } +} + +impl<'a, K, V> AsRef> for NodeRef<'a, K, V> { + fn as_ref(&self) -> &Node { + unsafe { &(*self.as_ptr()) } + } +} + +impl<'a, K, V> core::ops::Deref for NodeRef<'a, K, V> { + type Target = Node; + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl<'a, K, V> core::ops::DerefMut for NodeRef<'a, K, V> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut (*self.as_ptr()) } + } +} + +impl<'a, K, V> core::fmt::Debug for NodeRef<'a, K, V> +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + unsafe { + f.debug_struct("NodeRef").field("node", self.node.as_ref()).finish() + } + } +} + +impl<'a, K, V> From> for Entry<'a, K, V> { + fn from(value: NodeRef<'a, K, V>) -> Self { + unsafe { core::mem::transmute(value) } + } +} + +impl<'a, K, V> Clone for NodeRef<'a, K, V> { + fn clone(&self) -> Self { + NodeRef { + node: self.node.clone(), + list: self.list, + _pause: self._pause.clone(), + } + } +} + +impl<'a, K, V> core::cmp::PartialEq for NodeRef<'a, K, V> { + fn eq(&self, other: &Self) -> bool { + core::ptr::eq(self.node.as_ptr(), other.node.as_ptr()) + } +} + +#[repr(transparent)] +struct DeallocOnDrop(*mut Node); + +unsafe impl Send for DeallocOnDrop +where + K: Send + Sync, + V: Send + Sync, +{ +} + +unsafe impl Sync for DeallocOnDrop +where + K: Send + Sync, + V: Send + Sync, +{ +} + +impl From<*mut Node> for DeallocOnDrop { + fn from(node: *mut Node) -> Self { + DeallocOnDrop(node) + } +} + +impl Drop for DeallocOnDrop { + fn drop(&mut self) { + unsafe { Node::drop(self.0) } + } +} + +impl core::ops::Deref for DeallocOnDrop { + type Target = Node; + + fn deref(&self) -> &Self::Target { + unsafe { &(*self.0) } + } +} + +impl core::ops::DerefMut for DeallocOnDrop { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut (*self.0) } + } +} + +/// A lock-free binary search tree that that currently only supports concurrent +/// pushing with removal for now only working through a mutable reference. + +pub mod iter { + use super::Node; + + use super::{Entry, SkipList}; + use std::iter::FromIterator; + + /// A borrowing [Iterator](std::iter::Iterator) over [Entries](Entry) in the + /// SkipList. + pub struct Iter<'a, K, V> { + list: &'a SkipList, + next: Option>, + } + + impl<'a, K, V> Iter<'a, K, V> + where + K: Ord + Send + Sync, + V: Send + Sync, + { + /// Creates an instance of [Iter](Iter) from a [SkipList](SkipList). + pub fn from_list(list: &'a SkipList) -> Self { + Self { list, next: list.get_first() } + } + } + + impl<'a, K, V> core::iter::Iterator for Iter<'a, K, V> + where + K: Ord + Send + Sync, + V: Send + Sync, + { + type Item = Entry<'a, K, V>; + fn next(&mut self) -> Option { + if let Some(next) = self.next.take() { + self.next = self.list.next_node(&next); + return Some(next); + } + + None + } + } + + impl IntoIterator for SkipList + where + K: Ord + Send + Sync, + V: Send + Sync, + { + type Item = (K, V); + type IntoIter = IntoIter; + fn into_iter(self) -> Self::IntoIter { + IntoIter::from_list(self) + } + } + + impl FromIterator<(K, V)> for SkipList + where + K: Ord + Send + Sync, + V: Send + Sync, + { + fn from_iter>(iter: T) -> Self { + let list = Self::new(); + for (k, v) in iter { + list.insert(k, v); + } + + list + } + } + + /// An owning [Iterator](std::iter::Iterator) over key-value pairs from + /// a [SkipList](SkipList). + pub struct IntoIter { + next: *mut Node, + } + + impl IntoIter + where + K: Ord + Send + Sync, + V: Send + Sync, + { + /// Creates an instance of [IntoIter] from a [SkipList](SkipList). + pub fn from_list<'a>(mut list: SkipList) -> Self { + unsafe { + let next = list.head.as_ref().levels[0].load_ptr(); + for level in list.head.as_mut().levels.pointers.iter_mut() { + level.store_ptr(core::ptr::null_mut()); + } + + IntoIter { next } + } + } + } + + impl core::iter::Iterator for IntoIter + where + K: Ord + Send + Sync, + V: Send + Sync, + { + type Item = (K, V); + fn next(&mut self) -> Option { + if self.next.is_null() { + return None; + } + + let next = self.next; + + self.next = unsafe { (*next).levels[0].load_ptr() }; + + let (key, val) = unsafe { + (core::ptr::read(&(*next).key), core::ptr::read(&(*next).val)) + }; + + unsafe { + Node::dealloc(next); + } + + (key, val).into() + } + } +} + +#[cfg(test)] +mod skiplist_test { + use self::rand::Rng; + use super::*; + + #[test] + fn test_new_node_sync() { + let node = Node::new(100, "hello", 1); + let other = Node::new(100, "hello", 1); + unsafe { println!("node 1: {:?},", *node) }; + unsafe { println!("node 2: {:?},", *other) }; + let other = unsafe { + let node = Node::alloc(1); + core::ptr::write(&mut (*node).key, 100); + core::ptr::write(&mut (*node).val, "hello"); + node + }; + + unsafe { println!("node 1: {:?}, node 2: {:?}", *node, *other) }; + + unsafe { assert_eq!(*node, *other) }; + } + + #[test] + fn test_new_list_sync() { + let _: SkipList = SkipList::new(); + } + + #[test] + fn test_insert_sync() { + let list = SkipList::new(); + let mut rng: u16 = rand::random(); + + for _ in 0 .. 10_000 { + rng ^= rng << 3; + rng ^= rng >> 12; + rng ^= rng << 7; + list.insert(rng, "hello there!"); + } + } + + #[test] + fn test_rand_height_sync() { + let list: SkipList<&str, &str> = SkipList::new(); + let node = Node::new_rand_height("Hello", "There!", &list); + + assert!(!node.is_null()); + let height = unsafe { (*node).levels.pointers.len() }; + + println!("height: {}", height); + + unsafe { + println!("{}", *node); + } + + unsafe { + let _ = Box::from_raw(node); + } + } + + #[test] + fn test_drop() { + struct CountOnDrop { + key: K, + counter: std::sync::Arc, + } + + impl CountOnDrop { + fn new( + key: K, + counter: std::sync::Arc, + ) -> Self { + CountOnDrop { key, counter } + } + + fn new_none(key: K) -> Self { + CountOnDrop { + key, + counter: std::sync::Arc::new( + std::sync::atomic::AtomicUsize::new(0), + ), + } + } + } + impl PartialEq for CountOnDrop { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } + } + + impl Eq for CountOnDrop {} + + impl PartialOrd for CountOnDrop { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.key.cmp(&other.key)) + } + } + + impl Ord for CountOnDrop { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.key.cmp(&other.key) + } + } + + impl Drop for CountOnDrop { + fn drop(&mut self) { + self.counter.fetch_add(1, Ordering::SeqCst); + } + } + + let counter = + std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)); + + let list = SkipList::new(); + + list.insert(CountOnDrop::new(1, counter.clone()), ()); + + list.remove(&CountOnDrop::new_none(1)); + + // assert_eq!(counter.load(Ordering::SeqCst), 1); + + list.insert(CountOnDrop::new(1, counter.clone()), ()); + + list.insert(CountOnDrop::new(1, counter.clone()), ()); + + println!("length: {}", list.len()); + + list.incin.inner.try_clear(); + + core::sync::atomic::fence(Ordering::SeqCst); + + assert_eq!(counter.load(Ordering::SeqCst), 2); + + drop(list); + + assert_eq!(counter.load(Ordering::SeqCst), 3); + + let list = SkipList::>, ()>::new(); + + std::thread::scope(|s| { + let list = &list; + for _ in 0 .. 16 { + let counter = counter.clone(); + s.spawn(move || { + for _ in 0 .. 10_000 { + match rand::random::() % 3 { + 0 => { + list.remove(&Box::new(CountOnDrop { + key: rand::random(), + counter: counter.clone(), + })); + }, + _ => { + list.insert( + Box::new(CountOnDrop { + key: rand::random(), + counter: counter.clone(), + }), + (), + ); + }, + }; + } + }); + } + }); + + drop(list); + + assert_eq!(counter.load(Ordering::SeqCst), 160000); + } + + #[test] + fn test_insert_verbose_sync() { + let list = SkipList::new(); + + list.insert(1, 1); + + list.iter().for_each(|n| println!("k: {},", n.key())); + + list.insert(2, 2); + + list.iter().for_each(|n| println!("k: {},", n.key())); + + list.insert(5, 3); + + list.iter().for_each(|n| println!("k: {},", n.key())); + } + + #[test] + fn test_remove() { + let list = SkipList::new(); + let mut rng: u16 = rand::random(); + + for _ in 0 .. 10_000 { + rng ^= rng << 3; + rng ^= rng >> 12; + rng ^= rng << 7; + list.insert(rng, "hello there!"); + } + for _ in 0 .. 10_000 { + rng ^= rng << 3; + rng ^= rng >> 12; + rng ^= rng << 7; + list.remove(&rng); + } + } + + #[test] + fn test_verbose_remove() { + let list = SkipList::new(); + + list.insert(1, 1); + list.insert(2, 2); + list.insert(2, 2); + list.insert(5, 3); + + list.iter().for_each(|n| println!("k: {},", n.key())); + + assert!(list.remove(&1).is_some()); + + list.iter().for_each(|n| println!("k: {},", n.key())); + + println!("removing 6"); + assert!(list.remove(&6).is_none()); + println!("removing 1"); + assert!(list.remove(&1).is_none()); + println!("removing 5"); + assert!(list.remove(&5).is_some()); + println!("removing 2"); + assert!(list.remove(&2).is_some()); + + list.iter().for_each(|n| println!("k: {},", n.key())); + + assert_eq!(list.len(), 0); + } + + #[test] + fn test_find_removed() { + let list = SkipList::new(); + + list.insert(3, ()); + + list.insert(4, ()); + + list.insert(5, ()); + + assert!(list.find(&3, false).target.is_some()); + assert!(list.find(&4, false).target.is_some()); + + // manually get reference to the nodes + let node_3 = + unsafe { &mut (*(*list.head.as_ptr()).levels[0].load_ptr()) }; + let node_4 = unsafe { + &mut (*(*(*list.head.as_ptr()).levels[0].load_ptr()).levels[0] + .load_ptr()) + }; + let node_5 = unsafe { + &mut (*(*(*(*list.head.as_ptr()).levels[0].load_ptr()).levels[0] + .load_ptr()) + .levels[0] + .load_ptr()) + }; + + // make sure it is the right node + assert_eq!(node_3.key, 3); + println!("{:?}", node_3); + assert_eq!(node_4.key, 4); + println!("{:?}", node_4); + assert_eq!(node_5.key, 5); + println!("{:?}", node_5); + + // remove the node logically + let _ = node_4.set_removed(); + + assert!(list.find(&4, false).target.is_none()); + + println!("{:?}", list.find(&3, false)); + + assert!(!node_3.removed()); + + assert!(list.remove(&4).is_none()); + + // remove the node logically + node_4.height_and_removed.store( + node_4.height_and_removed.load(Ordering::SeqCst) + & (usize::MAX >> 1), + Ordering::SeqCst, + ); + + assert!(!node_4.removed()); + + assert!(list.remove(&4).is_some()); + } + + #[test] + fn test_sync_remove() { + use std::sync::Arc; + let list = Arc::new(SkipList::new()); + let mut rng = rand::thread_rng(); + + for _ in 0 .. 10_000 { + list.insert(rng.gen::(), ()); + } + let threads = (0 .. 20) + .map(|_| { + let list = list.clone(); + std::thread::spawn(move || { + let mut rng = rand::thread_rng(); + for _ in 0 .. 1_000 { + let target = &rng.gen::(); + list.remove(&target); + } + }) + }) + .collect::>(); + + for thread in threads { + thread.join().unwrap() + } + + list.iter().for_each(|e| println!("key: {}", e.key)); + } + + #[test] + fn test_sync_insert() { + use std::sync::Arc; + let list = Arc::new(SkipList::new()); + + let threads = (0 .. 20) + .map(|_| { + let list = list.clone(); + std::thread::spawn(move || { + let mut rng = rand::thread_rng(); + for _ in 0 .. 1_000 { + let target = rng.gen::(); + + list.insert(target, ()); + } + }) + }) + .collect::>(); + + for thread in threads { + thread.join().unwrap() + } + + list.iter().for_each(|e| println!("key: {}", e.key)); + } + + #[test] + fn test_sync_inmove() { + use std::sync::Arc; + let list = Arc::new(SkipList::new()); + + let threads = (0 .. 20) + .map(|_| { + let list = list.clone(); + std::thread::spawn(move || { + let mut rng = rand::thread_rng(); + for _ in 0 .. 5_000 { + let target = rng.gen::(); + if rng.gen::() % 5 == 0 { + list.remove(&target); + } else { + list.insert(target, ()); + } + } + }) + }) + .collect::>(); + + for thread in threads { + thread.join().unwrap() + } + + list.iter().for_each(|e| println!("key: {}", e.key)); + } + + #[test] + fn test_sync_iterate() { + use std::sync::Arc; + let list = Arc::new(SkipList::new()); + + let threads = (0 .. 20) + .map(|_| { + let list = list.clone(); + std::thread::spawn(move || { + let mut rng = rand::thread_rng(); + for _ in 0 .. 1_000 { + let target = rng.gen::(); + if rng.gen::() % 5 == 0 { + list.remove(&target); + } else { + list.insert(target, ()); + } + } + }) + }) + .collect::>(); + + for _ in 0 .. 5 { + list.iter().for_each(|e| println!("key: {}", e.key())); + } + + for thread in threads { + thread.join().unwrap() + } + + let list = Arc::>::try_unwrap(list).unwrap(); + + list.into_iter().for_each(|(k, _)| println!("key: {}", k)) + } +} diff --git a/src/skiplist/node.rs b/src/skiplist/node.rs new file mode 100644 index 0000000..ef4eab5 --- /dev/null +++ b/src/skiplist/node.rs @@ -0,0 +1,285 @@ +use super::{ + alloc::alloc::{alloc, dealloc, handle_alloc_error, Layout}, + tagged::MaybeTagged, + SkipList, + HEIGHT, + HEIGHT_BITS, + HEIGHT_MASK, +}; + +const REMOVED_MASK: usize = !(usize::MAX >> 1); + +use std::{ + fmt::{Debug, Display}, + mem, + ops::Index, + ptr::{self, NonNull}, + sync::atomic::{AtomicUsize, Ordering}, +}; + +/// Head stores the first pointer tower at the beginning of the list. It is +/// always of maximum +#[repr(C)] +pub(super) struct Head { + key: K, + val: V, + height_and_removed: AtomicUsize, + pub(super) levels: Levels, +} + +impl Head { + pub(super) fn new() -> NonNull { + let head_ptr = unsafe { Node::::alloc(super::HEIGHT).cast() }; + + if let Some(head) = NonNull::new(head_ptr) { + head + } else { + panic!() + } + } + + pub(super) unsafe fn drop(ptr: NonNull) { + Node::::dealloc(ptr.as_ptr().cast()); + } +} + +#[repr(C)] +pub(super) struct Levels { + pub(super) pointers: [MaybeTagged>; 1], +} + +impl Levels { + fn get_size(height: usize) -> usize { + assert!(height <= HEIGHT && height > 0); + + mem::size_of::() * (height - 1) + } +} + +impl Index for Levels { + type Output = MaybeTagged>; + + fn index(&self, index: usize) -> &Self::Output { + unsafe { self.pointers.get_unchecked(index) } + } +} + +#[repr(C)] +pub struct Node { + pub key: K, + pub val: V, + pub(super) height_and_removed: AtomicUsize, + pub(super) levels: Levels, +} + +impl Node { + pub(super) fn new(key: K, val: V, height: usize) -> *mut Self { + unsafe { + let node = Self::alloc(height); + ptr::write(&mut (*node).key, key); + ptr::write(&mut (*node).val, val); + node + } + } + + pub(super) fn new_rand_height( + key: K, + val: V, + list: &SkipList, + ) -> *mut Self { + // construct the base nod + Self::new(key, val, list.gen_height()) + } + + pub(super) unsafe fn alloc(height: usize) -> *mut Self { + let layout = Self::get_layout(height); + + let ptr = alloc(layout).cast::(); + + if ptr.is_null() { + handle_alloc_error(layout); + } + + ptr::write(&mut (*ptr).height_and_removed, AtomicUsize::new(height)); + + ptr::write_bytes((*ptr).levels.pointers.as_mut_ptr(), 0, height); + + ptr + } + + pub(super) unsafe fn dealloc(ptr: *mut Self) { + let height = (*ptr).height(); + + let layout = Self::get_layout(height); + + dealloc(ptr.cast(), layout); + } + + unsafe fn get_layout(height: usize) -> Layout { + let size_self = mem::size_of::(); + let align = mem::align_of::(); + let size_levels = Levels::::get_size(height); + + Layout::from_size_align_unchecked(size_self + size_levels, align) + } + + pub(super) unsafe fn drop(ptr: *mut Self) { + ptr::drop_in_place(&mut (*ptr).key); + ptr::drop_in_place(&mut (*ptr).val); + + Node::dealloc(ptr); + } + + pub(super) fn height(&self) -> usize { + (self.height_and_removed.load(Ordering::Acquire) & HEIGHT_MASK) as usize + } + + pub(super) fn add_ref(&self) -> usize { + let refs = self + .height_and_removed + .fetch_add(1 << (HEIGHT_BITS + 1), Ordering::AcqRel) + as usize; + + refs + } + + pub(super) fn try_add_ref(&self) -> Result { + self.height_and_removed + .fetch_update(Ordering::AcqRel, Ordering::Acquire, |o| { + if (o & !REMOVED_MASK) >> (HEIGHT_BITS + 1) == 0 { + return None; + } + + Some(o + (1 << (HEIGHT_BITS + 1))) + }) + .map(|now| ((now & !REMOVED_MASK) >> (HEIGHT_BITS + 1)) + 1) + } + + pub(super) fn sub_ref(&self) -> usize { + let prev = self + .height_and_removed + .fetch_sub(1 << (HEIGHT_BITS + 1), Ordering::AcqRel); + ((prev & !REMOVED_MASK) >> (HEIGHT_BITS + 1)) - 1 + } + + pub(super) fn removed(&self) -> bool { + self.height_and_removed.load(Ordering::Acquire).leading_zeros() == 0 + } + + pub(crate) fn set_removed(&self) -> Result { + self.set_har_with(|old| old | REMOVED_MASK) + } + + fn set_har_with(&self, f: F) -> Result + where + F: Fn(usize) -> usize, + { + let height_and_removed = + self.height_and_removed.load(Ordering::Acquire); + + let new_height_and_removed = f(height_and_removed); + + if new_height_and_removed == height_and_removed { + return Err(()); + } + + // try to exchange + self.height_and_removed + .compare_exchange( + height_and_removed, + new_height_and_removed, + Ordering::AcqRel, + Ordering::Relaxed, + ) + .map_err(|_| ()) + } + + pub(super) fn tag_levels(&self, tag: usize) -> Result { + for level in (0 .. self.height()).rev() { + if let Err(o_tag) = self.levels[level].compare_exchange_tag( + 0, + tag, + Ordering::AcqRel, + Ordering::Relaxed, + ) { + return Err(o_tag); + } + } + Ok(self.height() - 1) + } + + pub(crate) fn try_remove_and_tag(&self) -> Result<(), ()> { + self.set_removed()?; + + self.tag_levels(1).map_err(|_| ())?; + + Ok(()) + } +} + +impl PartialEq for Node +where + K: PartialEq, + V: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + self.key == other.key && self.val == other.val + } +} + +impl Debug for Node +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Node") + .field("key", &self.key) + .field("val", &self.val) + .field("height", &self.height()) + .field( + "levels", + &(0 .. self.height()).fold(String::new(), |acc, level| { + format!("{}{:?}, ", acc, self.levels[level].as_std()) + }), + ) + .finish() + } +} + +impl Display for Node +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + (1 ..= self.levels.pointers.len()).try_for_each(|level| { + writeln!( + f, + "[key: {:?}, val: {:?}, level: {}]", + self.key, self.val, level, + ) + }) + } +} + +mod node_test { + use super::*; + + #[test] + fn test_removed() { + unsafe { + let node = Node::new(1, (), 3); + + assert!(!(*node).removed()); + + assert!((*node).set_removed().is_ok()); + + assert!((*node).removed()); + + (*node).add_ref(); + + assert_eq!((*node).try_add_ref().unwrap(), 2); + } + } +} diff --git a/src/skiplist/padded.rs b/src/skiplist/padded.rs new file mode 100644 index 0000000..fc4e52f --- /dev/null +++ b/src/skiplist/padded.rs @@ -0,0 +1,88 @@ +//! Aligns the data to the appropriate chache line. + +use std::{ + fmt::{Debug, Display}, + ops::{Deref, DerefMut}, +}; + +#[cfg_attr( + any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64", + ), + repr(align(128)) +)] +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv64", + ), + repr(align(32)) +)] +#[cfg_attr(target_arch = "s390x", repr(align(256)))] +#[cfg_attr( + not(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64", + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv64", + target_arch = "s390x", + )), + repr(align(64)) +)] +#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)] +pub(crate) struct Padded(T); + +impl Padded { + pub(crate) const fn new(t: T) -> Self { + Padded(t) + } + + pub(crate) fn into_inner(self) -> T { + self.0 + } +} + +impl Deref for Padded { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Padded { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Debug for Padded +where + T: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{:?}", self.0)) + } +} + +impl Display for Padded +where + T: Display, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}", self.0)) + } +} + +impl From for Padded { + fn from(value: T) -> Self { + Padded::new(value) + } +} diff --git a/src/skiplist/tagged.rs b/src/skiplist/tagged.rs new file mode 100644 index 0000000..9b6f137 --- /dev/null +++ b/src/skiplist/tagged.rs @@ -0,0 +1,114 @@ +use std::sync::atomic::{AtomicPtr, Ordering}; + +pub(crate) struct MaybeTagged(AtomicPtr); + +impl MaybeTagged { + pub(crate) fn load_ptr(&self) -> *mut T { + self.load_decomposed().0 + } + pub(crate) fn load_decomposed(&self) -> (*mut T, usize) { + let raw = self.0.load(Ordering::Acquire); + Self::decompose_raw(raw) + } + + #[inline] + fn decompose_raw(raw: *mut T) -> (*mut T, usize) { + ( + usize_to_ptr_with_provenance( + raw as usize & !unused_bits::(), + raw, + ), + raw as usize & unused_bits::(), + ) + } + + pub(crate) fn store_composed(&self, ptr: *mut T, tag: usize) { + let tagged = Self::compose_raw(ptr, tag); + + self.0.store(tagged, Ordering::Release); + } + + #[inline] + fn compose_raw(ptr: *mut T, tag: usize) -> *mut T { + usize_to_ptr_with_provenance( + (ptr as usize & !unused_bits::()) | (tag & unused_bits::()), + ptr, + ) + } + + pub(crate) fn store_ptr(&self, ptr: *mut T) { + self.store_composed(ptr, 0); + } + + pub(crate) fn compare_exchange( + &self, + expected: *mut T, + new: *mut T, + succes: Ordering, + failure: Ordering, + ) -> Result<(*mut T, usize), (*mut T, usize)> { + self.compare_exchange_with_tag(expected, 0, new, 0, succes, failure) + } + + pub(crate) fn compare_exchange_with_tag( + &self, + expected: *mut T, + e_tag: usize, + new: *mut T, + n_tag: usize, + succes: Ordering, + failure: Ordering, + ) -> Result<(*mut T, usize), (*mut T, usize)> { + match self.0.compare_exchange( + Self::compose_raw(expected, e_tag), + Self::compose_raw(new, n_tag), + succes, + failure, + ) { + Ok(new) => Ok(Self::decompose_raw(new)), + Err(other) => Err(Self::decompose_raw(other)), + } + } + + pub(crate) fn compare_exchange_tag( + &self, + e_tag: usize, + tag: usize, + succes: Ordering, + failure: Ordering, + ) -> Result { + let mut ptr = self.load_ptr(); + while let Err((other_ptr, other_tag)) = self + .compare_exchange_with_tag(ptr, e_tag, ptr, tag, succes, failure) + { + if other_tag != e_tag { + return Err(other_tag); + } + + ptr = other_ptr; + } + + Ok(tag) + } + + pub(crate) fn load_tag(&self) -> usize { + self.load_decomposed().1 + } + + pub(crate) fn as_std(&self) -> &AtomicPtr { + &self.0 + } +} + +const fn align() -> usize { + core::mem::align_of::() +} + +const fn unused_bits() -> usize { + (1 << align::().trailing_zeros()) - 1 +} + +fn usize_to_ptr_with_provenance(addr: usize, prov: *mut T) -> *mut T { + let ptr = prov.cast::(); + ptr.wrapping_add(addr.wrapping_sub(ptr as usize)).cast() +} diff --git a/src/tls/mod.rs b/src/tls/mod.rs index ad4f777..77634e4 100644 --- a/src/tls/mod.rs +++ b/src/tls/mod.rs @@ -264,7 +264,7 @@ impl ThreadLocal { // pointer. (new_tbl_ptr.as_ptr() as usize | 1) as *mut (), AcqRel, - Release, + Acquire, ) { Ok(_) => { // If the old node was still stored, we succeeded.