Skip to content

Commit

Permalink
Merge pull request #167 from ClickHouse/glaredb
Browse files Browse the repository at this point in the history
GlareDB
  • Loading branch information
alexey-milovidov authored Feb 5, 2024
2 parents d81c922 + 3dd7b77 commit 27b2caf
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 3 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ We also introduced the [Hardware Benchmark](https://benchmark.clickhouse.com/har
- [x] Hydra
- [ ] Impala
- [ ] Hyper
- [x] Umbra
- [x] SQLite
- [x] Redshift
- [x] Redshift Serverless
Expand All @@ -253,7 +254,7 @@ We also introduced the [Hardware Benchmark](https://benchmark.clickhouse.com/har
- [ ] ScyllaDB
- [x] Elasticsearch
- [ ] Apache Ignite
- [ ] Motherduck
- [x] Motherduck
- [x] Infobright
- [ ] Actian Vector
- [ ] Manticore Search
Expand All @@ -275,11 +276,11 @@ We also introduced the [Hardware Benchmark](https://benchmark.clickhouse.com/har
- [ ] Boilingdata
- [x] Byteconity
- [ ] DolphinDB
- [ ] Oxla
- [x] Oxla
- [ ] Quickwit
- [x] AlloyDB
- [x] ParadeDB
- [ ] GlareDB
- [x] GlareDB
- [ ] Seafowl
- [ ] Sneller

Expand Down
20 changes: 20 additions & 0 deletions glaredb/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# Install

sudo apt-get install -y unzip
curl https://glaredb.com/install.sh | sh

wget https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits_compatible/athena/hits.parquet

cat queries.sql | while read query
do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches

for i in $(seq 1 3); do
./glaredb --timing --query "${query}"
done;
done 2>&1 | tee log.txt

cat log.txt | grep -oP 'Time: \d+\.\d+s|Error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+)s/\1/; s/Error/null/' | awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
43 changes: 43 additions & 0 deletions glaredb/queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
SELECT COUNT(*) FROM 'hits.parquet';
SELECT COUNT(*) FROM 'hits.parquet' WHERE "AdvEngineID" <> 0;
SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM 'hits.parquet';
SELECT AVG("UserID") FROM 'hits.parquet';
SELECT COUNT(DISTINCT "UserID") FROM 'hits.parquet';
SELECT COUNT(DISTINCT "SearchPhrase") FROM 'hits.parquet';
SELECT MIN("EventDate"), MAX("EventDate") FROM 'hits.parquet';
SELECT "AdvEngineID", COUNT(*) FROM 'hits.parquet' WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM 'hits.parquet' GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM 'hits.parquet' GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM 'hits.parquet' WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM 'hits.parquet' WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
SELECT "SearchPhrase", COUNT(*) AS c FROM 'hits.parquet' WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM 'hits.parquet' WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM 'hits.parquet' WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT "UserID", COUNT(*) FROM 'hits.parquet' GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10;
SELECT "UserID", "SearchPhrase", COUNT(*) FROM 'hits.parquet' GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
SELECT "UserID", "SearchPhrase", COUNT(*) FROM 'hits.parquet' GROUP BY "UserID", "SearchPhrase" LIMIT 10;
SELECT "UserID", extract(minute FROM arrow_cast("EventTime", 'Timestamp(Second, None)')) AS m, "SearchPhrase", COUNT(*) FROM 'hits.parquet' GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
SELECT "UserID" FROM 'hits.parquet' WHERE "UserID" = 435090932899640449;
SELECT COUNT(*) FROM 'hits.parquet' WHERE "URL" LIKE '%google%';
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM 'hits.parquet' WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM 'hits.parquet' WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT * FROM 'hits.parquet' WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
SELECT "SearchPhrase" FROM 'hits.parquet' WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
SELECT "SearchPhrase" FROM 'hits.parquet' WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
SELECT "SearchPhrase" FROM 'hits.parquet' WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM 'hits.parquet' WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM 'hits.parquet' WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM 'hits.parquet';
SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM 'hits.parquet' WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM 'hits.parquet' WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM 'hits.parquet' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
SELECT "URL", COUNT(*) AS c FROM 'hits.parquet' GROUP BY "URL" ORDER BY c DESC LIMIT 10;
SELECT 1, "URL", COUNT(*) AS c FROM 'hits.parquet' GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM 'hits.parquet' GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
SELECT "URL", COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
SELECT "Title", COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10;
SELECT "URL", COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000;
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000;
SELECT "URLHash", "EventDate", COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 100;
SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 10000;
SELECT DATE_TRUNC('minute', arrow_cast("EventTime", 'Timestamp(Second, None)')) AS M, COUNT(*) AS "PageViews" FROM 'hits.parquet' WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', arrow_cast("EventTime", 'Timestamp(Second, None)')) ORDER BY M LIMIT 10 OFFSET 1000;
55 changes: 55 additions & 0 deletions glaredb/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"system": "GlareDB",
"date": "2024-02-02",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "",
"tags": ["Rust", "serverless"],
"load_time": 0,
"data_size": 14779976446,
"result": [
[0.221,0.070,0.069],
[0.509,0.425,0.424],
[0.569,0.462,0.466],
[0.787,0.483,0.470],
[1.339,1.220,1.200],
[1.800,1.644,1.649],
[0.490,0.418,0.410],
[0.509,0.423,0.423],
[2.009,1.880,1.920],
[3.383,3.134,3.147],
[0.950,0.695,0.702],
[0.891,0.743,0.721],
[1.787,1.646,1.654],
[4.032,3.007,2.974],
[1.911,1.746,1.762],
[1.538,1.321,1.360],
[3.746,3.335,3.327],
[3.611,3.064,2.993],
[7.835,6.293,6.414],
[0.882,0.469,0.470],
[9.903,1.943,1.947],
[11.653,2.182,2.172],
[22.503,4.410,4.416],
[56.481,11.754,11.769],
[3.039,0.925,0.917],
[1.132,0.854,0.855],
[2.939,0.991,0.973],
[9.958,2.688,2.695],
[9.431,5.639,5.614],
[1.027,0.872,0.814],
[2.611,1.508,1.497],
[6.177,1.887,1.960],
[9.675,9.095,8.891],
[12.268,7.139,7.063],
[12.675,7.661,7.671],
[2.418,2.250,2.210],
[9.998,2.095,2.066],
[9.273,2.782,2.722],
[10.015,2.085,2.079],
[18.876,3.284,3.317],
[2.963,0.939,0.917],
[2.165,0.973,0.936],
[1.380,0.901,0.864]
]
}
55 changes: 55 additions & 0 deletions glaredb/results/c6a.metal.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"system": "GlareDB",
"date": "2024-02-02",
"machine": "c6a.metal, 500gb gp2",
"cluster_size": 1,
"comment": "",
"tags": ["Rust", "serverless"],
"load_time": 0,
"data_size": 14779976446,
"result": [
[0.213,0.070,0.071],
[4.066,4.060,3.978],
[3.994,3.812,4.046],
[4.287,4.122,4.218],
[6.016,6.482,6.210],
[5.260,5.308,5.242],
[3.968,4.054,4.054],
[4.013,3.961,4.045],
[7.124,6.742,6.934],
[5.632,5.625,5.433],
[4.546,4.463,4.577],
[4.540,4.616,4.675],
[5.438,5.302,5.243],
[6.286,6.115,6.202],
[5.460,5.259,5.531],
[6.064,6.451,6.458],
[7.461,7.655,7.337],
[6.920,7.538,7.684],
[11.688,11.362,11.378],
[4.264,4.085,4.092],
[12.973,4.585,4.449],
[14.739,4.562,4.676],
[25.511,5.088,5.323],
[59.374,8.895,8.542],
[6.076,4.352,4.324],
[4.305,4.233,4.334],
[6.083,4.450,4.398],
[12.983,4.812,4.721],
[11.651,5.245,5.281],
[4.275,4.214,4.175],
[5.561,5.378,5.630],
[9.220,5.983,6.112],
[17.147,15.575,15.934],
[14.016,9.356,8.974],
[14.274,9.454,9.371],
[6.675,6.473,6.510],
[13.348,4.684,4.587],
[12.473,4.655,4.580],
[13.153,4.471,4.637],
[21.939,4.932,4.951],
[6.033,4.320,4.282],
[5.181,4.242,4.286],
[4.503,4.312,4.215]
]
}

0 comments on commit 27b2caf

Please sign in to comment.