Export csv (#92)

* dump report csv file * add extra line * fix build k6 * add accuracy and rowsize
nebula-contrib · Dec 14, 2023 · 668df57 · 668df57
1 parent d66301c
commit 668df57
Show file tree

Hide file tree

Showing 18 changed files with 199 additions and 116 deletions.
diff --git a/.github/workflows/nebula-bench.yaml b/.github/workflows/nebula-bench.yaml
@@ -48,7 +48,7 @@ jobs:
 
       - uses: actions/setup-go@v1
         with:
-          go-version: 1.16.4
+          go-version: 1.20.3
 
       - name: Cache Hadoop
         uses: actions/cache@v2

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
 
 
 format:
-	black -l 100 .
+	black -l 100 nebula_bench
diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ The main features:
 |       [v1.0.0](https://github.com/vesoft-inc/nebula-bench/releases/tag/v1.0.0)      |    [v2.6.0](https://github.com/vesoft-inc/nebula/releases/tag/v2.6.0) / [v2.6.1](https://github.com/vesoft-inc/nebula/releases/tag/v2.6.1) / [v2.6.2](https://github.com/vesoft-inc/nebula/releases/tag/v2.6.2)    |     [v2.6.0](https://github.com/vesoft-inc/nebula-importer/releases/tag/v2.6.0)      |    [v0.0.8](https://github.com/vesoft-inc/k6-plugin/releases/tag/v0.0.8)    |      [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)         |     [v2.6.0](https://github.com/vesoft-inc/nebula-go/releases/tag/v2.6.0)     |
 |       [v1.1.0](https://github.com/vesoft-inc/nebula-bench/releases/tag/v1.1.0)      |    [v3.0.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.0.0) /  [v3.0.1](https://github.com/vesoft-inc/nebula/releases/tag/v3.0.1) |     [v3.0.0](https://github.com/vesoft-inc/nebula-importer/releases/tag/v3.0.0)      |    [v0.0.9](https://github.com/vesoft-inc/k6-plugin/releases/tag/v0.0.9)    |        [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)        |     [v3.0.0](https://github.com/vesoft-inc/nebula-go/releases/tag/v3.0.0)     |
 |       [v1.2.0](https://github.com/vesoft-inc/nebula-bench/releases/tag/v1.2.0)      |    [v3.1.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.1.0)     |     [v3.1.0](https://github.com/vesoft-inc/nebula-importer/releases/tag/v3.1.0)      |    [v1.0.0](https://github.com/vesoft-inc/k6-plugin/releases/tag/v1.0.0)    |        [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)        |     NONE       |
-|       master      |    nightly    |     [v3.2.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.2.0)      |    [v1.0.1](https://github.com/vesoft-inc/k6-plugin/releases/tag/v1.0.1)    |     [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)      |     NONE       |
+|       master      |    nightly    |     [v3.2.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.2.0)      |    [v1.1.6](https://github.com/vesoft-inc/k6-plugin/releases/tag/v1.1.6)    |     [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)      |     NONE       |
 
 ## How to use
 

diff --git a/README_cn.md b/README_cn.md
@@ -18,7 +18,8 @@
 |       [v1.0.0](https://github.com/vesoft-inc/nebula-bench/releases/tag/v1.0.0)      |    [v2.6.0](https://github.com/vesoft-inc/nebula/releases/tag/v2.6.0) / [v2.6.1](https://github.com/vesoft-inc/nebula/releases/tag/v2.6.1) / [v2.6.2](https://github.com/vesoft-inc/nebula/releases/tag/v2.6.2)    |     [v2.6.0](https://github.com/vesoft-inc/nebula-importer/releases/tag/v2.6.0)      |    [v0.0.8](https://github.com/vesoft-inc/k6-plugin/releases/tag/v0.0.8)    |      [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)         |     [v2.6.0](https://github.com/vesoft-inc/nebula-go/releases/tag/v2.6.0)     |
 |       [v1.1.0](https://github.com/vesoft-inc/nebula-bench/releases/tag/v1.1.0)      |    [v3.0.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.0.0) /  [v3.0.1](https://github.com/vesoft-inc/nebula/releases/tag/v3.0.1) |     [v3.0.0](https://github.com/vesoft-inc/nebula-importer/releases/tag/v3.0.0)      |    [v0.0.9](https://github.com/vesoft-inc/k6-plugin/releases/tag/v0.0.9)    |        [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)        |     [v3.0.0](https://github.com/vesoft-inc/nebula-go/releases/tag/v3.0.0)     |
 |       [v1.2.0](https://github.com/vesoft-inc/nebula-bench/releases/tag/v1.2.0)      |    [v3.1.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.1.0)     |     [v3.1.0](https://github.com/vesoft-inc/nebula-importer/releases/tag/v3.1.0)      |    [v1.0.0](https://github.com/vesoft-inc/k6-plugin/releases/tag/v1.0.0)    |        [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)        |     NONE       |
-|       master      |    nightly    |     [v3.2.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.2.0)      |    [v1.0.1](https://github.com/vesoft-inc/k6-plugin/releases/tag/v1.0.1)    |     [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)      |     NONE       |
+|       master      |    nightly    |     [v3.2.0](https://github.com/vesoft-inc/nebula/releases/tag/v3.2.0)      |    [v1.1.6](https://github.com/vesoft-inc/k6-plugin/releases/tag/v1.1.6)    |     [v0.3.3](https://github.com/ldbc/ldbc_snb_datagen_spark/tree/v0.3.3)      |     NONE       |
+
 ## 使用说明
 
 ### 安装准备

diff --git a/nebula_bench/cli.py b/nebula_bench/cli.py
@@ -14,16 +14,13 @@
 
 def common(f):
     f = click.option(
-        "-f", "--folder", help="ldbc data folder, default: target/data/test_data"
-    )(f)
-
-    f = click.option(
-        "-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669"
+        "-f",
+        "--folder",
+        help="ldbc data folder, default: target/data/test_data",
     )(f)
+    f = click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")(f)
     f = click.option("-u", "--user", help="Nebula Graph address, default: root")(f)
-    f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(
-        f
-    )
+    f = click.option("-p", "--password", help="Nebula Graph address, default: nebula")(f)
     f = click.option(
         "-s",
         "--space",
@@ -40,10 +37,17 @@ def cli():
 
 @cli.command(help="generate and split ldbc data")
 @click.option(
-    "-s", "--scale-factor", default="1", help="scale factor for ldbc, default: 1"
+    "-s",
+    "--scale-factor",
+    default="1",
+    help="scale factor for ldbc, default: 1",
 )
 @click.option(
-    "-og", "--only-generate", default=False, is_flag=True, help="only generate data"
+    "-og",
+    "--only-generate",
+    default=False,
+    is_flag=True,
+    help="only generate data",
 )
 @click.option(
     "-os",
@@ -55,17 +59,26 @@ def cli():
 def data(scale_factor, only_generate, only_split):
     my_env = {"scaleFactor": str(scale_factor)}
     if only_generate:
-        command = [SH_COMMAND, setting.WORKSPACE_PATH / "scripts/generate-data.sh"]
+        command = [
+            SH_COMMAND,
+            setting.WORKSPACE_PATH / "scripts/generate-data.sh",
+        ]
         c = run_process(command, my_env)
 
     elif only_split:
         command = [SH_COMMAND, setting.WORKSPACE_PATH / "scripts/split-data.sh"]
         c = run_process(command)
     else:
-        command = [SH_COMMAND, setting.WORKSPACE_PATH / "scripts/generate-data.sh"]
+        command = [
+            SH_COMMAND,
+            setting.WORKSPACE_PATH / "scripts/generate-data.sh",
+        ]
         c = run_process(command, my_env)
         if c == 0:
-            command = [SH_COMMAND, setting.WORKSPACE_PATH / "scripts/split-data.sh"]
+            command = [
+                SH_COMMAND,
+                setting.WORKSPACE_PATH / "scripts/split-data.sh",
+            ]
             b = run_process(command)
 
     exit(c)
@@ -80,14 +93,10 @@ def nebula():
 @click.option("-a", "--address", help="Nebula Graph address, default: 127.0.0.1:9669")
 @click.option("-u", "--user", help="Nebula Graph address, default: root")
 @click.option("-p", "--password", help="Nebula Graph address, default: nebula")
-@click.option(
-    "-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2"
-)
+@click.option("-k", "--keep", help="keep spaces that not be dropped, e.g. space1,space2")
 def clean(address, user, password, keep):
     sc = NebulaController(user=user, password=password, address=address)
-    value = click.confirm(
-        "Will delete all spaces in Nebula Graph. Continue?", abort=True
-    )
+    value = click.confirm("Will delete all spaces in Nebula Graph. Continue?", abort=True)
     sc.clean_spaces(keep)
 
 
@@ -114,10 +123,11 @@ def clean(address, user, password, keep):
     help="enable add prefix in vid, vid type should be string",
 )
 def importer(folder, address, user, password, space, vid_type, enable_prefix, dry_run):
-    assert vid_type in ["int", "string"], 'the vid type should be "ini" or "string" '
-    nc = NebulaController(
-        folder, space, user, password, address, vid_type, enable_prefix
-    )
+    assert vid_type in [
+        "int",
+        "string",
+    ], 'the vid type should be "ini" or "string" '
+    nc = NebulaController(folder, space, user, password, address, vid_type, enable_prefix)
     c = nc.import_space(dry_run)
     if c != 0:
         exit(c)
@@ -188,7 +198,7 @@ def report():
     pass
 
 
-@report.command(help="dump the html report")
+@report.command(help="dump the report")
 @click.option(
     "-f",
     "--folder",
@@ -199,12 +209,18 @@ def report():
     "--output",
     help="dump the html to file, default: report.html",
 )
-def export(folder, output):
+@click.option(
+    "-t",
+    "--filetype",
+    default="html",
+    help="dump the report type, values should be [html, csv], default: html",
+)
+def export(folder, output, filetype):
     controller = DumpController()
     if folder is None:
         folder = controller.get_latest_output()
 
-    controller.export(folder=folder,output=output)
+    controller.export(folder=folder, output=output, filetype=filetype)
 
 
 @report.command(help="launch the http report server")

diff --git a/nebula_bench/common/base.py b/nebula_bench/common/base.py
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 
+
 class ScenarioMeta(type):
     def __new__(cls, name, bases, attrs, *args, **kwargs):
         # super(ScenarioMeta, cls).__new__(cls, name, bases, attrs, *args, **kwargs)
@@ -14,7 +15,7 @@ def __new__(cls, name, bases, attrs, *args, **kwargs):
 class BaseScenario(metaclass=ScenarioMeta):
     abstract = True
     is_insert_scenario = False
-    nGQL: str =""
+    nGQL: str = ""
     value: str = ""
     stage: dict
     csv_path: str

diff --git a/nebula_bench/controller.py b/nebula_bench/controller.py
@@ -11,7 +11,12 @@
 
 class BaseController(object):
     def __init__(
-        self, data_folder=None, space=None, user=None, password=None, address=None
+        self,
+        data_folder=None,
+        space=None,
+        user=None,
+        password=None,
+        address=None,
     ):
         self.workspace_path = setting.WORKSPACE_PATH
         self.data_folder = data_folder or setting.DATA_FOLDER
@@ -52,7 +57,7 @@ def import_space(self, dry_run=False):
 
     def dump_nebula_importer(self):
         kwargs = {}
-        if self.enable_prefix and self.vid_type == 'int':
+        if self.enable_prefix and self.vid_type == "int":
             raise Exception("must use prefix with vid type string")
         else:
             kwargs["enable_prefix"] = self.enable_prefix
@@ -73,10 +78,19 @@ class DumpController(object):
     def __init__(self):
         pass
 
-    def export(self, folder, output):
-        utils.jinja_dump(
-            "report.html.j2", output, {"data": self.get_data(folder)}
-        )
+    def export(self, folder, output, filetype):
+        if filetype == "html":
+            self._export_html(folder, output)
+        elif filetype == "csv":
+            self._export_csv(folder, output)
+        else:
+            raise Exception("not support filetype: %s" % filetype)
+
+    def _export_html(self, folder, output):
+        utils.jinja_dump("report.html.j2", output, {"data": self.get_data(folder)})
+
+    def _export_csv(self, folder, output):
+        utils.csv_dump(output, self.get_data(folder))
 
     def get_data(self, folder):
         # [
@@ -95,9 +109,7 @@ def get_data(self, folder):
         if folder is None:
             return
         package_name = "nebula_bench.scenarios"
-        scenarios = utils.load_class(
-            package_name, load_all=True, base_class=BaseScenario
-        )
+        scenarios = utils.load_class(package_name, load_all=True, base_class=BaseScenario)
 
         paths = sorted(Path(folder).iterdir(), key=os.path.getmtime)
         case = None
@@ -138,9 +150,7 @@ def get_data(self, folder):
     def serve(self, port=5000):
         import flask
 
-        app = flask.Flask(
-            __name__, template_folder=setting.WORKSPACE_PATH / "templates"
-        )
+        app = flask.Flask(__name__, template_folder=setting.WORKSPACE_PATH / "templates")
 
         @app.route("/", methods=["GET"])
         def index():

diff --git a/nebula_bench/parser.py b/nebula_bench/parser.py
@@ -69,7 +69,10 @@ def __init__(self, name=None, index=None):
 
 
 class Parser(object):
-    except_csv_file = ["person_email_emailaddress.csv", "person_speaks_language.csv"]
+    except_csv_file = [
+        "person_email_emailaddress.csv",
+        "person_speaks_language.csv",
+    ]
     delimiter = "|"
 
     def __init__(self, dump_class, data_path):
@@ -119,9 +122,7 @@ def parse_vertex(self, file_path):
 
         assert len(header_list) == len(
             data_list
-        ), "header length should be equle to data length, error file is {}".format(
-            file_path
-        )
+        ), "header length should be equle to data length, error file is {}".format(file_path)
 
         for index, h in enumerate(header_list):
             if h.strip().lower() == "id":
@@ -163,9 +164,7 @@ def parse_edge(self, file_path):
 
         assert len(header_list) == len(
             data_list
-        ), "header length should be equle to data length, error file is {}".format(
-            file_path
-        )
+        ), "header length should be equle to data length, error file is {}".format(file_path)
 
         flag = True
         for index, h in enumerate(header_list):
@@ -242,9 +241,7 @@ def dump(self, *args, **kwargs):
         if vid_type == "int":
             self.template_file = self.template_file or "nebula-import-vid-int.yaml.j2"
         elif vid_type == "string":
-            self.template_file = (
-                self.template_file or "nebula-import-vid-string.yaml.j2"
-            )
+            self.template_file = self.template_file or "nebula-import-vid-string.yaml.j2"
 
         kwargs["vertex_list"] = self._parser.vertex_list
         kwargs["edge_list"] = self._parser.edge_list

diff --git a/nebula_bench/scenarios/fetch.py b/nebula_bench/scenarios/fetch.py
@@ -4,10 +4,11 @@
 
 class FetchTag(BaseScenario):
     abstract = False
-    nGQL = 'FETCH PROP ON Person {0} YIELD Person.firstName, Person.lastName, Person.gender, Person.birthday, Person.creationDate, Person.locationIP, Person.browserUsed'
+    nGQL = "FETCH PROP ON Person {0} YIELD Person.firstName, Person.lastName, Person.gender, Person.birthday, Person.creationDate, Person.locationIP, Person.browserUsed"
     csv_path = "social_network/dynamic/person.csv"
 
+
 class FetchEdge(BaseScenario):
     abstract = False
-    nGQL ='FETCH PROP ON KNOWS {0} -> {1} YIELD KNOWS.creationDate'
-    csv_path = "social_network/dynamic/person_knows_person.csv"	
+    nGQL = "FETCH PROP ON KNOWS {0} -> {1} YIELD KNOWS.creationDate"
+    csv_path = "social_network/dynamic/person_knows_person.csv"
diff --git a/nebula_bench/scenarios/find_path.py b/nebula_bench/scenarios/find_path.py
@@ -8,11 +8,12 @@ class BaseFindShortestPath(BaseScenario):
     csv_path = "social_network/dynamic/person_knows_person.csv"
     rank = 100
 
+
 class FindShortestPath(BaseFindShortestPath):
     abstract = False
     nGQL = "FIND SHORTEST PATH FROM {0} TO {1} OVER * YIELD path as p"
 
+
 class FindShortestNoVidPath(BaseFindShortestPath):
     abstract = False
     nGQL = "FIND SHORTEST PATH FROM {0} TO -1 OVER * YIELD path as p"
-
diff --git a/nebula_bench/scenarios/insert.py b/nebula_bench/scenarios/insert.py
@@ -4,7 +4,7 @@
 
 class BatchInsertVertexScenario(BaseScenario):
     is_insert_scenario = True
-    nGQL = 'INSERT VERTEX Person(firstName, lastName, gender, birthday, creationDate, locationIP, browserUsed) VALUES '
+    nGQL = "INSERT VERTEX Person(firstName, lastName, gender, birthday, creationDate, locationIP, browserUsed) VALUES "
     value = '{0}:("{1}", "{2}", "{3}", "{4}", datetime("{5}"), "{6}", "{7}")'
     abstract = False
     csv_path = "social_network/dynamic/person.csv"
@@ -14,27 +14,26 @@ class BatchInsertVertexScenario(BaseScenario):
 class InsertVertexScenario(BaseScenario):
     is_insert_scenario = False
     nGQL = (
-        'INSERT VERTEX Person(firstName, lastName, gender, birthday, creationDate, locationIP, browserUsed) VALUES '
+        "INSERT VERTEX Person(firstName, lastName, gender, birthday, creationDate, locationIP, browserUsed) VALUES "
         '{0}:("{1}", "{2}", "{3}", "{4}", datetime("{5}"), "{6}", "{7}")'
     )
     abstract = False
     csv_path = "social_network/dynamic/person.csv"
     rank = 9999
 
+
 class BatchInsertEdgeScenario(BaseScenario):
     is_insert_scenario = True
-    nGQL = 'INSERT EDGE LIKES (creationDate) VALUES '
+    nGQL = "INSERT EDGE LIKES (creationDate) VALUES "
     value = '{0}->{1}:(datetime("{2}"))'
     abstract = False
     csv_path = "social_network/dynamic/person_likes_comment.csv"
     rank = 9999
 
+
 class InsertEdgeScenario(BaseScenario):
     is_insert_scenario = False
-    nGQL = (
-        'INSERT EDGE LIKES (creationDate) VALUES {0}->{1}:(datetime("{2}"))'
-    )
+    nGQL = 'INSERT EDGE LIKES (creationDate) VALUES {0}->{1}:(datetime("{2}"))'
     abstract = False
     csv_path = "social_network/dynamic/person_likes_comment.csv"
     rank = 9999
-
diff --git a/nebula_bench/scenarios/match.py b/nebula_bench/scenarios/match.py
@@ -8,4 +8,4 @@ class BaseMatchScenario(BaseScenario):
 
 
 class Match1Hop(BaseMatchScenario):
-    nGQL = 'MATCH (v1:Person)-[e:KNOWS]->(v2:Person) WHERE id(v1) == {0} RETURN v2'
+    nGQL = "MATCH (v1:Person)-[e:KNOWS]->(v2:Person) WHERE id(v1) == {0} RETURN v2"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,4 @@ class BaseMatchScenario(BaseScenario):


		class Match1Hop(BaseMatchScenario):
		nGQL = 'MATCH (v1:Person)-[e:KNOWS]->(v2:Person) WHERE id(v1) == {0} RETURN v2'
		nGQL = "MATCH (v1:Person)-[e:KNOWS]->(v2:Person) WHERE id(v1) == {0} RETURN v2"