Skip to content

Commit

Permalink
Merge branch 'main' into 3457-fix-louvain
Browse files Browse the repository at this point in the history
  • Loading branch information
acezen authored Jan 12, 2024
2 parents a290372 + e67f8d7 commit 9b75cea
Show file tree
Hide file tree
Showing 30 changed files with 2,437 additions and 172 deletions.
70 changes: 70 additions & 0 deletions docs/interactive_engine/optimizer.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,73 @@ This rule optimizes to retain only necessary data, significantly reducing the vo

## Cost-based Optimizer (CBO)
TODO

## Graph Type Inference
Traditional SQL only provides checks and inference for basic data types, such as int, double, string, boolean, etc. However, graph databases encompass more complex types. Beyond basic data types, they introduce special vertex and edge types. Vertex types constrain the range of entity types within the graph, while edge types constrain relationships between entities. While the introduction of vertex and edge types in graph databases provides a more flexible data model, it simultaneously adds complexity to the data, necessitating type checks and inference for graph data.

We address this complexity through Graph Type Inference, offering functionality for type checks and inference for graph data. This feature examines whether vertex and edge types in graph data adhere to user-defined schemas and can infer vertex and edge types, laying the groundwork for subsequent optimizations in graph queries.

Taking the example of a modern graph schema, which includes the following vertex and edge types:
```
# Vertex types
person (name, age)
software (name, lang)
# Edge types
knows (weight)
created (weight)
```
Graph Type Inference provides the following capabilities:
- Checking if vertex and edge types in the graph conform to user-defined schemas:
- Reporting errors for nonexistent vertex or edge types:
```bash
# 'per' is a nonexistent vertex type
Match (a:per) Return a;
```
```bash
# 'kno' is a nonexistent edge type
Match (a:person)-[b:kno]->(c) Return a, b, c;
```
- Reporting errors for nonexistent properties or mismatched property types in vertices or edges:
```bash
# The 'lang' property does not exist in vertices of type 'person'
Match (a:person {lang: 'java'}) Return a;
```
```bash
# The 'name' property in vertices of type 'person' is of type string, cannot perform addition
Match (a:person) Return a.name + 1;
```
- Reporting errors for invalid paths composed of vertices and edges:
```bash
# There is no edge of type 'created' between vertices of type 'person'
Match (a:person)-[:created]->(b:person)
```
```bash
# Vertices of type 'software' have no outgoing edges
Match (a:software)-[b]->(c);
```
- Inferring vertex and edge types in the graph:
- Inferring vertex types given edge types:
```bash
# (?)-[knows]->(?) => (person)-[knows]->(person)
Match (a)-[:knows]->(b) Return labels(a), labels(b);
```
```bash
# (?)-[knows*1..4]->(?) => (person)-[knows*1..2]->(person)
Match (a)-[:knows*1..4]->(b) Return labels(a), labels(b);
```
- Inferring edge types given vertex types:
```bash
# (person)-[?]->(person) => (person)-[knows]->(person)
Match (a:person)-[b]->(c:person) Return type(b);
```
- Inferring all vertex and edge types given a path:
```bash
# (?)-[?]->(?)->[?]->(software) => (person)-[knows]->(person)->[created]->(software)
Match (a)-[b]->(c)-[d]->(:software) Return labels(a), type(b), labels(c), type(d);
```
- Inferring across multiple sentences:
```bash
# (?)-[]->(?), (?)-[knows]-(?) => (person)-[knows]-(person), (person)-[knows]-(person)
Match (a)-[b]-(c), (a)-[:KNOWS]-(c) Return labels(a), type(b), labels(c);
```
7 changes: 5 additions & 2 deletions flex/codegen/src/hqps/hqps_group_by_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,12 @@ class GroupByOpBuilder {
prop_name = prop_key.name();
prop_type = single_common_data_type_pb_2_str(
key_alias_key.node_type().data_type());
} else if (prop.item_case() == common::Property::kLabel) {
prop_type = "LabelKey";
} else {
LOG(FATAL)
<< "Current only support key_alias on internal id or property";
LOG(FATAL) << "Current only support key_alias on internal id or "
"property, but got: "
<< prop.DebugString();
}
} else {
VLOG(10) << "Apply internal id since no property provided";
Expand Down
8 changes: 7 additions & 1 deletion flex/engines/hqps_db/core/operator/edge_expand.h
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,10 @@ class EdgeExpand {
dst_label = state.other_label_;
}

VLOG(10) << "src label: " << (int) src_label
<< ", dst label: " << (int) dst_label
<< ", edge label: " << (int) state.edge_label_;

auto adj_list_array = state.graph_.template GetEdges<T>(
src_label, dst_label, state.edge_label_, cur_set.GetVertices(),
gs::to_string(state.direction_), state.limit_, prop_names);
Expand All @@ -1333,7 +1337,9 @@ class EdgeExpand {
offset.reserve(cur_set.Size() + 1);
size_t size = 0;
offset.emplace_back(size);
CHECK(cur_set.Size() == adj_list_array.size());
CHECK(cur_set.Size() == adj_list_array.size())
<< "cur_set.Size(): " << cur_set.Size()
<< ", adj_list_array.size():" << adj_list_array.size();
std::vector<std::tuple<vertex_id_t, vertex_id_t, std::tuple<T>>>
prop_tuples;
prop_tuples.reserve(cur_set.Size() + 1);
Expand Down
118 changes: 115 additions & 3 deletions flex/engines/hqps_db/core/utils/keyed.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#define ENGINES_HQPS_ENGINE_KEYED_UTILS_H_

#include "flex/engines/hqps_db/core/utils/props.h"
#include "flex/engines/hqps_db/database/mutable_csr_interface.h"
#include "flex/engines/hqps_db/structures/collection.h"
#include "flex/engines/hqps_db/structures/multi_edge_set/adj_edge_set.h"
#include "flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h"
Expand Down Expand Up @@ -73,6 +74,49 @@ struct KeyedT<RowVertexSet<LabelT, VID_T, T...>,
}
};

// Group By TwoLabelVertexSet's internal id.
template <typename LabelT, typename VID_T, typename... T>
struct KeyedT<TwoLabelVertexSet<VID_T, LabelT, T...>,
PropertySelector<grape::EmptyType>> {
using keyed_set_t = TwoLabelVertexSet<VID_T, LabelT, T...>;
// // The builder type.
using keyed_builder_t = TwoLabelVertexSetImplBuilder<VID_T, LabelT, T...>;
using unkeyed_builder_t = TwoLabelVertexSetImplBuilder<LabelT, VID_T, T...>;

static keyed_builder_t create_keyed_builder(
const TwoLabelVertexSet<VID_T, LabelT, T...>& set,
const PropertySelector<grape::EmptyType>& selector) {
return keyed_builder_t(set);
}

static unkeyed_builder_t create_unkeyed_builder(
const TwoLabelVertexSet<VID_T, LabelT, T...>& set,
const PropertySelector<grape::EmptyType>& selector) {
return set.CreateBuilder();
}
};

// Group By TwoLabelVertexSet's other properties
template <typename LabelT, typename VID_T, typename... T, typename PropT>
struct KeyedT<TwoLabelVertexSet<VID_T, LabelT, T...>, PropertySelector<PropT>> {
using keyed_set_t = Collection<PropT>;
// // The builder type.
using keyed_builder_t = KeyedCollectionBuilder<PropT>;
using unkeyed_builder_t = CollectionBuilder<PropT>;

static keyed_builder_t create_keyed_builder(
const TwoLabelVertexSet<VID_T, LabelT, T...>& set,
const PropertySelector<PropT>& selector) {
return keyed_builder_t();
}

static unkeyed_builder_t create_unkeyed_builder(
const TwoLabelVertexSet<VID_T, LabelT, T...>& set,
const PropertySelector<grape::EmptyType>& selector) {
return unkeyed_builder_t();
}
};

// group by the vertex set' property
template <typename LabelT, typename VID_T, typename... T, typename PropT>
struct KeyedT<RowVertexSet<LabelT, VID_T, T...>, PropertySelector<PropT>> {
Expand Down Expand Up @@ -169,12 +213,34 @@ template <typename GI, typename LabelT, typename VID_T, typename... T,
struct KeyedAggT<GI, RowVertexSet<LabelT, VID_T, T...>, AggFunc::COUNT,
std::tuple<PropT>, std::integer_sequence<int32_t, tag_id>> {
using agg_res_t = Collection<size_t>;
using index_ele_t =
typename RowVertexSet<LabelT, VID_T, T...>::index_ele_tuple_t;
using prop_getter_t = RowVertexSetPropGetter<
tag_id, gs::mutable_csr_graph_impl::SinglePropGetter<PropT>, index_ele_t>;
// build a counter array.
using aggregate_res_builder_t = CountBuilder<tag_id>;
using aggregate_res_builder_t = PropCountBuilder<tag_id, prop_getter_t>;

static aggregate_res_builder_t create_agg_builder(
const RowVertexSet<LabelT, VID_T, T...>& set, const GI& graph,
std::tuple<PropertySelector<PropT>>& selector) {
auto prop_getter = create_prop_getter_impl<tag_id, PropT>(
set, graph, std::get<0>(selector).prop_name_);
return aggregate_res_builder_t(std::move(prop_getter));
}
};

template <typename GI, typename LabelT, typename VID_T, typename... T,
int tag_id>
struct KeyedAggT<GI, RowVertexSet<LabelT, VID_T, T...>, AggFunc::COUNT,
std::tuple<grape::EmptyType>,
std::integer_sequence<int32_t, tag_id>> {
using agg_res_t = Collection<size_t>;
// build a counter array.
using aggregate_res_builder_t = CountBuilder<tag_id>;

static aggregate_res_builder_t create_agg_builder(
const RowVertexSet<LabelT, VID_T, T...>& set, const GI& graph,
std::tuple<PropertySelector<grape::EmptyType>>& selector) {
return CountBuilder<tag_id>();
}
};
Expand Down Expand Up @@ -202,12 +268,34 @@ template <typename GI, typename VID_T, typename LabelT, typename... T,
struct KeyedAggT<GI, TwoLabelVertexSet<VID_T, LabelT, T...>, AggFunc::COUNT,
std::tuple<PropT>, std::integer_sequence<int32_t, tag_id>> {
using agg_res_t = Collection<size_t>;
using index_ele_t =
typename TwoLabelVertexSet<VID_T, LabelT, T...>::index_ele_tuple_t;
// build a counter array.
using aggregate_res_builder_t = CountBuilder<tag_id>;
using prop_getter_t = TwoLabelVertexSetImplPropGetter<
tag_id, gs::mutable_csr_graph_impl::SinglePropGetter<PropT>, index_ele_t>;
using aggregate_res_builder_t = PropCountBuilder<tag_id, prop_getter_t>;

static aggregate_res_builder_t create_agg_builder(
const TwoLabelVertexSet<VID_T, LabelT, T...>& set, const GI& graph,
std::tuple<PropertySelector<PropT>>& selectors) {
auto prop_getter = create_prop_getter_impl<tag_id, PropT>(
set, graph, std::get<0>(selectors).prop_name_);
return aggregate_res_builder_t(std::move(prop_getter));
}
};

template <typename GI, typename VID_T, typename LabelT, typename... T,
int tag_id>
struct KeyedAggT<GI, TwoLabelVertexSet<VID_T, LabelT, T...>, AggFunc::COUNT,
std::tuple<grape::EmptyType>,
std::integer_sequence<int32_t, tag_id>> {
using agg_res_t = Collection<size_t>;
// build a counter array.
using aggregate_res_builder_t = CountBuilder<tag_id>;

static aggregate_res_builder_t create_agg_builder(
const TwoLabelVertexSet<VID_T, LabelT, T...>& set, const GI& graph,
std::tuple<PropertySelector<grape::EmptyType>>& selectors) {
return CountBuilder<tag_id>();
}
};
Expand Down Expand Up @@ -236,12 +324,36 @@ template <typename GI, typename VID_T, typename LabelT, typename... SET_T,
struct KeyedAggT<GI, GeneralVertexSet<VID_T, LabelT, SET_T...>, AggFunc::COUNT,
std::tuple<PropT>, std::integer_sequence<int32_t, tag_id>> {
using agg_res_t = Collection<size_t>;
using index_ele_t =
typename GeneralVertexSet<VID_T, LabelT, SET_T...>::index_ele_tuple_t;
using prop_getter_t = GeneralVertexSetPropGetter<
tag_id, gs::mutable_csr_graph_impl::SinglePropGetter<PropT>, index_ele_t>;

// build a counter array.
using aggregate_res_builder_t = CountBuilder<tag_id>;
using aggregate_res_builder_t = PropCountBuilder<tag_id, prop_getter_t>;

static aggregate_res_builder_t create_agg_builder(
const GeneralVertexSet<VID_T, LabelT, SET_T...>& set, const GI& graph,
std::tuple<PropertySelector<PropT>>& selectors) {
auto prop_getter = create_prop_getter_impl<tag_id, PropT>(
set, graph, std::get<0>(selectors).prop_name_);
return aggregate_res_builder_t(std::move(prop_getter));
}
};

// count internal for general vertex set.
template <typename GI, typename VID_T, typename LabelT, typename... SET_T,
int tag_id>
struct KeyedAggT<GI, GeneralVertexSet<VID_T, LabelT, SET_T...>, AggFunc::COUNT,
std::tuple<grape::EmptyType>,
std::integer_sequence<int32_t, tag_id>> {
using agg_res_t = Collection<size_t>;
// build a counter array.
using aggregate_res_builder_t = CountBuilder<tag_id>;

static aggregate_res_builder_t create_agg_builder(
const GeneralVertexSet<VID_T, LabelT, SET_T...>& set, const GI& graph,
std::tuple<PropertySelector<grape::EmptyType>>& selectors) {
return CountBuilder<tag_id>();
}
};
Expand Down
10 changes: 10 additions & 0 deletions flex/engines/hqps_db/database/adj_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,11 @@ class AdjListArray<T> {
slices_.emplace_back(
std::make_pair(casted_csr->get_edges(v), slice_t()));
}
} else {
LOG(WARNING) << "csr is null";
for (auto v : vids) {
slices_.emplace_back(std::make_pair(slice_t(), slice_t()));
}
}
}
AdjListArray(const csr_base_t* csr0, const csr_base_t* csr1,
Expand Down Expand Up @@ -567,6 +572,11 @@ class AdjListArray<grape::EmptyType> {
slices_.emplace_back(
std::make_pair(casted_csr->get_edges(v), slice_t()));
}
} else {
LOG(ERROR) << "csr is null";
for (auto v : vids) {
slices_.emplace_back(std::make_pair(slice_t(), slice_t()));
}
}
}

Expand Down
38 changes: 38 additions & 0 deletions flex/engines/hqps_db/structures/collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,44 @@ class CountBuilder {
std::vector<size_t> vec_;
};

// Prop Count Builder.
template <int tag, typename PropGetterT>
class PropCountBuilder {
public:
PropCountBuilder(PropGetterT&& prop_getter)
: prop_getter_(std::move(prop_getter)) {}

// insert tuple at index ind.
// if the ele_value equal to invalid_value, then do not insert.
template <typename ELE_TUPLE, typename DATA_TUPLE>
void insert(size_t ind, const ELE_TUPLE& tuple, const DATA_TUPLE& data) {
// just count times.
while (vec_.size() <= ind) {
vec_.emplace_back(0);
}
using cur_ele_tuple = typename gs::tuple_element<tag, ELE_TUPLE>::type;

auto& cur_ele = gs::get_from_tuple<tag>(tuple);
// get prop from prop getter
auto props = prop_getter_.get_view(cur_ele);
// get the type of props
using props_t = decltype(props);

if (props != NullRecordCreator<props_t>::GetNull()) {
++vec_[ind];
} else {
VLOG(10) << "ele is null, ind: " << ind
<< "ele:" << gs::to_string(cur_ele);
}
}

Collection<size_t> Build() { return Collection<size_t>(std::move(vec_)); }

private:
std::vector<size_t> vec_;
PropGetterT prop_getter_;
};

template <int... tag>
class MultiColCountBuilder {
public:
Expand Down
2 changes: 2 additions & 0 deletions interactive_engine/compiler/conf/ir.compiler.properties
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,5 @@ neo4j.bolt.server.disabled: false
calcite.default.charset: UTF-8

# frontend.query.per.second.limit: 2147483647

# graph.type.inference.enabled: true
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,18 @@

import com.alibaba.graphscope.common.utils.FileUtils;

import org.apache.calcite.plan.Context;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.checkerframework.checker.nullness.qual.Nullable;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;

public class Configs {
public class Configs implements Context {
protected Properties properties;

public Configs(String file) throws IOException {
Expand Down Expand Up @@ -86,6 +88,11 @@ public String toString() {
return this.properties.toString();
}

@Override
public <C> @Nullable C unwrap(Class<C> aClass) {
return aClass.isInstance(this) ? aClass.cast(this) : null;
}

public static class Factory {
public static Configs create(String file) throws Exception {
switch (FileUtils.getFormatType(file)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,7 @@ public class FrontendConfig {

public static final Config<Integer> QUERY_PER_SECOND_LIMIT =
Config.intConfig("frontend.query.per.second.limit", 2147483647);

public static final Config<Boolean> GRAPH_TYPE_INFERENCE_ENABLED =
Config.boolConfig("graph.type.inference.enabled", true);
}
Loading

0 comments on commit 9b75cea

Please sign in to comment.