Skip to content

Commit

Permalink
Enable date and time types support in vineyard-graph (and GIE)
Browse files Browse the repository at this point in the history
Signed-off-by: Tao He <[email protected]>
  • Loading branch information
sighingnow committed Jan 20, 2024
1 parent a383a34 commit 0cdeb5a
Show file tree
Hide file tree
Showing 23 changed files with 983 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-graphscope-wheels-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ jobs:
run: |
. ~/.graphscope_env
python3 -m pip install libclang
git clone --single-branch -b v0.18.2 --depth=1 https://github.com/v6d-io/v6d.git /tmp/v6d
git clone --single-branch -b v0.20.1 --depth=1 https://github.com/v6d-io/v6d.git /tmp/v6d
cd /tmp/v6d
git submodule update --init
cmake . -DCMAKE_INSTALL_PREFIX=/usr/local \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gae.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-20.04
if: ${{ github.repository == 'alibaba/GraphScope' }}
container:
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.18.2
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.20.1
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/networkx-forward-algo-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
run:
shell: bash --noprofile --norc -eo pipefail {0}
container:
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.18.2
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.20.1
options:
--shm-size 4096m

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
sudo mkdir /opt/graphscope
sudo chown -R $(id -u):$(id -g) /opt/graphscope
python3 -m pip install click
python3 gsctl.py install-deps dev --v6d-version v0.18.2
python3 gsctl.py install-deps dev --v6d-version v0.20.1
- name: Setup tmate session
if: false
Expand Down
72 changes: 72 additions & 0 deletions analytical_engine/core/object/fragment_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,46 @@ gs::rpc::graph::DataTypePb PropertyTypeToPb(vineyard::PropertyType type) {
return gs::rpc::graph::DataTypePb::STRING;
} else if (arrow::large_utf8()->Equals(type)) {
return gs::rpc::graph::DataTypePb::STRING;
} else if (arrow::date32()->Equals(type)) {
return gs::rpc::graph::DataTypePb::DATE32;
} else if (arrow::date64()->Equals(type)) {
return gs::rpc::graph::DataTypePb::DATE64;
} else if (type->id() == arrow::Type::TIME32) {
auto time32_type = std::dynamic_pointer_cast<arrow::Time32Type>(type);
switch (time32_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIME32_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIME32_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIME32_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIME32_NS;
}
} else if (type->id() == arrow::Type::TIME64) {
auto time64_type = std::dynamic_pointer_cast<arrow::Time64Type>(type);
switch (time64_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIME64_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIME64_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIME64_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIME64_NS;
}
} else if (type->id() == arrow::Type::TIMESTAMP) {
auto timestamp_type = std::dynamic_pointer_cast<arrow::TimestampType>(type);
switch (timestamp_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIMESTAMP_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIMESTAMP_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIMESTAMP_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIMESTAMP_NS;
}
} else if (arrow::large_list(arrow::int32())->Equals(type)) {
return gs::rpc::graph::DataTypePb::INT_LIST;
} else if (arrow::large_list(arrow::int64())->Equals(type)) {
Expand Down Expand Up @@ -138,6 +178,38 @@ gs::rpc::graph::DataTypePb PropertyTypeToPb(const std::string& type) {
return gs::rpc::graph::DataTypePb::LONG_LIST;
} else if (type == "float_list") {
return gs::rpc::graph::DataTypePb::FLOAT_LIST;
} else if (type == "date32[day]") {
return gs::rpc::graph::DataTypePb::DATE32;
} else if (type == "date64[ms]") {
return gs::rpc::graph::DataTypePb::DATE64;
} else if (type == "time32[s]") {
return gs::rpc::graph::DataTypePb::TIME32_S;
} else if (type == "time32[ms]") {
return gs::rpc::graph::DataTypePb::TIME32_MS;
} else if (type == "time32[us]") {
return gs::rpc::graph::DataTypePb::TIME32_US;
} else if (type == "time32[ns]") {
return gs::rpc::graph::DataTypePb::TIME32_NS;
} else if (type == "time64[s]") {
return gs::rpc::graph::DataTypePb::TIME64_S;
} else if (type == "time64[ms]") {
return gs::rpc::graph::DataTypePb::TIME64_MS;
} else if (type == "time64[us]") {
return gs::rpc::graph::DataTypePb::TIME64_US;
} else if (type == "time64[ns]") {
return gs::rpc::graph::DataTypePb::TIME64_NS;
} else if (type.substr(0, std::string("timestamp[s]").length()) ==
"timestamp[s]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_S;
} else if (type.substr(0, std::string("timestamp[ms]").length()) ==
"timestamp[ms]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_MS;
} else if (type.substr(0, std::string("timestamp[us]").length()) ==
"timestamp[us]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_US;
} else if (type.substr(0, std::string("timestamp[ns]").length()) ==
"timestamp[ns]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_NS;
} else if (type == "double_list") {
return gs::rpc::graph::DataTypePb::DOUBLE_LIST;
} else if (type == "string_list" || type == "str_list") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ public class DataType {
public static final DataType DOUBLE = new DataType(InternalDataType.DOUBLE);
public static final DataType BYTES = new DataType(InternalDataType.BYTES);
public static final DataType STRING = new DataType(InternalDataType.STRING);

// See also: `Date32` in common.proto.
public static final DataType DATE = new DataType(InternalDataType.DATE);
// See also: `Time32` in common.proto.
public static final DataType TIME = new DataType(InternalDataType.TIME);
// See also: `Timestamp` in common.proto.
public static final DataType TIMESTAMP = new DataType(InternalDataType.TIMESTAMP);

// For LIST, SET and MAP
@JsonProperty private String expression;
Expand All @@ -58,6 +64,15 @@ public static DataType toDataType(int i) {
}

public static DataType valueOf(String typeName) {
if (typeName.startsWith("DATE")) {
return DATE;
}
if (typeName.startsWith("TIME")) {
return TIME;
}
if (typeName.startsWith("TIMESTAMP")) {
return TIMESTAMP;
}
return new DataType(InternalDataType.valueOf(typeName));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,19 @@ public enum InternalDataType {
* STRING data type in InteractiveEngine, map to String in Java
*/
STRING,

/**
* INT data type in InteractiveEngine, map to Integer(int) in Java
* Date data type in InteractiveEngine, map to DateValue in Java
*/
DATE,
/**
* Date data type in InteractiveEngine, map to TimeValue in Java
*/
TIME,
/**
* Date data type in InteractiveEngine, map to DateTimeValue in Java
*/
TIMESTAMP,

/**
* SET data type, Collection Type, can mixed with list and map, example:Set, value: List<Map<List<String>,List<String>>>
Expand Down Expand Up @@ -94,6 +103,8 @@ public enum InternalDataType {
&& value != UNKNOWN
&& value != CHAR
&& value != DATE
&& value != TIME
&& value != TIMESTAMP
&& value != SHORT) {
primitiveTypes.add(value.name());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ public static DataType parseFromDataType(
case DATE:
return DataType.STRING;

case TIME:
return DataType.STRING;

case TIMESTAMP:
return DataType.STRING;

case BYTES:
return DataType.BYTES;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@
import com.alibaba.graphscope.gaia.proto.IrResult;
import com.alibaba.graphscope.gremlin.exception.GremlinResultParserException;

import com.google.common.base.Preconditions;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedEdge;
import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.sql.Timestamp;
import java.time.*;
import java.util.*;
import java.util.stream.Collectors;

public class ParserUtils {
Expand Down Expand Up @@ -91,6 +91,36 @@ private static Object parseCommonValue(Common.Value value) {
return value.getF64();
case STR:
return value.getStr();
case DATE:
Preconditions.checkArgument(value.getDate().getItem() >= 0,
"Date prior to 1970-00-00 is not supported, got %d", value.getDate().getItem());
return new Date(((long) value.getDate().getItem()) * 24 * 60 * 60 * 1000);
case TIME:
Preconditions.checkArgument(value.getTime().getItem() >= 0,
"Time of day must be greater than 00:00:00, got %d", value.getTime().getItem());
// gremlin-python doesn't support local time
//
// see also: https://github.com/apache/tinkerpop/blob/master/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py#L105-L107
OffsetTime time = LocalTime.ofNanoOfDay(((long) value.getTime().getItem()) * 1000_000L).atOffset(ZoneOffset.UTC);
// to ISO-8601 formats: HH:mm:ss.SSSSSS
//
// see also:
// - https://docs.oracle.com/javase/8/docs/api/java/time/LocalTime.html#toString--
// - https://docs.oracle.com/javase/8/docs/api/java/time/OffsetTime.html#toString--
return time.toString();
case TIMESTAMP:
Preconditions.checkArgument(value.getTimestamp().getItem() >= 0,
"Timestamp prior to 1970-00-00 00:00:00 is not supported, got %d", value.getTimestamp().getItem());
// gremlin-python will convert timestamp to float, that isn't what we want
//
// see also: https://github.com/apache/tinkerpop/blob/master/gremlin-python/src/main/python/gremlin_python/statics.py#L48
//
// We use java.util.Instant rather than java.sql.Timestamp for a UTC timestamp value
OffsetDateTime ts = Instant.ofEpochSecond(value.getTimestamp().getItem() / 1000L, value.getTimestamp().getItem() % 1000L * 1000_000L).atOffset(ZoneOffset.UTC);
// to ISO-8601 format: uuuu-MM-dd'T'HH:mm:ss.SSSSSSXXXXX
//
// see also: https://docs.oracle.com/javase/8/docs/api/java/time/OffsetDateTime.html#toString--
return ts.toString();
case PAIR_ARRAY:
Common.PairArray pairs = value.getPairArray();
Map pairInMap = new HashMap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
use std::fmt;

use ahash::HashMap;
use chrono::{NaiveDate, NaiveDateTime, NaiveTime, Datelike, Timelike};

use dyn_type::DateTimeFormats;
use dyn_type::Object;
use dyn_type::Primitives;
use global_query::store_api::prelude::Property;
Expand Down Expand Up @@ -52,6 +55,18 @@ fn encode_runtime_prop_val(prop_val: Property) -> Object {
Property::Double(d) => Object::Primitive(Primitives::Float(d)),
Property::Bytes(v) => Object::Blob(v.into_boxed_slice()),
Property::String(s) => Object::String(s),
Property::Date(s) => {
match NaiveDate::parse_from_str(&s, "%Y-%m-%d") {
Ok(date) => Object::DateFormat(DateTimeFormats::Date(date)),
Err(_) => match NaiveTime::parse_from_str(&s, "%H:%M:%S.%6f") {
Ok(time) => Object::DateFormat(DateTimeFormats::Time(time)),
Err(_) => match NaiveDateTime::parse_from_str(&s, "%Y-%m-%d %H:%M:%S.%6f") {
Ok(datetime) => Object::DateFormat(DateTimeFormats::DateTime(datetime)),
Err(_) => unimplemented!("Failed to parse the datetime/timestamp property value: '{}'", s),
}
}
}
},
_ => unimplemented!(),
}
}
Expand Down
1 change: 1 addition & 0 deletions interactive_engine/executor/store/global_query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
chrono = "0.4"
log = "0.4"
itertools = "0.10"
byteorder = "1.4.3"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,52 @@ int v6d_get_property_as_string_list(Property* property, const char*** out,
return htap_impl::get_property_as_string_list(property, out, out_len,
out_num);
}
int v6d_get_property_as_date32(struct Property* property, int32_t *out) {
return htap_impl::get_property_as_date32(property, out);
}
int v6d_get_property_as_date64(struct Property* property, int64_t *out) {
return htap_impl::get_property_as_date64(property, out);
}
int v6d_get_property_as_time32_s(struct Property* property, int32_t *out) {
return htap_impl::get_property_as_time32_s(property, out);
}
int v6d_get_property_as_time32_ms(struct Property* property, int32_t *out) {
return htap_impl::get_property_as_time32_ms(property, out);
}
int v6d_get_property_as_time32_us(struct Property* property, int32_t *out) {
return htap_impl::get_property_as_time32_us(property, out);
}
int v6d_get_property_as_time32_ns(struct Property* property, int32_t *out) {
return htap_impl::get_property_as_time32_ns(property, out);
}
int v6d_get_property_as_time64_s(struct Property* property, int64_t *out) {
return htap_impl::get_property_as_time64_s(property, out);
}
int v6d_get_property_as_time64_ms(struct Property* property, int64_t *out) {
return htap_impl::get_property_as_time64_ms(property, out);
}
int v6d_get_property_as_time64_us(struct Property* property, int64_t *out) {
return htap_impl::get_property_as_time64_us(property, out);
}
int v6d_get_property_as_time64_ns(struct Property* property, int64_t *out) {
return htap_impl::get_property_as_time64_ns(property, out);
}
int v6d_get_property_as_timestamp_s(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len) {
return htap_impl::get_property_as_timestamp_s(property, out, out_timezone, out_timezone_len);
}
int v6d_get_property_as_timestamp_ms(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len) {
return htap_impl::get_property_as_timestamp_ms(property, out, out_timezone, out_timezone_len);
}
int v6d_get_property_as_timestamp_us(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len) {
return htap_impl::get_property_as_timestamp_us(property, out, out_timezone, out_timezone_len);
}
int v6d_get_property_as_timestamp_ns(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len) {
return htap_impl::get_property_as_timestamp_ns(property, out, out_timezone, out_timezone_len);
}

void v6d_free_property(Property* property) {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ enum PropertyType {
FLOAT_LIST = 12,
DOUBLE_LIST = 13,
STRING_LIST = 14,
DATE32 = 15,
DATE64 = 16,
TIME32_S = 17,
TIME32_MS = 18,
TIME32_US = 19,
TIME32_NS = 20,
TIME64_S = 21,
TIME64_MS = 22,
TIME64_US = 23,
TIME64_NS = 24,
TIMESTAMP_S = 25,
TIMESTAMP_MS = 26,
TIMESTAMP_US = 27,
TIMESTAMP_NS = 28,
};

struct Property {
Expand Down Expand Up @@ -240,6 +254,24 @@ int v6d_get_property_as_double_list(struct Property* property, const double** ou
int* out_len);
int v6d_get_property_as_string_list(struct Property* property, const char*** out,
const int** out_len, int* out_num);
int v6d_get_property_as_date32(struct Property* property, int32_t *out);
int v6d_get_property_as_date64(struct Property* property, int64_t *out);
int v6d_get_property_as_time32_s(struct Property* property, int32_t *out);
int v6d_get_property_as_time32_ms(struct Property* property, int32_t *out);
int v6d_get_property_as_time32_us(struct Property* property, int32_t *out);
int v6d_get_property_as_time32_ns(struct Property* property, int32_t *out);
int v6d_get_property_as_time64_s(struct Property* property, int64_t *out);
int v6d_get_property_as_time64_ms(struct Property* property, int64_t *out);
int v6d_get_property_as_time64_us(struct Property* property, int64_t *out);
int v6d_get_property_as_time64_ns(struct Property* property, int64_t *out);
int v6d_get_property_as_timestamp_s(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len);
int v6d_get_property_as_timestamp_ms(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len);
int v6d_get_property_as_timestamp_us(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len);
int v6d_get_property_as_timestamp_ns(struct Property* property, int64_t *out,
const char **out_timezone, int *out_timezone_len);

// *out_num为string的个数
// (*out_len)[i]为第i个string的长度
Expand Down
Loading

0 comments on commit 0cdeb5a

Please sign in to comment.