Skip to content

Commit

Permalink
Replace BTreeSet with IndexSet in type to order more intuitively (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jpschorr authored Jul 11, 2024
1 parent 1b22fb6 commit c91c5a4
Show file tree
Hide file tree
Showing 10 changed files with 71 additions and 46 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- *BREAKING:* partiql-ast: changed modeling of `GroupByExpr` `strategy` field to be an `Option`
- *BREAKING:* partiql-ast: changed modeling of `PathStep` to split `PathExpr` to `PathIndex` (e.g., `[2]`) and `PathProject` (e.g., `.a`)
- *BREAKING:* partiql-ast: changed modeling of `PathStep` to rename `PathWildcard` to `PathForEach` (for `[*]`)
- *BREAKING:* partiql-types: changed type ordering to match specification order

### Added
- partiql-ast: Pretty-printing of AST via `ToPretty` trait
Expand Down
2 changes: 2 additions & 0 deletions extension/partiql-extension-ddl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ thiserror = "1.0"
miette = { version = "7.2", features = ["fancy"] }
time = { version = "0.3", features = ["formatting", "parsing", "serde"] }

indexmap = "2.2"

[dev-dependencies]
criterion = "0.4"

Expand Down
16 changes: 8 additions & 8 deletions extension/partiql-extension-ddl/src/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,8 @@ impl PartiqlDdlEncoder for PartiqlBasicDdlEncoder {
#[cfg(test)]
mod tests {
use super::*;
use indexmap::IndexSet;
use partiql_types::{array, bag, f64, int8, r#struct, str, struct_fields, StructConstraint};
use std::collections::BTreeSet;

#[test]
fn ddl_test() {
Expand All @@ -240,7 +240,7 @@ mod tests {
("b", array![str![]]),
("c", f64!()),
];
let details = r#struct![BTreeSet::from([nested_attrs])];
let details = r#struct![IndexSet::from([nested_attrs])];

let fields = struct_fields![
("employee_id", int8![]),
Expand All @@ -249,17 +249,17 @@ mod tests {
("details", details),
("dependents", array![str![]])
];
let ty = bag![r#struct![BTreeSet::from([
let ty = bag![r#struct![IndexSet::from([
fields,
StructConstraint::Open(false)
])]];

let expected_compact = r#""dependents" ARRAY<VARCHAR>,"details" STRUCT<"a": UNION<TINYINT,DECIMAL(5, 4)>,"b": ARRAY<VARCHAR>,"c": DOUBLE>,"employee_id" TINYINT,"full_name" VARCHAR,"salary" DECIMAL(8, 2)"#;
let expected_pretty = r#""dependents" ARRAY<VARCHAR>,
"details" STRUCT<"a": UNION<TINYINT,DECIMAL(5, 4)>,"b": ARRAY<VARCHAR>,"c": DOUBLE>,
"employee_id" TINYINT,
let expected_compact = r#""employee_id" TINYINT,"full_name" VARCHAR,"salary" DECIMAL(8, 2),"details" STRUCT<"a": UNION<DECIMAL(5, 4),TINYINT>,"b": ARRAY<VARCHAR>,"c": DOUBLE>,"dependents" ARRAY<VARCHAR>"#;
let expected_pretty = r#""employee_id" TINYINT,
"full_name" VARCHAR,
"salary" DECIMAL(8, 2)"#;
"salary" DECIMAL(8, 2),
"details" STRUCT<"a": UNION<DECIMAL(5, 4),TINYINT>,"b": ARRAY<VARCHAR>,"c": DOUBLE>,
"dependents" ARRAY<VARCHAR>"#;

let ddl_compact = PartiqlBasicDdlEncoder::new(DdlFormat::Compact);
assert_eq!(ddl_compact.ddl(&ty).expect("write shape"), expected_compact);
Expand Down
8 changes: 4 additions & 4 deletions extension/partiql-extension-ddl/tests/ddl-tests.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
use indexmap::IndexSet;
use partiql_extension_ddl::ddl::{DdlFormat, PartiqlBasicDdlEncoder, PartiqlDdlEncoder};
use partiql_types::{bag, int, r#struct, str, struct_fields, StructConstraint, StructField};
use partiql_types::{BagType, PartiqlShape, Static, StructType};
use std::collections::BTreeSet;

#[test]
fn basic_ddl_test() {
let details_fields = struct_fields![("age", int!())];
let details = r#struct![BTreeSet::from([details_fields])];
let details = r#struct![IndexSet::from([details_fields])];
let fields = [
StructField::new("id", int!()),
StructField::new("name", str!()),
StructField::new("address", PartiqlShape::new_non_nullable(Static::String)),
StructField::new_optional("details", details.clone()),
]
.into();
let shape = bag![r#struct![BTreeSet::from([
let shape = bag![r#struct![IndexSet::from([
StructConstraint::Fields(fields),
StructConstraint::Open(false)
])]];

let ddl_compact = PartiqlBasicDdlEncoder::new(DdlFormat::Compact);
let actual = ddl_compact.ddl(&shape).expect("ddl_output");
let expected = r#""address" VARCHAR NOT NULL,"id" INT,"name" VARCHAR,"details" OPTIONAL STRUCT<"age": INT>"#;
let expected = r#""id" INT,"name" VARCHAR,"address" VARCHAR NOT NULL,"details" OPTIONAL STRUCT<"age": INT>"#;

println!("Actual: {actual}");
println!("Expected: {expected}");
Expand Down
2 changes: 1 addition & 1 deletion partiql-ast-passes/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ partiql-types = { path = "../partiql-types", version = "0.8.*" }

assert_matches = "1.5.*"
fnv = "1"
indexmap = "1.9"
indexmap = "2.2"
thiserror = "1.0"

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion partiql-ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ path = "src/lib.rs"
bench = false

[dependencies]
indexmap = { version = "1.9", default-features = false }
indexmap = "2.2"
rust_decimal = { version = "1.25.0", default-features = false, features = ["std"] }
serde = { version = "1.*", features = ["derive"], optional = true }
pretty = "0.12"
Expand Down
2 changes: 1 addition & 1 deletion partiql-logical-planner/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ ion-rs = "0.18"
ordered-float = "3.*"
itertools = "0.10.*"
unicase = "2.6"
indexmap = "1.9"
indexmap = "2.2"
petgraph = "0.6.*"
num = "0.4"
fnv = "1"
Expand Down
28 changes: 14 additions & 14 deletions partiql-logical-planner/src/typer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::typer::LookupOrder::{GlobalLocal, LocalGlobal};
use indexmap::IndexMap;
use indexmap::{IndexMap, IndexSet};
use partiql_ast::ast::{CaseSensitivity, SymbolPrimitive};
use partiql_catalog::Catalog;
use partiql_logical::{BindingsOp, LogicalPlan, OpId, PathComponent, ValueExpr, VarRefType};
Expand All @@ -11,7 +11,7 @@ use partiql_value::{BindingsName, Value};
use petgraph::algo::toposort;
use petgraph::graph::NodeIndex;
use petgraph::prelude::StableGraph;
use std::collections::{BTreeSet, HashMap};
use std::collections::HashMap;
use thiserror::Error;

#[macro_export]
Expand Down Expand Up @@ -218,7 +218,7 @@ impl<'c> PlanTyper<'c> {
StructField::new(k.as_str(), self.get_singleton_type_from_env())
});

let ty = PartiqlShape::new_struct(StructType::new(BTreeSet::from([
let ty = PartiqlShape::new_struct(StructType::new(IndexSet::from([
StructConstraint::Fields(fields.collect()),
])));

Expand Down Expand Up @@ -686,7 +686,7 @@ mod tests {

// Open Schema with `Strict` typing mode and `age` in nested attribute.
let details_fields = struct_fields![("age", int!())];
let details = r#struct![BTreeSet::from([details_fields])];
let details = r#struct![IndexSet::from([details_fields])];

assert_query_typing(
TypingMode::Strict,
Expand Down Expand Up @@ -731,7 +731,7 @@ mod tests {
fn simple_sfw_with_alias() {
// Open Schema with `Strict` typing mode and `age` in nested attribute.
let details_fields = struct_fields![("age", int!())];
let details = r#struct![BTreeSet::from([details_fields])];
let details = r#struct![IndexSet::from([details_fields])];

// TODO Revise this behavior once the following discussion is conclusive and spec. is
// in place: https://github.com/partiql/partiql-spec/discussions/65
Expand Down Expand Up @@ -775,7 +775,7 @@ mod tests {
#[test]
fn simple_sfw_err() {
// Closed Schema with `Strict` typing mode and `age` non-existent projection.
let err1 = r#"No Typing Information for SymbolPrimitive { value: "age", case: CaseInsensitive } in closed Schema Static(StaticType { ty: Struct(StructType { constraints: {Open(false), Fields({StructField { optional: false, name: "id", ty: Static(StaticType { ty: Int, nullable: true }) }, StructField { optional: false, name: "name", ty: Static(StaticType { ty: String, nullable: true }) }})} }), nullable: true })"#;
let err1 = r#"No Typing Information for SymbolPrimitive { value: "age", case: CaseInsensitive } in closed Schema Static(StaticType { ty: Struct(StructType { constraints: {Fields({StructField { optional: false, name: "id", ty: Static(StaticType { ty: Int, nullable: true }) }, StructField { optional: false, name: "name", ty: Static(StaticType { ty: String, nullable: true }) }}), Open(false)} }), nullable: true })"#;

assert_err(
assert_query_typing(
Expand All @@ -792,7 +792,7 @@ mod tests {
vec![],
),
vec![TypingError::TypeCheck(err1.to_string())],
Some(bag![r#struct![BTreeSet::from([StructConstraint::Fields(
Some(bag![r#struct![IndexSet::from([StructConstraint::Fields(
[
StructField::new("id", int!()),
StructField::new("name", str!()),
Expand All @@ -804,12 +804,12 @@ mod tests {

// Closed Schema with `Strict` typing mode and `bar` non-existent projection from closed nested `details`.
let details_fields = struct_fields![("age", int!())];
let details = r#struct![BTreeSet::from([
let details = r#struct![IndexSet::from([
details_fields,
StructConstraint::Open(false)
])];

let err1 = r#"No Typing Information for SymbolPrimitive { value: "details", case: CaseInsensitive } in closed Schema Static(StaticType { ty: Struct(StructType { constraints: {Open(false), Fields({StructField { optional: false, name: "age", ty: Static(StaticType { ty: Int, nullable: true }) }})} }), nullable: true })"#;
let err1 = r#"No Typing Information for SymbolPrimitive { value: "details", case: CaseInsensitive } in closed Schema Static(StaticType { ty: Struct(StructType { constraints: {Fields({StructField { optional: false, name: "age", ty: Static(StaticType { ty: Int, nullable: true }) }}), Open(false)} }), nullable: true })"#;
let err2 = r"Illegal Derive Type Undefined";

assert_err(
Expand All @@ -831,7 +831,7 @@ mod tests {
TypingError::TypeCheck(err1.to_string()),
TypingError::IllegalState(err2.to_string()),
],
Some(bag![r#struct![BTreeSet::from([StructConstraint::Fields(
Some(bag![r#struct![IndexSet::from([StructConstraint::Fields(
[
StructField::new("id", int!()),
StructField::new("name", str!()),
Expand Down Expand Up @@ -863,8 +863,8 @@ mod tests {
};
}

fn create_customer_schema(is_open: bool, fields: BTreeSet<StructField>) -> PartiqlShape {
bag![r#struct![BTreeSet::from([
fn create_customer_schema(is_open: bool, fields: IndexSet<StructField>) -> PartiqlShape {
bag![r#struct![IndexSet::from([
StructConstraint::Fields(fields),
StructConstraint::Open(is_open)
])]]
Expand All @@ -876,7 +876,7 @@ mod tests {
schema: PartiqlShape,
expected_fields: Vec<StructField>,
) -> Result<(), TypeErr> {
let expected_fields: BTreeSet<_> = expected_fields.into_iter().collect();
let expected_fields: IndexSet<_> = expected_fields.into_iter().collect();
let actual = type_query(mode, query, TypeEnvEntry::new("customers", &[], schema))?
.expect_static()?;

Expand All @@ -887,7 +887,7 @@ mod tests {

let f: Vec<_> = expected_fields
.iter()
.filter(|f| !fields.contains(f))
.filter(|f| !fields.contains(*f))
.collect();
assert!(f.is_empty());
assert_eq!(expected_fields.len(), fields.len());
Expand Down
4 changes: 4 additions & 0 deletions partiql-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,9 @@ unicase = "2.6"
miette = { version ="7.2.*", features = ["fancy"] }
thiserror = "1.*"

indexmap = "2.2"

derivative = "2.2"

[dev-dependencies]
criterion = "0.4"
52 changes: 35 additions & 17 deletions partiql-types/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#![deny(rust_2018_idioms)]
#![deny(clippy::all)]

use derivative::Derivative;
use indexmap::IndexSet;
use itertools::Itertools;
use miette::Diagnostic;
use std::collections::BTreeSet;
use std::fmt::{Debug, Display, Formatter};
use std::hash::Hash;
use std::hash::{Hash, Hasher};
use thiserror::Error;

#[derive(Debug, Clone, Eq, PartialEq, Hash, Error, Diagnostic)]
Expand All @@ -23,6 +24,16 @@ pub trait Type {}

impl Type for StaticType {}

fn indexset_hash<H, T>(set: &IndexSet<T>, state: &mut H)
where
H: Hasher,
T: Hash,
{
for val in set {
val.hash(state)
}
}

#[macro_export]
macro_rules! dynamic {
() => {
Expand Down Expand Up @@ -140,7 +151,7 @@ macro_rules! undefined {
}

/// Represents a PartiQL Shape
#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
// With this implementation `Dynamic` and `AnyOf` cannot have `nullability`; this does not mean their
// `null` value at runtime cannot belong to their domain.
// TODO adopt the correct model Pending PartiQL Types semantics finalization: https://github.com/partiql/partiql-lang/issues/18
Expand All @@ -151,13 +162,13 @@ pub enum PartiqlShape {
Undefined,
}

#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct StaticType {
ty: Static,
nullable: bool,
}

#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub enum Static {
// Scalar Types
Int,
Expand Down Expand Up @@ -506,15 +517,17 @@ impl Display for PartiqlShape {
}
}

#[derive(Hash, Eq, PartialEq, Debug, Clone, Ord, PartialOrd)]
#[derive(Derivative, Eq, Debug, Clone)]
#[derivative(PartialEq, Hash)]
#[allow(dead_code)]
pub struct AnyOf {
types: BTreeSet<PartiqlShape>,
#[derivative(Hash(hash_with = "indexset_hash"))]
types: IndexSet<PartiqlShape>,
}

impl AnyOf {
#[must_use]
pub const fn new(types: BTreeSet<PartiqlShape>) -> Self {
pub const fn new(types: IndexSet<PartiqlShape>) -> Self {
AnyOf { types }
}

Expand All @@ -531,15 +544,17 @@ impl FromIterator<PartiqlShape> for AnyOf {
}
}

#[derive(Debug, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
#[derive(Derivative, Eq, Debug, Clone)]
#[derivative(PartialEq, Hash)]
#[allow(dead_code)]
pub struct StructType {
constraints: BTreeSet<StructConstraint>,
#[derivative(Hash(hash_with = "indexset_hash"))]
constraints: IndexSet<StructConstraint>,
}

impl StructType {
#[must_use]
pub fn new(constraints: BTreeSet<StructConstraint>) -> Self {
pub fn new(constraints: IndexSet<StructConstraint>) -> Self {
StructType { constraints }
}

Expand All @@ -551,7 +566,7 @@ impl StructType {
}

#[must_use]
pub fn fields(&self) -> BTreeSet<StructField> {
pub fn fields(&self) -> IndexSet<StructField> {
self.constraints
.iter()
.flat_map(|c| {
Expand All @@ -575,17 +590,18 @@ impl StructType {
}
}

#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Derivative, Eq, Debug, Clone)]
#[derivative(PartialEq, Hash)]
#[allow(dead_code)]
#[non_exhaustive]
pub enum StructConstraint {
Open(bool),
Ordered(bool),
DuplicateAttrs(bool),
Fields(BTreeSet<StructField>),
Fields(#[derivative(Hash(hash_with = "indexset_hash"))] IndexSet<StructField>),
}

#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
#[allow(dead_code)]
pub struct StructField {
optional: bool,
Expand Down Expand Up @@ -648,7 +664,8 @@ impl From<(&str, PartiqlShape, bool)> for StructField {
}
}

#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Derivative, Eq, Debug, Clone)]
#[derivative(PartialEq, Hash)]
#[allow(dead_code)]
pub struct BagType {
element_type: Box<PartiqlShape>,
Expand All @@ -671,7 +688,8 @@ impl BagType {
}
}

#[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
#[derive(Derivative, Eq, Debug, Clone)]
#[derivative(PartialEq, Hash)]
#[allow(dead_code)]
pub struct ArrayType {
element_type: Box<PartiqlShape>,
Expand Down

1 comment on commit c91c5a4

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PartiQL (rust) Benchmark

Benchmark suite Current: c91c5a4 Previous: 1b22fb6 Ratio
arith_agg-avg 774408 ns/iter (± 10165) 775078 ns/iter (± 8705) 1.00
arith_agg-avg_distinct 861458 ns/iter (± 2997) 862652 ns/iter (± 6626) 1.00
arith_agg-count 820770 ns/iter (± 11846) 830846 ns/iter (± 7811) 0.99
arith_agg-count_distinct 864445 ns/iter (± 2515) 855765 ns/iter (± 2496) 1.01
arith_agg-min 823916 ns/iter (± 39653) 843309 ns/iter (± 3129) 0.98
arith_agg-min_distinct 861591 ns/iter (± 12547) 871031 ns/iter (± 11411) 0.99
arith_agg-max 832754 ns/iter (± 8948) 840150 ns/iter (± 3401) 0.99
arith_agg-max_distinct 873721 ns/iter (± 13503) 870457 ns/iter (± 1793) 1.00
arith_agg-sum 828017 ns/iter (± 3246) 833829 ns/iter (± 2930) 0.99
arith_agg-sum_distinct 864254 ns/iter (± 3454) 863680 ns/iter (± 2889) 1.00
arith_agg-avg-count-min-max-sum 969776 ns/iter (± 3143) 971146 ns/iter (± 7271) 1.00
arith_agg-avg-count-min-max-sum-group_by 1218335 ns/iter (± 82167) 1228144 ns/iter (± 5068) 0.99
arith_agg-avg-count-min-max-sum-group_by-group_as 1832486 ns/iter (± 13544) 1825750 ns/iter (± 17664) 1.00
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct 1293061 ns/iter (± 12846) 1171155 ns/iter (± 7797) 1.10
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by 1592716 ns/iter (± 16316) 1475823 ns/iter (± 17342) 1.08
arith_agg-avg_distinct-count_distinct-min_distinct-max_distinct-sum_distinct-group_by-group_as 2189565 ns/iter (± 8129) 2067865 ns/iter (± 12643) 1.06
parse-1 4635 ns/iter (± 92) 4149 ns/iter (± 24) 1.12
parse-15 43481 ns/iter (± 258) 39532 ns/iter (± 211) 1.10
parse-30 85428 ns/iter (± 349) 76008 ns/iter (± 562) 1.12
compile-1 4322 ns/iter (± 16) 4432 ns/iter (± 13) 0.98
compile-15 32995 ns/iter (± 153) 35043 ns/iter (± 104) 0.94
compile-30 69154 ns/iter (± 356) 71338 ns/iter (± 331) 0.97
plan-1 66539 ns/iter (± 327) 69546 ns/iter (± 702) 0.96
plan-15 1046584 ns/iter (± 28576) 1078664 ns/iter (± 86226) 0.97
plan-30 2098596 ns/iter (± 23287) 2153387 ns/iter (± 13338) 0.97
eval-1 13184607 ns/iter (± 397271) 12766894 ns/iter (± 60043) 1.03
eval-15 89455403 ns/iter (± 395638) 86920998 ns/iter (± 1270499) 1.03
eval-30 171764736 ns/iter (± 1745204) 167856739 ns/iter (± 489213) 1.02
join 9850 ns/iter (± 21) 9891 ns/iter (± 173) 1.00
simple 2462 ns/iter (± 12) 2474 ns/iter (± 27) 1.00
simple-no 425 ns/iter (± 1) 427 ns/iter (± 2) 1.00
numbers 57 ns/iter (± 0) 57 ns/iter (± 0) 1
parse-simple 573 ns/iter (± 2) 586 ns/iter (± 3) 0.98
parse-ion 1731 ns/iter (± 3) 1762 ns/iter (± 4) 0.98
parse-group 5897 ns/iter (± 24) 5772 ns/iter (± 35) 1.02
parse-complex 15037 ns/iter (± 67) 15496 ns/iter (± 188) 0.97
parse-complex-fexpr 22022 ns/iter (± 108) 22579 ns/iter (± 111) 0.98

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.