Skip to content

Commit

Permalink
feat: Support GROUP BY one or more expression
Browse files Browse the repository at this point in the history
  • Loading branch information
AmrDeveloper committed Jun 15, 2024
1 parent bc7b841 commit ca81fca
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 34 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ SELECT @arr[1], @arr[2], @arr[3], ARRAY_LENGTH(@arr);
SELECT @arr[1:2], @arr[2:], @arr[:2];

SELECT DISTINCT title AS tt FROM commits
SELECT name, COUNT(name) AS commit_num FROM commits GROUP BY name ORDER BY commit_num DESC LIMIT 10
SELECT name, COUNT(name) AS commit_num FROM commits GROUP BY name, email ORDER BY commit_num DESC LIMIT 10
SELECT commit_count FROM branches WHERE commit_count BETWEEN 0 .. 10

SELECT * FROM refs WHERE type = "branch"
Expand Down
2 changes: 1 addition & 1 deletion crates/gitql-ast/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl Statement for OrderByStatement {
}

pub struct GroupByStatement {
pub field_name: String,
pub values: Vec<Box<dyn Expression>>,
}

impl Statement for GroupByStatement {
Expand Down
36 changes: 22 additions & 14 deletions crates/gitql-engine/src/engine_executor.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::cmp;
use std::cmp::Ordering;
use std::collections::hash_map::Entry::Vacant;
use std::collections::HashMap;
use std::hash::DefaultHasher;
use std::hash::Hash;
use std::hash::Hasher;

use gitql_ast::statement::AggregateValue;
use gitql_ast::statement::AggregationsStatement;
Expand Down Expand Up @@ -88,7 +92,7 @@ pub fn execute_statement(
.as_any()
.downcast_ref::<GroupByStatement>()
.unwrap();
execute_group_by_statement(statement, gitql_object)
execute_group_by_statement(env, statement, gitql_object)
}
AggregateFunction => {
let statement = statement
Expand Down Expand Up @@ -338,6 +342,7 @@ fn execute_order_by_statement(
}

fn execute_group_by_statement(
env: &mut Environment,
statement: &GroupByStatement,
gitql_object: &mut GitQLObject,
) -> Result<(), String> {
Expand All @@ -351,31 +356,34 @@ fn execute_group_by_statement(
}

// Mapping each unique value to it group index
let mut groups_map: HashMap<String, usize> = HashMap::new();
let mut groups_map: HashMap<u64, usize> = HashMap::new();

// Track current group index
let mut next_group_index = 0;
let values_count = statement.values.len();

for object in main_group.rows.into_iter() {
let field_index = gitql_object
.titles
.iter()
.position(|r| r.eq(&statement.field_name))
.unwrap();
let mut row_values: Vec<String> = Vec::with_capacity(values_count);

let field_value = &object.values[field_index];
for expression in &statement.values {
let value = evaluate_expression(env, expression, &gitql_object.titles, &object.values)?;
row_values.push(value.to_string());
}

// If there is an existing group for this value, append current object to it
if let std::collections::hash_map::Entry::Vacant(e) =
groups_map.entry(field_value.as_text())
{
// Compute the hash for row of values
let mut hasher = DefaultHasher::new();
row_values.hash(&mut hasher);
let values_hash = hasher.finish();

// Push a new group for this unique value and update the next index
if let Vacant(e) = groups_map.entry(values_hash) {
e.insert(next_group_index);
next_group_index += 1;
gitql_object.groups.push(Group { rows: vec![object] });
}
// Push a new group for this unique value and update the next index
// If there is an existing group for this value, append current object to it
else {
let index = *groups_map.get(&field_value.as_text()).unwrap();
let index = *groups_map.get(&values_hash).unwrap();
let target_group = &mut gitql_object.groups[index];
target_group.rows.push(object);
}
Expand Down
33 changes: 15 additions & 18 deletions crates/gitql-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,9 @@ fn parse_group_by_statement(
tokens: &[Token],
position: &mut usize,
) -> Result<Box<dyn Statement>, Box<Diagnostic>> {
// Consume `Group` keyword
*position += 1;

if *position >= tokens.len() || tokens[*position].kind != TokenKind::By {
return Err(
Diagnostic::error("Expect keyword `by` after keyword `group`")
Expand All @@ -700,31 +702,26 @@ fn parse_group_by_statement(
.as_boxed(),
);
}

// Consume `By` keyword
*position += 1;
if *position >= tokens.len() || tokens[*position].kind != TokenKind::Symbol {
return Err(Diagnostic::error("Expect field name after `group by`")
.with_location(get_safe_location(tokens, *position - 1))
.as_boxed());
}

let field_name = tokens[*position].literal.to_string();
if !context.selected_fields.contains(&field_name) {
context.hidden_selections.push(field_name.to_string());
}
// Parse one or more expression
let mut values: Vec<Box<dyn Expression>> = vec![];
while *position < tokens.len() {
values.push(parse_expression(context, env, tokens, position)?);

*position += 1;
if *position < tokens.len() && tokens[*position].kind == TokenKind::Comma {
// Consume Comma `,`
*position += 1;
continue;
}

if !env.contains(&field_name) {
return Err(
Diagnostic::error("Current table not contains field with this name")
.add_help("Check the documentations to see available fields for each tables")
.with_location(get_safe_location(tokens, *position - 1))
.as_boxed(),
);
break;
}

context.has_group_by_statement = true;
Ok(Box::new(GroupByStatement { field_name }))
Ok(Box::new(GroupByStatement { values }))
}

fn parse_having_statement(
Expand Down
4 changes: 4 additions & 0 deletions docs/statement/group_by.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
### Group By Statement

The `GROUP BY` statement groups rows that have the same values into summary rows, like "find the number of commits for each username or email".

```SQL
SELECT * FROM commits GROUP BY name
SELECT * FROM commits GROUP BY name, email
SELECT * FROM commits GROUP BY LEN(name)
```

0 comments on commit ca81fca

Please sign in to comment.