Skip to content

Commit

Permalink
Merge pull request #17 from cherryramatisdev/feature/multiple-parameters
Browse files Browse the repository at this point in the history
Feature: Multiple Parameters
  • Loading branch information
cherryramatisdev authored Dec 29, 2023
2 parents eff8c8d + 2ed1665 commit e9cc69c
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 79 deletions.
25 changes: 21 additions & 4 deletions docs/language.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
### 08/12/2023

The initial idea here is to make the first "language" that comes to my mind in a javascripty way
The initial idea here is to make the first "language" that comes to my mind in a
javascripty way

```
.letter(upcase).glob(rest=True).whitespace().number() => [A-Z].*\s[0-9]
.letters(upcase).glob(rest=True).whitespace().numbers() => [A-Z]+.*\s[0-9]+
.group(letters(upcase).glob(rest=True)).whitespace().group(numbers()) => ([A-Z]+.*)\s([0-9]+)
.group(letters(upcase).glob(rest=True)).whitespace().group(numbers()) =>
([A-Z]+.*)\s([0-9]+)
```

### 09/12/2023
Expand All @@ -38,8 +40,10 @@ because we can in the future support more data types, not only booleans.

```
letter(upcase=True) | glob(rest=True) | whitespace | number => [A-Z].*\s[0-9]
letters(upcase=True) | glob(rest=True) | whitespace | numbers => [A-Z]+.*\s[0-9]+
group(letters(upcase=True) | glob(rest=True)) | whitespace | group(numbers) => ([A-Z]+.*)\s([0-9]+)
letters(upcase=True) | glob(rest=True) | whitespace | numbers =>
[A-Z]+.*\s[0-9]+
group(letters(upcase=True) | glob(rest=True)) | whitespace | group(numbers) =>
([A-Z]+.*)\s([0-9]+)
```

**Possibilities for a group**:
Expand All @@ -55,3 +59,16 @@ group(letter | whitespace)
```
group(letter | glob(rest=True))
```

### 29/12/2023

For every identifier like `numbers`, `letters, `letter`, `number`, we should be
able to define a selector that forces the query to match **exactly** the number
of chars informed. This can be achieved by using the `{3}` constraint from
regex.

```
letters(upcase=True, select=3) => [A-Z]{3}
letters(upcase=True, select=3) | numbers(select=4) => [A-Z]{3}[0-9]{4}
group(letters(upcase=True, select=3)) | numbers(select=4) => ([A-Z]{3})[0-9]{4}
```
188 changes: 122 additions & 66 deletions regexer/src/lexer/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,20 @@ pub enum Casing {

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Function {
Letter { casing: Casing },
Letters { casing: Casing },
Glob { rest: bool },
Letter {
casing: Option<Casing>,
select: Option<u32>,
},
Letters {
casing: Option<Casing>,
},
Glob {
rest: bool,
},
Whitespace,
Number,
Number {
select: Option<u32>,
},
Numbers,
Group(Box<Vec<Function>>),
}
Expand Down Expand Up @@ -51,6 +60,63 @@ fn slice_until_end_group<'a>(
return (&[], 0);
}

fn find_int_parameter(tokens: &[tokens::Token], parameter: String) -> Option<u32> {
let founded = tokens.windows(3).find(|window| {
matches!(
window,
[
tokens::Token::Parameter(param),
tokens::Token::Equal,
tokens::Token::Int(_),
] if *param == parameter
)
});

if let Some(inner_tokens) = founded {
if let [tokens::Token::Parameter(_), tokens::Token::Equal, tokens::Token::Int(i)] =
inner_tokens[..]
{
return Some(i);
}
}

return None;
}

fn find_casing_parameter(tokens: &[tokens::Token], parameter: String) -> Option<Casing> {
let upcase = tokens.windows(3).any(|window| {
matches!(
window,
[
tokens::Token::Parameter(param),
tokens::Token::Equal,
tokens::Token::True,
] if *param == parameter
)
});

let downcase = tokens.windows(3).any(|window| {
matches!(
window,
[
tokens::Token::Parameter(param),
tokens::Token::Equal,
tokens::Token::False,
] if *param == parameter
)
});

if upcase && !downcase {
return Some(Casing::Upcase);
}

if downcase && !upcase {
return Some(Casing::Downcase);
}

return None;
}

// TODO: define a `consume` function to not keep repeating the peeks_tokens.next() all the time
pub fn parse(tokens: Vec<tokens::Token>) -> Vec<Function> {
let mut peeks_tokens = tokens.clone().into_iter().enumerate().peekable();
Expand All @@ -59,70 +125,42 @@ pub fn parse(tokens: Vec<tokens::Token>) -> Vec<Function> {
while let Some((index, token)) = peeks_tokens.peek() {
if let tokens::Token::Identifier(identifier) = token {
match identifier.as_str() {
"letter" => {
"letter" | "letters" => {
let (func_tokens, right_pos_idx) = slice_until_end_func(&tokens, &index);
if func_tokens
== [
tokens::Token::LeftParen,
tokens::Token::Parameter("upcase".to_string()),
tokens::Token::Equal,
tokens::Token::True,
tokens::Token::RightParen,
]
{

let casing = find_casing_parameter(func_tokens, "upcase".to_string());
let select = find_int_parameter(func_tokens, "select".to_string());

if identifier == "letter" {
functions.push(Function::Letter {
casing: Casing::Upcase,
casing: casing.clone(),
select: select.clone(),
});
}

if func_tokens
== [
tokens::Token::LeftParen,
tokens::Token::Parameter("upcase".to_string()),
tokens::Token::Equal,
tokens::Token::False,
tokens::Token::RightParen,
]
{
functions.push(Function::Letter {
casing: Casing::Downcase,
if identifier == "letters" {
functions.push(Function::Letters {
casing: casing.clone(),
});
}

peeks_tokens.nth(right_pos_idx + 1);
}
"letters" => {
"number" => {
let (func_tokens, right_pos_idx) = slice_until_end_func(&tokens, &index);
if func_tokens
== [
tokens::Token::LeftParen,
tokens::Token::Parameter("upcase".to_string()),
tokens::Token::Equal,
tokens::Token::True,
tokens::Token::RightParen,
]
{
functions.push(Function::Letters {
casing: Casing::Upcase,
});
}

if func_tokens
== [
tokens::Token::LeftParen,
tokens::Token::Parameter("upcase".to_string()),
tokens::Token::Equal,
tokens::Token::False,
tokens::Token::RightParen,
]
{
functions.push(Function::Letters {
casing: Casing::Downcase,
});
}
let select = find_int_parameter(func_tokens, "select".to_string());

functions.push(Function::Number {
select: select.clone(),
});

peeks_tokens.nth(right_pos_idx + 1);
}
"numbers" => {
peeks_tokens.next();
functions.push(Function::Numbers)
}
"glob" => {
let (func_tokens, right_pos_idx) = slice_until_end_func(&tokens, &index);
if func_tokens
Expand Down Expand Up @@ -155,14 +193,6 @@ pub fn parse(tokens: Vec<tokens::Token>) -> Vec<Function> {
peeks_tokens.next();
functions.push(Function::Whitespace)
}
"number" => {
peeks_tokens.next();
functions.push(Function::Number)
}
"numbers" => {
peeks_tokens.next();
functions.push(Function::Numbers)
}
"group" => {
let (group_tokens, right_pos_idx) = slice_until_end_group(&tokens, &index);

Expand Down Expand Up @@ -200,7 +230,7 @@ mod tests {
vec![
Function::Group(Box::new(vec![
Function::Letters {
casing: Casing::Upcase
casing: Some(Casing::Upcase),
},
Function::Glob { rest: true }
])),
Expand All @@ -220,15 +250,17 @@ mod tests {
parse(tokens::tokenize(input)),
vec![
Function::Letter {
casing: Casing::Upcase
casing: Some(Casing::Upcase),
select: None,
},
Function::Letter {
casing: Casing::Downcase
casing: Some(Casing::Downcase),
select: None
},
Function::Glob { rest: true },
Function::Glob { rest: false },
Function::Whitespace,
Function::Number,
Function::Number { select: None },
]
);

Expand All @@ -238,12 +270,36 @@ mod tests {
parse(tokens::tokenize(input)),
vec![
Function::Letters {
casing: Casing::Upcase
casing: Some(Casing::Upcase),
},
Function::Glob { rest: true },
Function::Whitespace,
Function::Numbers,
]
);
}

#[test]
fn test_select_parameters() {
let input = String::from(
"letter(select=3, upcase=True) | letters(upcase=False) | glob(rest=True) | glob(rest=False) | whitespace | number(select=99)",
);

assert_eq!(
parse(tokens::tokenize(input)),
vec![
Function::Letter {
casing: Some(Casing::Upcase),
select: Some(3),
},
Function::Letters {
casing: Some(Casing::Downcase),
},
Function::Glob { rest: true },
Function::Glob { rest: false },
Function::Whitespace,
Function::Number { select: Some(99) },
]
);
}
}
Loading

0 comments on commit e9cc69c

Please sign in to comment.