Skip to content

Commit

Permalink
Merge #194
Browse files Browse the repository at this point in the history
194: Remove the pervasive assumption that the start state is 0. r=ptersilie a=ltratt

There's no good reason for us exposing an assumption about which state in the 
stategraph/statetable is the start state, and it may reduce our flexibility later. This commit abstracts this assumption away such that the start state is specified concretely just once (in pager::pager_stategraph), thus giving us a single place to consider if we ever do want to change this assumption.

This addresses one small part of #191.

Co-authored-by: Laurence Tratt <[email protected]>
  • Loading branch information
bors[bot] and ltratt authored Jun 1, 2020
2 parents ce09c90 + 895cc3a commit 28c1bdf
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 43 deletions.
12 changes: 6 additions & 6 deletions lrpar/src/lib/mf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -900,8 +900,8 @@ mod test {
yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
Symbol,
};
use lrtable::{from_yacc, Minimiser, StIdx, StIdxStorageT};
use num_traits::{AsPrimitive, PrimInt, ToPrimitive, Unsigned, Zero};
use lrtable::{from_yacc, Minimiser};
use num_traits::{AsPrimitive, PrimInt, ToPrimitive, Unsigned};

use crate::{
lex::Lexeme,
Expand Down Expand Up @@ -947,7 +947,7 @@ A: '(' A ')'
let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), grms).unwrap();
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
let d = Dist::new(&grm, &sgraph, &stable, |_| 1);
let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("(").unwrap()), 0);
assert_eq!(d.dist(s0, grm.token_idx(")").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("a").unwrap()), 0);
Expand Down Expand Up @@ -1014,7 +1014,7 @@ U: 'B';
// This only tests a subset of all the states and distances but, I believe, it tests all
// more interesting edge cases that the example from the Kim/Yi paper.

let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("A").unwrap()), 0);
assert_eq!(d.dist(s0, grm.token_idx("B").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("C").unwrap()), 2);
Expand Down Expand Up @@ -1072,7 +1072,7 @@ Factor: '(' Expr ')'
// This only tests a subset of all the states and distances but, I believe, it tests all
// more interesting edge cases that the example from the Kim/Yi paper.

let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("+").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("*").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("(").unwrap()), 0);
Expand Down Expand Up @@ -1134,7 +1134,7 @@ W: 'b' ;
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
let d = Dist::new(&grm, &sgraph, &stable, |_| 1);

let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("a").unwrap()), 0);
assert_eq!(d.dist(s0, grm.token_idx("b").unwrap()), 1);
assert_eq!(d.dist(s0, grm.eof_token_idx()), 0);
Expand Down
10 changes: 5 additions & 5 deletions lrpar/src/lib/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use std::{

use cactus::Cactus;
use cfgrammar::{yacc::YaccGrammar, RIdx, TIdx};
use lrtable::{Action, StIdx, StIdxStorageT, StateGraph, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned, Zero};
use lrtable::{Action, StIdx, StateGraph, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned};

use crate::{
cpctplus,
Expand Down Expand Up @@ -125,7 +125,7 @@ where
lexemes,
actions: actions.as_slice(),
};
let mut pstack = vec![StIdx::from(StIdxStorageT::zero())];
let mut pstack = vec![sgraph.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
Expand Down Expand Up @@ -180,7 +180,7 @@ where
lexemes,
actions: actions.as_slice(),
};
let mut pstack = vec![StIdx::from(StIdxStorageT::zero())];
let mut pstack = vec![sgraph.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
Expand Down Expand Up @@ -231,7 +231,7 @@ where
lexemes,
actions,
};
let mut pstack = vec![StIdx::from(StIdxStorageT::zero())];
let mut pstack = vec![sgraph.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
Expand Down
49 changes: 26 additions & 23 deletions lrtable/src/lib/pager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
};

use cfgrammar::{yacc::YaccGrammar, SIdx, Symbol};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use num_traits::{AsPrimitive, PrimInt, Unsigned, Zero};
use vob::Vob;

use crate::{itemset::Itemset, stategraph::StateGraph, StIdx, StIdxStorageT};
Expand Down Expand Up @@ -130,6 +130,7 @@ where
let mut core_states = Vec::new();
let mut edges: Vec<HashMap<Symbol<StorageT>, StIdx>> = Vec::new();

let start_state = StIdx::from(StIdxStorageT::zero());
let mut state0 = Itemset::new(grm);
let mut ctx = Vob::from_elem(usize::from(grm.tokens_len()), false);
ctx.set(usize::from(grm.eof_token_idx()), true);
Expand Down Expand Up @@ -286,15 +287,17 @@ where
.drain(..)
.zip(closed_states.drain(..).map(Option::unwrap))
.collect(),
start_state,
edges,
);
StateGraph::new(gc_states, gc_edges)
StateGraph::new(gc_states, start_state, gc_edges)
}

/// Garbage collect `zip_states` (of `(core_states, closed_state)`) and `edges`. Returns a new pair
/// with unused states and their corresponding edges removed.
fn gc<StorageT: Eq + Hash + PrimInt>(
mut states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx,
mut edges: Vec<HashMap<Symbol<StorageT>, StIdx>>,
) -> (
Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
Expand All @@ -303,7 +306,7 @@ fn gc<StorageT: Eq + Hash + PrimInt>(
// First of all, do a simple pass over all states. All state indexes reachable from the
// start state will be inserted into the 'seen' set.
let mut todo = HashSet::new();
todo.insert(StIdx(0));
todo.insert(start_state);
let mut seen = HashSet::new();
while !todo.is_empty() {
// XXX This is the clumsy way we're forced to do what we'd prefer to be:
Expand Down Expand Up @@ -448,12 +451,12 @@ mod test {
assert_eq!(sg.all_states_len(), StIdx(10));
assert_eq!(sg.all_edges_len(), 10);

assert_eq!(sg.closed_state(StIdx(0)).items.len(), 3);
state_exists(&grm, &sg.closed_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "S", 0, SIdx(0), vec!["$", "b"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "S", 1, SIdx(0), vec!["$", "b"]);
assert_eq!(sg.closed_state(sg.start_state()).items.len(), 3);
state_exists(&grm, &sg.closed_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "S", 0, SIdx(0), vec!["$", "b"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "S", 1, SIdx(0), vec!["$", "b"]);

let s1 = sg.edge(StIdx(0), Symbol::Rule(grm.rule_idx("S").unwrap())).unwrap();
let s1 = sg.edge(sg.start_state(), Symbol::Rule(grm.rule_idx("S").unwrap())).unwrap();
assert_eq!(sg.closed_state(s1).items.len(), 2);
state_exists(&grm, &sg.closed_state(s1), "^", 0, SIdx(1), vec!["$"]);
state_exists(&grm, &sg.closed_state(s1), "S", 0, SIdx(1), vec!["$", "b"]);
Expand All @@ -462,7 +465,7 @@ mod test {
assert_eq!(sg.closed_state(s2).items.len(), 1);
state_exists(&grm, &sg.closed_state(s2), "S", 0, SIdx(2), vec!["$", "b"]);

let s3 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
let s3 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
assert_eq!(sg.closed_state(s3).items.len(), 4);
state_exists(&grm, &sg.closed_state(s3), "S", 1, SIdx(1), vec!["$", "b", "c"]);
state_exists(&grm, &sg.closed_state(s3), "A", 0, SIdx(0), vec!["a"]);
Expand Down Expand Up @@ -529,16 +532,16 @@ mod test {
assert_eq!(sg.all_edges_len(), 27);

// State 0
assert_eq!(sg.closed_state(StIdx(0)).items.len(), 7);
state_exists(&grm, &sg.closed_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 1, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 2, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 3, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 4, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 5, SIdx(0), vec!["$"]);

let s1 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
assert_eq!(sg.closed_state(sg.start_state()).items.len(), 7);
state_exists(&grm, &sg.closed_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 1, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 2, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 3, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 4, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 5, SIdx(0), vec!["$"]);

let s1 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
assert_eq!(sg.closed_state(s1).items.len(), 7);
state_exists(&grm, &sg.closed_state(s1), "X", 0, SIdx(1), vec!["a", "d", "e", "$"]);
state_exists(&grm, &sg.closed_state(s1), "X", 1, SIdx(1), vec!["a", "d", "e", "$"]);
Expand All @@ -548,7 +551,7 @@ mod test {
state_exists(&grm, &sg.closed_state(s1), "Z", 0, SIdx(0), vec!["c"]);
state_exists(&grm, &sg.closed_state(s1), "T", 0, SIdx(0), vec!["a", "d", "e", "$"]);

let s7 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
let s7 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
assert_eq!(sg.closed_state(s7).items.len(), 7);
state_exists(&grm, &sg.closed_state(s7), "X", 3, SIdx(1), vec!["a", "d", "e", "$"]);
state_exists(&grm, &sg.closed_state(s7), "X", 4, SIdx(1), vec!["a", "d", "e", "$"]);
Expand Down Expand Up @@ -609,7 +612,7 @@ mod test {
// Ommitted successors from the graph in Fig.3

// X-successor of S0
let s0x = sg.edge(StIdx(0), Symbol::Rule(grm.rule_idx("X").unwrap())).unwrap();
let s0x = sg.edge(sg.start_state(), Symbol::Rule(grm.rule_idx("X").unwrap())).unwrap();
state_exists(&grm, &sg.closed_state(s0x), "^", 0, SIdx(1), vec!["$"]);

// Y-successor of S1 (and it's d-successor)
Expand Down Expand Up @@ -678,7 +681,7 @@ mod test {
let sg = pager_stategraph(&grm);

// State 0
assert_eq!(sg.core_state(StIdx(0)).items.len(), 1);
state_exists(&grm, &sg.core_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]);
assert_eq!(sg.core_state(sg.start_state()).items.len(), 1);
state_exists(&grm, &sg.core_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]);
}
}
15 changes: 13 additions & 2 deletions lrtable/src/lib/stategraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::{itemset::Itemset, StIdx, StIdxStorageT};
pub struct StateGraph<StorageT: Eq + Hash> {
/// A vector of `(core_states, closed_states)` tuples.
states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx,
/// For each state in `states`, edges is a hashmap from symbols to state offsets.
edges: Vec<HashMap<Symbol<StorageT>, StIdx>>,
}
Expand All @@ -23,12 +24,22 @@ where
{
pub(crate) fn new(
states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx,
edges: Vec<HashMap<Symbol<StorageT>, StIdx>>,
) -> Self {
// states.len() needs to fit into StIdxStorageT; however we don't need to worry about
// edges.len() (which merely needs to fit in a usize)
assert!(StIdxStorageT::try_from(states.len()).is_ok());
StateGraph { states, edges }
StateGraph {
states,
start_state,
edges,
}
}

/// Return this state graph's start state.
pub fn start_state(&self) -> StIdx {
self.start_state
}

/// Return an iterator which produces (in order from `0..self.rules_len()`) all this
Expand Down Expand Up @@ -113,7 +124,7 @@ where

let mut o = String::new();
for (stidx, &(ref core_st, ref closed_st)) in self.iter_stidxs().zip(self.states.iter()) {
if StIdxStorageT::from(stidx) > 0 {
if stidx != self.start_state {
o.push_str(&"\n");
}
{
Expand Down
21 changes: 14 additions & 7 deletions lrtable/src/lib/statetable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ pub struct StateTable<StorageT> {
actions: SparseVec<usize>,
state_actions: Vob,
gotos: SparseVec<usize>,
start_state: StIdx,
core_reduces: Vob,
state_shifts: Vob,
reduce_states: Vob,
Expand Down Expand Up @@ -342,6 +343,7 @@ where
actions: actions_sv,
state_actions,
gotos: gotos_sv,
start_state: sg.start_state(),
state_shifts,
core_reduces,
reduce_states,
Expand Down Expand Up @@ -454,6 +456,11 @@ where
}
}

/// Return this state table's start state.
pub fn start_state(&self) -> StIdx {
self.start_state
}

/// Return a struct containing all conflicts or `None` if there aren't any.
pub fn conflicts(&self) -> Option<&Conflicts<StorageT>> {
self.conflicts.as_ref()
Expand Down Expand Up @@ -593,7 +600,7 @@ mod test {
let sg = pager_stategraph(&grm);
assert_eq!(sg.all_states_len(), StIdx(9));

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s2 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Term").unwrap())).unwrap();
let s3 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Factor").unwrap())).unwrap();
Expand Down Expand Up @@ -676,7 +683,7 @@ mod test {
assert_eq!(st.actions.len(), len);

// We only extract the states necessary to test those rules affected by the reduce/reduce.
let s0 = StIdx(0);
let s0 = sg.start_state();
let s4 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();

assert_eq!(st.action(s4, grm.token_idx("x").unwrap()),
Expand All @@ -698,7 +705,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down Expand Up @@ -728,7 +735,7 @@ mod test {
").unwrap();
let sg = pager_stategraph(&grm);
let st = StateTable::new(&grm, &sg).unwrap();
let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
let s2 = sg.edge(s0, Symbol::Token(grm.token_idx("b").unwrap())).unwrap();

Expand All @@ -755,7 +762,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down Expand Up @@ -796,7 +803,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down Expand Up @@ -854,7 +861,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down

0 comments on commit 28c1bdf

Please sign in to comment.