Skip to content

Commit

Permalink
Remove the pervasive assumption that the start state is 0.
Browse files Browse the repository at this point in the history
There's no good reason for us exposing an assumption about which state in the
stategraph/statetable is the start state, and it may reduce our flexibility
later. This commit abstracts this assumption away such that the start state is
specified concretely just once (in pager::pager_stategraph), thus giving us a
single place to consider if we ever do want to change this assumption.
  • Loading branch information
ltratt committed May 30, 2020
1 parent 59965f0 commit 895cc3a
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 43 deletions.
12 changes: 6 additions & 6 deletions lrpar/src/lib/mf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -900,8 +900,8 @@ mod test {
yacc::{YaccGrammar, YaccKind, YaccOriginalActionKind},
Symbol,
};
use lrtable::{from_yacc, Minimiser, StIdx, StIdxStorageT};
use num_traits::{AsPrimitive, PrimInt, ToPrimitive, Unsigned, Zero};
use lrtable::{from_yacc, Minimiser};
use num_traits::{AsPrimitive, PrimInt, ToPrimitive, Unsigned};

use crate::{
lex::Lexeme,
Expand Down Expand Up @@ -947,7 +947,7 @@ A: '(' A ')'
let grm = YaccGrammar::new(YaccKind::Original(YaccOriginalActionKind::GenericParseTree), grms).unwrap();
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
let d = Dist::new(&grm, &sgraph, &stable, |_| 1);
let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("(").unwrap()), 0);
assert_eq!(d.dist(s0, grm.token_idx(")").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("a").unwrap()), 0);
Expand Down Expand Up @@ -1014,7 +1014,7 @@ U: 'B';
// This only tests a subset of all the states and distances but, I believe, it tests all
// more interesting edge cases that the example from the Kim/Yi paper.

let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("A").unwrap()), 0);
assert_eq!(d.dist(s0, grm.token_idx("B").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("C").unwrap()), 2);
Expand Down Expand Up @@ -1072,7 +1072,7 @@ Factor: '(' Expr ')'
// This only tests a subset of all the states and distances but, I believe, it tests all
// more interesting edge cases that the example from the Kim/Yi paper.

let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("+").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("*").unwrap()), 1);
assert_eq!(d.dist(s0, grm.token_idx("(").unwrap()), 0);
Expand Down Expand Up @@ -1134,7 +1134,7 @@ W: 'b' ;
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager).unwrap();
let d = Dist::new(&grm, &sgraph, &stable, |_| 1);

let s0 = StIdx::from(StIdxStorageT::zero());
let s0 = sgraph.start_state();
assert_eq!(d.dist(s0, grm.token_idx("a").unwrap()), 0);
assert_eq!(d.dist(s0, grm.token_idx("b").unwrap()), 1);
assert_eq!(d.dist(s0, grm.eof_token_idx()), 0);
Expand Down
10 changes: 5 additions & 5 deletions lrpar/src/lib/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use std::{

use cactus::Cactus;
use cfgrammar::{yacc::YaccGrammar, RIdx, TIdx};
use lrtable::{Action, StIdx, StIdxStorageT, StateGraph, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned, Zero};
use lrtable::{Action, StIdx, StateGraph, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned};

use crate::{
cpctplus,
Expand Down Expand Up @@ -125,7 +125,7 @@ where
lexemes,
actions: actions.as_slice(),
};
let mut pstack = vec![StIdx::from(StIdxStorageT::zero())];
let mut pstack = vec![sgraph.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
Expand Down Expand Up @@ -180,7 +180,7 @@ where
lexemes,
actions: actions.as_slice(),
};
let mut pstack = vec![StIdx::from(StIdxStorageT::zero())];
let mut pstack = vec![sgraph.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
Expand Down Expand Up @@ -231,7 +231,7 @@ where
lexemes,
actions,
};
let mut pstack = vec![StIdx::from(StIdxStorageT::zero())];
let mut pstack = vec![sgraph.start_state()];
let mut astack = Vec::new();
let mut errors = Vec::new();
let mut spans = Vec::new();
Expand Down
49 changes: 26 additions & 23 deletions lrtable/src/lib/pager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
};

use cfgrammar::{yacc::YaccGrammar, SIdx, Symbol};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use num_traits::{AsPrimitive, PrimInt, Unsigned, Zero};
use vob::Vob;

use crate::{itemset::Itemset, stategraph::StateGraph, StIdx, StIdxStorageT};
Expand Down Expand Up @@ -130,6 +130,7 @@ where
let mut core_states = Vec::new();
let mut edges: Vec<HashMap<Symbol<StorageT>, StIdx>> = Vec::new();

let start_state = StIdx::from(StIdxStorageT::zero());
let mut state0 = Itemset::new(grm);
let mut ctx = Vob::from_elem(usize::from(grm.tokens_len()), false);
ctx.set(usize::from(grm.eof_token_idx()), true);
Expand Down Expand Up @@ -286,15 +287,17 @@ where
.drain(..)
.zip(closed_states.drain(..).map(Option::unwrap))
.collect(),
start_state,
edges,
);
StateGraph::new(gc_states, gc_edges)
StateGraph::new(gc_states, start_state, gc_edges)
}

/// Garbage collect `zip_states` (of `(core_states, closed_state)`) and `edges`. Returns a new pair
/// with unused states and their corresponding edges removed.
fn gc<StorageT: Eq + Hash + PrimInt>(
mut states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx,
mut edges: Vec<HashMap<Symbol<StorageT>, StIdx>>,
) -> (
Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
Expand All @@ -303,7 +306,7 @@ fn gc<StorageT: Eq + Hash + PrimInt>(
// First of all, do a simple pass over all states. All state indexes reachable from the
// start state will be inserted into the 'seen' set.
let mut todo = HashSet::new();
todo.insert(StIdx(0));
todo.insert(start_state);
let mut seen = HashSet::new();
while !todo.is_empty() {
// XXX This is the clumsy way we're forced to do what we'd prefer to be:
Expand Down Expand Up @@ -448,12 +451,12 @@ mod test {
assert_eq!(sg.all_states_len(), StIdx(10));
assert_eq!(sg.all_edges_len(), 10);

assert_eq!(sg.closed_state(StIdx(0)).items.len(), 3);
state_exists(&grm, &sg.closed_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "S", 0, SIdx(0), vec!["$", "b"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "S", 1, SIdx(0), vec!["$", "b"]);
assert_eq!(sg.closed_state(sg.start_state()).items.len(), 3);
state_exists(&grm, &sg.closed_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "S", 0, SIdx(0), vec!["$", "b"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "S", 1, SIdx(0), vec!["$", "b"]);

let s1 = sg.edge(StIdx(0), Symbol::Rule(grm.rule_idx("S").unwrap())).unwrap();
let s1 = sg.edge(sg.start_state(), Symbol::Rule(grm.rule_idx("S").unwrap())).unwrap();
assert_eq!(sg.closed_state(s1).items.len(), 2);
state_exists(&grm, &sg.closed_state(s1), "^", 0, SIdx(1), vec!["$"]);
state_exists(&grm, &sg.closed_state(s1), "S", 0, SIdx(1), vec!["$", "b"]);
Expand All @@ -462,7 +465,7 @@ mod test {
assert_eq!(sg.closed_state(s2).items.len(), 1);
state_exists(&grm, &sg.closed_state(s2), "S", 0, SIdx(2), vec!["$", "b"]);

let s3 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
let s3 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
assert_eq!(sg.closed_state(s3).items.len(), 4);
state_exists(&grm, &sg.closed_state(s3), "S", 1, SIdx(1), vec!["$", "b", "c"]);
state_exists(&grm, &sg.closed_state(s3), "A", 0, SIdx(0), vec!["a"]);
Expand Down Expand Up @@ -529,16 +532,16 @@ mod test {
assert_eq!(sg.all_edges_len(), 27);

// State 0
assert_eq!(sg.closed_state(StIdx(0)).items.len(), 7);
state_exists(&grm, &sg.closed_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 1, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 2, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 3, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 4, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(StIdx(0)), "X", 5, SIdx(0), vec!["$"]);

let s1 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
assert_eq!(sg.closed_state(sg.start_state()).items.len(), 7);
state_exists(&grm, &sg.closed_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 0, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 1, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 2, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 3, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 4, SIdx(0), vec!["$"]);
state_exists(&grm, &sg.closed_state(sg.start_state()), "X", 5, SIdx(0), vec!["$"]);

let s1 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
assert_eq!(sg.closed_state(s1).items.len(), 7);
state_exists(&grm, &sg.closed_state(s1), "X", 0, SIdx(1), vec!["a", "d", "e", "$"]);
state_exists(&grm, &sg.closed_state(s1), "X", 1, SIdx(1), vec!["a", "d", "e", "$"]);
Expand All @@ -548,7 +551,7 @@ mod test {
state_exists(&grm, &sg.closed_state(s1), "Z", 0, SIdx(0), vec!["c"]);
state_exists(&grm, &sg.closed_state(s1), "T", 0, SIdx(0), vec!["a", "d", "e", "$"]);

let s7 = sg.edge(StIdx(0), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
let s7 = sg.edge(sg.start_state(), Symbol::Token(grm.token_idx("b").unwrap())).unwrap();
assert_eq!(sg.closed_state(s7).items.len(), 7);
state_exists(&grm, &sg.closed_state(s7), "X", 3, SIdx(1), vec!["a", "d", "e", "$"]);
state_exists(&grm, &sg.closed_state(s7), "X", 4, SIdx(1), vec!["a", "d", "e", "$"]);
Expand Down Expand Up @@ -609,7 +612,7 @@ mod test {
// Ommitted successors from the graph in Fig.3

// X-successor of S0
let s0x = sg.edge(StIdx(0), Symbol::Rule(grm.rule_idx("X").unwrap())).unwrap();
let s0x = sg.edge(sg.start_state(), Symbol::Rule(grm.rule_idx("X").unwrap())).unwrap();
state_exists(&grm, &sg.closed_state(s0x), "^", 0, SIdx(1), vec!["$"]);

// Y-successor of S1 (and it's d-successor)
Expand Down Expand Up @@ -678,7 +681,7 @@ mod test {
let sg = pager_stategraph(&grm);

// State 0
assert_eq!(sg.core_state(StIdx(0)).items.len(), 1);
state_exists(&grm, &sg.core_state(StIdx(0)), "^", 0, SIdx(0), vec!["$"]);
assert_eq!(sg.core_state(sg.start_state()).items.len(), 1);
state_exists(&grm, &sg.core_state(sg.start_state()), "^", 0, SIdx(0), vec!["$"]);
}
}
15 changes: 13 additions & 2 deletions lrtable/src/lib/stategraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::{itemset::Itemset, StIdx, StIdxStorageT};
pub struct StateGraph<StorageT: Eq + Hash> {
/// A vector of `(core_states, closed_states)` tuples.
states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx,
/// For each state in `states`, edges is a hashmap from symbols to state offsets.
edges: Vec<HashMap<Symbol<StorageT>, StIdx>>,
}
Expand All @@ -23,12 +24,22 @@ where
{
pub(crate) fn new(
states: Vec<(Itemset<StorageT>, Itemset<StorageT>)>,
start_state: StIdx,
edges: Vec<HashMap<Symbol<StorageT>, StIdx>>,
) -> Self {
// states.len() needs to fit into StIdxStorageT; however we don't need to worry about
// edges.len() (which merely needs to fit in a usize)
assert!(StIdxStorageT::try_from(states.len()).is_ok());
StateGraph { states, edges }
StateGraph {
states,
start_state,
edges,
}
}

/// Return this state graph's start state.
pub fn start_state(&self) -> StIdx {
self.start_state
}

/// Return an iterator which produces (in order from `0..self.rules_len()`) all this
Expand Down Expand Up @@ -113,7 +124,7 @@ where

let mut o = String::new();
for (stidx, &(ref core_st, ref closed_st)) in self.iter_stidxs().zip(self.states.iter()) {
if StIdxStorageT::from(stidx) > 0 {
if stidx != self.start_state {
o.push_str(&"\n");
}
{
Expand Down
21 changes: 14 additions & 7 deletions lrtable/src/lib/statetable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ pub struct StateTable<StorageT> {
actions: SparseVec<usize>,
state_actions: Vob,
gotos: SparseVec<usize>,
start_state: StIdx,
core_reduces: Vob,
state_shifts: Vob,
reduce_states: Vob,
Expand Down Expand Up @@ -350,6 +351,7 @@ where
actions: actions_sv,
state_actions,
gotos: gotos_sv,
start_state: sg.start_state(),
state_shifts,
core_reduces,
reduce_states,
Expand Down Expand Up @@ -463,6 +465,11 @@ where
}
}

/// Return this state table's start state.
pub fn start_state(&self) -> StIdx {
self.start_state
}

/// Return a struct containing all conflicts or `None` if there aren't any.
pub fn conflicts(&self) -> Option<&Conflicts<StorageT>> {
self.conflicts.as_ref()
Expand Down Expand Up @@ -602,7 +609,7 @@ mod test {
let sg = pager_stategraph(&grm);
assert_eq!(sg.all_states_len(), StIdx(9));

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s2 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Term").unwrap())).unwrap();
let s3 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Factor").unwrap())).unwrap();
Expand Down Expand Up @@ -685,7 +692,7 @@ mod test {
assert_eq!(st.actions.len(), len);

// We only extract the states necessary to test those rules affected by the reduce/reduce.
let s0 = StIdx(0);
let s0 = sg.start_state();
let s4 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();

assert_eq!(st.action(s4, grm.token_idx("x").unwrap()),
Expand All @@ -707,7 +714,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down Expand Up @@ -737,7 +744,7 @@ mod test {
").unwrap();
let sg = pager_stategraph(&grm);
let st = StateTable::new(&grm, &sg).unwrap();
let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Token(grm.token_idx("a").unwrap())).unwrap();
let s2 = sg.edge(s0, Symbol::Token(grm.token_idx("b").unwrap())).unwrap();

Expand All @@ -764,7 +771,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down Expand Up @@ -805,7 +812,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down Expand Up @@ -863,7 +870,7 @@ mod test {
let len = usize::from(grm.tokens_len()) * usize::from(sg.all_states_len());
assert_eq!(st.actions.len(), len);

let s0 = StIdx(0);
let s0 = sg.start_state();
let s1 = sg.edge(s0, Symbol::Rule(grm.rule_idx("Expr").unwrap())).unwrap();
let s3 = sg.edge(s1, Symbol::Token(grm.token_idx("+").unwrap())).unwrap();
let s4 = sg.edge(s1, Symbol::Token(grm.token_idx("*").unwrap())).unwrap();
Expand Down

0 comments on commit 895cc3a

Please sign in to comment.