From 46dc70648d70278e21503a187690294d25cb5cbd Mon Sep 17 00:00:00 2001 From: benlipkin Date: Wed, 17 Jan 2024 13:37:25 -0500 Subject: [PATCH] update CFGFSM documentation to reflect updated logic --- outlines/fsm/fsm.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/outlines/fsm/fsm.py b/outlines/fsm/fsm.py index c2672bac5..62b75198b 100644 --- a/outlines/fsm/fsm.py +++ b/outlines/fsm/fsm.py @@ -220,18 +220,22 @@ def allowed_token_ids(self, state: FSMState) -> List[int]: """Generate a list of allowed tokens for the next step. Upon initialization, the CFG incremental parser is used to determine the - first regex. - - This regex is used for proposals until either: - - - The regex is exhausted, and its only remaining option is the EOS - token, in which case we always transition to the next regex - - The regex can be exhausted, but the EOS token is not the only - remaining option, in which case we transition to the next regex with - probability P (TODO) or remove the possibility of generating the EOS - token and continue with the current regex - - The CFG incremental parser is allowed to propose the EOS token from any final state, + first regex and construct the first FSM to generate the first terminal. + + This FSM is used for proposals until either: + + - The FSM is exhausted, and its only remaining option is the EOS + token, in which case we feed the generated terminal to the + CFG incremental parser and allow it to propose the next regex + corresponding to the next set of valid terminals. + - The current FSM can be exhausted, but the EOS token is not the only + remaining option. In this case we allow proposal of current terminal extensions, + store the current FSM and its state, then also use the CFG parser + to propose a new regex corresponding to terminating the current terminal + and starting the next one. The model can then sample from either of these sets + to determine whether to extend the current terminal or terminate it and start the next one. + + The CFG incremental parser is allowed to propose the EOS token from any accepting state, and once it is generated, the FSM will continue to always generate the EOS token. Parameters