Skip to content
This repository has been archived by the owner on Dec 29, 2022. It is now read-only.

Commit

Permalink
Modify shlex to remove the charRuneClass. This makes it behave more l…
Browse files Browse the repository at this point in the history
…ike shlex.py with whitespace_split=True
  • Loading branch information
Steven Thurgood committed Jan 27, 2015
1 parent 6f9e655 commit 6f45313
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 46 deletions.
51 changes: 10 additions & 41 deletions shlex.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ import (
// TokenType is a top-level token classification: A word, space, comment, unknown.
type TokenType int

// runeTokenClass is the type of a UTF-8 character classification: A character, quote, space, escape.
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
type runeTokenClass int

// the internal state used by the lexer state machine
Expand All @@ -76,7 +76,6 @@ func (a *Token) Equal(b *Token) bool {

// Named classes of UTF-8 runes
const (
charRunes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-,|"
spaceRunes = " \t\r\n"
escapingQuoteRunes = `"`
nonEscapingQuoteRunes = "'"
Expand All @@ -87,7 +86,6 @@ const (
// Classes of rune token
const (
unknownRuneClass runeTokenClass = iota
charRuneClass
spaceRuneClass
escapingQuoteRuneClass
nonEscapingQuoteRuneClass
Expand Down Expand Up @@ -127,7 +125,6 @@ func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenCla
// newDefaultClassifier creates a new classifier for ASCII characters.
func newDefaultClassifier() tokenClassifier {
t := tokenClassifier{}
t.addRuneClass(charRunes, charRuneClass)
t.addRuneClass(spaceRunes, spaceRuneClass)
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
Expand Down Expand Up @@ -213,12 +210,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
{
return nil, io.EOF
}
case charRuneClass:
{
tokenType = WordToken
value = append(value, nextRune)
state = inWordState
}
case spaceRuneClass:
{
}
Expand All @@ -244,7 +235,9 @@ func (t *Tokenizer) scanStream() (*Token, error) {
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
tokenType = WordToken
value = append(value, nextRune)
state = inWordState
}
}
}
Expand All @@ -258,10 +251,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
value: string(value)}
return token, err
}
case charRuneClass, commentRuneClass:
{
value = append(value, nextRune)
}
case spaceRuneClass:
{
t.input.UnreadRune()
Expand All @@ -284,7 +273,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
value = append(value, nextRune)
}
}
}
Expand All @@ -299,15 +288,11 @@ func (t *Tokenizer) scanStream() (*Token, error) {
value: string(value)}
return token, err
}
case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, nonEscapingQuoteRuneClass, escapeRuneClass, commentRuneClass:
default:
{
state = inWordState
value = append(value, nextRune)
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
}
}
}
case escapingQuotedState: // the next rune after an escape character, in double quotes
Expand All @@ -321,15 +306,11 @@ func (t *Tokenizer) scanStream() (*Token, error) {
value: string(value)}
return token, err
}
case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, nonEscapingQuoteRuneClass, escapeRuneClass, commentRuneClass:
default:
{
state = quotingEscapingState
value = append(value, nextRune)
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
}
}
}
case quotingEscapingState: // in escaping double quotes
Expand All @@ -343,10 +324,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
value: string(value)}
return token, err
}
case charRuneClass, spaceRuneClass, nonEscapingQuoteRuneClass, commentRuneClass:
{
value = append(value, nextRune)
}
case escapingQuoteRuneClass:
{
state = inWordState
Expand All @@ -357,7 +334,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
value = append(value, nextRune)
}
}
}
Expand All @@ -372,17 +349,13 @@ func (t *Tokenizer) scanStream() (*Token, error) {
value: string(value)}
return token, err
}
case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, escapeRuneClass, commentRuneClass:
{
value = append(value, nextRune)
}
case nonEscapingQuoteRuneClass:
{
state = inWordState
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
value = append(value, nextRune)
}
}
}
Expand All @@ -396,10 +369,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
value: string(value)}
return token, err
}
case charRuneClass, escapingQuoteRuneClass, escapeRuneClass, commentRuneClass, nonEscapingQuoteRuneClass:
{
value = append(value, nextRune)
}
case spaceRuneClass:
{
if nextRune == '\n' {
Expand All @@ -414,7 +383,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
}
default:
{
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
value = append(value, nextRune)
}
}
}
Expand Down
11 changes: 6 additions & 5 deletions shlex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@ import (
var (
// one two "three four" "five \"six\"" seven#eight # nine # ten
// eleven 'twelve\'
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\'"
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14"
)

func TestClassifier(t *testing.T) {
classifier := newDefaultClassifier()
tests := map[rune]runeTokenClass{
'a': charRuneClass,
' ': spaceRuneClass,
'"': escapingQuoteRuneClass,
'\'': nonEscapingQuoteRuneClass,
Expand All @@ -53,7 +52,9 @@ func TestTokenizer(t *testing.T) {
&Token{WordToken, "seven#eight"},
&Token{CommentToken, " nine # ten"},
&Token{WordToken, "eleven"},
&Token{WordToken, "twelve\\"}}
&Token{WordToken, "twelve\\"},
&Token{WordToken, "thirteen=13"},
&Token{WordToken, "fourteen/14"}}

tokenizer := NewTokenizer(testInput)
for i, want := range expectedTokens {
Expand All @@ -69,7 +70,7 @@ func TestTokenizer(t *testing.T) {

func TestLexer(t *testing.T) {
testInput := strings.NewReader(testString)
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\"}
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}

lexer := NewLexer(testInput)
for i, want := range expectedStrings {
Expand All @@ -84,7 +85,7 @@ func TestLexer(t *testing.T) {
}

func TestSplit(t *testing.T) {
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\"}
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
got, err := Split(testString)
if err != nil {
t.Error(err)
Expand Down

0 comments on commit 6f45313

Please sign in to comment.