From 6f45313302b9c56850fc17f99e40caebce98c716 Mon Sep 17 00:00:00 2001 From: Steven Thurgood Date: Tue, 27 Jan 2015 13:39:51 +0000 Subject: [PATCH] Modify shlex to remove the charRuneClass. This makes it behave more like shlex.py with whitespace_split=True --- shlex.go | 51 ++++++++++----------------------------------------- shlex_test.go | 11 ++++++----- 2 files changed, 16 insertions(+), 46 deletions(-) diff --git a/shlex.go b/shlex.go index 9d31cc6..3cb37b7 100644 --- a/shlex.go +++ b/shlex.go @@ -49,7 +49,7 @@ import ( // TokenType is a top-level token classification: A word, space, comment, unknown. type TokenType int -// runeTokenClass is the type of a UTF-8 character classification: A character, quote, space, escape. +// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape. type runeTokenClass int // the internal state used by the lexer state machine @@ -76,7 +76,6 @@ func (a *Token) Equal(b *Token) bool { // Named classes of UTF-8 runes const ( - charRunes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-,|" spaceRunes = " \t\r\n" escapingQuoteRunes = `"` nonEscapingQuoteRunes = "'" @@ -87,7 +86,6 @@ const ( // Classes of rune token const ( unknownRuneClass runeTokenClass = iota - charRuneClass spaceRuneClass escapingQuoteRuneClass nonEscapingQuoteRuneClass @@ -127,7 +125,6 @@ func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenCla // newDefaultClassifier creates a new classifier for ASCII characters. func newDefaultClassifier() tokenClassifier { t := tokenClassifier{} - t.addRuneClass(charRunes, charRuneClass) t.addRuneClass(spaceRunes, spaceRuneClass) t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass) t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass) @@ -213,12 +210,6 @@ func (t *Tokenizer) scanStream() (*Token, error) { { return nil, io.EOF } - case charRuneClass: - { - tokenType = WordToken - value = append(value, nextRune) - state = inWordState - } case spaceRuneClass: { } @@ -244,7 +235,9 @@ func (t *Tokenizer) scanStream() (*Token, error) { } default: { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) + tokenType = WordToken + value = append(value, nextRune) + state = inWordState } } } @@ -258,10 +251,6 @@ func (t *Tokenizer) scanStream() (*Token, error) { value: string(value)} return token, err } - case charRuneClass, commentRuneClass: - { - value = append(value, nextRune) - } case spaceRuneClass: { t.input.UnreadRune() @@ -284,7 +273,7 @@ func (t *Tokenizer) scanStream() (*Token, error) { } default: { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) + value = append(value, nextRune) } } } @@ -299,15 +288,11 @@ func (t *Tokenizer) scanStream() (*Token, error) { value: string(value)} return token, err } - case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, nonEscapingQuoteRuneClass, escapeRuneClass, commentRuneClass: + default: { state = inWordState value = append(value, nextRune) } - default: - { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) - } } } case escapingQuotedState: // the next rune after an escape character, in double quotes @@ -321,15 +306,11 @@ func (t *Tokenizer) scanStream() (*Token, error) { value: string(value)} return token, err } - case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, nonEscapingQuoteRuneClass, escapeRuneClass, commentRuneClass: + default: { state = quotingEscapingState value = append(value, nextRune) } - default: - { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) - } } } case quotingEscapingState: // in escaping double quotes @@ -343,10 +324,6 @@ func (t *Tokenizer) scanStream() (*Token, error) { value: string(value)} return token, err } - case charRuneClass, spaceRuneClass, nonEscapingQuoteRuneClass, commentRuneClass: - { - value = append(value, nextRune) - } case escapingQuoteRuneClass: { state = inWordState @@ -357,7 +334,7 @@ func (t *Tokenizer) scanStream() (*Token, error) { } default: { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) + value = append(value, nextRune) } } } @@ -372,17 +349,13 @@ func (t *Tokenizer) scanStream() (*Token, error) { value: string(value)} return token, err } - case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, escapeRuneClass, commentRuneClass: - { - value = append(value, nextRune) - } case nonEscapingQuoteRuneClass: { state = inWordState } default: { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) + value = append(value, nextRune) } } } @@ -396,10 +369,6 @@ func (t *Tokenizer) scanStream() (*Token, error) { value: string(value)} return token, err } - case charRuneClass, escapingQuoteRuneClass, escapeRuneClass, commentRuneClass, nonEscapingQuoteRuneClass: - { - value = append(value, nextRune) - } case spaceRuneClass: { if nextRune == '\n' { @@ -414,7 +383,7 @@ func (t *Tokenizer) scanStream() (*Token, error) { } default: { - return nil, fmt.Errorf("Uknown rune: %v", nextRune) + value = append(value, nextRune) } } } diff --git a/shlex_test.go b/shlex_test.go index eaafa33..f9f9e0c 100644 --- a/shlex_test.go +++ b/shlex_test.go @@ -24,13 +24,12 @@ import ( var ( // one two "three four" "five \"six\"" seven#eight # nine # ten // eleven 'twelve\' - testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\'" + testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14" ) func TestClassifier(t *testing.T) { classifier := newDefaultClassifier() tests := map[rune]runeTokenClass{ - 'a': charRuneClass, ' ': spaceRuneClass, '"': escapingQuoteRuneClass, '\'': nonEscapingQuoteRuneClass, @@ -53,7 +52,9 @@ func TestTokenizer(t *testing.T) { &Token{WordToken, "seven#eight"}, &Token{CommentToken, " nine # ten"}, &Token{WordToken, "eleven"}, - &Token{WordToken, "twelve\\"}} + &Token{WordToken, "twelve\\"}, + &Token{WordToken, "thirteen=13"}, + &Token{WordToken, "fourteen/14"}} tokenizer := NewTokenizer(testInput) for i, want := range expectedTokens { @@ -69,7 +70,7 @@ func TestTokenizer(t *testing.T) { func TestLexer(t *testing.T) { testInput := strings.NewReader(testString) - expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\"} + expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"} lexer := NewLexer(testInput) for i, want := range expectedStrings { @@ -84,7 +85,7 @@ func TestLexer(t *testing.T) { } func TestSplit(t *testing.T) { - want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\"} + want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"} got, err := Split(testString) if err != nil { t.Error(err)