diff --git a/halfpike.go b/halfpike.go index 3d0ac7a..32feab5 100644 --- a/halfpike.go +++ b/halfpike.go @@ -24,6 +24,14 @@ import ( "unicode/utf8" ) +const ( + // eof represents an eof character that we return. However, its + // not really the EOF. UTF-8 doesn't do EOF, we simply know how + // big the file is. We simply use this control character as a rune + // to symbolize EOF. We do not return this to the user. + eof = '\x01' +) + // stateFn is used to process some part of an input line either emitting tokens and // returning the next stateFn or nil if terminating. // The last token should be ItemEOL. @@ -184,8 +192,18 @@ func (l *lexer) run() { func (l *lexer) emit(t ItemType, ri ...rawInfo) ItemType { var item Item switch t { - case ItemEOL, ItemEOF: - item = Item{t, l.input[l.start:l.pos], ri[0].num, ri[0].str} + case ItemEOL: + item = Item{ + Type: t, + Val: l.input[l.start:l.pos], + lineNum: ri[0].num, + raw: ri[0].str, + } + case ItemEOF: + item = Item{ + Type: t, + lineNum: ri[0].num, + } default: item = Item{Type: t, Val: l.input[l.start:l.pos]} } @@ -229,7 +247,7 @@ func (l *lexer) backup() { func (l *lexer) next() rune { if l.pos >= len(l.input) { l.width = 0 - return rune(ItemEOF) + return eof } var r rune r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) @@ -278,7 +296,24 @@ func untilEOF(l *lexer) stateFn { raw.Reset() lineNum++ - case r == rune(ItemEOF): + case r == eof: + l.backup() // backup before the EOF. + if len(l.current()) > 0 { + switch { + case isInt(l.current()): + l.emit(ItemInt) + case isFloat(l.current()): + l.emit(ItemFloat) + case last == itemSpace: + // do nothing + default: + l.emit(ItemText) + } + } + + // Emit the EOF. + l.next() + raw.Reset() l.emit(ItemEOF, rawInfo{raw.String(), lineNum}) raw.Reset() return nil diff --git a/halfpike_test.go b/halfpike_test.go index 3e5c4ae..4782631 100644 --- a/halfpike_test.go +++ b/halfpike_test.go @@ -7,7 +7,6 @@ import ( "testing" "github.com/kylelemons/godebug/pretty" - //"fmt" ) const str = ` @@ -33,7 +32,12 @@ func TestLexer(t *testing.T) { {Type: ItemText, Val: "Flags:"}, {Type: ItemText, Val: ""}, {Type: ItemEOL, Val: "\n", lineNum: 2, raw: " Type: External State: Established Flags: \n"}, - {Type: ItemEOF, lineNum: 3, raw: "\x01"}, + {Type: ItemEOF, lineNum: 3, raw: ""}, + } + + config := pretty.Config{ + IncludeUnexported: true, + PrintStringers: true, } l := newLexer(context.Background(), str, untilEOF) @@ -44,7 +48,7 @@ func TestLexer(t *testing.T) { got = append(got, item) } - if diff := pretty.Compare(want, got); diff != "" { + if diff := config.Compare(want, got); diff != "" { t.Errorf("TestLexer: -want/+got:\n%s", diff) } } @@ -81,7 +85,7 @@ func TestNext(t *testing.T) { }, { LineNum: 3, - Raw: "\x01", + Raw: "", Items: []Item{ {Type: ItemEOF}, }, @@ -369,3 +373,34 @@ func TestRegressionRawStartsWithCarriageReturn(t *testing.T) { t.Fatalf("TestRegressionRawStartsWithCarriageReturn: got err == %s", err) } } + +// TestRegressionEOLOnLastLine tests a bug where if we have a EOF after a single character in a line, +// that the item comes out as a single item with type EOF, where it should be two items with EOF at the end. +func TestRegressionEOLOnLastLine(t *testing.T) { +text :=`a +}` + + want := []Item{ + {Type: ItemText, Val: "a"}, + {Type: ItemEOL, Val: "\n"}, + {Type: ItemText, Val: "}"}, + {Type: ItemEOF}, + } + + config := pretty.Config{ + IncludeUnexported: false, + PrintStringers: true, + } + + l := newLexer(context.Background(), text, untilEOF) + go l.run() + + got := []Item{} + for item := range l.items { + got = append(got, item) + } + + if diff := config.Compare(want, got); diff != "" { + t.Errorf("TestRegressionEOLOnLastLine: -want/+got:\n%s", diff) + } +}