Skip to content

Commit

Permalink
Fixing bug with EOF parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
John Doak authored and John Doak committed Aug 14, 2022
1 parent 5cdf7a3 commit 5df5c74
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 8 deletions.
43 changes: 39 additions & 4 deletions halfpike.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ import (
"unicode/utf8"
)

const (
// eof represents an eof character that we return. However, its
// not really the EOF. UTF-8 doesn't do EOF, we simply know how
// big the file is. We simply use this control character as a rune
// to symbolize EOF. We do not return this to the user.
eof = '\x01'
)

// stateFn is used to process some part of an input line either emitting tokens and
// returning the next stateFn or nil if terminating.
// The last token should be ItemEOL.
Expand Down Expand Up @@ -184,8 +192,18 @@ func (l *lexer) run() {
func (l *lexer) emit(t ItemType, ri ...rawInfo) ItemType {
var item Item
switch t {
case ItemEOL, ItemEOF:
item = Item{t, l.input[l.start:l.pos], ri[0].num, ri[0].str}
case ItemEOL:
item = Item{
Type: t,
Val: l.input[l.start:l.pos],
lineNum: ri[0].num,
raw: ri[0].str,
}
case ItemEOF:
item = Item{
Type: t,
lineNum: ri[0].num,
}
default:
item = Item{Type: t, Val: l.input[l.start:l.pos]}
}
Expand Down Expand Up @@ -229,7 +247,7 @@ func (l *lexer) backup() {
func (l *lexer) next() rune {
if l.pos >= len(l.input) {
l.width = 0
return rune(ItemEOF)
return eof
}
var r rune
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
Expand Down Expand Up @@ -278,7 +296,24 @@ func untilEOF(l *lexer) stateFn {
raw.Reset()

lineNum++
case r == rune(ItemEOF):
case r == eof:
l.backup() // backup before the EOF.
if len(l.current()) > 0 {
switch {
case isInt(l.current()):
l.emit(ItemInt)
case isFloat(l.current()):
l.emit(ItemFloat)
case last == itemSpace:
// do nothing
default:
l.emit(ItemText)
}
}

// Emit the EOF.
l.next()
raw.Reset()
l.emit(ItemEOF, rawInfo{raw.String(), lineNum})
raw.Reset()
return nil
Expand Down
43 changes: 39 additions & 4 deletions halfpike_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"testing"

"github.com/kylelemons/godebug/pretty"
//"fmt"
)

const str = `
Expand All @@ -33,7 +32,12 @@ func TestLexer(t *testing.T) {
{Type: ItemText, Val: "Flags:"},
{Type: ItemText, Val: "<Sync>"},
{Type: ItemEOL, Val: "\n", lineNum: 2, raw: " Type: External State: Established Flags: <Sync>\n"},
{Type: ItemEOF, lineNum: 3, raw: "\x01"},
{Type: ItemEOF, lineNum: 3, raw: ""},
}

config := pretty.Config{
IncludeUnexported: true,
PrintStringers: true,
}

l := newLexer(context.Background(), str, untilEOF)
Expand All @@ -44,7 +48,7 @@ func TestLexer(t *testing.T) {
got = append(got, item)
}

if diff := pretty.Compare(want, got); diff != "" {
if diff := config.Compare(want, got); diff != "" {
t.Errorf("TestLexer: -want/+got:\n%s", diff)
}
}
Expand Down Expand Up @@ -81,7 +85,7 @@ func TestNext(t *testing.T) {
},
{
LineNum: 3,
Raw: "\x01",
Raw: "",
Items: []Item{
{Type: ItemEOF},
},
Expand Down Expand Up @@ -369,3 +373,34 @@ func TestRegressionRawStartsWithCarriageReturn(t *testing.T) {
t.Fatalf("TestRegressionRawStartsWithCarriageReturn: got err == %s", err)
}
}

// TestRegressionEOLOnLastLine tests a bug where if we have a EOF after a single character in a line,
// that the item comes out as a single item with type EOF, where it should be two items with EOF at the end.
func TestRegressionEOLOnLastLine(t *testing.T) {
text :=`a
}`

want := []Item{
{Type: ItemText, Val: "a"},
{Type: ItemEOL, Val: "\n"},
{Type: ItemText, Val: "}"},
{Type: ItemEOF},
}

config := pretty.Config{
IncludeUnexported: false,
PrintStringers: true,
}

l := newLexer(context.Background(), text, untilEOF)
go l.run()

got := []Item{}
for item := range l.items {
got = append(got, item)
}

if diff := config.Compare(want, got); diff != "" {
t.Errorf("TestRegressionEOLOnLastLine: -want/+got:\n%s", diff)
}
}

0 comments on commit 5df5c74

Please sign in to comment.