Skip to content

Commit

Permalink
x/text/internal/colltab: Improve numeric sorting
Browse files Browse the repository at this point in the history
Sorts zero (0) as the first number instead of the last
Sorts numbers with leading zeros after numbers with less leading zeros

Fixes golang/go#25554
  • Loading branch information
lordwelch committed May 5, 2024
1 parent 8d533a0 commit 496c61d
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 22 deletions.
10 changes: 9 additions & 1 deletion collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,15 @@ func TestNumeric(t *testing.T) {
{"A-1", "A-2", -1},
{"A-2", "A-12", -1},
{"A-12", "A-2", 1},
{"A-0001", "A-1", 0},
{"A-0001", "A-1", 1},
{"A-0000", "A-1", -1},
{"0000-", "1-", -1},
{"00001", "1", 1},
{"00", "01", -1},
{"0", "00", -1},
{"01", "001", -1},
{"01", "1", 1},
{"1", "01", -1},
} {
if got := c.CompareString(tt.a, tt.b); got != tt.want {
t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
Expand Down
21 changes: 19 additions & 2 deletions internal/colltab/numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,20 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
old_index := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
old_index = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[old_index+1:], nc.elems[old_index:])
nc.elems[old_index], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -105,14 +111,20 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n
return ce, n
}
nc.init(ce, len(buf), isZero)
old_index := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
old_index = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[old_index+1:], nc.elems[old_index:])
nc.elems[old_index], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -122,6 +134,7 @@ type numberConverter struct {
elems []Elem
nDigits int
lenIndex int
zero int

s string // set if the input was of type string
b []byte // set if the input was of type []byte
Expand All @@ -133,6 +146,7 @@ func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and a placeholder
// for the number of digits.
if isZero {
nc.zero++
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
Expand Down Expand Up @@ -217,20 +231,23 @@ const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
if nc.zero+1 < maxDigits {
nc.zero++
}
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
return nc.nDigits+1 < maxDigits
}

// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
e, _ := MakeElem(nc.nDigits+1, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}
59 changes: 40 additions & 19 deletions internal/colltab/numeric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,34 +78,39 @@ func TestNumericAppendNext(t *testing.T) {
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
{"1", p(120, 2, 101, 1)},
{"0", p(120, 1, 2)},
{"00", p(120, 1, 3)},
{"01", p(120, 2, 101, 2)},
{"02", p(120, 2, 102, 2)},
{"0001", p(120, 2, 101, 4)},
{"10", p(120, 3, 101, 100, 1)},
{"99", p(120, 3, 119, 119, 1)},
{"9999", p(120, 5, 119, 119, 119, 119, 1)},
{"1a", p(120, 2, 101, 1, 5)},
{"0b", p(120, 1, 2, 6)},
{"01c", p(120, 2, 101, 2, 8, 2)},
{"10x", p(120, 3, 101, 100, 1, 200)},
{"99y", p(120, 3, 119, 119, 1, 201)},
{"9999nop", p(120, 5, 119, 119, 119, 119, 1, 121)},
{"a0001", p(5, 120, 2, 101, 4)},
{"a1", p(5, 120, 2, 101, 1)},

// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3, e(1)}},
{
"129",
[]Elem{
e(120), e(3),
e(120), e(4),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
e(1),
},
},

// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
{"a10", p(5, 120, 3, 101, 100, 1)},
} {
nw := NewNumericWeighter(numWeighter)

Expand Down Expand Up @@ -137,12 +142,28 @@ func TestNumericOverflow(t *testing.T) {

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
if n != maxDigits-1 { // A digit is lost because we can't use elem.Primary() == 0 as it sorts after elem.Primary() == 1
t.Errorf("n: got %d; want %d", n, maxDigits-1)
}

if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
t.Errorf("primary(e[1]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

func TestNumericZeroOverflow(t *testing.T) {
manyDigits := strings.Repeat("0", maxDigits+1) + "a"

nw := NewNumericWeighter(numWeighter)

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits+2 { // Zeros after maxDigits-1 are ignored but we still consume them so that the number with leading zeros is ordered after a number with less leading zeros
t.Errorf("n: got %d; want %d", n, maxDigits+2)
}

if got[2].Primary() != maxDigits {
t.Errorf("primary(e[2]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

Expand Down

0 comments on commit 496c61d

Please sign in to comment.