diff --git a/go/mysql/collations/charset/convert_test.go b/go/mysql/collations/charset/convert_test.go index 6d737a91faf..df44f961743 100644 --- a/go/mysql/collations/charset/convert_test.go +++ b/go/mysql/collations/charset/convert_test.go @@ -17,13 +17,11 @@ limitations under the License. package charset import ( - "fmt" "testing" "github.com/stretchr/testify/assert" ) -// TODO: These types can be removed, use binary_charset instead. type testCharset1 struct{} func (c *testCharset1) Name() string { @@ -75,26 +73,290 @@ func (c *testCharset2) EncodeRune([]byte, rune) int { return 0 } -func (c *testCharset2) DecodeRune([]byte) (rune, int) { - return 1, 1 +func (c *testCharset2) DecodeRune(bytes []byte) (rune, int) { + if len(bytes) < 1 { + return RuneError, 0 + } + return rune(bytes[0]), 1 +} + +func (c *testCharset2) Convert(_, src []byte, from Charset) ([]byte, error) { + return src, nil } func TestConvert(t *testing.T) { - dstCharset := &testCharset1{} - srcCharset := &testCharset2{} - src := []byte("src") + testCases := []struct { + src []byte + srcCharset Charset + dst []byte + dstCharset Charset + want []byte + err string + }{ + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb4{}, + want: []byte("testDsttestSrc"), + }, + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + dst: nil, + dstCharset: Charset_utf8mb4{}, + want: []byte("testSrc"), + }, + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb4{}, + dst: nil, + dstCharset: Charset_utf8mb3{}, + want: []byte("testSrc"), + }, + { + src: []byte("testSrc"), + srcCharset: Charset_utf8mb4{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDsttestSrc"), + }, + { + src: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + srcCharset: Charset_utf8mb4{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDst???"), + err: "Cannot convert string", + }, + { + src: []byte("testSrc"), + srcCharset: Charset_binary{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDsttestSrc"), + }, + { + src: []byte{00, 65, 00, 66}, + srcCharset: Charset_ucs2{}, + dst: []byte("testDst"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDstAB"), + }, + { + src: []byte{00, 65, 00, 66}, + srcCharset: Charset_ucs2{}, + dst: nil, + dstCharset: Charset_utf8mb3{}, + want: []byte("AB"), + }, + { + src: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + srcCharset: Charset_utf8mb3{}, + dst: nil, + dstCharset: &testCharset2{}, + want: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + }, + } - res, err := Convert(nil, dstCharset, src, srcCharset) - assert.NoError(t, err) - assert.Equal(t, src, res) + for _, tc := range testCases { + res, err := Convert(tc.dst, tc.dstCharset, tc.src, tc.srcCharset) + if tc.err != "" { + assert.ErrorContains(t, err, tc.err) + assert.Equal(t, tc.want, res) + } else { + assert.NoError(t, err) + assert.Equal(t, tc.want, res) + } + } +} + +func TestExpand(t *testing.T) { + testCases := []struct { + dst []rune + src []byte + srcCharset Charset + want []rune + }{ + { + dst: []rune("testDst"), + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + want: []rune("testSrc"), + }, + { + dst: nil, + src: []byte("testSrc"), + srcCharset: Charset_utf8mb3{}, + want: []rune("testSrc"), + }, + { + dst: nil, + src: []byte("testSrc"), + srcCharset: Charset_binary{}, + want: []rune("testSrc"), + }, + { + dst: []rune("testDst"), + src: []byte("testSrc"), + srcCharset: Charset_binary{}, + want: []rune("testDsttestSrc"), + }, + { + dst: []rune("testDst"), + src: []byte{0, 0, 0, 0x41}, + srcCharset: Charset_utf32{}, + want: []rune("testDstA"), + }, + { + dst: nil, + src: []byte{0xFF}, + srcCharset: Charset_latin1{}, + want: []rune("ΓΏ"), + }, + // multibyte case + { + dst: []rune("testDst"), + src: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + srcCharset: Charset_utf8mb4{}, + want: []rune("πŸ˜ŠπŸ˜‚πŸ€’"), + }, + } + + for _, tc := range testCases { + res := Expand(tc.dst, tc.src, tc.srcCharset) + + assert.Equal(t, tc.want, res) + } +} + +func TestCollapse(t *testing.T) { + testCases := []struct { + dst []byte + src []rune + dstCharset Charset + want []byte + }{ + { + dst: []byte("testDst"), + src: []rune("testSrc"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testDsttestSrc"), + }, + { + dst: nil, + src: []rune("testSrc"), + dstCharset: Charset_utf8mb3{}, + want: []byte("testSrc"), + }, + { + dst: []byte("testDst"), + src: []rune("testSrc"), + dstCharset: Charset_utf8mb4{}, + want: []byte("testDsttestSrc"), + }, + { + dst: []byte("testDst"), + src: []rune("testSrc"), + dstCharset: Charset_binary{}, + want: []byte("testDsttestSrc"), + }, + { + dst: nil, + src: []rune("testSrc"), + dstCharset: Charset_binary{}, + want: []byte("testSrc"), + }, + { + dst: []byte("dst"), + src: []rune("src"), + dstCharset: Charset_ucs2{}, + want: []byte{100, 115, 116, 0, 115, 0, 114, 0, 99}, + }, + { + dst: nil, + src: []rune("src"), + dstCharset: Charset_ucs2{}, + want: []byte{0, 115, 0, 114, 0, 99}, + }, + // unsupported encoding case + { + dst: nil, + src: []rune{0xffff1}, + dstCharset: Charset_ucs2{}, + want: []byte{0, 63}, + }, + } + + for _, tc := range testCases { + res := Collapse(tc.dst, tc.src, tc.dstCharset) + + assert.Equal(t, tc.want, res) + } +} + +func TestConvertFromUTF8(t *testing.T) { dst := []byte("dst") - res, err = Convert(dst, dstCharset, src, srcCharset) - assert.NoError(t, err) - assert.Equal(t, []byte("dstsrc"), res) + src := []byte("πŸ˜ŠπŸ˜‚πŸ€’") - // TODO: Write more tests - res, err = Convert(nil, &testCharset2{}, src, &testCharset1{}) + res, err := ConvertFromUTF8(dst, Charset_utf8mb4{}, src) assert.NoError(t, err) - fmt.Println(res) + assert.Equal(t, []byte("dstπŸ˜ŠπŸ˜‚πŸ€’"), res) + + res, err = ConvertFromUTF8(dst, Charset_utf8mb3{}, src) + assert.ErrorContains(t, err, "Cannot convert string") + assert.Equal(t, []byte("dst???"), res) +} + +func TestConvertFromBinary(t *testing.T) { + testCases := []struct { + dst []byte + cs Charset + in []byte + want []byte + err string + }{ + { + dst: []byte("testDst"), + cs: Charset_utf8mb4{}, + in: []byte("testString"), + want: []byte("testDsttestString"), + }, + { + cs: Charset_utf16le{}, + in: []byte("testForOddLen"), + want: append([]byte{0}, []byte("testForOddLen")...), + }, + { + cs: Charset_utf16{}, + in: []byte("testForEvenLen"), + want: []byte("testForEvenLen"), + }, + // multibyte case + { + dst: []byte("testDst"), + cs: Charset_utf8mb4{}, + in: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + want: []byte("testDstπŸ˜ŠπŸ˜‚πŸ€’"), + }, + // unsuppported encoding case + { + cs: Charset_utf32{}, + in: []byte{0xff}, + err: "Cannot convert string", + }, + } + + for _, tc := range testCases { + got, err := ConvertFromBinary(tc.dst, tc.cs, tc.in) + + if tc.want == nil { + assert.ErrorContains(t, err, tc.err) + assert.Nil(t, got) + } else { + assert.NoError(t, err) + assert.Equal(t, tc.want, got) + } + } } diff --git a/go/mysql/collations/charset/helpers_test.go b/go/mysql/collations/charset/helpers_test.go index 2e4e040e3e0..4f8d367e880 100644 --- a/go/mysql/collations/charset/helpers_test.go +++ b/go/mysql/collations/charset/helpers_test.go @@ -23,49 +23,87 @@ import ( ) func TestSlice(t *testing.T) { - s := Slice(Charset_binary{}, []byte("testString"), 1, 4) - assert.Equal(t, []byte("est"), s) - - s = Slice(&testCharset1{}, []byte("testString"), 2, 5) - assert.Equal(t, []byte("stS"), s) - - s = Slice(&testCharset1{}, []byte("testString"), 2, 20) - assert.Equal(t, []byte("stString"), s) - - // Multibyte tests - s = Slice(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), 1, 3) - assert.Equal(t, []byte("πŸ˜‚πŸ€’"), s) - - s = Slice(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), -2, 4) - assert.Equal(t, []byte("πŸ˜ŠπŸ˜‚πŸ€’"), s) + testCases := []struct { + in []byte + cs Charset + from int + to int + want []byte + }{ + { + in: []byte("testString"), + cs: Charset_binary{}, + from: 1, + to: 4, + want: []byte("est"), + }, + { + in: []byte("testString"), + cs: &testCharset1{}, + from: 2, + to: 5, + want: []byte("stS"), + }, + { + in: []byte("testString"), + cs: &testCharset1{}, + from: 2, + to: 20, + want: []byte("stString"), + }, + // Multibyte cases + { + in: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + cs: Charset_utf8mb4{}, + from: 1, + to: 3, + want: []byte("πŸ˜‚πŸ€’"), + }, + { + in: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + cs: Charset_utf8mb4{}, + from: -2, + to: 4, + want: []byte("πŸ˜ŠπŸ˜‚πŸ€’"), + }, + } + + for _, tc := range testCases { + s := Slice(tc.cs, tc.in, tc.from, tc.to) + assert.Equal(t, tc.want, s) + } } func TestValidate(t *testing.T) { - // TODO: Add more tests - in := "testString" ok := Validate(Charset_binary{}, []byte(in)) - assert.True(t, ok, "'%s' should be validated from binary charset", in) + assert.True(t, ok, "%q should be valid for binary charset", in) ok = Validate(&testCharset1{}, nil) assert.True(t, ok, "Validate should return true for empty string irrespective of charset") ok = Validate(&testCharset1{}, []byte(in)) - assert.True(t, ok) + assert.True(t, ok, "%q should be valid for testCharset1", in) + + ok = Validate(Charset_utf16le{}, []byte{0x41}) + assert.False(t, ok, "%v should not be valid for utf16le charset", []byte{0x41}) } func TestLength(t *testing.T) { - in := "testString" - l := Length(Charset_binary{}, []byte(in)) - assert.Equal(t, 10, l) - - l = Length(&testCharset1{}, []byte(in)) - assert.Equal(t, 10, l) - - // Multibyte tests - l = Length(Charset_utf8mb4{}, []byte("πŸ˜ŠπŸ˜‚πŸ€’")) - assert.Equal(t, 3, l) - - l = Length(Charset_utf8mb4{}, []byte("ν•œκ΅­μ–΄ μ‹œν—˜")) - assert.Equal(t, 6, l) + testCases := []struct { + in []byte + cs Charset + want int + }{ + {[]byte("testString"), Charset_binary{}, 10}, + {[]byte("testString"), &testCharset1{}, 10}, + // Multibyte cases + {[]byte("πŸ˜ŠπŸ˜‚πŸ€’"), Charset_utf8mb4{}, 3}, + {[]byte("ν•œκ΅­μ–΄ μ‹œν—˜"), Charset_utf8mb4{}, 6}, + } + + for _, tc := range testCases { + l := Length(tc.cs, tc.in) + assert.Equal(t, tc.want, l) + } }