Skip to content

Commit

Permalink
Add charset converter to convert non utf-8 char
Browse files Browse the repository at this point in the history
Signed-off-by: Anisur Rahman <[email protected]>
  • Loading branch information
anisurrahman75 authored and tamalsaha committed May 24, 2024
1 parent f3eddb5 commit 4b07f1d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 17 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ require (
github.com/prometheus/client_golang v1.13.0
github.com/prometheus/client_model v0.2.0
golang.org/x/mod v0.9.0
golang.org/x/net v0.17.0
golang.org/x/sys v0.13.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/apimachinery v0.25.3
Expand Down Expand Up @@ -187,7 +188,6 @@ require (
github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 // indirect
go.opencensus.io v0.24.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
Expand Down
20 changes: 4 additions & 16 deletions internal/databases/sqlserver/blob/xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ package blob

import (
"bytes"
"encoding/binary"
"encoding/xml"
"fmt"
"golang.org/x/net/html/charset"
"io"

"strings"
"unicode/utf16"
"unicode/utf8"
)

const (
Expand Down Expand Up @@ -121,9 +119,11 @@ func (bl *XBlockListIn) MarshalXML(e *xml.Encoder, start xml.StartElement) error

func ParseBlocklistXML(data []byte) (*XBlockListIn, error) {
bl := &XBlockListIn{}
data = utf16utf8(data, binary.LittleEndian)
d := xml.NewDecoder(bytes.NewBuffer(data))
d.CharsetReader = func(s string, r io.Reader) (io.Reader, error) {
if s == "utf-16" {
return charset.NewReader(r, "charset=utf-16")
}
return r, nil
}
err := d.Decode(bl)
Expand All @@ -133,18 +133,6 @@ func ParseBlocklistXML(data []byte) (*XBlockListIn, error) {
return bl, nil
}

// 21century, we can't convert charset in golang. nice
func utf16utf8(b []byte, o binary.ByteOrder) []byte {
utf := make([]uint16, (len(b)+(2-1))/2)
for i := 0; i+(2-1) < len(b); i += 2 {
utf[i/2] = o.Uint16(b[i:])
}
if len(b)/2 < len(utf) {
utf[len(utf)-1] = utf8.RuneError
}
return []byte(string(utf16.Decode(utf)))
}

type XBlockListOut struct {
XMLName xml.Name `xml:"BlockList"`
CommittedBlocks struct {
Expand Down

0 comments on commit 4b07f1d

Please sign in to comment.