diff --git a/go.mod b/go.mod index c605e152602..6b6d6ed49a8 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/dhowden/tag v0.0.0-20230630033851-978a0926ee25 github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e github.com/egirna/icap-client v0.1.1 - github.com/gabriel-vasile/mimetype v1.4.3 + github.com/gabriel-vasile/mimetype v1.4.4 github.com/ggwhite/go-masker v1.1.0 github.com/go-chi/chi/v5 v5.0.12 github.com/go-chi/cors v1.2.1 diff --git a/go.sum b/go.sum index 347a8aa4b01..2f2232593ff 100644 --- a/go.sum +++ b/go.sum @@ -1124,8 +1124,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= -github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= +github.com/gabriel-vasile/mimetype v1.4.4 h1:QjV6pZ7/XZ7ryI2KuyeEDE8wnh7fHP9YnQy+R0LnH8I= +github.com/gabriel-vasile/mimetype v1.4.4/go.mod h1:JwLei5XPtWdGiMFB5Pjle1oEeoSeEuJfJE+TtfvdB/s= github.com/gdexlab/go-render v1.0.1 h1:rxqB3vo5s4n1kF0ySmoNeSPRYkEsyHgln4jFIQY7v0U= github.com/gdexlab/go-render v1.0.1/go.mod h1:wRi5nW2qfjiGj4mPukH4UV0IknS1cHD4VgFTmJX5JzM= github.com/getkin/kin-openapi v0.13.0/go.mod h1:WGRs2ZMM1Q8LR1QBEwUxC6RJEfaBcD0s+pcEVXFuAjw= diff --git a/vendor/github.com/gabriel-vasile/mimetype/LICENSE b/vendor/github.com/gabriel-vasile/mimetype/LICENSE index 6aac070c78f..13b61daa594 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/LICENSE +++ b/vendor/github.com/gabriel-vasile/mimetype/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018-2020 Gabriel Vasile +Copyright (c) 2018 Gabriel Vasile Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/github.com/gabriel-vasile/mimetype/README.md b/vendor/github.com/gabriel-vasile/mimetype/README.md index 231b29190fd..fd6c533e4ac 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/README.md +++ b/vendor/github.com/gabriel-vasile/mimetype/README.md @@ -16,9 +16,6 @@ Go report card - - Code coverage - License diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go index fec11f080a6..554ac4d4a61 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/archive.go @@ -3,6 +3,7 @@ package magic import ( "bytes" "encoding/binary" + "strconv" ) var ( @@ -74,51 +75,87 @@ func CRX(raw []byte, limit uint32) bool { } // Tar matches a (t)ape (ar)chive file. +// Tar files are divided into 512 bytes records. First record contains a 257 +// bytes header padded with NUL. func Tar(raw []byte, _ uint32) bool { - // The "magic" header field for files in in UStar (POSIX IEEE P1003.1) archives - // has the prefix "ustar". The values of the remaining bytes in this field vary - // by archiver implementation. - if len(raw) >= 512 && bytes.HasPrefix(raw[257:], []byte{0x75, 0x73, 0x74, 0x61, 0x72}) { - return true + const sizeRecord = 512 + + // The structure of a tar header: + // type TarHeader struct { + // Name [100]byte + // Mode [8]byte + // Uid [8]byte + // Gid [8]byte + // Size [12]byte + // Mtime [12]byte + // Chksum [8]byte + // Linkflag byte + // Linkname [100]byte + // Magic [8]byte + // Uname [32]byte + // Gname [32]byte + // Devmajor [8]byte + // Devminor [8]byte + // } + + if len(raw) < sizeRecord { + return false } + raw = raw[:sizeRecord] - if len(raw) < 256 { + // First 100 bytes of the header represent the file name. + // Check if file looks like Gentoo GLEP binary package. + if bytes.Contains(raw[:100], []byte("/gpkg-1\x00")) { return false } - // The older v7 format has no "magic" field, and therefore must be identified - // with heuristics based on legal ranges of values for other header fields: - // https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures - rules := []struct { - min, max uint8 - i int - }{ - {0x21, 0xEF, 0}, - {0x30, 0x37, 105}, - {0x20, 0x37, 106}, - {0x00, 0x00, 107}, - {0x30, 0x37, 113}, - {0x20, 0x37, 114}, - {0x00, 0x00, 115}, - {0x30, 0x37, 121}, - {0x20, 0x37, 122}, - {0x00, 0x00, 123}, - {0x30, 0x37, 134}, - {0x30, 0x37, 146}, - {0x30, 0x37, 153}, - {0x00, 0x37, 154}, + // Get the checksum recorded into the file. + recsum, err := tarParseOctal(raw[148:156]) + if err != nil { + return false } - for _, r := range rules { - if raw[r.i] < r.min || raw[r.i] > r.max { - return false - } + sum1, sum2 := tarChksum(raw) + return recsum == sum1 || recsum == sum2 +} + +// tarParseOctal converts octal string to decimal int. +func tarParseOctal(b []byte) (int64, error) { + // Because unused fields are filled with NULs, we need to skip leading NULs. + // Fields may also be padded with spaces or NULs. + // So we remove leading and trailing NULs and spaces to be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0, nil + } + x, err := strconv.ParseUint(tarParseString(b), 8, 64) + if err != nil { + return 0, err } + return int64(x), nil +} - for _, i := range []uint8{135, 147, 155} { - if raw[i] != 0x00 && raw[i] != 0x20 { - return false - } +// tarParseString converts a NUL ended bytes slice to a string. +func tarParseString(b []byte) string { + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) } + return string(b) +} - return true +// tarChksum computes the checksum for the header block b. +// The actual checksum is written to same b block after it has been calculated. +// Before calculation the bytes from b reserved for checksum have placeholder +// value of ASCII space 0x20. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. We compute and return both. +func tarChksum(b []byte) (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(c) + signed += int64(int8(c)) + } + return unsigned, signed } diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go index 34b84f401bd..3ce1de113ba 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/magic.go @@ -153,8 +153,11 @@ func ftyp(sigs ...[]byte) Detector { if len(raw) < 12 { return false } + if !bytes.Equal(raw[4:8], []byte("ftyp")) { + return false + } for _, s := range sigs { - if bytes.Equal(raw[4:12], append([]byte("ftyp"), s...)) { + if bytes.Equal(raw[8:12], s) { return true } } diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go index e2a03caf50a..9f1a637ba1c 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text.go @@ -1,7 +1,6 @@ package magic import ( - "bufio" "bytes" "strings" "time" @@ -234,9 +233,10 @@ func GeoJSON(raw []byte, limit uint32) bool { // types. func NdJSON(raw []byte, limit uint32) bool { lCount, hasObjOrArr := 0, false - sc := bufio.NewScanner(dropLastLine(raw, limit)) - for sc.Scan() { - l := sc.Bytes() + raw = dropLastLine(raw, limit) + var l []byte + for len(raw) != 0 { + l, raw = scanLine(raw) // Empty lines are allowed in NDJSON. if l = trimRWS(trimLWS(l)); len(l) == 0 { continue @@ -301,20 +301,15 @@ func Svg(raw []byte, limit uint32) bool { } // Srt matches a SubRip file. -func Srt(in []byte, _ uint32) bool { - s := bufio.NewScanner(bytes.NewReader(in)) - if !s.Scan() { - return false - } - // First line must be 1. - if s.Text() != "1" { - return false - } +func Srt(raw []byte, _ uint32) bool { + line, raw := scanLine(raw) - if !s.Scan() { + // First line must be 1. + if string(line) != "1" { return false } - secondLine := s.Text() + line, raw = scanLine(raw) + secondLine := string(line) // Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits secondLine // length to exactly 29 characters. if len(secondLine) != 29 { @@ -325,14 +320,12 @@ func Srt(in []byte, _ uint32) bool { if strings.Contains(secondLine, ".") { return false } - // For Go <1.17, comma is not recognised as a decimal separator by `time.Parse`. - secondLine = strings.ReplaceAll(secondLine, ",", ".") // Second line must be a time range. ts := strings.Split(secondLine, " --> ") if len(ts) != 2 { return false } - const layout = "15:04:05.000" + const layout = "15:04:05,000" t0, err := time.Parse(layout, ts[0]) if err != nil { return false @@ -345,8 +338,9 @@ func Srt(in []byte, _ uint32) bool { return false } + line, _ = scanLine(raw) // A third line must exist and not be empty. This is the actual subtitle text. - return s.Scan() && len(s.Bytes()) != 0 + return len(line) != 0 } // Vtt matches a Web Video Text Tracks (WebVTT) file. See @@ -373,3 +367,15 @@ func Vtt(raw []byte, limit uint32) bool { return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT" bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT" } + +// dropCR drops a terminal \r from the data. +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0 : len(data)-1] + } + return data +} +func scanLine(b []byte) (line, remainder []byte) { + line, remainder, _ = bytes.Cut(b, []byte("\n")) + return dropCR(line), remainder +} diff --git a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go index 84ed6492840..af2564381b5 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go +++ b/vendor/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go @@ -18,7 +18,7 @@ func Tsv(raw []byte, limit uint32) bool { } func sv(in []byte, comma rune, limit uint32) bool { - r := csv.NewReader(dropLastLine(in, limit)) + r := csv.NewReader(bytes.NewReader(dropLastLine(in, limit))) r.Comma = comma r.ReuseRecord = true r.LazyQuotes = true @@ -44,20 +44,14 @@ func sv(in []byte, comma rune, limit uint32) bool { // mimetype limits itself to ReadLimit bytes when performing a detection. // This means, for file formats like CSV for NDJSON, the last line of the input // can be an incomplete line. -func dropLastLine(b []byte, cutAt uint32) io.Reader { - if cutAt == 0 { - return bytes.NewReader(b) +func dropLastLine(b []byte, readLimit uint32) []byte { + if readLimit == 0 || uint32(len(b)) < readLimit { + return b } - if uint32(len(b)) >= cutAt { - for i := cutAt - 1; i > 0; i-- { - if b[i] == '\n' { - return bytes.NewReader(b[:i]) - } + for i := len(b) - 1; i > 0; i-- { + if b[i] == '\n' { + return b[:i] } - - // No newline was found between the 0 index and cutAt. - return bytes.NewReader(b[:cutAt]) } - - return bytes.NewReader(b) + return b } diff --git a/vendor/github.com/gabriel-vasile/mimetype/mimetype.go b/vendor/github.com/gabriel-vasile/mimetype/mimetype.go index 1b5909b751b..d8d512b8062 100644 --- a/vendor/github.com/gabriel-vasile/mimetype/mimetype.go +++ b/vendor/github.com/gabriel-vasile/mimetype/mimetype.go @@ -7,14 +7,15 @@ package mimetype import ( "io" - "io/ioutil" "mime" "os" "sync/atomic" ) +var defaultLimit uint32 = 3072 + // readLimit is the maximum number of bytes from the input used when detecting. -var readLimit uint32 = 3072 +var readLimit uint32 = defaultLimit // Detect returns the MIME type found from the provided byte slice. // @@ -48,7 +49,7 @@ func DetectReader(r io.Reader) (*MIME, error) { // Using atomic because readLimit can be written at the same time in other goroutine. l := atomic.LoadUint32(&readLimit) if l == 0 { - in, err = ioutil.ReadAll(r) + in, err = io.ReadAll(r) if err != nil { return errMIME, err } @@ -103,6 +104,7 @@ func EqualsAny(s string, mimes ...string) bool { // SetLimit sets the maximum number of bytes read from input when detecting the MIME type. // Increasing the limit provides better detection for file formats which store // their magical numbers towards the end of the file: docx, pptx, xlsx, etc. +// During detection data is read in a single block of size limit, i.e. it is not buffered. // A limit of 0 means the whole input file will be used. func SetLimit(limit uint32) { // Using atomic because readLimit can be read at the same time in other goroutine. diff --git a/vendor/modules.txt b/vendor/modules.txt index e71769596a6..f11d077c5cf 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -799,7 +799,7 @@ github.com/felixge/httpsnoop # github.com/fsnotify/fsnotify v1.7.0 ## explicit; go 1.17 github.com/fsnotify/fsnotify -# github.com/gabriel-vasile/mimetype v1.4.3 +# github.com/gabriel-vasile/mimetype v1.4.4 ## explicit; go 1.20 github.com/gabriel-vasile/mimetype github.com/gabriel-vasile/mimetype/internal/charset