diff --git a/.gitignore b/.gitignore index 638d06c..43ecbe9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ ## Build generated build/ DerivedData/ +development/ ## Various settings *.pbxuser diff --git a/DataCompression.podspec b/DataCompression.podspec index f8a8218..4ee1ef4 100644 --- a/DataCompression.podspec +++ b/DataCompression.podspec @@ -1,12 +1,14 @@ Pod::Spec.new do |s| s.name = "DataCompression" - s.version = "2.0.1" - s.summary = "Swift libcompression wrapper as an extension for the Data type (ZLIB, LZFSE, LZMA, LZ4, deflate, RFC-1950, RFC-1951)" - s.authors = "Markus Wanke" + s.version = "3.0.0" + s.summary = "Swift libcompression wrapper as an extension for the Data type (GZIP, ZLIB, LZFSE, LZMA, LZ4, deflate, RFC-1950, RFC-1951, RFC-1952)" + s.authors = { "Markus Wanke" => "mw99@users.noreply.github.com" } s.homepage = "https://github.com/mw99/DataCompression" s.license = { :type => 'Apache 2.0', :file => 'LICENSE' } s.source = { :git => "https://github.com/mw99/DataCompression.git", :tag => s.version } + s.swift_version = '4.1' + s.ios.deployment_target = '9.0' s.osx.deployment_target = '10.11' s.tvos.deployment_target = '9.0' diff --git a/README.md b/README.md index 8269449..749e552 100644 --- a/README.md +++ b/README.md @@ -2,50 +2,108 @@ ### A libcompression wrapper extension for the `Data` type -##### Supported compression algorithms are: +#### Supported compression algorithms are: + * GZIP format (.gz files) [RFC-1952](https://www.ietf.org/rfc/rfc1952.txt) * ZLIB deflate stream [RFC-1950](https://www.ietf.org/rfc/rfc1950.txt) * ZLIB deflate raw [RFC-1951](https://www.ietf.org/rfc/rfc1951.txt) * [LZMA](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Markov_chain_algorithm) * [LZFSE](https://github.com/lzfse/lzfse) * [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) -##### Requirements - * iOS deployment target >= **9.0** - * macOS deployment target >= **10.11** - * tvOS deployment target >= **9.0** - * watchOS deployment target >= **2.0** +#### Requirements + * iOS deployment target **>= 9.0** + * macOS deployment target **>= 10.11** + * tvOS deployment target **>= 9.0** + * watchOS deployment target **>= 2.0** ## Usage example +### Try all algorithms and compare the compression ratio ```swift - let s = "A string that really needs to loose some weight..." - var res: Data? = s.data(using: .utf8) +let raw: Data! = String(repeating: "There is no place like 127.0.0.1", count: 25).data(using: .utf8) - res = res?.deflate() - res = res?.inflate() +print("raw => \(raw.count) bytes") - res = res?.zip() - res = res?.unzip() +for algo: Data.CompressionAlgorithm in [.zlib, .lzfse, .lz4, .lzma] { + let compressedData: Data! = raw.compress(withAlgorithm: algo) - res = res?.compress(withAlgorithm: .LZFSE) - res = res?.decompress(withAlgorithm: .LZFSE) + let ratio = Double(raw.count) / Double(compressedData.count) + print("\(algo) => \(compressedData.count) bytes, ratio: \(ratio)") + + assert(compressedData.decompress(withAlgorithm: algo)! == raw) +} +``` - res = res?.compress(withAlgorithm: .LZ4) - res = res?.decompress(withAlgorithm: .LZ4) +Will print something like: +``` +raw => 800 bytes +zlib => 40 bytes, ratio: 20.00 +lzfse => 69 bytes, ratio: 11.59 +lz4 => 181 bytes, ratio: 4.42 +lzma => 100 bytes, ratio: 8.00 +``` - res = res?.compress(withAlgorithm: .LZMA) - res = res?.decompress(withAlgorithm: .LZMA) +### Container formats - res = res?.compress(withAlgorithm: .ZLIB) - res = res?.decompress(withAlgorithm: .ZLIB) - assert(res != nil) - let t = String(data: res!, encoding: .utf8) - assert(s == t) +##### The famous zlib deflate algorithm ([RFC-1951](https://www.ietf.org/rfc/rfc1951.txt)) can also be used with the shortcuts `.deflate()` and `.inflate()` +``` +let data: Data! = "https://www.ietf.org/rfc/rfc1951.txt".data(using: .utf8) +let deflated: Data! = data.deflate() +let inflated: Data? = deflated?.inflate() +assert(data == inflated) +``` + +##### Data in gzip format ([RFC-1952](https://www.ietf.org/rfc/rfc1952.txt)) can be handled with `.gzip()` and `.gunzip()` +``` +let data: Data! = "https://www.ietf.org/rfc/rfc1952.txt".data(using: .utf8) +let gzipped: Data! = data.zip() +let gunzipped: Data? = gzipped.unzip() +assert(data == gunzipped) +``` +*Note: Compressed data in gzip format will always be 18 bytes larger than raw deflated data and will append/perform a crc32 checksum based data integrity test .* + +##### Data in zip format ([RFC-1950](https://www.ietf.org/rfc/rfc1950.txt)) can be handled with `.zip()` and `.unzip()` +``` +let data: Data! = "https://www.ietf.org/rfc/rfc1950.txt".data(using: .utf8) +let zipped: Data! = data.zip() +let unzipped: Data? = zipped.unzip() +assert(data == unzipped) +``` +*Note: Compressed data in zip format will always be 6 bytes larger than raw deflated data and will append/perform a adler32 checksum based data integrity test .* + + +### Compress a file on the command line and decompress it in Swift +The easiest way is using the already installed **gzip** command line tool. Assuming you have a file called **file.txt**, after calling +```bash +gzip -9 file.txt +``` +the file should have been compressed to **file.txt.gz**. You can now load and uncompress the contents of your file with: +``` +let compressedData = try? Data(contentsOf: URL(fileURLWithPath: "/path/to/your/file.txt.gz")) + +if let uncompressedData = compressedData?.gunzip() { + print(String(data: uncompressedData, encoding: .utf8) ?? "Can't decode UTF-8") +} ``` +### Checksum extensions +Unrelated to compression but for convenience **Crc32** and **Adler32** methods are also exposed on the `Data` type which may come in handy. +``` +let classicExample = "The quick brown fox jumps over the lazy dog".data(using: .utf8)! +let crc32 = classicExample.crc32() +let adler32 = classicExample.adler32() +print("crc32: \(crc32), adler32: \(adler32)") +``` +Will print: +``` +crc32: 414fa339, adler32: 5bdc0fda +``` + + + ## Install #### Cocoa Pods @@ -77,7 +135,7 @@ import PackageDescription let package = Package( name: "", dependencies: [ - .Package(url: "https://github.com/mw99/DataCompression.git", majorVersion: 2) + .Package(url: "https://github.com/mw99/DataCompression.git", majorVersion: 3) ] ) ``` @@ -93,7 +151,29 @@ $ swift build -Xswiftc -target -Xswiftc x86_64-apple-macosx10.11 #### Or just copy the file into your project -You only need one file located in `Sources/DataCompression.swift`. Drag and drop it into the Xcode project navigator. +You only need one file located at `Sources/DataCompression.swift`. Drag and drop it into the Xcode project navigator. + + +## Change log / Upgrading guide + +##### Version `2.0.X` to `3.0.0` + +- The encoded data in zip format is not copied anymore, which should improve performance. +- Checksum validation is now always performed with libz and way faster. +- The `skipCheckSumValidation:` parameter of `.unzip()` was removed. +- Items of the algorithm enum type are now Swift like lowercase, e.g. `.LZMA` → `.lzma` + + +## License + + +##### Apache License, Version 2.0 + +##### Copyright 2016, Markus Wanke + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at +[http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0) +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/Sources/DataCompression.swift b/Sources/DataCompression.swift index 58ee3cd..f06c9b9 100644 --- a/Sources/DataCompression.swift +++ b/Sources/DataCompression.swift @@ -1,8 +1,8 @@ /// /// DataCompression /// -/// libcompression wrapper as an extension for the `Data` type -/// (ZLIB, LZFSE, LZMA, LZ4, deflate, RFC-1950, RFC-1951) +/// A libcompression wrapper as an extension for the `Data` type +/// (GZIP, ZLIB, LZFSE, LZMA, LZ4, deflate, RFC-1950, RFC-1951, RFC-1952) /// /// Created by Markus Wanke, 2016/12/05 /// @@ -56,16 +56,16 @@ public extension Data /// Please consider the [libcompression documentation](https://developer.apple.com/reference/compression/1665429-data_compression) /// for further details. Short info: - /// ZLIB : Fast with a very solid compression rate. There is a reason it is used everywhere. - /// LZFSE : Apples proprietary compression algorithm. Claims to compress as good as ZLIB but 2 to 3 times faster. - /// LZMA : Horribly slow. Compression as well as decompression. Normally you will regret choosing LZMA. - /// LZ4 : Fast, but depending on the data the compression rate can be really bad. Which is often the case. + /// zlib : Aka deflate. Fast with a good compression rate. Proved itself ofer time and is supported everywhere. + /// lzfse : Apples custom Lempel-Ziv style compression algorithm. Claims to compress as good as zlib but 2 to 3 times faster. + /// lzma : Horribly slow. Compression as well as decompression. Compresses better than zlib though. + /// lz4 : Fast, but compression rate is very bad. Apples lz4 implementation often to not compress at all. public enum CompressionAlgorithm { - case ZLIB - case LZFSE - case LZMA - case LZ4 + case zlib + case lzfse + case lzma + case lz4 } /// Compresses the data using the zlib deflate algorithm. @@ -79,7 +79,7 @@ public extension Data } } - /// Deompresses the data using the zlib deflate algorithm. Self is expected to be a raw deflate + /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a raw deflate /// stream according to [RFC-1951](https://tools.ietf.org/html/rfc1951). /// - returns: uncompressed data public func inflate() -> Data? @@ -90,23 +90,27 @@ public extension Data } } - /// Compresses the data using the zlib deflate algorithm. - /// - returns: zlib deflated data according to [RFC-1950](https://tools.ietf.org/html/rfc1950) + /// Compresses the data using the deflate algorithm and makes it comply to the zlib format. + /// - returns: deflated data in zlib format [RFC-1950](https://tools.ietf.org/html/rfc1950) /// - note: Fixed at compression level 5 (best trade off between speed and time) public func zip() -> Data? { - var res = Data(bytes: [0x78, 0x5e]) + let header = Data(bytes: [0x78, 0x5e]) - guard let deflated = self.deflate() else { return nil } - res.append(deflated) + let deflated = self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: sourcePtr, sourceSize: count, preload: header) + } - var adler = self.adler32().bigEndian - res.append(Data(bytes: &adler, count: MemoryLayout.size)) + guard var result = deflated else { return nil } - return res + var adler = self.adler32().checksum.bigEndian + result.append(Data(bytes: &adler, count: MemoryLayout.size)) + + return result } - /// Deompresses the data using the zlib deflate algorithm. Self is expected to be a zlib deflate + /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a zlib deflate /// stream according to [RFC-1950](https://tools.ietf.org/html/rfc1950). /// - returns: uncompressed data public func unzip(skipCheckSumValidation: Bool = true) -> Data? @@ -141,50 +145,319 @@ public extension Data } } - return cksum == inflated.adler32() ? inflated : nil + return cksum == inflated.adler32().checksum ? inflated : nil + } + + /// Compresses the data using the deflate algorithm and makes it comply to the gzip stream format. + /// - returns: deflated data in gzip format [RFC-1952](https://tools.ietf.org/html/rfc1952) + /// - note: Fixed at compression level 5 (best trade off between speed and time) + public func gzip() -> Data? + { + var header = Data(bytes: [0x1f, 0x8b, 0x08, 0x00]) // magic, magic, deflate, noflags + + var unixtime = UInt32(Date().timeIntervalSince1970).littleEndian + header.append(Data(bytes: &unixtime, count: MemoryLayout.size)) + + header.append(contentsOf: [0x00, 0x03]) // normal compression level, unix file type + + let deflated = self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: sourcePtr, sourceSize: count, preload: header) + } + + guard var result = deflated else { return nil } + + // append checksum + var crc32: UInt32 = self.crc32().checksum.littleEndian + result.append(Data(bytes: &crc32, count: MemoryLayout.size)) + + // append size of original data + var isize: UInt32 = UInt32(truncatingIfNeeded: count).littleEndian + result.append(Data(bytes: &isize, count: MemoryLayout.size)) + + return result + } + + /// Decompresses the data using the gzip deflate algorithm. Self is expected to be a gzip deflate + /// stream according to [RFC-1952](https://tools.ietf.org/html/rfc1952). + /// - returns: uncompressed data + public func gunzip() -> Data? + { + // 10 byte header + data + 8 byte footer. See https://tools.ietf.org/html/rfc1952#section-2 + let overhead = 10 + 8 + guard count >= overhead else { return nil } + + + typealias GZipHeader = (id1: UInt8, id2: UInt8, cm: UInt8, flg: UInt8, xfl: UInt8, os: UInt8) + let hdr: GZipHeader = withUnsafeBytes { (ptr: UnsafePointer) -> GZipHeader in + // +---+---+---+---+---+---+---+---+---+---+ + // |ID1|ID2|CM |FLG| MTIME |XFL|OS | + // +---+---+---+---+---+---+---+---+---+---+ + return (id1: ptr[0], id2: ptr[1], cm: ptr[2], flg: ptr[3], xfl: ptr[8], os: ptr[9]) + } + + typealias GZipFooter = (crc32: UInt32, isize: UInt32) + let ftr: GZipFooter = withUnsafeBytes { (bptr: UnsafePointer) -> GZipFooter in + // +---+---+---+---+---+---+---+---+ + // | CRC32 | ISIZE | + // +---+---+---+---+---+---+---+---+ + return bptr.advanced(by: count - 8).withMemoryRebound(to: UInt32.self, capacity: 2) { ptr in + return (ptr[0].littleEndian, ptr[1].littleEndian) + } + } + + // Wrong gzip magic or unsupported compression method + guard hdr.id1 == 0x1f && hdr.id2 == 0x8b && hdr.cm == 0x08 else { return nil } + + let has_crc16: Bool = hdr.flg & 0b00010 != 0 + let has_extra: Bool = hdr.flg & 0b00100 != 0 + let has_fname: Bool = hdr.flg & 0b01000 != 0 + let has_cmmnt: Bool = hdr.flg & 0b10000 != 0 + + let cresult: Data? = withUnsafeBytes { (ptr: UnsafePointer) -> Data? in + var pos = 10 ; let limit = count - 8 + + if has_extra { + pos += ptr.advanced(by: pos).withMemoryRebound(to: UInt16.self, capacity: 1) { + return Int($0.pointee.littleEndian) + 2 // +2 for xlen + } + } + if has_fname { + while pos < limit && ptr[pos] != 0x0 { pos += 1 } + pos += 1 // skip null byte as well + } + if has_cmmnt { + while pos < limit && ptr[pos] != 0x0 { pos += 1 } + pos += 1 // skip null byte as well + } + if has_crc16 { + pos += 2 // ignoring header crc16 + } + + guard pos < limit else { return nil } + let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: ptr.advanced(by: pos), sourceSize: limit - pos) + } + + guard let inflated = cresult else { return nil } + guard ftr.isize == UInt32(truncatingIfNeeded: inflated.count) else { return nil } + guard ftr.crc32 == inflated.crc32().checksum else { return nil } + return inflated + } + + /// Calculate the Adler32 checksum of the data. + /// - returns: Adler32 checksum type. Can still be further advanced. + public func adler32() -> Adler32 + { + var res = Adler32() + res.advance(withChunk: self) + return res } - /// Rudimentary implementation of the adler32 checksum. + /// Calculate the Crc32 checksum of the data. + /// - returns: Crc32 checksum type. Can still be further advanced. + public func crc32() -> Crc32 + { + var res = Crc32() + res.advance(withChunk: self) + return res + } +} + + + + +/// Struct based type representing a Crc32 checksum. +public struct Crc32: CustomStringConvertible +{ + private static let zLibCrc32: ZLibCrc32FuncPtr? = loadCrc32fromZLib() + + public init() {} + + // C convention function pointer type matching the signature of `libz::crc32` + private typealias ZLibCrc32FuncPtr = @convention(c) ( + _ cks: UInt32, + _ buf: UnsafePointer, + _ len: UInt32 + ) -> UInt32 + + /// Raw checksum. Updated after a every call to `advance(withChunk:)` + public var checksum: UInt32 = 0 + + /// Advance the current checksum with a chunk of data. Designed t be called multiple times. + /// - parameter chunk: data to advance the checksum + public mutating func advance(withChunk chunk: Data) + { + if let fastCrc32 = Crc32.zLibCrc32 { + checksum = chunk.withUnsafeBytes({ (ptr: UnsafePointer) -> UInt32 in + return fastCrc32(checksum, ptr, UInt32(chunk.count)) + }) + } + else { + checksum = slowCrc32(start: checksum, data: chunk) + } + } + + /// Formatted checksum. + public var description: String + { + return String(format: "%08x", checksum) + } + + /// Load `crc32()` from '/usr/lib/libz.dylib' if libz is installed. + /// - returns: A function pointer to crc32() of zlib or nil if zlib can't be found + private static func loadCrc32fromZLib() -> ZLibCrc32FuncPtr? + { + guard let libz = dlopen("/usr/lib/libz.dylib", RTLD_NOW) else { return nil } + guard let fptr = dlsym(libz, "crc32") else { return nil } + return unsafeBitCast(fptr, to: ZLibCrc32FuncPtr.self) + } + + /// Rudimentary fallback implementation of the crc32 checksum. This is only a backup used + /// when zlib can't be found under '/usr/lib/libz.dylib'. + /// - returns: crc32 checksum (4 byte) + private func slowCrc32(start: UInt32, data: Data) -> UInt32 + { + return ~data.reduce(~start) { (crc: UInt32, next: UInt8) -> UInt32 in + let tableOffset = (crc ^ UInt32(next)) & 0xff + return lookUpTable[Int(tableOffset)] ^ crc >> 8 + } + } + + /// Lookup table for faster crc32 calculation. + /// table source: http://web.mit.edu/freebsd/head/sys/libkern/crc32.c + private let lookUpTable: [UInt32] = [ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, + ] +} + + + + + +/// Struct based type representing a Adler32 checksum. +public struct Adler32: CustomStringConvertible +{ + private static let zLibAdler32: ZLibAdler32FuncPtr? = loadAdler32fromZLib() + + public init() {} + + // C convention function pointer type matching the signature of `libz::adler32` + private typealias ZLibAdler32FuncPtr = @convention(c) ( + _ cks: UInt32, + _ buf: UnsafePointer, + _ len: UInt32 + ) -> UInt32 + + /// Raw checksum. Updated after a every call to `advance(withChunk:)` + public var checksum: UInt32 = 1 + + /// Advance the current checksum with a chunk of data. Designed t be called multiple times. + /// - parameter chunk: data to advance the checksum + public mutating func advance(withChunk chunk: Data) + { + if let fastAdler32 = Adler32.zLibAdler32 { + checksum = chunk.withUnsafeBytes({ (ptr: UnsafePointer) -> UInt32 in + return fastAdler32(checksum, ptr, UInt32(chunk.count)) + }) + } + else { + checksum = slowAdler32(start: checksum, data: chunk) + } + } + + /// Formatted checksum. + public var description: String + { + return String(format: "%08x", checksum) + } + + /// Load `adler32()` from '/usr/lib/libz.dylib' if libz is installed. + /// - returns: A function pointer to adler32() of zlib or nil if zlib can't be found + private static func loadAdler32fromZLib() -> ZLibAdler32FuncPtr? + { + guard let libz = dlopen("/usr/lib/libz.dylib", RTLD_NOW) else { return nil } + guard let fptr = dlsym(libz, "adler32") else { return nil } + return unsafeBitCast(fptr, to: ZLibAdler32FuncPtr.self) + } + + /// Rudimentary fallback implementation of the adler32 checksum. This is only a backup used + /// when zlib can't be found under '/usr/lib/libz.dylib'. /// - returns: adler32 checksum (4 byte) - public func adler32() -> UInt32 + private func slowAdler32(start: UInt32, data: Data) -> UInt32 { - var s1: UInt32 = 1 - var s2: UInt32 = 0 + var s1: UInt32 = start & 0xffff + var s2: UInt32 = (start >> 16) & 0xffff let prime: UInt32 = 65521 - for byte in self { + for byte in data { s1 += UInt32(byte) if s1 >= prime { s1 = s1 % prime } s2 += s1 if s2 >= prime { s2 = s2 % prime } } - return (s2 << 16) | s1 } } + + + + fileprivate extension Data.CompressionAlgorithm { var lowLevelType: compression_algorithm { switch self { - case .ZLIB : return COMPRESSION_ZLIB - case .LZFSE : return COMPRESSION_LZFSE - case .LZ4 : return COMPRESSION_LZ4 - case .LZMA : return COMPRESSION_LZMA + case .zlib : return COMPRESSION_ZLIB + case .lzfse : return COMPRESSION_LZFSE + case .lz4 : return COMPRESSION_LZ4 + case .lzma : return COMPRESSION_LZMA } } } + fileprivate typealias Config = (operation: compression_stream_operation, algorithm: compression_algorithm) -fileprivate func perform(_ config: Config, source: UnsafePointer, sourceSize: Int) -> Data? +fileprivate func perform(_ config: Config, source: UnsafePointer, sourceSize: Int, preload: Data = Data()) -> Data? { guard config.operation == COMPRESSION_STREAM_ENCODE || sourceSize > 0 else { return nil } let streamBase = UnsafeMutablePointer.allocate(capacity: 1) - defer { streamBase.deallocate(capacity: 1) } + defer { streamBase.deallocate() } var stream = streamBase.pointee let status = compression_stream_init(&stream, config.operation, config.algorithm) @@ -193,14 +466,14 @@ fileprivate func perform(_ config: Config, source: UnsafePointer, sourceS let bufferSize = Swift.max( Swift.min(sourceSize, 64 * 1024), 64) let buffer = UnsafeMutablePointer.allocate(capacity: bufferSize) - defer { buffer.deallocate(capacity: bufferSize) } + defer { buffer.deallocate() } stream.dst_ptr = buffer stream.dst_size = bufferSize stream.src_ptr = source stream.src_size = sourceSize - var res = Data() + var res = preload let flags: Int32 = Int32(COMPRESSION_STREAM_FINALIZE.rawValue) while true { diff --git a/Tests/DataCompressionTests/CompressionTest.swift b/Tests/DataCompressionTests/CompressionTest.swift index d088eb1..e0c74d8 100644 --- a/Tests/DataCompressionTests/CompressionTest.swift +++ b/Tests/DataCompressionTests/CompressionTest.swift @@ -49,22 +49,34 @@ class CompressionTest: XCTestCase var tmp: Data? = d tmp = tmp?.deflate() + XCTAssertNotNil(tmp, "deflate() failed") tmp = tmp?.inflate() + XCTAssertNotNil(tmp, "inflate() failed") tmp = tmp?.zip() + XCTAssertNotNil(tmp, "zip() failed") tmp = tmp?.unzip() + XCTAssertNotNil(tmp, "unzip() failed") - tmp = tmp?.compress(withAlgorithm: .LZFSE) - tmp = tmp?.decompress(withAlgorithm: .LZFSE) + tmp = tmp?.gzip() + XCTAssertNotNil(tmp, "gzip() failed") + tmp = tmp?.gunzip() + XCTAssertNotNil(tmp, "gunzip() failed") - tmp = tmp?.compress(withAlgorithm: .LZ4) - tmp = tmp?.decompress(withAlgorithm: .LZ4) + tmp = tmp?.compress(withAlgorithm: .lzfse) + XCTAssertNotNil(tmp, "compress .lzfse() failed") + tmp = tmp?.decompress(withAlgorithm: .lzfse) + XCTAssertNotNil(tmp, "decompress .lzfse() failed") - tmp = tmp?.compress(withAlgorithm: .LZMA) - tmp = tmp?.decompress(withAlgorithm: .LZMA) + tmp = tmp?.compress(withAlgorithm: .lz4) + XCTAssertNotNil(tmp, "compress .lz4() failed") + tmp = tmp?.decompress(withAlgorithm: .lz4) + XCTAssertNotNil(tmp, "decompress .lz4() failed") - tmp = tmp?.compress(withAlgorithm: .ZLIB) - tmp = tmp?.decompress(withAlgorithm: .ZLIB) + tmp = tmp?.compress(withAlgorithm: .lzma) + XCTAssertNotNil(tmp, "compress .lzma() failed") + tmp = tmp?.decompress(withAlgorithm: .lzma) + XCTAssertNotNil(tmp, "decompress .lzma() failed") return tmp } @@ -78,4 +90,23 @@ class CompressionTest: XCTestCase return String(data: res, encoding: .utf8) } + func testAdler32() + { + var adler = Adler32() + XCTAssertEqual(adler.checksum, 0x1) + adler.advance(withChunk: "The quick brown ".data(using: .ascii)!) + adler.advance(withChunk: "fox jumps over ".data(using: .ascii)!) + adler.advance(withChunk: "the lazy dog.".data(using: .ascii)!) + XCTAssertEqual(adler.checksum, 0x6be41008) + } + + func testCrc32() + { + var crc = Crc32() + XCTAssertEqual(crc.checksum, 0x0) + crc.advance(withChunk: "The quick brown ".data(using: .ascii)!) + crc.advance(withChunk: "fox jumps over ".data(using: .ascii)!) + crc.advance(withChunk: "the lazy dog.".data(using: .ascii)!) + XCTAssertEqual(crc.checksum, 0x519025e9) + } } diff --git a/Tests/DataCompressionTests/XCTestManifests.swift b/Tests/DataCompressionTests/XCTestManifests.swift index b76f8fd..5c53ac9 100644 --- a/Tests/DataCompressionTests/XCTestManifests.swift +++ b/Tests/DataCompressionTests/XCTestManifests.swift @@ -7,5 +7,7 @@ extension CompressionTest ("testAsciiNumbers", testAsciiNumbers), ("testRandomDataChunks", testRandomDataChunks), ("testRandomDataBlob", testRandomDataBlob), + ("testAdler32", testAdler32), + ("testCrc32", testCrc32), ] }