diff --git a/Cargo.toml b/Cargo.toml index 300dda3..0b61e6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lzma-rs" -description = "A codec for LZMA, LZMA2 and XZ written in pure Rust" +description = "A codec for LZMA, LZMA2, XZ, and lzip written in pure Rust" version = "0.3.0" license = "MIT" authors = ["Guillaume Endignoux "] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 6cae9de..3de6567 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -34,6 +34,10 @@ path = "fuzz_targets/roundtrip_lzma2.rs" name = "roundtrip_xz" path = "fuzz_targets/roundtrip_xz.rs" +[[bin]] +name = "roundtrip_lzip" +path = "fuzz_targets/roundtrip_lzip.rs" + [[bin]] name = "decompress_lzma" path = "fuzz_targets/decompress_lzma.rs" diff --git a/fuzz/fuzz_targets/roundtrip_lzip.rs b/fuzz/fuzz_targets/roundtrip_lzip.rs new file mode 100644 index 0000000..57517b5 --- /dev/null +++ b/fuzz/fuzz_targets/roundtrip_lzip.rs @@ -0,0 +1,20 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; + +use lzma_rs::error::Result; + +fn round_trip_lzip(x: &[u8]) -> Result> { + let mut compressed: Vec = Vec::new(); + lzma_rs::lzip_compress(&mut std::io::BufReader::new(x), &mut compressed)?; + let mut bf = std::io::BufReader::new(compressed.as_slice()); + + let mut decomp: Vec = Vec::new(); + lzma_rs::lzip_decompress(&mut bf, &mut decomp)?; + Ok(decomp) +} + +fuzz_target!(|data: &[u8]| { + let decomp = round_trip_lzip(data).expect("Can't decompress what we just compressed"); + assert_eq!(decomp, data); +}); diff --git a/src/decode/lzip.rs b/src/decode/lzip.rs new file mode 100644 index 0000000..382616b --- /dev/null +++ b/src/decode/lzip.rs @@ -0,0 +1,64 @@ +//! Decoder for the `.lz` file format. + +use crate::decode::lzma::{LzmaDecoder, LzmaParams, LzmaProperties}; +use crate::error; +use crate::lzip::crc::CRC32; +use crate::lzip::header; +use byteorder::{ByteOrder, LittleEndian}; +use std::io; + +pub fn decode_stream(input: &mut R, output: &mut W) -> error::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + let mut header_buf = [0; 6]; + input.read_exact(&mut header_buf)?; + let header = header::Header::parse(header_buf)?; + let mut buf = Vec::new(); + input.read_to_end(&mut buf)?; + let footer = buf.split_off(buf.len() - 20); + let unpacked_size = LittleEndian::read_u64(&footer[4..12]); + // See . + let properties = LzmaProperties { + lc: 3, + lp: 0, + pb: 2, + }; + let params = LzmaParams::new(properties, header.dict_size, Some(unpacked_size)); + let mut uncompressed_data = Vec::new(); + LzmaDecoder::new(params, None)?.decompress(&mut buf.as_slice(), &mut uncompressed_data)?; + + let crc32 = CRC32.checksum(&uncompressed_data); + let expected_crc32 = LittleEndian::read_u32(&footer[..4]); + if crc32 != expected_crc32 { + return Err(error::Error::LzipError(format!( + "Invalid uncompressed data CRC32: expected 0x{:08x} but got 0x{:08x}", + expected_crc32, crc32 + ))); + } + + if uncompressed_data.len() as u64 != unpacked_size { + return Err(error::Error::LzipError(format!( + "Invalid uncompressed data size: expected {} but got {}", + unpacked_size, + uncompressed_data.len() + ))); + } + + let member_size = (header_buf.len() + buf.len() + footer.len()) as u64; + let expected_member_size = LittleEndian::read_u64(&footer[12..]); + if member_size > (1 << 51) { + return Err(error::Error::LzipError(String::from( + "member size too large, must be less than 2 PiB", + ))); + } + if member_size != expected_member_size { + return Err(error::Error::LzipError(format!( + "Invalid member size: expected {} but got {}", + expected_member_size, member_size + ))); + } + output.write_all(&uncompressed_data)?; + Ok(()) +} diff --git a/src/decode/lzma.rs b/src/decode/lzma.rs index bbb98e1..d30d38c 100644 --- a/src/decode/lzma.rs +++ b/src/decode/lzma.rs @@ -79,7 +79,6 @@ pub struct LzmaParams { impl LzmaParams { /// Create an new instance of LZMA parameters. - #[cfg(feature = "raw_decoder")] pub fn new( properties: LzmaProperties, dict_size: u32, diff --git a/src/decode/mod.rs b/src/decode/mod.rs index 2a7b0b8..30e4d00 100644 --- a/src/decode/mod.rs +++ b/src/decode/mod.rs @@ -1,6 +1,7 @@ //! Decoding logic. pub mod lzbuffer; +pub mod lzip; pub mod lzma; pub mod lzma2; pub mod options; diff --git a/src/decode/stream.rs b/src/decode/stream.rs index a570f94..5968488 100644 --- a/src/decode/stream.rs +++ b/src/decode/stream.rs @@ -299,7 +299,7 @@ where Err(e) => { return Err(match e { Error::IoError(e) | Error::HeaderTooShort(e) => e, - Error::LzmaError(e) | Error::XzError(e) => { + Error::LzmaError(e) | Error::XzError(e) | Error::LzipError(e) => { io::Error::new(io::ErrorKind::Other, e) } }); diff --git a/src/encode/lzip.rs b/src/encode/lzip.rs new file mode 100644 index 0000000..42c4d08 --- /dev/null +++ b/src/encode/lzip.rs @@ -0,0 +1,60 @@ +use crate::lzip::crc::CRC32; +use crate::lzip::header; +use crate::lzma_compress; +use byteorder::{LittleEndian, WriteBytesExt}; +use std::io; + +pub fn encode_stream(input: &mut R, output: &mut W) -> io::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + // Header + write_header(output)?; + + let mut buf = Vec::new(); + input.read_to_end(&mut buf)?; + // Raw LZMA stream + let compressed_data_size = write_stream(&buf, output)?; + + // Footer + write_footer(&buf, output, compressed_data_size) +} + +fn write_header(output: &mut W) -> io::Result<()> +where + W: io::Write, +{ + output.write_all(header::LZIP_MAGIC)?; + output.write_u8(header::LZIP_VERSION_NUMBER)?; + // Pre-computed coded dictionary size which represents 0x00800000 (8388608 in + // decimal). TODO: It is recommended that this be fixed when it becomes + // possible to specify the dictionary size when compressing. + let dict_size = 0x17; + output.write_u8(dict_size) +} + +fn write_footer(input: &[u8], output: &mut W, compressed_data_size: u64) -> io::Result<()> +where + W: io::Write, +{ + let digest = CRC32.checksum(input); + output.write_u32::(digest)?; + output.write_u64::(input.len() as u64)?; + // header_size + compressed_data_size + footer_size + output.write_u64::(6 + compressed_data_size + 20) +} + +fn write_stream(mut input: &[u8], output: &mut W) -> io::Result +where + W: io::Write, +{ + let mut buf = Vec::new(); + lzma_compress(&mut input, &mut buf)?; + // Drop the LZMA header. + // TODO: It is recommended that this be fixed when it becomes possible to + // generate the LZMA stream without the header. + buf = buf.split_off(13); + output.write_all(&buf)?; + Ok(buf.len() as u64) +} diff --git a/src/encode/mod.rs b/src/encode/mod.rs index 98a0e84..1f4a8b4 100644 --- a/src/encode/mod.rs +++ b/src/encode/mod.rs @@ -1,6 +1,7 @@ //! Encoding logic. pub mod dumbencoder; +pub mod lzip; pub mod lzma2; pub mod options; mod rangecoder; diff --git a/src/error.rs b/src/error.rs index 2ee0fbe..70fb03c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -14,6 +14,8 @@ pub enum Error { LzmaError(String), /// XZ error. XzError(String), + /// lzip error. + LzipError(String), } /// Library result alias. @@ -32,6 +34,7 @@ impl Display for Error { Error::HeaderTooShort(e) => write!(fmt, "header too short: {}", e), Error::LzmaError(e) => write!(fmt, "lzma error: {}", e), Error::XzError(e) => write!(fmt, "xz error: {}", e), + Error::LzipError(e) => write!(fmt, "lzip error: {}", e), } } } @@ -40,7 +43,7 @@ impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Error::IoError(e) | Error::HeaderTooShort(e) => Some(e), - Error::LzmaError(_) | Error::XzError(_) => None, + Error::LzmaError(_) | Error::XzError(_) | Error::LzipError(_) => None, } } } @@ -67,5 +70,9 @@ mod test { Error::XzError("this is an error".to_string()).to_string(), "xz error: this is an error" ); + assert_eq!( + Error::LzipError("this is an error".to_string()).to_string(), + "lzip error: this is an error" + ); } } diff --git a/src/lib.rs b/src/lib.rs index deb849e..31a2f48 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -//! Pure-Rust codecs for LZMA, LZMA2, and XZ. +//! Pure-Rust codecs for LZMA, LZMA2, XZ, and lzip. #![cfg_attr(docsrs, feature(doc_cfg, doc_cfg_hide))] #![deny(missing_docs)] #![deny(missing_debug_implementations)] @@ -12,6 +12,7 @@ mod encode; pub mod error; +mod lzip; mod util; mod xz; @@ -108,3 +109,21 @@ pub fn xz_decompress( pub fn xz_compress(input: &mut R, output: &mut W) -> io::Result<()> { encode::xz::encode_stream(input, output) } + +/// Decompress lzip data with default +/// [`Options`](decompress/struct.Options.html). +pub fn lzip_decompress( + input: &mut R, + output: &mut W, +) -> error::Result<()> { + decode::lzip::decode_stream(input, output) +} + +/// Compress data with lzip and default +/// [`Options`](compress/struct.Options.html). +pub fn lzip_compress( + input: &mut R, + output: &mut W, +) -> io::Result<()> { + encode::lzip::encode_stream(input, output) +} diff --git a/src/lzip.rs b/src/lzip.rs new file mode 100644 index 0000000..43b94c8 --- /dev/null +++ b/src/lzip.rs @@ -0,0 +1,8 @@ +//! Logic for handling `.lz` file format. +//! +//! Format specifications are at or [draft-diaz-lzip-09]. +//! +//! [draft-diaz-lzip-09]: https://datatracker.ietf.org/doc/html/draft-diaz-lzip-09#section-2 + +pub(crate) mod crc; +pub(crate) mod header; diff --git a/src/lzip/crc.rs b/src/lzip/crc.rs new file mode 100644 index 0000000..3ec294c --- /dev/null +++ b/src/lzip/crc.rs @@ -0,0 +1,3 @@ +use crc::{Crc, CRC_32_ISO_HDLC}; + +pub const CRC32: Crc = Crc::::new(&CRC_32_ISO_HDLC); diff --git a/src/lzip/header.rs b/src/lzip/header.rs new file mode 100644 index 0000000..a0f656c --- /dev/null +++ b/src/lzip/header.rs @@ -0,0 +1,60 @@ +//! lzip header. + +use crate::error; + +/// File format magic header signature. +pub(crate) const LZIP_MAGIC: &[u8] = b"LZIP"; + +/// File format version number, 1 for now. +pub(crate) const LZIP_VERSION_NUMBER: u8 = 1; + +/// lzip header. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Header { + pub(crate) dict_size: u32, +} + +impl Header { + /// Parses the lzip header from a buffered reader. + pub(crate) fn parse(input: [u8; 6]) -> error::Result { + if &input[..4] != LZIP_MAGIC { + return Err(error::Error::LzipError(format!( + "Invalid lzip magic, expected {:?}", + LZIP_MAGIC + ))); + } + + match input[4] { + LZIP_VERSION_NUMBER => {} + 0 => { + return Err(error::Error::LzipError(String::from( + "lzip version 0 is not supported", + ))); + } + _ => { + return Err(error::Error::LzipError(format!( + "Unknown lzip version number, expected {:?}", + LZIP_VERSION_NUMBER + ))); + } + } + + let mut dict_size = 1 << (input[5] & 0x1f); + dict_size -= (dict_size / 16) * ((input[5] >> 5) & 0x07) as u32; + match dict_size { + ds if ds < (1 << 12) => { + return Err(error::Error::LzipError(String::from( + "dictionary size too small, must be at least 4 KiB", + ))); + } + ds if ds > (1 << 29) => { + return Err(error::Error::LzipError(String::from( + "dictionary size too large, must be less than 512 MiB", + ))); + } + _ => {} + } + let header = Self { dict_size }; + Ok(header) + } +} diff --git a/tests/files/block-check-crc32.txt.lz b/tests/files/block-check-crc32.txt.lz new file mode 100644 index 0000000..7756e74 Binary files /dev/null and b/tests/files/block-check-crc32.txt.lz differ diff --git a/tests/files/empty.txt.lz b/tests/files/empty.txt.lz new file mode 100644 index 0000000..ec60725 Binary files /dev/null and b/tests/files/empty.txt.lz differ diff --git a/tests/files/foo.txt.lz b/tests/files/foo.txt.lz new file mode 100644 index 0000000..813b58a Binary files /dev/null and b/tests/files/foo.txt.lz differ diff --git a/tests/files/good-1-lzma2-1.lz b/tests/files/good-1-lzma2-1.lz new file mode 100644 index 0000000..b44014a Binary files /dev/null and b/tests/files/good-1-lzma2-1.lz differ diff --git a/tests/files/good-1-lzma2-2.lz b/tests/files/good-1-lzma2-2.lz new file mode 100644 index 0000000..b44014a Binary files /dev/null and b/tests/files/good-1-lzma2-2.lz differ diff --git a/tests/files/good-1-lzma2-3.lz b/tests/files/good-1-lzma2-3.lz new file mode 100644 index 0000000..b44014a Binary files /dev/null and b/tests/files/good-1-lzma2-3.lz differ diff --git a/tests/files/good-1-lzma2-4.lz b/tests/files/good-1-lzma2-4.lz new file mode 100644 index 0000000..b44014a Binary files /dev/null and b/tests/files/good-1-lzma2-4.lz differ diff --git a/tests/files/hello.txt.lz b/tests/files/hello.txt.lz new file mode 100644 index 0000000..03b8d16 Binary files /dev/null and b/tests/files/hello.txt.lz differ diff --git a/tests/files/small.txt.lz b/tests/files/small.txt.lz new file mode 100644 index 0000000..5476881 Binary files /dev/null and b/tests/files/small.txt.lz differ diff --git a/tests/lzip.rs b/tests/lzip.rs new file mode 100644 index 0000000..dd00f39 --- /dev/null +++ b/tests/lzip.rs @@ -0,0 +1,107 @@ +#[cfg(feature = "enable_logging")] +use log::{debug, info}; +use std::io::{BufReader, Read}; + +/// Utility function to read a file into memory +fn read_all_file(filename: &str) -> std::io::Result> { + let mut data = Vec::new(); + std::fs::File::open(filename).and_then(|mut file| file.read_to_end(&mut data))?; + Ok(data) +} + +fn round_trip(x: &[u8]) { + let mut compressed: Vec = Vec::new(); + lzma_rs::lzip_compress(&mut std::io::BufReader::new(x), &mut compressed).unwrap(); + #[cfg(feature = "enable_logging")] + info!("Compressed {} -> {} bytes", x.len(), compressed.len()); + #[cfg(feature = "enable_logging")] + debug!("Compressed content: {:?}", compressed); + let mut bf = BufReader::new(compressed.as_slice()); + let mut decomp: Vec = Vec::new(); + lzma_rs::lzip_decompress(&mut bf, &mut decomp).unwrap(); + assert_eq!(decomp, x) +} + +fn round_trip_file(filename: &str) { + let x = read_all_file(filename).unwrap(); + round_trip(x.as_slice()); +} + +#[test] +fn round_trip_basics() { + #[cfg(feature = "enable_logging")] + let _ = env_logger::try_init(); + round_trip(b""); + // Note: we use vec! to avoid storing the slice in the binary + round_trip(vec![0x00; 1_000_000].as_slice()); + round_trip(vec![0xFF; 1_000_000].as_slice()); +} + +#[test] +fn round_trip_hello() { + #[cfg(feature = "enable_logging")] + let _ = env_logger::try_init(); + round_trip(b"Hello world"); +} + +#[test] +fn round_trip_files() { + #[cfg(feature = "enable_logging")] + let _ = env_logger::try_init(); + round_trip_file("tests/files/foo.txt"); +} + +fn decomp_big_file(compfile: &str, plainfile: &str) { + let expected = read_all_file(plainfile).unwrap(); + let mut f = BufReader::new(std::fs::File::open(compfile).unwrap()); + let mut decomp: Vec = Vec::new(); + lzma_rs::lzip_decompress(&mut f, &mut decomp).unwrap(); + assert!(decomp == expected) +} + +#[test] +fn big_file() { + #[cfg(feature = "enable_logging")] + let _ = env_logger::try_init(); + decomp_big_file("tests/files/foo.txt.lz", "tests/files/foo.txt"); + decomp_big_file( + "tests/files/good-1-lzma2-1.lz", + "tests/files/good-1-lzma2-1", + ); + decomp_big_file( + "tests/files/good-1-lzma2-2.lz", + "tests/files/good-1-lzma2-2", + ); + decomp_big_file( + "tests/files/good-1-lzma2-3.lz", + "tests/files/good-1-lzma2-3", + ); + decomp_big_file( + "tests/files/good-1-lzma2-4.lz", + "tests/files/good-1-lzma2-4", + ); +} + +#[test] +fn decompress_empty_world() { + #[cfg(feature = "enable_logging")] + let _ = env_logger::try_init(); + let mut x: &[u8] = b"\x4c\x5a\x49\x50\x01\x0c\x00\x83\xff\xfb\xff\xff\xc0\x00\x00\x00\ + \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x24\x00\x00\x00\ + \x00\x00\x00\x00"; + let mut decomp: Vec = Vec::new(); + lzma_rs::lzip_decompress(&mut x, &mut decomp).unwrap(); + assert_eq!(decomp, b"") +} + +#[test] +fn decompress_hello_world() { + #[cfg(feature = "enable_logging")] + let _ = env_logger::try_init(); + let mut x: &[u8] = b"\x4c\x5a\x49\x50\x01\x0c\x00\x24\x19\x49\x98\x6f\x10\x19\xc6\xd7\ + \x31\xeb\x36\x50\xb2\x98\x48\xff\xfe\xa5\xb0\x00\xd5\xe0\x39\xb7\ + \x0c\x00\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00"; + let mut decomp: Vec = Vec::new(); + lzma_rs::lzip_decompress(&mut x, &mut decomp).unwrap(); + assert_eq!(decomp, b"Hello world\x0a") +}