Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial support for lzip #111

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lzma-rs"
description = "A codec for LZMA, LZMA2 and XZ written in pure Rust"
description = "A codec for LZMA, LZMA2, XZ, and lzip written in pure Rust"
version = "0.3.0"
license = "MIT"
authors = ["Guillaume Endignoux <[email protected]>"]
Expand Down
4 changes: 4 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ path = "fuzz_targets/roundtrip_lzma2.rs"
name = "roundtrip_xz"
path = "fuzz_targets/roundtrip_xz.rs"

[[bin]]
name = "roundtrip_lzip"
path = "fuzz_targets/roundtrip_lzip.rs"

[[bin]]
name = "decompress_lzma"
path = "fuzz_targets/decompress_lzma.rs"
Expand Down
20 changes: 20 additions & 0 deletions fuzz/fuzz_targets/roundtrip_lzip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;

use lzma_rs::error::Result;

fn round_trip_lzip(x: &[u8]) -> Result<Vec<u8>> {
let mut compressed: Vec<u8> = Vec::new();
lzma_rs::lzip_compress(&mut std::io::BufReader::new(x), &mut compressed)?;
let mut bf = std::io::BufReader::new(compressed.as_slice());

let mut decomp: Vec<u8> = Vec::new();
lzma_rs::lzip_decompress(&mut bf, &mut decomp)?;
Ok(decomp)
}

fuzz_target!(|data: &[u8]| {
let decomp = round_trip_lzip(data).expect("Can't decompress what we just compressed");
assert_eq!(decomp, data);
});
64 changes: 64 additions & 0 deletions src/decode/lzip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//! Decoder for the `.lz` file format.

use crate::decode::lzma::{LzmaDecoder, LzmaParams, LzmaProperties};
use crate::error;
use crate::lzip::crc::CRC32;
use crate::lzip::header;
use byteorder::{ByteOrder, LittleEndian};
use std::io;

pub fn decode_stream<R, W>(input: &mut R, output: &mut W) -> error::Result<()>
where
R: io::BufRead,
W: io::Write,
{
let mut header_buf = [0; 6];
input.read_exact(&mut header_buf)?;
let header = header::Header::parse(header_buf)?;
let mut buf = Vec::new();
input.read_to_end(&mut buf)?;
let footer = buf.split_off(buf.len() - 20);
let unpacked_size = LittleEndian::read_u64(&footer[4..12]);
// See <https://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format>.
let properties = LzmaProperties {
lc: 3,
lp: 0,
pb: 2,
};
let params = LzmaParams::new(properties, header.dict_size, Some(unpacked_size));
let mut uncompressed_data = Vec::new();
LzmaDecoder::new(params, None)?.decompress(&mut buf.as_slice(), &mut uncompressed_data)?;

let crc32 = CRC32.checksum(&uncompressed_data);
let expected_crc32 = LittleEndian::read_u32(&footer[..4]);
if crc32 != expected_crc32 {
return Err(error::Error::LzipError(format!(
"Invalid uncompressed data CRC32: expected 0x{:08x} but got 0x{:08x}",
expected_crc32, crc32
)));
}

if uncompressed_data.len() as u64 != unpacked_size {
return Err(error::Error::LzipError(format!(
"Invalid uncompressed data size: expected {} but got {}",
unpacked_size,
uncompressed_data.len()
)));
}

let member_size = (header_buf.len() + buf.len() + footer.len()) as u64;
let expected_member_size = LittleEndian::read_u64(&footer[12..]);
if member_size > (1 << 51) {
return Err(error::Error::LzipError(String::from(
"member size too large, must be less than 2 PiB",
)));
}
if member_size != expected_member_size {
return Err(error::Error::LzipError(format!(
"Invalid member size: expected {} but got {}",
expected_member_size, member_size
)));
}
output.write_all(&uncompressed_data)?;
Ok(())
}
1 change: 0 additions & 1 deletion src/decode/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ pub struct LzmaParams {

impl LzmaParams {
/// Create an new instance of LZMA parameters.
#[cfg(feature = "raw_decoder")]
pub fn new(
properties: LzmaProperties,
dict_size: u32,
Expand Down
1 change: 1 addition & 0 deletions src/decode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Decoding logic.

pub mod lzbuffer;
pub mod lzip;
pub mod lzma;
pub mod lzma2;
pub mod options;
Expand Down
2 changes: 1 addition & 1 deletion src/decode/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ where
Err(e) => {
return Err(match e {
Error::IoError(e) | Error::HeaderTooShort(e) => e,
Error::LzmaError(e) | Error::XzError(e) => {
Error::LzmaError(e) | Error::XzError(e) | Error::LzipError(e) => {
io::Error::new(io::ErrorKind::Other, e)
}
});
Expand Down
60 changes: 60 additions & 0 deletions src/encode/lzip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use crate::lzip::crc::CRC32;
use crate::lzip::header;
use crate::lzma_compress;
use byteorder::{LittleEndian, WriteBytesExt};
use std::io;

pub fn encode_stream<R, W>(input: &mut R, output: &mut W) -> io::Result<()>
where
R: io::BufRead,
W: io::Write,
{
// Header
write_header(output)?;

let mut buf = Vec::new();
input.read_to_end(&mut buf)?;
// Raw LZMA stream
let compressed_data_size = write_stream(&buf, output)?;

// Footer
write_footer(&buf, output, compressed_data_size)
}

fn write_header<W>(output: &mut W) -> io::Result<()>
where
W: io::Write,
{
output.write_all(header::LZIP_MAGIC)?;
output.write_u8(header::LZIP_VERSION_NUMBER)?;
// Pre-computed coded dictionary size which represents 0x00800000 (8388608 in
// decimal). TODO: It is recommended that this be fixed when it becomes
// possible to specify the dictionary size when compressing.
let dict_size = 0x17;
output.write_u8(dict_size)
}

fn write_footer<W>(input: &[u8], output: &mut W, compressed_data_size: u64) -> io::Result<()>
where
W: io::Write,
{
let digest = CRC32.checksum(input);
output.write_u32::<LittleEndian>(digest)?;
output.write_u64::<LittleEndian>(input.len() as u64)?;
// header_size + compressed_data_size + footer_size
output.write_u64::<LittleEndian>(6 + compressed_data_size + 20)
}

fn write_stream<W>(mut input: &[u8], output: &mut W) -> io::Result<u64>
where
W: io::Write,
{
let mut buf = Vec::new();
lzma_compress(&mut input, &mut buf)?;
// Drop the LZMA header.
// TODO: It is recommended that this be fixed when it becomes possible to
// generate the LZMA stream without the header.
buf = buf.split_off(13);
output.write_all(&buf)?;
Ok(buf.len() as u64)
}
1 change: 1 addition & 0 deletions src/encode/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Encoding logic.

pub mod dumbencoder;
pub mod lzip;
pub mod lzma2;
pub mod options;
mod rangecoder;
Expand Down
9 changes: 8 additions & 1 deletion src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ pub enum Error {
LzmaError(String),
/// XZ error.
XzError(String),
/// lzip error.
LzipError(String),
}

/// Library result alias.
Expand All @@ -32,6 +34,7 @@ impl Display for Error {
Error::HeaderTooShort(e) => write!(fmt, "header too short: {}", e),
Error::LzmaError(e) => write!(fmt, "lzma error: {}", e),
Error::XzError(e) => write!(fmt, "xz error: {}", e),
Error::LzipError(e) => write!(fmt, "lzip error: {}", e),
}
}
}
Expand All @@ -40,7 +43,7 @@ impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::IoError(e) | Error::HeaderTooShort(e) => Some(e),
Error::LzmaError(_) | Error::XzError(_) => None,
Error::LzmaError(_) | Error::XzError(_) | Error::LzipError(_) => None,
}
}
}
Expand All @@ -67,5 +70,9 @@ mod test {
Error::XzError("this is an error".to_string()).to_string(),
"xz error: this is an error"
);
assert_eq!(
Error::LzipError("this is an error".to_string()).to_string(),
"lzip error: this is an error"
);
}
}
21 changes: 20 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Pure-Rust codecs for LZMA, LZMA2, and XZ.
//! Pure-Rust codecs for LZMA, LZMA2, XZ, and lzip.
#![cfg_attr(docsrs, feature(doc_cfg, doc_cfg_hide))]
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
Expand All @@ -12,6 +12,7 @@ mod encode;

pub mod error;

mod lzip;
mod util;
mod xz;

Expand Down Expand Up @@ -108,3 +109,21 @@ pub fn xz_decompress<R: io::BufRead, W: io::Write>(
pub fn xz_compress<R: io::BufRead, W: io::Write>(input: &mut R, output: &mut W) -> io::Result<()> {
encode::xz::encode_stream(input, output)
}

/// Decompress lzip data with default
/// [`Options`](decompress/struct.Options.html).
pub fn lzip_decompress<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
) -> error::Result<()> {
decode::lzip::decode_stream(input, output)
}

/// Compress data with lzip and default
/// [`Options`](compress/struct.Options.html).
pub fn lzip_compress<R: io::BufRead, W: io::Write>(
input: &mut R,
output: &mut W,
) -> io::Result<()> {
encode::lzip::encode_stream(input, output)
}
8 changes: 8 additions & 0 deletions src/lzip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//! Logic for handling `.lz` file format.
//!
//! Format specifications are at <https://www.nongnu.org/lzip/manual/lzip_manual.html#File-format> or [draft-diaz-lzip-09].
//!
//! [draft-diaz-lzip-09]: https://datatracker.ietf.org/doc/html/draft-diaz-lzip-09#section-2

pub(crate) mod crc;
pub(crate) mod header;
3 changes: 3 additions & 0 deletions src/lzip/crc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
use crc::{Crc, CRC_32_ISO_HDLC};

pub const CRC32: Crc<u32> = Crc::<u32>::new(&CRC_32_ISO_HDLC);
60 changes: 60 additions & 0 deletions src/lzip/header.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//! lzip header.

use crate::error;

/// File format magic header signature.
pub(crate) const LZIP_MAGIC: &[u8] = b"LZIP";

/// File format version number, 1 for now.
pub(crate) const LZIP_VERSION_NUMBER: u8 = 1;

/// lzip header.
#[derive(Clone, Copy, Debug)]
pub(crate) struct Header {
pub(crate) dict_size: u32,
}

impl Header {
/// Parses the lzip header from a buffered reader.
pub(crate) fn parse(input: [u8; 6]) -> error::Result<Self> {
if &input[..4] != LZIP_MAGIC {
return Err(error::Error::LzipError(format!(
"Invalid lzip magic, expected {:?}",
LZIP_MAGIC
)));
}

match input[4] {
LZIP_VERSION_NUMBER => {}
0 => {
return Err(error::Error::LzipError(String::from(
"lzip version 0 is not supported",
)));
}
_ => {
return Err(error::Error::LzipError(format!(
"Unknown lzip version number, expected {:?}",
LZIP_VERSION_NUMBER
)));
}
}

let mut dict_size = 1 << (input[5] & 0x1f);
dict_size -= (dict_size / 16) * ((input[5] >> 5) & 0x07) as u32;
match dict_size {
ds if ds < (1 << 12) => {
return Err(error::Error::LzipError(String::from(
"dictionary size too small, must be at least 4 KiB",
)));
}
ds if ds > (1 << 29) => {
return Err(error::Error::LzipError(String::from(
"dictionary size too large, must be less than 512 MiB",
)));
}
_ => {}
}
let header = Self { dict_size };
Ok(header)
}
}
Binary file added tests/files/block-check-crc32.txt.lz
Binary file not shown.
Binary file added tests/files/empty.txt.lz
Binary file not shown.
Binary file added tests/files/foo.txt.lz
Binary file not shown.
Binary file added tests/files/good-1-lzma2-1.lz
Binary file not shown.
Binary file added tests/files/good-1-lzma2-2.lz
Binary file not shown.
Binary file added tests/files/good-1-lzma2-3.lz
Binary file not shown.
Binary file added tests/files/good-1-lzma2-4.lz
Binary file not shown.
Binary file added tests/files/hello.txt.lz
Binary file not shown.
Binary file added tests/files/small.txt.lz
Binary file not shown.
Loading