ruzstd/encoding/mod.rs
1//! Structures and utilities used for compressing/encoding data into the Zstd format.
2
3pub(crate) mod bit_writer;
4pub(crate) mod block_header;
5pub(crate) mod blocks;
6pub(crate) mod frame_header;
7pub(crate) mod match_generator;
8pub(crate) mod util;
9
10mod frame_compressor;
11pub use frame_compressor::FrameCompressor;
12
13use crate::io::{Read, Write};
14use alloc::vec::Vec;
15
16/// Convenience function to compress some source into a target without reusing any resources of the compressor
17/// ```rust
18/// use ruzstd::encoding::{compress, CompressionLevel};
19/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
20/// let mut target = Vec::new();
21/// compress(data, &mut target, CompressionLevel::Fastest);
22/// ```
23pub fn compress<R: Read, W: Write>(source: R, target: W, level: CompressionLevel) {
24 let mut frame_enc = FrameCompressor::new(level);
25 frame_enc.set_source(source);
26 frame_enc.set_drain(target);
27 frame_enc.compress();
28}
29
30/// Convenience function to compress some source into a Vec without reusing any resources of the compressor
31/// ```rust
32/// use ruzstd::encoding::{compress_to_vec, CompressionLevel};
33/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
34/// let compressed = compress_to_vec(data, CompressionLevel::Fastest);
35/// ```
36pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
37 let mut vec = Vec::new();
38 compress(source, &mut vec, level);
39 vec
40}
41
42/// The compression mode used impacts the speed of compression,
43/// and resulting compression ratios. Faster compression will result
44/// in worse compression ratios, and vice versa.
45#[derive(Copy, Clone)]
46pub enum CompressionLevel {
47 /// This level does not compress the data at all, and simply wraps
48 /// it in a Zstandard frame.
49 Uncompressed,
50 /// This level is roughly equivalent to Zstd compression level 1
51 Fastest,
52 /// This level is roughly equivalent to Zstd level 3,
53 /// or the one used by the official compressor when no level
54 /// is specified.
55 ///
56 /// UNIMPLEMENTED
57 Default,
58 /// This level is roughly equivalent to Zstd level 7.
59 ///
60 /// UNIMPLEMENTED
61 Better,
62 /// This level is roughly equivalent to Zstd level 11.
63 ///
64 /// UNIMPLEMENTED
65 Best,
66}
67
68/// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm
69/// making their own tradeoffs between runtime, memory usage and compression ratio
70///
71/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
72/// One or more of these buffers represent the window the decoder will need to decode the data again.
73///
74/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
75/// window of data that is being used for matching.
76///
77/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`.
78///
79/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called.
80///
81/// This is repeated until no more data is left to be compressed.
82pub trait Matcher {
83 /// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB.
84 fn get_next_space(&mut self) -> alloc::vec::Vec<u8>;
85 /// Get a reference to the last commited space
86 fn get_last_space(&mut self) -> &[u8];
87 /// Commit a space to the matcher so it can be matched against
88 fn commit_space(&mut self, space: alloc::vec::Vec<u8>);
89 /// Just process the data in the last commited space for future matching
90 fn skip_matching(&mut self);
91 /// Process the data in the last commited space for future matching AND generate matches for the data
92 fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>));
93 /// Reset this matcher so it can be used for the next new frame
94 fn reset(&mut self, level: CompressionLevel);
95 /// The size of the window the decoder will need to execute all sequences produced by this matcher
96 ///
97 /// May change after a call to reset with a different compression level
98 fn window_size(&self) -> u64;
99}
100
101#[derive(PartialEq, Eq, Debug)]
102/// Sequences that a [`Matcher`] can produce
103pub enum Sequence<'data> {
104 /// Is encoded as a sequence for the decoder sequence execution.
105 ///
106 /// First the literals will be copied to the decoded data,
107 /// then `match_len` bytes are copied from `offset` bytes back in the buffer
108 Triple {
109 literals: &'data [u8],
110 offset: usize,
111 match_len: usize,
112 },
113 /// This is returned as the last sequence in a block
114 ///
115 /// These literals will just be copied at the end of the sequence execution by the decoder
116 Literals { literals: &'data [u8] },
117}