ruzstd/decoding/
dictionary.rs

1use alloc::vec::Vec;
2use core::convert::TryInto;
3
4use crate::decoding::scratch::FSEScratch;
5use crate::decoding::scratch::HuffmanScratch;
6use crate::fse::FSETableError;
7use crate::huff0::HuffmanTableError;
8
9/// Zstandard includes support for "raw content" dictionaries, that store bytes optionally used
10/// during sequence execution.
11///
12/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format>
13pub struct Dictionary {
14    /// A 4 byte value used by decoders to check if they can use
15    /// the correct dictionary. This value must not be zero.
16    pub id: u32,
17    /// A dictionary can contain an entropy table, either FSE or
18    /// Huffman.
19    pub fse: FSEScratch,
20    /// A dictionary can contain an entropy table, either FSE or
21    /// Huffman.
22    pub huf: HuffmanScratch,
23    /// The content of a dictionary acts as a "past" in front of data
24    /// to compress or decompress,
25    /// so it can be referenced in sequence commands.
26    /// As long as the amount of data decoded from this frame is less than or
27    /// equal to Window_Size, sequence commands may specify offsets longer than
28    /// the total length of decoded output so far to reference back to the
29    /// dictionary, even parts of the dictionary with offsets larger than Window_Size.
30    /// After the total output has surpassed Window_Size however,
31    /// this is no longer allowed and the dictionary is no longer accessible
32    pub dict_content: Vec<u8>,
33    /// The 3 most recent offsets are stored so that they can be used
34    /// during sequence execution, see
35    /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets>
36    /// for more.
37    pub offset_hist: [u32; 3],
38}
39
40#[derive(Debug)]
41#[non_exhaustive]
42pub enum DictionaryDecodeError {
43    BadMagicNum { got: [u8; 4] },
44    FSETableError(FSETableError),
45    HuffmanTableError(HuffmanTableError),
46}
47
48#[cfg(feature = "std")]
49impl std::error::Error for DictionaryDecodeError {
50    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
51        match self {
52            DictionaryDecodeError::FSETableError(source) => Some(source),
53            DictionaryDecodeError::HuffmanTableError(source) => Some(source),
54            _ => None,
55        }
56    }
57}
58
59impl core::fmt::Display for DictionaryDecodeError {
60    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61        match self {
62            DictionaryDecodeError::BadMagicNum { got } => {
63                write!(
64                    f,
65                    "Bad magic_num at start of the dictionary; Got: {:#04X?}, Expected: {:#04x?}",
66                    got, MAGIC_NUM,
67                )
68            }
69            DictionaryDecodeError::FSETableError(e) => write!(f, "{:?}", e),
70            DictionaryDecodeError::HuffmanTableError(e) => write!(f, "{:?}", e),
71        }
72    }
73}
74
75impl From<FSETableError> for DictionaryDecodeError {
76    fn from(val: FSETableError) -> Self {
77        Self::FSETableError(val)
78    }
79}
80
81impl From<HuffmanTableError> for DictionaryDecodeError {
82    fn from(val: HuffmanTableError) -> Self {
83        Self::HuffmanTableError(val)
84    }
85}
86
87/// This 4 byte (little endian) magic number refers to the start of a dictionary
88pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC];
89
90impl Dictionary {
91    /// Parses the dictionary from `raw` and set the tables
92    /// it returns the dict_id for checking with the frame's `dict_id``
93    pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> {
94        let mut new_dict = Dictionary {
95            id: 0,
96            fse: FSEScratch::new(),
97            huf: HuffmanScratch::new(),
98            dict_content: Vec::new(),
99            offset_hist: [2, 4, 8],
100        };
101
102        let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away");
103        if magic_num != MAGIC_NUM {
104            return Err(DictionaryDecodeError::BadMagicNum { got: magic_num });
105        }
106
107        let dict_id = raw[4..8].try_into().expect("optimized away");
108        let dict_id = u32::from_le_bytes(dict_id);
109        new_dict.id = dict_id;
110
111        let raw_tables = &raw[8..];
112
113        let huf_size = new_dict.huf.table.build_decoder(raw_tables)?;
114        let raw_tables = &raw_tables[huf_size as usize..];
115
116        let of_size = new_dict.fse.offsets.build_decoder(
117            raw_tables,
118            crate::decoding::sequence_section_decoder::OF_MAX_LOG,
119        )?;
120        let raw_tables = &raw_tables[of_size..];
121
122        let ml_size = new_dict.fse.match_lengths.build_decoder(
123            raw_tables,
124            crate::decoding::sequence_section_decoder::ML_MAX_LOG,
125        )?;
126        let raw_tables = &raw_tables[ml_size..];
127
128        let ll_size = new_dict.fse.literal_lengths.build_decoder(
129            raw_tables,
130            crate::decoding::sequence_section_decoder::LL_MAX_LOG,
131        )?;
132        let raw_tables = &raw_tables[ll_size..];
133
134        let offset1 = raw_tables[0..4].try_into().expect("optimized away");
135        let offset1 = u32::from_le_bytes(offset1);
136
137        let offset2 = raw_tables[4..8].try_into().expect("optimized away");
138        let offset2 = u32::from_le_bytes(offset2);
139
140        let offset3 = raw_tables[8..12].try_into().expect("optimized away");
141        let offset3 = u32::from_le_bytes(offset3);
142
143        new_dict.offset_hist[0] = offset1;
144        new_dict.offset_hist[1] = offset2;
145        new_dict.offset_hist[2] = offset3;
146
147        let raw_content = &raw_tables[12..];
148        new_dict.dict_content.extend(raw_content);
149
150        Ok(new_dict)
151    }
152}