ruzstd/blocks/
literals_section.rs

1//! Utilities and representations for the first half of a block, the literals section.
2//! It contains data that is then copied from by the sequences section.
3use super::super::decoding::bit_reader::{BitReader, GetBitsError};
4
5/// A compressed block consists of two sections, a literals section, and a sequences section.
6///
7/// This is the first of those two sections. A literal is just any arbitrary data, and it is copied by the sequences section
8pub struct LiteralsSection {
9    /// - If this block is of type [LiteralsSectionType::Raw], then the data is `regenerated_bytes`
10    ///     bytes long, and it contains the raw literals data to be used during the second section,
11    ///     the sequences section.
12    /// - If this block is of type [LiteralsSectionType::RLE],
13    ///     then the literal consists of a single byte repeated `regenerated_size` times.
14    /// - For types [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless],
15    ///     then this is the size of the decompressed data.
16    pub regenerated_size: u32,
17    /// - For types [LiteralsSectionType::Raw] and [LiteralsSectionType::RLE], this value is not present.
18    /// - For types [LiteralsSectionType::Compressed] and [LiteralsSectionType::Treeless], this value will
19    ///     be set to the size of the compressed data.
20    pub compressed_size: Option<u32>,
21    /// This value will be either 1 stream or 4 streams if the literal is of type
22    /// [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], and it
23    /// is not used for RLE or uncompressed literals.
24    pub num_streams: Option<u8>,
25    /// The type of the literal section.
26    pub ls_type: LiteralsSectionType,
27}
28
29/// The way which a literal section is encoded.
30pub enum LiteralsSectionType {
31    /// Literals are stored uncompressed.
32    Raw,
33    /// Literals consist of a single byte value repeated [LiteralsSection::regenerated_size] times.
34    RLE,
35    /// This is a standard Huffman-compressed block, starting with a Huffman tree description.
36    /// In this mode, there are at least *2* different literals represented in the Huffman tree
37    /// description.
38    Compressed,
39    /// This is a Huffman-compressed block,
40    /// using the Huffman tree from the previous [LiteralsSectionType::Compressed] block
41    /// in the sequence. If this mode is triggered without any previous Huffman-tables in the
42    /// frame (or dictionary), it should be treated as data corruption.
43    Treeless,
44}
45
46#[derive(Debug)]
47#[non_exhaustive]
48pub enum LiteralsSectionParseError {
49    IllegalLiteralSectionType { got: u8 },
50    GetBitsError(GetBitsError),
51    NotEnoughBytes { have: usize, need: u8 },
52}
53
54#[cfg(feature = "std")]
55impl std::error::Error for LiteralsSectionParseError {
56    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
57        match self {
58            LiteralsSectionParseError::GetBitsError(source) => Some(source),
59            _ => None,
60        }
61    }
62}
63impl core::fmt::Display for LiteralsSectionParseError {
64    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
65        match self {
66            LiteralsSectionParseError::IllegalLiteralSectionType { got } => {
67                write!(
68                    f,
69                    "Illegal literalssectiontype. Is: {}, must be in: 0, 1, 2, 3",
70                    got
71                )
72            }
73            LiteralsSectionParseError::GetBitsError(e) => write!(f, "{:?}", e),
74            LiteralsSectionParseError::NotEnoughBytes { have, need } => {
75                write!(
76                    f,
77                    "Not enough byte to parse the literals section header. Have: {}, Need: {}",
78                    have, need,
79                )
80            }
81        }
82    }
83}
84
85impl From<GetBitsError> for LiteralsSectionParseError {
86    fn from(val: GetBitsError) -> Self {
87        Self::GetBitsError(val)
88    }
89}
90
91impl core::fmt::Display for LiteralsSectionType {
92    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
93        match self {
94            LiteralsSectionType::Compressed => write!(f, "Compressed"),
95            LiteralsSectionType::Raw => write!(f, "Raw"),
96            LiteralsSectionType::RLE => write!(f, "RLE"),
97            LiteralsSectionType::Treeless => write!(f, "Treeless"),
98        }
99    }
100}
101
102impl Default for LiteralsSection {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108impl LiteralsSection {
109    /// Create a new [LiteralsSection].
110    pub fn new() -> LiteralsSection {
111        LiteralsSection {
112            regenerated_size: 0,
113            compressed_size: None,
114            num_streams: None,
115            ls_type: LiteralsSectionType::Raw,
116        }
117    }
118
119    /// Given the first byte of a header, determine the size of the whole header, from 1 to 5 bytes.
120    pub fn header_bytes_needed(&self, first_byte: u8) -> Result<u8, LiteralsSectionParseError> {
121        let ls_type: LiteralsSectionType = Self::section_type(first_byte)?;
122        let size_format = (first_byte >> 2) & 0x3;
123        match ls_type {
124            LiteralsSectionType::RLE | LiteralsSectionType::Raw => {
125                match size_format {
126                    0 | 2 => {
127                        // size_format actually only uses one bit
128                        // regenerated_size uses 5 bits
129                        Ok(1)
130                    }
131                    1 => {
132                        // size_format uses 2 bit
133                        // regenerated_size uses 12 bits
134                        Ok(2)
135                    }
136                    3 => {
137                        // size_format uses 2 bit
138                        // regenerated_size uses 20 bits
139                        Ok(3)
140                    }
141                    _ => panic!(
142                        "This is a bug in the program. There should only be values between 0..3"
143                    ),
144                }
145            }
146            LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
147                match size_format {
148                    0 | 1 => {
149                        // Only differ in num_streams
150                        // both regenerated and compressed sizes use 10 bit
151                        Ok(3)
152                    }
153                    2 => {
154                        // both regenerated and compressed sizes use 14 bit
155                        Ok(4)
156                    }
157                    3 => {
158                        // both regenerated and compressed sizes use 18 bit
159                        Ok(5)
160                    }
161
162                    _ => panic!(
163                        "This is a bug in the program. There should only be values between 0..3"
164                    ),
165                }
166            }
167        }
168    }
169
170    /// Parse the header into `self`, and returns the number of bytes read.
171    pub fn parse_from_header(&mut self, raw: &[u8]) -> Result<u8, LiteralsSectionParseError> {
172        let mut br: BitReader<'_> = BitReader::new(raw);
173        let block_type = br.get_bits(2)? as u8;
174        self.ls_type = Self::section_type(block_type)?;
175        let size_format = br.get_bits(2)? as u8;
176
177        let byte_needed = self.header_bytes_needed(raw[0])?;
178        if raw.len() < byte_needed as usize {
179            return Err(LiteralsSectionParseError::NotEnoughBytes {
180                have: raw.len(),
181                need: byte_needed,
182            });
183        }
184
185        match self.ls_type {
186            LiteralsSectionType::RLE | LiteralsSectionType::Raw => {
187                self.compressed_size = None;
188                match size_format {
189                    0 | 2 => {
190                        // size_format actually only uses one bit
191                        // regenerated_size uses 5 bits
192                        self.regenerated_size = u32::from(raw[0]) >> 3;
193                        Ok(1)
194                    }
195                    1 => {
196                        // size_format uses 2 bit
197                        // regenerated_size uses 12 bits
198                        self.regenerated_size = (u32::from(raw[0]) >> 4) + (u32::from(raw[1]) << 4);
199                        Ok(2)
200                    }
201                    3 => {
202                        // size_format uses 2 bit
203                        // regenerated_size uses 20 bits
204                        self.regenerated_size = (u32::from(raw[0]) >> 4)
205                            + (u32::from(raw[1]) << 4)
206                            + (u32::from(raw[2]) << 12);
207                        Ok(3)
208                    }
209                    _ => panic!(
210                        "This is a bug in the program. There should only be values between 0..3"
211                    ),
212                }
213            }
214            LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
215                match size_format {
216                    0 => {
217                        self.num_streams = Some(1);
218                    }
219                    1..=3 => {
220                        self.num_streams = Some(4);
221                    }
222                    _ => panic!(
223                        "This is a bug in the program. There should only be values between 0..3"
224                    ),
225                };
226
227                match size_format {
228                    0 | 1 => {
229                        // Differ in num_streams see above
230                        // both regenerated and compressed sizes use 10 bit
231
232                        // 4 from the first, six from the second byte
233                        self.regenerated_size =
234                            (u32::from(raw[0]) >> 4) + ((u32::from(raw[1]) & 0x3f) << 4);
235
236                        // 2 from the second, full last byte
237                        self.compressed_size =
238                            Some(u32::from(raw[1] >> 6) + (u32::from(raw[2]) << 2));
239                        Ok(3)
240                    }
241                    2 => {
242                        // both regenerated and compressed sizes use 14 bit
243
244                        // 4 from first, full second, 2 from the third byte
245                        self.regenerated_size = (u32::from(raw[0]) >> 4)
246                            + (u32::from(raw[1]) << 4)
247                            + ((u32::from(raw[2]) & 0x3) << 12);
248
249                        // 6 from the third, full last byte
250                        self.compressed_size =
251                            Some((u32::from(raw[2]) >> 2) + (u32::from(raw[3]) << 6));
252                        Ok(4)
253                    }
254                    3 => {
255                        // both regenerated and compressed sizes use 18 bit
256
257                        // 4 from first, full second, six from third byte
258                        self.regenerated_size = (u32::from(raw[0]) >> 4)
259                            + (u32::from(raw[1]) << 4)
260                            + ((u32::from(raw[2]) & 0x3F) << 12);
261
262                        // 2 from third, full fourth, full fifth byte
263                        self.compressed_size = Some(
264                            (u32::from(raw[2]) >> 6)
265                                + (u32::from(raw[3]) << 2)
266                                + (u32::from(raw[4]) << 10),
267                        );
268                        Ok(5)
269                    }
270
271                    _ => panic!(
272                        "This is a bug in the program. There should only be values between 0..3"
273                    ),
274                }
275            }
276        }
277    }
278
279    /// Given the first two bits of a header, determine the type of a header.
280    fn section_type(raw: u8) -> Result<LiteralsSectionType, LiteralsSectionParseError> {
281        let t = raw & 0x3;
282        match t {
283            0 => Ok(LiteralsSectionType::Raw),
284            1 => Ok(LiteralsSectionType::RLE),
285            2 => Ok(LiteralsSectionType::Compressed),
286            3 => Ok(LiteralsSectionType::Treeless),
287            other => Err(LiteralsSectionParseError::IllegalLiteralSectionType { got: other }),
288        }
289    }
290}