ruzstd/blocks/literals_section.rs
1//! Utilities and representations for the first half of a block, the literals section.
2//! It contains data that is then copied from by the sequences section.
3use crate::bit_io::BitReader;
4use crate::decoding::errors::LiteralsSectionParseError;
5
6/// A compressed block consists of two sections, a literals section, and a sequences section.
7///
8/// This is the first of those two sections. A literal is just any arbitrary data, and it is copied by the sequences section
9pub struct LiteralsSection {
10 /// - If this block is of type [LiteralsSectionType::Raw], then the data is `regenerated_bytes`
11 /// bytes long, and it contains the raw literals data to be used during the second section,
12 /// the sequences section.
13 /// - If this block is of type [LiteralsSectionType::RLE],
14 /// then the literal consists of a single byte repeated `regenerated_size` times.
15 /// - For types [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless],
16 /// then this is the size of the decompressed data.
17 pub regenerated_size: u32,
18 /// - For types [LiteralsSectionType::Raw] and [LiteralsSectionType::RLE], this value is not present.
19 /// - For types [LiteralsSectionType::Compressed] and [LiteralsSectionType::Treeless], this value will
20 /// be set to the size of the compressed data.
21 pub compressed_size: Option<u32>,
22 /// This value will be either 1 stream or 4 streams if the literal is of type
23 /// [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], and it
24 /// is not used for RLE or uncompressed literals.
25 pub num_streams: Option<u8>,
26 /// The type of the literal section.
27 pub ls_type: LiteralsSectionType,
28}
29
30/// The way which a literal section is encoded.
31pub enum LiteralsSectionType {
32 /// Literals are stored uncompressed.
33 Raw,
34 /// Literals consist of a single byte value repeated [LiteralsSection::regenerated_size] times.
35 #[allow(clippy::upper_case_acronyms)]
36 RLE,
37 /// This is a standard Huffman-compressed block, starting with a Huffman tree description.
38 /// In this mode, there are at least *2* different literals represented in the Huffman tree
39 /// description.
40 Compressed,
41 /// This is a Huffman-compressed block,
42 /// using the Huffman tree from the previous [LiteralsSectionType::Compressed] block
43 /// in the sequence. If this mode is triggered without any previous Huffman-tables in the
44 /// frame (or dictionary), it should be treated as data corruption.
45 Treeless,
46}
47
48impl Default for LiteralsSection {
49 fn default() -> Self {
50 Self::new()
51 }
52}
53
54impl LiteralsSection {
55 /// Create a new [LiteralsSection].
56 pub fn new() -> LiteralsSection {
57 LiteralsSection {
58 regenerated_size: 0,
59 compressed_size: None,
60 num_streams: None,
61 ls_type: LiteralsSectionType::Raw,
62 }
63 }
64
65 /// Given the first byte of a header, determine the size of the whole header, from 1 to 5 bytes.
66 pub fn header_bytes_needed(&self, first_byte: u8) -> Result<u8, LiteralsSectionParseError> {
67 let ls_type: LiteralsSectionType = Self::section_type(first_byte)?;
68 let size_format = (first_byte >> 2) & 0x3;
69 match ls_type {
70 LiteralsSectionType::RLE | LiteralsSectionType::Raw => {
71 match size_format {
72 0 | 2 => {
73 // size_format actually only uses one bit
74 // regenerated_size uses 5 bits
75 Ok(1)
76 }
77 1 => {
78 // size_format uses 2 bit
79 // regenerated_size uses 12 bits
80 Ok(2)
81 }
82 3 => {
83 // size_format uses 2 bit
84 // regenerated_size uses 20 bits
85 Ok(3)
86 }
87 _ => panic!(
88 "This is a bug in the program. There should only be values between 0..3"
89 ),
90 }
91 }
92 LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
93 match size_format {
94 0 | 1 => {
95 // Only differ in num_streams
96 // both regenerated and compressed sizes use 10 bit
97 Ok(3)
98 }
99 2 => {
100 // both regenerated and compressed sizes use 14 bit
101 Ok(4)
102 }
103 3 => {
104 // both regenerated and compressed sizes use 18 bit
105 Ok(5)
106 }
107
108 _ => panic!(
109 "This is a bug in the program. There should only be values between 0..3"
110 ),
111 }
112 }
113 }
114 }
115
116 /// Parse the header into `self`, and returns the number of bytes read.
117 pub fn parse_from_header(&mut self, raw: &[u8]) -> Result<u8, LiteralsSectionParseError> {
118 let mut br: BitReader<'_> = BitReader::new(raw);
119 let block_type = br.get_bits(2)? as u8;
120 self.ls_type = Self::section_type(block_type)?;
121 let size_format = br.get_bits(2)? as u8;
122
123 let byte_needed = self.header_bytes_needed(raw[0])?;
124 if raw.len() < byte_needed as usize {
125 return Err(LiteralsSectionParseError::NotEnoughBytes {
126 have: raw.len(),
127 need: byte_needed,
128 });
129 }
130
131 match self.ls_type {
132 LiteralsSectionType::RLE | LiteralsSectionType::Raw => {
133 self.compressed_size = None;
134 match size_format {
135 0 | 2 => {
136 // size_format actually only uses one bit
137 // regenerated_size uses 5 bits
138 self.regenerated_size = u32::from(raw[0]) >> 3;
139 Ok(1)
140 }
141 1 => {
142 // size_format uses 2 bit
143 // regenerated_size uses 12 bits
144 self.regenerated_size = (u32::from(raw[0]) >> 4) + (u32::from(raw[1]) << 4);
145 Ok(2)
146 }
147 3 => {
148 // size_format uses 2 bit
149 // regenerated_size uses 20 bits
150 self.regenerated_size = (u32::from(raw[0]) >> 4)
151 + (u32::from(raw[1]) << 4)
152 + (u32::from(raw[2]) << 12);
153 Ok(3)
154 }
155 _ => panic!(
156 "This is a bug in the program. There should only be values between 0..3"
157 ),
158 }
159 }
160 LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
161 match size_format {
162 0 => {
163 self.num_streams = Some(1);
164 }
165 1..=3 => {
166 self.num_streams = Some(4);
167 }
168 _ => panic!(
169 "This is a bug in the program. There should only be values between 0..3"
170 ),
171 };
172
173 match size_format {
174 0 | 1 => {
175 // Differ in num_streams see above
176 // both regenerated and compressed sizes use 10 bit
177
178 // 4 from the first, six from the second byte
179 self.regenerated_size =
180 (u32::from(raw[0]) >> 4) + ((u32::from(raw[1]) & 0x3f) << 4);
181
182 // 2 from the second, full last byte
183 self.compressed_size =
184 Some(u32::from(raw[1] >> 6) + (u32::from(raw[2]) << 2));
185 Ok(3)
186 }
187 2 => {
188 // both regenerated and compressed sizes use 14 bit
189
190 // 4 from first, full second, 2 from the third byte
191 self.regenerated_size = (u32::from(raw[0]) >> 4)
192 + (u32::from(raw[1]) << 4)
193 + ((u32::from(raw[2]) & 0x3) << 12);
194
195 // 6 from the third, full last byte
196 self.compressed_size =
197 Some((u32::from(raw[2]) >> 2) + (u32::from(raw[3]) << 6));
198 Ok(4)
199 }
200 3 => {
201 // both regenerated and compressed sizes use 18 bit
202
203 // 4 from first, full second, six from third byte
204 self.regenerated_size = (u32::from(raw[0]) >> 4)
205 + (u32::from(raw[1]) << 4)
206 + ((u32::from(raw[2]) & 0x3F) << 12);
207
208 // 2 from third, full fourth, full fifth byte
209 self.compressed_size = Some(
210 (u32::from(raw[2]) >> 6)
211 + (u32::from(raw[3]) << 2)
212 + (u32::from(raw[4]) << 10),
213 );
214 Ok(5)
215 }
216
217 _ => panic!(
218 "This is a bug in the program. There should only be values between 0..3"
219 ),
220 }
221 }
222 }
223 }
224
225 /// Given the first two bits of a header, determine the type of a header.
226 fn section_type(raw: u8) -> Result<LiteralsSectionType, LiteralsSectionParseError> {
227 let t = raw & 0x3;
228 match t {
229 0 => Ok(LiteralsSectionType::Raw),
230 1 => Ok(LiteralsSectionType::RLE),
231 2 => Ok(LiteralsSectionType::Compressed),
232 3 => Ok(LiteralsSectionType::Treeless),
233 other => Err(LiteralsSectionParseError::IllegalLiteralSectionType { got: other }),
234 }
235 }
236}