ruzstd/blocks/literals_section.rs
1//! Utilities and representations for the first half of a block, the literals section.
2//! It contains data that is then copied from by the sequences section.
3use super::super::decoding::bit_reader::{BitReader, GetBitsError};
4
5/// A compressed block consists of two sections, a literals section, and a sequences section.
6///
7/// This is the first of those two sections. A literal is just any arbitrary data, and it is copied by the sequences section
8pub struct LiteralsSection {
9 /// - If this block is of type [LiteralsSectionType::Raw], then the data is `regenerated_bytes`
10 /// bytes long, and it contains the raw literals data to be used during the second section,
11 /// the sequences section.
12 /// - If this block is of type [LiteralsSectionType::RLE],
13 /// then the literal consists of a single byte repeated `regenerated_size` times.
14 /// - For types [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless],
15 /// then this is the size of the decompressed data.
16 pub regenerated_size: u32,
17 /// - For types [LiteralsSectionType::Raw] and [LiteralsSectionType::RLE], this value is not present.
18 /// - For types [LiteralsSectionType::Compressed] and [LiteralsSectionType::Treeless], this value will
19 /// be set to the size of the compressed data.
20 pub compressed_size: Option<u32>,
21 /// This value will be either 1 stream or 4 streams if the literal is of type
22 /// [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], and it
23 /// is not used for RLE or uncompressed literals.
24 pub num_streams: Option<u8>,
25 /// The type of the literal section.
26 pub ls_type: LiteralsSectionType,
27}
28
29/// The way which a literal section is encoded.
30pub enum LiteralsSectionType {
31 /// Literals are stored uncompressed.
32 Raw,
33 /// Literals consist of a single byte value repeated [LiteralsSection::regenerated_size] times.
34 RLE,
35 /// This is a standard Huffman-compressed block, starting with a Huffman tree description.
36 /// In this mode, there are at least *2* different literals represented in the Huffman tree
37 /// description.
38 Compressed,
39 /// This is a Huffman-compressed block,
40 /// using the Huffman tree from the previous [LiteralsSectionType::Compressed] block
41 /// in the sequence. If this mode is triggered without any previous Huffman-tables in the
42 /// frame (or dictionary), it should be treated as data corruption.
43 Treeless,
44}
45
46#[derive(Debug)]
47#[non_exhaustive]
48pub enum LiteralsSectionParseError {
49 IllegalLiteralSectionType { got: u8 },
50 GetBitsError(GetBitsError),
51 NotEnoughBytes { have: usize, need: u8 },
52}
53
54#[cfg(feature = "std")]
55impl std::error::Error for LiteralsSectionParseError {
56 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
57 match self {
58 LiteralsSectionParseError::GetBitsError(source) => Some(source),
59 _ => None,
60 }
61 }
62}
63impl core::fmt::Display for LiteralsSectionParseError {
64 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
65 match self {
66 LiteralsSectionParseError::IllegalLiteralSectionType { got } => {
67 write!(
68 f,
69 "Illegal literalssectiontype. Is: {}, must be in: 0, 1, 2, 3",
70 got
71 )
72 }
73 LiteralsSectionParseError::GetBitsError(e) => write!(f, "{:?}", e),
74 LiteralsSectionParseError::NotEnoughBytes { have, need } => {
75 write!(
76 f,
77 "Not enough byte to parse the literals section header. Have: {}, Need: {}",
78 have, need,
79 )
80 }
81 }
82 }
83}
84
85impl From<GetBitsError> for LiteralsSectionParseError {
86 fn from(val: GetBitsError) -> Self {
87 Self::GetBitsError(val)
88 }
89}
90
91impl core::fmt::Display for LiteralsSectionType {
92 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
93 match self {
94 LiteralsSectionType::Compressed => write!(f, "Compressed"),
95 LiteralsSectionType::Raw => write!(f, "Raw"),
96 LiteralsSectionType::RLE => write!(f, "RLE"),
97 LiteralsSectionType::Treeless => write!(f, "Treeless"),
98 }
99 }
100}
101
102impl Default for LiteralsSection {
103 fn default() -> Self {
104 Self::new()
105 }
106}
107
108impl LiteralsSection {
109 /// Create a new [LiteralsSection].
110 pub fn new() -> LiteralsSection {
111 LiteralsSection {
112 regenerated_size: 0,
113 compressed_size: None,
114 num_streams: None,
115 ls_type: LiteralsSectionType::Raw,
116 }
117 }
118
119 /// Given the first byte of a header, determine the size of the whole header, from 1 to 5 bytes.
120 pub fn header_bytes_needed(&self, first_byte: u8) -> Result<u8, LiteralsSectionParseError> {
121 let ls_type: LiteralsSectionType = Self::section_type(first_byte)?;
122 let size_format = (first_byte >> 2) & 0x3;
123 match ls_type {
124 LiteralsSectionType::RLE | LiteralsSectionType::Raw => {
125 match size_format {
126 0 | 2 => {
127 // size_format actually only uses one bit
128 // regenerated_size uses 5 bits
129 Ok(1)
130 }
131 1 => {
132 // size_format uses 2 bit
133 // regenerated_size uses 12 bits
134 Ok(2)
135 }
136 3 => {
137 // size_format uses 2 bit
138 // regenerated_size uses 20 bits
139 Ok(3)
140 }
141 _ => panic!(
142 "This is a bug in the program. There should only be values between 0..3"
143 ),
144 }
145 }
146 LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
147 match size_format {
148 0 | 1 => {
149 // Only differ in num_streams
150 // both regenerated and compressed sizes use 10 bit
151 Ok(3)
152 }
153 2 => {
154 // both regenerated and compressed sizes use 14 bit
155 Ok(4)
156 }
157 3 => {
158 // both regenerated and compressed sizes use 18 bit
159 Ok(5)
160 }
161
162 _ => panic!(
163 "This is a bug in the program. There should only be values between 0..3"
164 ),
165 }
166 }
167 }
168 }
169
170 /// Parse the header into `self`, and returns the number of bytes read.
171 pub fn parse_from_header(&mut self, raw: &[u8]) -> Result<u8, LiteralsSectionParseError> {
172 let mut br: BitReader<'_> = BitReader::new(raw);
173 let block_type = br.get_bits(2)? as u8;
174 self.ls_type = Self::section_type(block_type)?;
175 let size_format = br.get_bits(2)? as u8;
176
177 let byte_needed = self.header_bytes_needed(raw[0])?;
178 if raw.len() < byte_needed as usize {
179 return Err(LiteralsSectionParseError::NotEnoughBytes {
180 have: raw.len(),
181 need: byte_needed,
182 });
183 }
184
185 match self.ls_type {
186 LiteralsSectionType::RLE | LiteralsSectionType::Raw => {
187 self.compressed_size = None;
188 match size_format {
189 0 | 2 => {
190 // size_format actually only uses one bit
191 // regenerated_size uses 5 bits
192 self.regenerated_size = u32::from(raw[0]) >> 3;
193 Ok(1)
194 }
195 1 => {
196 // size_format uses 2 bit
197 // regenerated_size uses 12 bits
198 self.regenerated_size = (u32::from(raw[0]) >> 4) + (u32::from(raw[1]) << 4);
199 Ok(2)
200 }
201 3 => {
202 // size_format uses 2 bit
203 // regenerated_size uses 20 bits
204 self.regenerated_size = (u32::from(raw[0]) >> 4)
205 + (u32::from(raw[1]) << 4)
206 + (u32::from(raw[2]) << 12);
207 Ok(3)
208 }
209 _ => panic!(
210 "This is a bug in the program. There should only be values between 0..3"
211 ),
212 }
213 }
214 LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
215 match size_format {
216 0 => {
217 self.num_streams = Some(1);
218 }
219 1..=3 => {
220 self.num_streams = Some(4);
221 }
222 _ => panic!(
223 "This is a bug in the program. There should only be values between 0..3"
224 ),
225 };
226
227 match size_format {
228 0 | 1 => {
229 // Differ in num_streams see above
230 // both regenerated and compressed sizes use 10 bit
231
232 // 4 from the first, six from the second byte
233 self.regenerated_size =
234 (u32::from(raw[0]) >> 4) + ((u32::from(raw[1]) & 0x3f) << 4);
235
236 // 2 from the second, full last byte
237 self.compressed_size =
238 Some(u32::from(raw[1] >> 6) + (u32::from(raw[2]) << 2));
239 Ok(3)
240 }
241 2 => {
242 // both regenerated and compressed sizes use 14 bit
243
244 // 4 from first, full second, 2 from the third byte
245 self.regenerated_size = (u32::from(raw[0]) >> 4)
246 + (u32::from(raw[1]) << 4)
247 + ((u32::from(raw[2]) & 0x3) << 12);
248
249 // 6 from the third, full last byte
250 self.compressed_size =
251 Some((u32::from(raw[2]) >> 2) + (u32::from(raw[3]) << 6));
252 Ok(4)
253 }
254 3 => {
255 // both regenerated and compressed sizes use 18 bit
256
257 // 4 from first, full second, six from third byte
258 self.regenerated_size = (u32::from(raw[0]) >> 4)
259 + (u32::from(raw[1]) << 4)
260 + ((u32::from(raw[2]) & 0x3F) << 12);
261
262 // 2 from third, full fourth, full fifth byte
263 self.compressed_size = Some(
264 (u32::from(raw[2]) >> 6)
265 + (u32::from(raw[3]) << 2)
266 + (u32::from(raw[4]) << 10),
267 );
268 Ok(5)
269 }
270
271 _ => panic!(
272 "This is a bug in the program. There should only be values between 0..3"
273 ),
274 }
275 }
276 }
277 }
278
279 /// Given the first two bits of a header, determine the type of a header.
280 fn section_type(raw: u8) -> Result<LiteralsSectionType, LiteralsSectionParseError> {
281 let t = raw & 0x3;
282 match t {
283 0 => Ok(LiteralsSectionType::Raw),
284 1 => Ok(LiteralsSectionType::RLE),
285 2 => Ok(LiteralsSectionType::Compressed),
286 3 => Ok(LiteralsSectionType::Treeless),
287 other => Err(LiteralsSectionParseError::IllegalLiteralSectionType { got: other }),
288 }
289 }
290}