ruzstd/decoding/
block_decoder.rs

1use super::super::blocks::block::BlockHeader;
2use super::super::blocks::block::BlockType;
3use super::super::blocks::literals_section::LiteralsSection;
4use super::super::blocks::literals_section::LiteralsSectionType;
5use super::super::blocks::sequence_section::SequencesHeader;
6use super::literals_section_decoder::{decode_literals, DecompressLiteralsError};
7use super::sequence_execution::ExecuteSequencesError;
8use super::sequence_section_decoder::decode_sequences;
9use super::sequence_section_decoder::DecodeSequenceError;
10use crate::blocks::literals_section::LiteralsSectionParseError;
11use crate::blocks::sequence_section::SequencesHeaderParseError;
12use crate::decoding::scratch::DecoderScratch;
13use crate::decoding::sequence_execution::execute_sequences;
14use crate::io::{self, Read};
15
16pub struct BlockDecoder {
17    header_buffer: [u8; 3],
18    internal_state: DecoderState,
19}
20
21enum DecoderState {
22    ReadyToDecodeNextHeader,
23    ReadyToDecodeNextBody,
24    #[allow(dead_code)]
25    Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs
26}
27
28#[derive(Debug)]
29#[non_exhaustive]
30pub enum BlockHeaderReadError {
31    ReadError(io::Error),
32    FoundReservedBlock,
33    BlockTypeError(BlockTypeError),
34    BlockSizeError(BlockSizeError),
35}
36
37#[cfg(feature = "std")]
38impl std::error::Error for BlockHeaderReadError {
39    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
40        match self {
41            BlockHeaderReadError::ReadError(source) => Some(source),
42            BlockHeaderReadError::BlockTypeError(source) => Some(source),
43            BlockHeaderReadError::BlockSizeError(source) => Some(source),
44            BlockHeaderReadError::FoundReservedBlock => None,
45        }
46    }
47}
48
49impl ::core::fmt::Display for BlockHeaderReadError {
50    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> ::core::fmt::Result {
51        match self {
52            BlockHeaderReadError::ReadError(_) => write!(f, "Error while reading the block header"),
53            BlockHeaderReadError::FoundReservedBlock => write!(
54                f,
55                "Reserved block occured. This is considered corruption by the documentation"
56            ),
57            BlockHeaderReadError::BlockTypeError(e) => write!(f, "Error getting block type: {}", e),
58            BlockHeaderReadError::BlockSizeError(e) => {
59                write!(f, "Error getting block content size: {}", e)
60            }
61        }
62    }
63}
64
65impl From<io::Error> for BlockHeaderReadError {
66    fn from(val: io::Error) -> Self {
67        Self::ReadError(val)
68    }
69}
70
71impl From<BlockTypeError> for BlockHeaderReadError {
72    fn from(val: BlockTypeError) -> Self {
73        Self::BlockTypeError(val)
74    }
75}
76
77impl From<BlockSizeError> for BlockHeaderReadError {
78    fn from(val: BlockSizeError) -> Self {
79        Self::BlockSizeError(val)
80    }
81}
82
83#[derive(Debug)]
84#[non_exhaustive]
85pub enum BlockTypeError {
86    InvalidBlocktypeNumber { num: u8 },
87}
88
89#[cfg(feature = "std")]
90impl std::error::Error for BlockTypeError {}
91
92impl core::fmt::Display for BlockTypeError {
93    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
94        match self {
95            BlockTypeError::InvalidBlocktypeNumber { num } => {
96                write!(f,
97                    "Invalid Blocktype number. Is: {} Should be one of: 0, 1, 2, 3 (3 is reserved though",
98                    num,
99                )
100            }
101        }
102    }
103}
104
105#[derive(Debug)]
106#[non_exhaustive]
107pub enum BlockSizeError {
108    BlockSizeTooLarge { size: u32 },
109}
110
111#[cfg(feature = "std")]
112impl std::error::Error for BlockSizeError {}
113
114impl core::fmt::Display for BlockSizeError {
115    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
116        match self {
117            BlockSizeError::BlockSizeTooLarge { size } => {
118                write!(
119                    f,
120                    "Blocksize was bigger than the absolute maximum {} (128kb). Is: {}",
121                    ABSOLUTE_MAXIMUM_BLOCK_SIZE, size,
122                )
123            }
124        }
125    }
126}
127
128#[derive(Debug)]
129#[non_exhaustive]
130pub enum DecompressBlockError {
131    BlockContentReadError(io::Error),
132    MalformedSectionHeader {
133        expected_len: usize,
134        remaining_bytes: usize,
135    },
136    DecompressLiteralsError(DecompressLiteralsError),
137    LiteralsSectionParseError(LiteralsSectionParseError),
138    SequencesHeaderParseError(SequencesHeaderParseError),
139    DecodeSequenceError(DecodeSequenceError),
140    ExecuteSequencesError(ExecuteSequencesError),
141}
142
143#[cfg(feature = "std")]
144impl std::error::Error for DecompressBlockError {
145    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
146        match self {
147            DecompressBlockError::BlockContentReadError(source) => Some(source),
148            DecompressBlockError::DecompressLiteralsError(source) => Some(source),
149            DecompressBlockError::LiteralsSectionParseError(source) => Some(source),
150            DecompressBlockError::SequencesHeaderParseError(source) => Some(source),
151            DecompressBlockError::DecodeSequenceError(source) => Some(source),
152            DecompressBlockError::ExecuteSequencesError(source) => Some(source),
153            _ => None,
154        }
155    }
156}
157
158impl core::fmt::Display for DecompressBlockError {
159    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
160        match self {
161            DecompressBlockError::BlockContentReadError(e) => {
162                write!(f, "Error while reading the block content: {}", e)
163            }
164            DecompressBlockError::MalformedSectionHeader {
165                expected_len,
166                remaining_bytes,
167            } => {
168                write!(f,
169                    "Malformed section header. Says literals would be this long: {} but there are only {} bytes left",
170                    expected_len,
171                    remaining_bytes,
172                )
173            }
174            DecompressBlockError::DecompressLiteralsError(e) => write!(f, "{:?}", e),
175            DecompressBlockError::LiteralsSectionParseError(e) => write!(f, "{:?}", e),
176            DecompressBlockError::SequencesHeaderParseError(e) => write!(f, "{:?}", e),
177            DecompressBlockError::DecodeSequenceError(e) => write!(f, "{:?}", e),
178            DecompressBlockError::ExecuteSequencesError(e) => write!(f, "{:?}", e),
179        }
180    }
181}
182
183impl From<io::Error> for DecompressBlockError {
184    fn from(val: io::Error) -> Self {
185        Self::BlockContentReadError(val)
186    }
187}
188
189impl From<DecompressLiteralsError> for DecompressBlockError {
190    fn from(val: DecompressLiteralsError) -> Self {
191        Self::DecompressLiteralsError(val)
192    }
193}
194
195impl From<LiteralsSectionParseError> for DecompressBlockError {
196    fn from(val: LiteralsSectionParseError) -> Self {
197        Self::LiteralsSectionParseError(val)
198    }
199}
200
201impl From<SequencesHeaderParseError> for DecompressBlockError {
202    fn from(val: SequencesHeaderParseError) -> Self {
203        Self::SequencesHeaderParseError(val)
204    }
205}
206
207impl From<DecodeSequenceError> for DecompressBlockError {
208    fn from(val: DecodeSequenceError) -> Self {
209        Self::DecodeSequenceError(val)
210    }
211}
212
213impl From<ExecuteSequencesError> for DecompressBlockError {
214    fn from(val: ExecuteSequencesError) -> Self {
215        Self::ExecuteSequencesError(val)
216    }
217}
218
219#[derive(Debug)]
220#[non_exhaustive]
221pub enum DecodeBlockContentError {
222    DecoderStateIsFailed,
223    ExpectedHeaderOfPreviousBlock,
224    ReadError { step: BlockType, source: io::Error },
225    DecompressBlockError(DecompressBlockError),
226}
227
228#[cfg(feature = "std")]
229impl std::error::Error for DecodeBlockContentError {
230    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
231        match self {
232            DecodeBlockContentError::ReadError { step: _, source } => Some(source),
233            DecodeBlockContentError::DecompressBlockError(source) => Some(source),
234            _ => None,
235        }
236    }
237}
238
239impl core::fmt::Display for DecodeBlockContentError {
240    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
241        match self {
242            DecodeBlockContentError::DecoderStateIsFailed => {
243                write!(
244                    f,
245                    "Can't decode next block if failed along the way. Results will be nonsense",
246                )
247            }
248            DecodeBlockContentError::ExpectedHeaderOfPreviousBlock => {
249                write!(f,
250                            "Can't decode next block body, while expecting to decode the header of the previous block. Results will be nonsense",
251                        )
252            }
253            DecodeBlockContentError::ReadError { step, source } => {
254                write!(f, "Error while reading bytes for {}: {}", step, source,)
255            }
256            DecodeBlockContentError::DecompressBlockError(e) => write!(f, "{:?}", e),
257        }
258    }
259}
260
261impl From<DecompressBlockError> for DecodeBlockContentError {
262    fn from(val: DecompressBlockError) -> Self {
263        Self::DecompressBlockError(val)
264    }
265}
266
267/// Create a new [BlockDecoder].
268pub fn new() -> BlockDecoder {
269    BlockDecoder {
270        internal_state: DecoderState::ReadyToDecodeNextHeader,
271        header_buffer: [0u8; 3],
272    }
273}
274
275const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024;
276
277impl BlockDecoder {
278    pub fn decode_block_content(
279        &mut self,
280        header: &BlockHeader,
281        workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
282        mut source: impl Read,
283    ) -> Result<u64, DecodeBlockContentError> {
284        match self.internal_state {
285            DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ }
286            DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
287            DecoderState::ReadyToDecodeNextHeader => {
288                return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
289            }
290        }
291
292        let block_type = header.block_type;
293        match block_type {
294            BlockType::RLE => {
295                const BATCH_SIZE: usize = 512;
296                let mut buf = [0u8; BATCH_SIZE];
297                let full_reads = header.decompressed_size / BATCH_SIZE as u32;
298                let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
299
300                source.read_exact(&mut buf[0..1]).map_err(|err| {
301                    DecodeBlockContentError::ReadError {
302                        step: block_type,
303                        source: err,
304                    }
305                })?;
306                self.internal_state = DecoderState::ReadyToDecodeNextHeader;
307
308                for i in 1..BATCH_SIZE {
309                    buf[i] = buf[0];
310                }
311
312                for _ in 0..full_reads {
313                    workspace.buffer.push(&buf[..]);
314                }
315                let smaller = &mut buf[..single_read_size as usize];
316                workspace.buffer.push(smaller);
317
318                Ok(1)
319            }
320            BlockType::Raw => {
321                const BATCH_SIZE: usize = 128 * 1024;
322                let mut buf = [0u8; BATCH_SIZE];
323                let full_reads = header.decompressed_size / BATCH_SIZE as u32;
324                let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
325
326                for _ in 0..full_reads {
327                    source.read_exact(&mut buf[..]).map_err(|err| {
328                        DecodeBlockContentError::ReadError {
329                            step: block_type,
330                            source: err,
331                        }
332                    })?;
333                    workspace.buffer.push(&buf[..]);
334                }
335
336                let smaller = &mut buf[..single_read_size as usize];
337                source
338                    .read_exact(smaller)
339                    .map_err(|err| DecodeBlockContentError::ReadError {
340                        step: block_type,
341                        source: err,
342                    })?;
343                workspace.buffer.push(smaller);
344
345                self.internal_state = DecoderState::ReadyToDecodeNextHeader;
346                Ok(u64::from(header.decompressed_size))
347            }
348
349            BlockType::Reserved => {
350                panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
351            }
352
353            BlockType::Compressed => {
354                self.decompress_block(header, workspace, source)?;
355
356                self.internal_state = DecoderState::ReadyToDecodeNextHeader;
357                Ok(u64::from(header.content_size))
358            }
359        }
360    }
361
362    fn decompress_block(
363        &mut self,
364        header: &BlockHeader,
365        workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
366        mut source: impl Read,
367    ) -> Result<(), DecompressBlockError> {
368        workspace
369            .block_content_buffer
370            .resize(header.content_size as usize, 0);
371
372        source.read_exact(workspace.block_content_buffer.as_mut_slice())?;
373        let raw = workspace.block_content_buffer.as_slice();
374
375        let mut section = LiteralsSection::new();
376        let bytes_in_literals_header = section.parse_from_header(raw)?;
377        let raw = &raw[bytes_in_literals_header as usize..];
378        vprintln!(
379            "Found {} literalssection with regenerated size: {}, and compressed size: {:?}",
380            section.ls_type,
381            section.regenerated_size,
382            section.compressed_size
383        );
384
385        let upper_limit_for_literals = match section.compressed_size {
386            Some(x) => x as usize,
387            None => match section.ls_type {
388                LiteralsSectionType::RLE => 1,
389                LiteralsSectionType::Raw => section.regenerated_size as usize,
390                _ => panic!("Bug in this library"),
391            },
392        };
393
394        if raw.len() < upper_limit_for_literals {
395            return Err(DecompressBlockError::MalformedSectionHeader {
396                expected_len: upper_limit_for_literals,
397                remaining_bytes: raw.len(),
398            });
399        }
400
401        let raw_literals = &raw[..upper_limit_for_literals];
402        vprintln!("Slice for literals: {}", raw_literals.len());
403
404        workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here
405        let bytes_used_in_literals_section = decode_literals(
406            &section,
407            &mut workspace.huf,
408            raw_literals,
409            &mut workspace.literals_buffer,
410        )?;
411        assert!(
412            section.regenerated_size == workspace.literals_buffer.len() as u32,
413            "Wrong number of literals: {}, Should have been: {}",
414            workspace.literals_buffer.len(),
415            section.regenerated_size
416        );
417        assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32);
418
419        let raw = &raw[upper_limit_for_literals..];
420        vprintln!("Slice for sequences with headers: {}", raw.len());
421
422        let mut seq_section = SequencesHeader::new();
423        let bytes_in_sequence_header = seq_section.parse_from_header(raw)?;
424        let raw = &raw[bytes_in_sequence_header as usize..];
425        vprintln!(
426            "Found sequencessection with sequences: {} and size: {}",
427            seq_section.num_sequences,
428            raw.len()
429        );
430
431        assert!(
432            u32::from(bytes_in_literals_header)
433                + bytes_used_in_literals_section
434                + u32::from(bytes_in_sequence_header)
435                + raw.len() as u32
436                == header.content_size
437        );
438        vprintln!("Slice for sequences: {}", raw.len());
439
440        if seq_section.num_sequences != 0 {
441            decode_sequences(
442                &seq_section,
443                raw,
444                &mut workspace.fse,
445                &mut workspace.sequences,
446            )?;
447            vprintln!("Executing sequences");
448            execute_sequences(workspace)?;
449        } else {
450            workspace.buffer.push(&workspace.literals_buffer);
451            workspace.sequences.clear();
452        }
453
454        Ok(())
455    }
456
457    pub fn read_block_header(
458        &mut self,
459        mut r: impl Read,
460    ) -> Result<(BlockHeader, u8), BlockHeaderReadError> {
461        //match self.internal_state {
462        //    DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */},
463        //    DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")),
464        //    DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")),
465        //}
466
467        r.read_exact(&mut self.header_buffer[0..3])?;
468
469        let btype = self.block_type()?;
470        if let BlockType::Reserved = btype {
471            return Err(BlockHeaderReadError::FoundReservedBlock);
472        }
473
474        let block_size = self.block_content_size()?;
475        let decompressed_size = match btype {
476            BlockType::Raw => block_size,
477            BlockType::RLE => block_size,
478            BlockType::Reserved => 0, //should be caught above, this is an error state
479            BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb)
480        };
481        let content_size = match btype {
482            BlockType::Raw => block_size,
483            BlockType::Compressed => block_size,
484            BlockType::RLE => 1,
485            BlockType::Reserved => 0, //should be caught above, this is an error state
486        };
487
488        let last_block = self.is_last();
489
490        self.reset_buffer();
491        self.internal_state = DecoderState::ReadyToDecodeNextBody;
492
493        //just return 3. Blockheaders always take 3 bytes
494        Ok((
495            BlockHeader {
496                last_block,
497                block_type: btype,
498                decompressed_size,
499                content_size,
500            },
501            3,
502        ))
503    }
504
505    fn reset_buffer(&mut self) {
506        self.header_buffer[0] = 0;
507        self.header_buffer[1] = 0;
508        self.header_buffer[2] = 0;
509    }
510
511    fn is_last(&self) -> bool {
512        self.header_buffer[0] & 0x1 == 1
513    }
514
515    fn block_type(&self) -> Result<BlockType, BlockTypeError> {
516        let t = (self.header_buffer[0] >> 1) & 0x3;
517        match t {
518            0 => Ok(BlockType::Raw),
519            1 => Ok(BlockType::RLE),
520            2 => Ok(BlockType::Compressed),
521            3 => Ok(BlockType::Reserved),
522            other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }),
523        }
524    }
525
526    fn block_content_size(&self) -> Result<u32, BlockSizeError> {
527        let val = self.block_content_size_unchecked();
528        if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE {
529            Err(BlockSizeError::BlockSizeTooLarge { size: val })
530        } else {
531            Ok(val)
532        }
533    }
534
535    fn block_content_size_unchecked(&self) -> u32 {
536        u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit
537            | (u32::from(self.header_buffer[1]) << 5)
538            | (u32::from(self.header_buffer[2]) << 13)
539    }
540}