ruzstd/decoding/
block_decoder.rs

1use super::super::blocks::block::BlockHeader;
2use super::super::blocks::block::BlockType;
3use super::super::blocks::literals_section::LiteralsSection;
4use super::super::blocks::literals_section::LiteralsSectionType;
5use super::super::blocks::sequence_section::SequencesHeader;
6use super::literals_section_decoder::decode_literals;
7use super::sequence_section_decoder::decode_sequences;
8use crate::common::MAX_BLOCK_SIZE;
9use crate::decoding::errors::DecodeSequenceError;
10use crate::decoding::errors::{
11    BlockHeaderReadError, BlockSizeError, BlockTypeError, DecodeBlockContentError,
12    DecompressBlockError,
13};
14use crate::decoding::scratch::DecoderScratch;
15use crate::decoding::sequence_execution::execute_sequences;
16use crate::io::Read;
17
18pub struct BlockDecoder {
19    header_buffer: [u8; 3],
20    internal_state: DecoderState,
21}
22
23enum DecoderState {
24    ReadyToDecodeNextHeader,
25    ReadyToDecodeNextBody,
26    #[allow(dead_code)]
27    Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs
28}
29
30/// Create a new [BlockDecoder].
31pub fn new() -> BlockDecoder {
32    BlockDecoder {
33        internal_state: DecoderState::ReadyToDecodeNextHeader,
34        header_buffer: [0u8; 3],
35    }
36}
37
38impl BlockDecoder {
39    pub fn decode_block_content(
40        &mut self,
41        header: &BlockHeader,
42        workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
43        mut source: impl Read,
44    ) -> Result<u64, DecodeBlockContentError> {
45        match self.internal_state {
46            DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ }
47            DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
48            DecoderState::ReadyToDecodeNextHeader => {
49                return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
50            }
51        }
52
53        let block_type = header.block_type;
54        match block_type {
55            BlockType::RLE => {
56                const BATCH_SIZE: usize = 512;
57                let mut buf = [0u8; BATCH_SIZE];
58                let full_reads = header.decompressed_size / BATCH_SIZE as u32;
59                let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
60
61                source.read_exact(&mut buf[0..1]).map_err(|err| {
62                    DecodeBlockContentError::ReadError {
63                        step: block_type,
64                        source: err,
65                    }
66                })?;
67                self.internal_state = DecoderState::ReadyToDecodeNextHeader;
68
69                for i in 1..BATCH_SIZE {
70                    buf[i] = buf[0];
71                }
72
73                for _ in 0..full_reads {
74                    workspace.buffer.push(&buf[..]);
75                }
76                let smaller = &mut buf[..single_read_size as usize];
77                workspace.buffer.push(smaller);
78
79                Ok(1)
80            }
81            BlockType::Raw => {
82                const BATCH_SIZE: usize = 128 * 1024;
83                let mut buf = [0u8; BATCH_SIZE];
84                let full_reads = header.decompressed_size / BATCH_SIZE as u32;
85                let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
86
87                for _ in 0..full_reads {
88                    source.read_exact(&mut buf[..]).map_err(|err| {
89                        DecodeBlockContentError::ReadError {
90                            step: block_type,
91                            source: err,
92                        }
93                    })?;
94                    workspace.buffer.push(&buf[..]);
95                }
96
97                let smaller = &mut buf[..single_read_size as usize];
98                source
99                    .read_exact(smaller)
100                    .map_err(|err| DecodeBlockContentError::ReadError {
101                        step: block_type,
102                        source: err,
103                    })?;
104                workspace.buffer.push(smaller);
105
106                self.internal_state = DecoderState::ReadyToDecodeNextHeader;
107                Ok(u64::from(header.decompressed_size))
108            }
109
110            BlockType::Reserved => {
111                panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
112            }
113
114            BlockType::Compressed => {
115                self.decompress_block(header, workspace, source)?;
116
117                self.internal_state = DecoderState::ReadyToDecodeNextHeader;
118                Ok(u64::from(header.content_size))
119            }
120        }
121    }
122
123    fn decompress_block(
124        &mut self,
125        header: &BlockHeader,
126        workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
127        mut source: impl Read,
128    ) -> Result<(), DecompressBlockError> {
129        workspace
130            .block_content_buffer
131            .resize(header.content_size as usize, 0);
132
133        source.read_exact(workspace.block_content_buffer.as_mut_slice())?;
134        let raw = workspace.block_content_buffer.as_slice();
135
136        let mut section = LiteralsSection::new();
137        let bytes_in_literals_header = section.parse_from_header(raw)?;
138        let raw = &raw[bytes_in_literals_header as usize..];
139        vprintln!(
140            "Found {} literalssection with regenerated size: {}, and compressed size: {:?}",
141            section.ls_type,
142            section.regenerated_size,
143            section.compressed_size
144        );
145
146        let upper_limit_for_literals = match section.compressed_size {
147            Some(x) => x as usize,
148            None => match section.ls_type {
149                LiteralsSectionType::RLE => 1,
150                LiteralsSectionType::Raw => section.regenerated_size as usize,
151                _ => panic!("Bug in this library"),
152            },
153        };
154
155        if raw.len() < upper_limit_for_literals {
156            return Err(DecompressBlockError::MalformedSectionHeader {
157                expected_len: upper_limit_for_literals,
158                remaining_bytes: raw.len(),
159            });
160        }
161
162        let raw_literals = &raw[..upper_limit_for_literals];
163        vprintln!("Slice for literals: {}", raw_literals.len());
164
165        workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here
166        let bytes_used_in_literals_section = decode_literals(
167            &section,
168            &mut workspace.huf,
169            raw_literals,
170            &mut workspace.literals_buffer,
171        )?;
172        assert!(
173            section.regenerated_size == workspace.literals_buffer.len() as u32,
174            "Wrong number of literals: {}, Should have been: {}",
175            workspace.literals_buffer.len(),
176            section.regenerated_size
177        );
178        assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32);
179
180        let raw = &raw[upper_limit_for_literals..];
181        vprintln!("Slice for sequences with headers: {}", raw.len());
182
183        let mut seq_section = SequencesHeader::new();
184        let bytes_in_sequence_header = seq_section.parse_from_header(raw)?;
185        let raw = &raw[bytes_in_sequence_header as usize..];
186        vprintln!(
187            "Found sequencessection with sequences: {} and size: {}",
188            seq_section.num_sequences,
189            raw.len()
190        );
191
192        assert!(
193            u32::from(bytes_in_literals_header)
194                + bytes_used_in_literals_section
195                + u32::from(bytes_in_sequence_header)
196                + raw.len() as u32
197                == header.content_size
198        );
199        vprintln!("Slice for sequences: {}", raw.len());
200
201        if seq_section.num_sequences != 0 {
202            decode_sequences(
203                &seq_section,
204                raw,
205                &mut workspace.fse,
206                &mut workspace.sequences,
207            )?;
208            vprintln!("Executing sequences");
209            execute_sequences(workspace)?;
210        } else {
211            if !raw.is_empty() {
212                return Err(DecompressBlockError::DecodeSequenceError(
213                    DecodeSequenceError::ExtraBits {
214                        bits_remaining: raw.len() as isize * 8,
215                    },
216                ));
217            }
218            workspace.buffer.push(&workspace.literals_buffer);
219            workspace.sequences.clear();
220        }
221
222        Ok(())
223    }
224
225    /// Reads 3 bytes from the provided reader and returns
226    /// the deserialized header and the number of bytes read.
227    pub fn read_block_header(
228        &mut self,
229        mut r: impl Read,
230    ) -> Result<(BlockHeader, u8), BlockHeaderReadError> {
231        //match self.internal_state {
232        //    DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */},
233        //    DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")),
234        //    DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")),
235        //}
236
237        r.read_exact(&mut self.header_buffer[0..3])?;
238
239        let btype = self.block_type()?;
240        if let BlockType::Reserved = btype {
241            return Err(BlockHeaderReadError::FoundReservedBlock);
242        }
243
244        let block_size = self.block_content_size()?;
245        let decompressed_size = match btype {
246            BlockType::Raw => block_size,
247            BlockType::RLE => block_size,
248            BlockType::Reserved => 0, //should be caught above, this is an error state
249            BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb)
250        };
251        let content_size = match btype {
252            BlockType::Raw => block_size,
253            BlockType::Compressed => block_size,
254            BlockType::RLE => 1,
255            BlockType::Reserved => 0, //should be caught above, this is an error state
256        };
257
258        let last_block = self.is_last();
259
260        self.reset_buffer();
261        self.internal_state = DecoderState::ReadyToDecodeNextBody;
262
263        //just return 3. Blockheaders always take 3 bytes
264        Ok((
265            BlockHeader {
266                last_block,
267                block_type: btype,
268                decompressed_size,
269                content_size,
270            },
271            3,
272        ))
273    }
274
275    fn reset_buffer(&mut self) {
276        self.header_buffer[0] = 0;
277        self.header_buffer[1] = 0;
278        self.header_buffer[2] = 0;
279    }
280
281    fn is_last(&self) -> bool {
282        self.header_buffer[0] & 0x1 == 1
283    }
284
285    fn block_type(&self) -> Result<BlockType, BlockTypeError> {
286        let t = (self.header_buffer[0] >> 1) & 0x3;
287        match t {
288            0 => Ok(BlockType::Raw),
289            1 => Ok(BlockType::RLE),
290            2 => Ok(BlockType::Compressed),
291            3 => Ok(BlockType::Reserved),
292            other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }),
293        }
294    }
295
296    fn block_content_size(&self) -> Result<u32, BlockSizeError> {
297        let val = self.block_content_size_unchecked();
298        if val > MAX_BLOCK_SIZE {
299            Err(BlockSizeError::BlockSizeTooLarge { size: val })
300        } else {
301            Ok(val)
302        }
303    }
304
305    fn block_content_size_unchecked(&self) -> u32 {
306        u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit
307            | (u32::from(self.header_buffer[1]) << 5)
308            | (u32::from(self.header_buffer[2]) << 13)
309    }
310}