ruzstd/
frame.rs

1use crate::io::{Error, Read};
2use core::fmt;
3#[cfg(feature = "std")]
4use std::error::Error as StdError;
5
6/// This magic number is included at the start of a single Zstandard frame
7pub const MAGIC_NUM: u32 = 0xFD2F_B528;
8/// The minimum window size is defined as 1 KB
9pub const MIN_WINDOW_SIZE: u64 = 1024;
10/// The maximum window size is 3.75TB
11pub const MAX_WINDOW_SIZE: u64 = (1 << 41) + 7 * (1 << 38);
12
13/// Zstandard compressed data is made of one or more [Frame]s. Each frame is independent and can be
14/// decompressed independently of other frames.
15///
16/// There are two frame formats defined by Zstandard: Zstandard frames and Skippable frames.
17/// Zstandard frames contain compressed data, while skippable frames contain custom user metadata.
18///
19/// This structure contains the header of the frame.
20///
21/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames>
22pub struct Frame {
23    pub header: FrameHeader,
24}
25
26/// A frame header has a variable size, with a minimum of 2 bytes, and a maximum of 14 bytes.
27pub struct FrameHeader {
28    pub descriptor: FrameDescriptor,
29    /// The `Window_Descriptor` field contains the minimum size of a memory buffer needed to
30    /// decompress the entire frame.
31    ///
32    /// This byte is not included in the frame header when the `Single_Segment_flag` is set.
33    ///
34    /// Bits 7-3 refer to the `Exponent`, where bits 2-0 refer to the `Mantissa`.
35    ///
36    /// To determine the size of a window, the following formula can be used:
37    /// ```text
38    /// windowLog = 10 + Exponent;
39    /// windowBase = 1 << windowLog;
40    /// windowAdd = (windowBase / 8) * Mantissa;
41    /// Window_Size = windowBase + windowAdd;
42    /// ```
43    /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor>
44    window_descriptor: u8,
45    /// The `Dictionary_ID` field contains the ID of the dictionary to be used to decode the frame.
46    /// When this value is not present, it's up to the decoder to know which dictionary to use.
47    dict_id: Option<u32>,
48    /// The size of the original/uncompressed content.
49    frame_content_size: u64,
50}
51
52/// The first byte is called the `Frame Header Descriptor`, and it describes what other fields
53/// are present.
54pub struct FrameDescriptor(u8);
55
56#[derive(Debug)]
57#[non_exhaustive]
58pub enum FrameDescriptorError {
59    InvalidFrameContentSizeFlag { got: u8 },
60}
61
62impl fmt::Display for FrameDescriptorError {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        match self {
65            Self::InvalidFrameContentSizeFlag { got } => write!(
66                f,
67                "Invalid Frame_Content_Size_Flag; Is: {}, Should be one of: 0, 1, 2, 3",
68                got
69            ),
70        }
71    }
72}
73
74#[cfg(feature = "std")]
75impl StdError for FrameDescriptorError {}
76
77impl FrameDescriptor {
78    /// Read the `Frame_Content_Size_flag` from the frame header descriptor.
79    ///
80    /// This is a 2 bit flag, specifying if the `Frame_Content_Size` field is present
81    /// within the header. It notates the number of bytes used by `Frame_Content_size`
82    ///
83    /// When this value is is 0, `FCS_Field_Size` depends on Single_Segment_flag.
84    /// If the `Single_Segment_flag` field is set in the frame header descriptor,
85    /// the size of the `Frame_Content_Size` field of the header is 1 byte.
86    /// Otherwise, `FCS_Field_Size` is 0, and the `Frame_Content_Size` is not provided.
87    ///
88    /// | Flag Value (decimal) | Size of the `Frame_Content_Size` field in bytes |
89    /// | -- | -- |
90    /// | 0 | 0 or 1 (see above) |
91    /// | 1 | 2 |
92    /// | 2 | 4 |
93    /// | 3 | 8 |
94    pub fn frame_content_size_flag(&self) -> u8 {
95        self.0 >> 6
96    }
97
98    /// This bit is reserved for some future feature, a compliant decoder **must ensure**
99    /// that this value is set to zero.
100    pub fn reserved_flag(&self) -> bool {
101        ((self.0 >> 3) & 0x1) == 1
102    }
103
104    /// If this flag is set, data must be regenerated within a single continuous memory segment.
105    ///
106    /// In this case, the `Window_Descriptor` byte is skipped, but `Frame_Content_Size` is present.
107    /// The decoder must allocate a memory segment equal to or larger than `Frame_Content_Size`.
108    pub fn single_segment_flag(&self) -> bool {
109        ((self.0 >> 5) & 0x1) == 1
110    }
111
112    /// If this flag is set, a 32 bit `Content_Checksum` will be present at the end of the frame.
113    pub fn content_checksum_flag(&self) -> bool {
114        ((self.0 >> 2) & 0x1) == 1
115    }
116
117    /// This is a two bit flag telling if a dictionary ID is provided within the header. It also
118    /// specifies the size of this field
119    ///
120    /// | Value (Decimal) | `DID_Field_Size` (bytes) |
121    /// | -- | -- |
122    /// | 0 | 0 |
123    /// | 1 | 1 |
124    /// | 2 | 2 |
125    /// | 3 | 4 |
126    pub fn dict_id_flag(&self) -> u8 {
127        self.0 & 0x3
128    }
129
130    /// Read the size of the `Frame_Content_size` field from the frame header descriptor, returning
131    /// the size in bytes.
132    /// If this value is zero, then the `Frame_Content_Size` field is not present within the header.
133    pub fn frame_content_size_bytes(&self) -> Result<u8, FrameDescriptorError> {
134        match self.frame_content_size_flag() {
135            0 => {
136                if self.single_segment_flag() {
137                    Ok(1)
138                } else {
139                    Ok(0)
140                }
141            }
142            1 => Ok(2),
143            2 => Ok(4),
144            3 => Ok(8),
145            other => Err(FrameDescriptorError::InvalidFrameContentSizeFlag { got: other }),
146        }
147    }
148
149    /// Read the size of the `Dictionary_ID` field from the frame header descriptor, returning the size in bytes.
150    /// If this value is zero, then the dictionary id is not present within the header,
151    /// and "It's up to the decoder to know which dictionary to use."
152    pub fn dictionary_id_bytes(&self) -> Result<u8, FrameDescriptorError> {
153        match self.dict_id_flag() {
154            0 => Ok(0),
155            1 => Ok(1),
156            2 => Ok(2),
157            3 => Ok(4),
158            other => Err(FrameDescriptorError::InvalidFrameContentSizeFlag { got: other }),
159        }
160    }
161}
162
163#[derive(Debug)]
164#[non_exhaustive]
165pub enum FrameHeaderError {
166    WindowTooBig { got: u64 },
167    WindowTooSmall { got: u64 },
168    FrameDescriptorError(FrameDescriptorError),
169    DictIdTooSmall { got: usize, expected: usize },
170    MismatchedFrameSize { got: usize, expected: u8 },
171    FrameSizeIsZero,
172    InvalidFrameSize { got: u8 },
173}
174
175impl fmt::Display for FrameHeaderError {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        match self {
178            Self::WindowTooBig { got } => write!(
179                f,
180                "window_size bigger than allowed maximum. Is: {}, Should be lower than: {}",
181                got, MAX_WINDOW_SIZE
182            ),
183            Self::WindowTooSmall { got } => write!(
184                f,
185                "window_size smaller than allowed minimum. Is: {}, Should be greater than: {}",
186                got, MIN_WINDOW_SIZE
187            ),
188            Self::FrameDescriptorError(e) => write!(f, "{:?}", e),
189            Self::DictIdTooSmall { got, expected } => write!(
190                f,
191                "Not enough bytes in dict_id. Is: {}, Should be: {}",
192                got, expected
193            ),
194            Self::MismatchedFrameSize { got, expected } => write!(
195                f,
196                "frame_content_size does not have the right length. Is: {}, Should be: {}",
197                got, expected
198            ),
199            Self::FrameSizeIsZero => write!(f, "frame_content_size was zero"),
200            Self::InvalidFrameSize { got } => write!(
201                f,
202                "Invalid frame_content_size. Is: {}, Should be one of 1, 2, 4, 8 bytes",
203                got
204            ),
205        }
206    }
207}
208
209#[cfg(feature = "std")]
210impl StdError for FrameHeaderError {
211    fn source(&self) -> Option<&(dyn StdError + 'static)> {
212        match self {
213            FrameHeaderError::FrameDescriptorError(source) => Some(source),
214            _ => None,
215        }
216    }
217}
218
219impl From<FrameDescriptorError> for FrameHeaderError {
220    fn from(error: FrameDescriptorError) -> Self {
221        Self::FrameDescriptorError(error)
222    }
223}
224
225impl FrameHeader {
226    /// Read the size of the window from the header, returning the size in bytes.
227    pub fn window_size(&self) -> Result<u64, FrameHeaderError> {
228        if self.descriptor.single_segment_flag() {
229            Ok(self.frame_content_size())
230        } else {
231            let exp = self.window_descriptor >> 3;
232            let mantissa = self.window_descriptor & 0x7;
233
234            let window_log = 10 + u64::from(exp);
235            let window_base = 1 << window_log;
236            let window_add = (window_base / 8) * u64::from(mantissa);
237
238            let window_size = window_base + window_add;
239
240            if window_size >= MIN_WINDOW_SIZE {
241                if window_size < MAX_WINDOW_SIZE {
242                    Ok(window_size)
243                } else {
244                    Err(FrameHeaderError::WindowTooBig { got: window_size })
245                }
246            } else {
247                Err(FrameHeaderError::WindowTooSmall { got: window_size })
248            }
249        }
250    }
251
252    /// The ID (if provided) of the dictionary required to decode this frame.
253    pub fn dictionary_id(&self) -> Option<u32> {
254        self.dict_id
255    }
256
257    /// Obtain the uncompressed size (in bytes) of the frame contents.
258    pub fn frame_content_size(&self) -> u64 {
259        self.frame_content_size
260    }
261}
262
263#[derive(Debug)]
264#[non_exhaustive]
265pub enum ReadFrameHeaderError {
266    MagicNumberReadError(Error),
267    BadMagicNumber(u32),
268    FrameDescriptorReadError(Error),
269    InvalidFrameDescriptor(FrameDescriptorError),
270    WindowDescriptorReadError(Error),
271    DictionaryIdReadError(Error),
272    FrameContentSizeReadError(Error),
273    SkipFrame { magic_number: u32, length: u32 },
274}
275
276impl fmt::Display for ReadFrameHeaderError {
277    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
278        match self {
279            Self::MagicNumberReadError(e) => write!(f, "Error while reading magic number: {}", e),
280            Self::BadMagicNumber(e) => write!(f, "Read wrong magic number: 0x{:X}", e),
281            Self::FrameDescriptorReadError(e) => {
282                write!(f, "Error while reading frame descriptor: {}", e)
283            }
284            Self::InvalidFrameDescriptor(e) => write!(f, "{:?}", e),
285            Self::WindowDescriptorReadError(e) => {
286                write!(f, "Error while reading window descriptor: {}", e)
287            }
288            Self::DictionaryIdReadError(e) => write!(f, "Error while reading dictionary id: {}", e),
289            Self::FrameContentSizeReadError(e) => {
290                write!(f, "Error while reading frame content size: {}", e)
291            }
292            Self::SkipFrame {
293                magic_number,
294                length,
295            } => write!(
296                f,
297                "SkippableFrame encountered with MagicNumber 0x{:X} and length {} bytes",
298                magic_number, length
299            ),
300        }
301    }
302}
303
304#[cfg(feature = "std")]
305impl StdError for ReadFrameHeaderError {
306    fn source(&self) -> Option<&(dyn StdError + 'static)> {
307        match self {
308            ReadFrameHeaderError::MagicNumberReadError(source) => Some(source),
309            ReadFrameHeaderError::FrameDescriptorReadError(source) => Some(source),
310            ReadFrameHeaderError::InvalidFrameDescriptor(source) => Some(source),
311            ReadFrameHeaderError::WindowDescriptorReadError(source) => Some(source),
312            ReadFrameHeaderError::DictionaryIdReadError(source) => Some(source),
313            ReadFrameHeaderError::FrameContentSizeReadError(source) => Some(source),
314            _ => None,
315        }
316    }
317}
318
319impl From<FrameDescriptorError> for ReadFrameHeaderError {
320    fn from(error: FrameDescriptorError) -> Self {
321        Self::InvalidFrameDescriptor(error)
322    }
323}
324
325/// Read a single serialized frame from the reader and return a tuple containing the parsed frame and the number of bytes read.
326pub fn read_frame_header(mut r: impl Read) -> Result<(Frame, u8), ReadFrameHeaderError> {
327    use ReadFrameHeaderError as err;
328    let mut buf = [0u8; 4];
329
330    r.read_exact(&mut buf).map_err(err::MagicNumberReadError)?;
331    let mut bytes_read = 4;
332    let magic_num = u32::from_le_bytes(buf);
333
334    // Skippable frames have a magic number in this interval
335    if (0x184D2A50..=0x184D2A5F).contains(&magic_num) {
336        r.read_exact(&mut buf)
337            .map_err(err::FrameDescriptorReadError)?;
338        let skip_size = u32::from_le_bytes(buf);
339        return Err(ReadFrameHeaderError::SkipFrame {
340            magic_number: magic_num,
341            length: skip_size,
342        });
343    }
344
345    if magic_num != MAGIC_NUM {
346        return Err(ReadFrameHeaderError::BadMagicNumber(magic_num));
347    }
348
349    r.read_exact(&mut buf[0..1])
350        .map_err(err::FrameDescriptorReadError)?;
351    let desc = FrameDescriptor(buf[0]);
352
353    bytes_read += 1;
354
355    let mut frame_header = FrameHeader {
356        descriptor: FrameDescriptor(desc.0),
357        dict_id: None,
358        frame_content_size: 0,
359        window_descriptor: 0,
360    };
361
362    if !desc.single_segment_flag() {
363        r.read_exact(&mut buf[0..1])
364            .map_err(err::WindowDescriptorReadError)?;
365        frame_header.window_descriptor = buf[0];
366        bytes_read += 1;
367    }
368
369    let dict_id_len = desc.dictionary_id_bytes()? as usize;
370    if dict_id_len != 0 {
371        let buf = &mut buf[..dict_id_len];
372        r.read_exact(buf).map_err(err::DictionaryIdReadError)?;
373        bytes_read += dict_id_len;
374        let mut dict_id = 0u32;
375
376        #[allow(clippy::needless_range_loop)]
377        for i in 0..dict_id_len {
378            dict_id += (buf[i] as u32) << (8 * i);
379        }
380        if dict_id != 0 {
381            frame_header.dict_id = Some(dict_id);
382        }
383    }
384
385    let fcs_len = desc.frame_content_size_bytes()? as usize;
386    if fcs_len != 0 {
387        let mut fcs_buf = [0u8; 8];
388        let fcs_buf = &mut fcs_buf[..fcs_len];
389        r.read_exact(fcs_buf)
390            .map_err(err::FrameContentSizeReadError)?;
391        bytes_read += fcs_len;
392        let mut fcs = 0u64;
393
394        #[allow(clippy::needless_range_loop)]
395        for i in 0..fcs_len {
396            fcs += (fcs_buf[i] as u64) << (8 * i);
397        }
398        if fcs_len == 2 {
399            fcs += 256;
400        }
401        frame_header.frame_content_size = fcs;
402    }
403
404    let frame: Frame = Frame {
405        header: frame_header,
406    };
407
408    Ok((frame, bytes_read as u8))
409}