rkyv/string/
repr.rs

1//! An archived string representation that supports inlining short strings.
2
3use crate::{Archived, FixedIsize, FixedUsize};
4use core::{marker::PhantomPinned, mem, ptr, slice, str};
5
6const OFFSET_BYTES: usize = mem::size_of::<FixedIsize>();
7
8#[derive(Clone, Copy)]
9#[repr(C)]
10struct OutOfLineRepr {
11    len: Archived<usize>,
12    // Offset is always stored in little-endian format to put the sign bit at the end.
13    // This representation is optimized for little-endian architectures.
14    offset: [u8; OFFSET_BYTES],
15    _phantom: PhantomPinned,
16}
17
18/// The maximum number of bytes that can be inlined.
19pub const INLINE_CAPACITY: usize = mem::size_of::<OutOfLineRepr>() - 1;
20
21#[derive(Clone, Copy)]
22#[repr(C)]
23struct InlineRepr {
24    bytes: [u8; INLINE_CAPACITY],
25    len: u8,
26}
27
28/// An archived string representation that can inline short strings.
29pub union ArchivedStringRepr {
30    out_of_line: OutOfLineRepr,
31    inline: InlineRepr,
32}
33
34impl ArchivedStringRepr {
35    /// Returns whether the representation is inline.
36    #[inline]
37    pub fn is_inline(&self) -> bool {
38        unsafe { self.inline.len & 0x80 == 0 }
39    }
40
41    /// Returns the offset of the representation.
42    ///
43    /// # Safety
44    ///
45    /// The internal representation must be out-of-line.
46    #[inline]
47    pub unsafe fn out_of_line_offset(&self) -> isize {
48        FixedIsize::from_le_bytes(self.out_of_line.offset) as isize
49    }
50
51    /// Returns a pointer to the bytes of the string.
52    #[inline]
53    pub fn as_ptr(&self) -> *const u8 {
54        unsafe {
55            if self.is_inline() {
56                self.inline.bytes.as_ptr()
57            } else {
58                (self as *const Self)
59                    .cast::<u8>()
60                    .offset(self.out_of_line_offset())
61            }
62        }
63    }
64
65    /// Returns a mutable pointer to the bytes of the string.
66    #[inline]
67    pub fn as_mut_ptr(&mut self) -> *mut u8 {
68        unsafe {
69            if self.is_inline() {
70                self.inline.bytes.as_mut_ptr()
71            } else {
72                (self as *mut Self)
73                    .cast::<u8>()
74                    .offset(self.out_of_line_offset())
75            }
76        }
77    }
78
79    /// Returns the length of the string.
80    #[inline]
81    pub fn len(&self) -> usize {
82        unsafe {
83            if self.is_inline() {
84                self.inline.len as usize
85            } else {
86                from_archived!(self.out_of_line.len) as usize
87            }
88        }
89    }
90
91    /// Returns whether the string is empty.
92    #[inline]
93    pub fn is_empty(&self) -> bool {
94        self.len() == 0
95    }
96
97    /// Returns a pointer to the string as a `str`.
98    #[cfg(feature = "validation")]
99    #[inline]
100    pub fn as_str_ptr(&self) -> *const str {
101        ptr_meta::from_raw_parts(self.as_ptr().cast(), self.len())
102    }
103
104    /// Returns a slice of the bytes of the string.
105    #[inline]
106    pub fn bytes(&self) -> &[u8] {
107        unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) }
108    }
109
110    /// Returns a mutable slice of the bytes of the string.
111    #[inline]
112    pub fn bytes_mut(&mut self) -> &mut [u8] {
113        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len()) }
114    }
115
116    /// Returns a reference to the string as a `str`.
117    #[inline]
118    pub fn as_str(&self) -> &str {
119        unsafe { str::from_utf8_unchecked(self.bytes()) }
120    }
121
122    /// Returns a mutable reference to the string as a `str`.
123    #[inline]
124    pub fn as_mut_str(&mut self) -> &mut str {
125        unsafe { str::from_utf8_unchecked_mut(self.bytes_mut()) }
126    }
127
128    /// Emplaces a new inline representation for the given `str`.
129    ///
130    /// # Safety
131    ///
132    /// - The length of `str` must be less than or equal to [`INLINE_CAPACITY`].
133    /// - `out` must point to a valid location to write the inline representation.
134    #[inline]
135    pub unsafe fn emplace_inline(value: &str, out: *mut Self) {
136        let out_bytes = ptr::addr_of_mut!((*out).inline.bytes);
137        ptr::copy_nonoverlapping(value.as_bytes().as_ptr(), out_bytes.cast(), value.len());
138
139        let out_len = ptr::addr_of_mut!((*out).inline.len);
140        *out_len = value.len() as u8;
141    }
142
143    /// Emplaces a new out-of-line representation for the given `str`.
144    ///
145    /// # Safety
146    ///
147    /// - The length of `str` must be greater than [`INLINE_CAPACITY`].
148    /// - `pos` must be the location of the representation within the archive.
149    /// - `target` must be the location of the serialized bytes of the string.
150    /// - `out` must point to a valid location to write the out-of-line representation.
151    #[inline]
152    pub unsafe fn emplace_out_of_line(value: &str, pos: usize, target: usize, out: *mut Self) {
153        let out_len = ptr::addr_of_mut!((*out).out_of_line.len);
154        out_len.write(to_archived!(value.len() as FixedUsize));
155
156        let out_offset = ptr::addr_of_mut!((*out).out_of_line.offset);
157        let offset = crate::rel_ptr::signed_offset(pos, target).unwrap();
158        *out_offset = (offset as FixedIsize).to_le_bytes();
159    }
160}
161
162#[cfg(feature = "validation")]
163const _: () = {
164    use crate::Fallible;
165    use bytecheck::CheckBytes;
166    use core::fmt;
167
168    /// An error resulting from an invalid string representation.
169    ///
170    /// Strings that are inline must have a length of at most [`INLINE_CAPACITY`].
171    #[derive(Debug)]
172    pub struct CheckStringReprError;
173
174    impl fmt::Display for CheckStringReprError {
175        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176            write!(
177                f,
178                "String representation was inline but the length was too large"
179            )
180        }
181    }
182
183    #[cfg(feature = "std")]
184    impl std::error::Error for CheckStringReprError {}
185
186    impl<C: Fallible + ?Sized> CheckBytes<C> for ArchivedStringRepr {
187        type Error = CheckStringReprError;
188
189        #[inline]
190        unsafe fn check_bytes<'a>(value: *const Self, _: &mut C) -> Result<&'a Self, Self::Error> {
191            // The fields of `ArchivedStringRepr` are always valid
192            let repr = &*value;
193
194            if repr.is_inline() && repr.len() > INLINE_CAPACITY {
195                Err(CheckStringReprError)
196            } else {
197                Ok(repr)
198            }
199        }
200    }
201};