rkyv/collections/hash_index/
mod.rs

1//! A helper type that archives index data for hashed collections using
2//! [compress, hash and displace](http://cmph.sourceforge.net/papers/esa09.pdf).
3
4use crate::{Archive, Archived, RelPtr};
5use core::{
6    fmt,
7    hash::{Hash, Hasher},
8    slice,
9};
10
11/// The hash builder for archived hash indexes.
12pub use seahash::SeaHasher as HashBuilder;
13
14#[cfg(feature = "validation")]
15pub mod validation;
16
17/// An archived hash index.
18#[cfg_attr(feature = "strict", repr(C))]
19pub struct ArchivedHashIndex {
20    len: Archived<usize>,
21    displace: RelPtr<Archived<u32>>,
22}
23
24impl ArchivedHashIndex {
25    /// Gets the number of items in the hash index.
26    #[inline]
27    pub const fn len(&self) -> usize {
28        from_archived!(self.len) as usize
29    }
30
31    #[inline]
32    fn make_hasher() -> HashBuilder {
33        HashBuilder::with_seeds(
34            0x08576fb6170b5f5f,
35            0x587775eeb84a7e46,
36            0xac701115428ee569,
37            0x910feb91b92bb1cd,
38        )
39    }
40
41    /// Gets the hasher for this hash index. The hasher for all archived hash indexes is the same
42    /// for reproducibility.
43    #[inline]
44    pub fn hasher(&self) -> HashBuilder {
45        Self::make_hasher()
46    }
47
48    #[inline]
49    fn displace_slice(&self) -> &[Archived<u32>] {
50        unsafe { slice::from_raw_parts(self.displace.as_ptr(), self.len()) }
51    }
52
53    #[inline]
54    fn displace(&self, index: usize) -> u32 {
55        from_archived!(self.displace_slice()[index])
56    }
57
58    /// Returns the index where a key may be located in the hash index.
59    ///
60    /// The hash index does not have access to the keys used to build it, so the key at the returned
61    /// index must be checked for equality.
62    #[inline]
63    pub fn index<K: Hash + ?Sized>(&self, k: &K) -> Option<usize> {
64        if self.is_empty() {
65            return None;
66        }
67        let mut hasher = self.hasher();
68        k.hash(&mut hasher);
69        let displace_index = hasher.finish() % self.len() as u64;
70        let displace = self.displace(displace_index as usize);
71
72        if displace == u32::MAX {
73            None
74        } else if displace & 0x80_00_00_00 == 0 {
75            Some(displace as usize)
76        } else {
77            let mut hasher = self.hasher();
78            displace.hash(&mut hasher);
79            k.hash(&mut hasher);
80            let index = hasher.finish() % self.len() as u64;
81            Some(index as usize)
82        }
83    }
84
85    /// Returns whether there are no items in the hash index.
86    #[inline]
87    pub const fn is_empty(&self) -> bool {
88        self.len() == 0
89    }
90
91    /// Resolves an archived hash index from a given length and parameters.
92    ///
93    /// # Safety
94    ///
95    /// - `len` must be the number of elements in the hash index
96    /// - `pos` must be the position of `out` within the archive
97    /// - `resolver` must be the result of building and serializing a hash index
98    #[inline]
99    pub unsafe fn resolve_from_len(
100        len: usize,
101        pos: usize,
102        resolver: HashIndexResolver,
103        out: *mut Self,
104    ) {
105        let (fp, fo) = out_field!(out.len);
106        len.resolve(pos + fp, (), fo);
107
108        let (fp, fo) = out_field!(out.displace);
109        RelPtr::emplace(pos + fp, resolver.displace_pos, fo);
110    }
111}
112
113#[cfg(feature = "alloc")]
114const _: () = {
115    use crate::{
116        ser::{ScratchSpace, Serializer},
117        ScratchVec,
118    };
119    #[cfg(not(feature = "std"))]
120    use alloc::vec::Vec;
121    use core::{
122        cmp::Reverse,
123        mem::{size_of, MaybeUninit},
124    };
125
126    impl ArchivedHashIndex {
127        /// Builds and serializes a hash index from an iterator of key-value pairs.
128        ///
129        /// # Safety
130        ///
131        /// - The keys returned by the iterator must be unique.
132        /// - `entries` must have a capacity of `iter.len()` entries.
133        #[allow(clippy::type_complexity)]
134        pub unsafe fn build_and_serialize<'a, K, V, S, I>(
135            iter: I,
136            serializer: &mut S,
137            entries: &mut ScratchVec<MaybeUninit<(&'a K, &'a V)>>,
138        ) -> Result<HashIndexResolver, S::Error>
139        where
140            K: 'a + Hash,
141            V: 'a,
142            S: Serializer + ScratchSpace + ?Sized,
143            I: ExactSizeIterator<Item = (&'a K, &'a V)>,
144        {
145            let len = iter.len();
146
147            let mut bucket_size = ScratchVec::new(serializer, len)?;
148            for _ in 0..len {
149                bucket_size.push(0u32);
150            }
151
152            let mut displaces = ScratchVec::new(serializer, len)?;
153
154            for (key, value) in iter {
155                let mut hasher = Self::make_hasher();
156                key.hash(&mut hasher);
157                let displace = (hasher.finish() % len as u64) as u32;
158                displaces.push((displace, (key, value)));
159                bucket_size[displace as usize] += 1;
160            }
161
162            displaces
163                .sort_by_key(|&(displace, _)| (Reverse(bucket_size[displace as usize]), displace));
164
165            let mut occupied = ScratchVec::new(serializer, len)?;
166            for _ in 0..len {
167                occupied.push(false);
168            }
169
170            let mut displacements = ScratchVec::new(serializer, len)?;
171            for _ in 0..len {
172                displacements.push(to_archived!(u32::MAX));
173            }
174
175            let mut first_empty = 0;
176            let mut assignments = Vec::with_capacity(8);
177
178            let mut start = 0;
179            while start < displaces.len() {
180                let displace = displaces[start].0;
181                let bucket_size = bucket_size[displace as usize] as usize;
182                let end = start + bucket_size;
183                let bucket = &displaces[start..end];
184                start = end;
185
186                if bucket_size > 1 {
187                    'find_seed: for seed in 0x80_00_00_00u32..=0xFF_FF_FF_FFu32 {
188                        let mut base_hasher = Self::make_hasher();
189                        seed.hash(&mut base_hasher);
190
191                        assignments.clear();
192
193                        for &(_, (key, _)) in bucket.iter() {
194                            let mut hasher = base_hasher;
195                            key.hash(&mut hasher);
196                            let index = (hasher.finish() % len as u64) as u32;
197                            if occupied[index as usize] || assignments.contains(&index) {
198                                continue 'find_seed;
199                            } else {
200                                assignments.push(index);
201                            }
202                        }
203
204                        for i in 0..bucket_size {
205                            occupied[assignments[i] as usize] = true;
206                            entries[assignments[i] as usize]
207                                .as_mut_ptr()
208                                .write(bucket[i].1);
209                        }
210                        displacements[displace as usize] = to_archived!(seed);
211                        break;
212                    }
213                } else {
214                    let offset = occupied[first_empty..]
215                        .iter()
216                        .position(|value| !value)
217                        .unwrap();
218                    first_empty += offset;
219                    occupied[first_empty] = true;
220                    entries[first_empty].as_mut_ptr().write(bucket[0].1);
221                    displacements[displace as usize] = to_archived!(first_empty as u32);
222                    first_empty += 1;
223                }
224            }
225
226            // Write displacements
227            let displace_pos = serializer.align_for::<Archived<u32>>()?;
228            let displacements_slice = slice::from_raw_parts(
229                displacements.as_ptr().cast::<u8>(),
230                len * size_of::<Archived<u32>>(),
231            );
232            serializer.write(displacements_slice)?;
233
234            // Free scratch vecs
235            displacements.free(serializer)?;
236            occupied.free(serializer)?;
237            displaces.free(serializer)?;
238            bucket_size.free(serializer)?;
239
240            Ok(HashIndexResolver { displace_pos })
241        }
242    }
243};
244
245impl fmt::Debug for ArchivedHashIndex {
246    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247        f.debug_list().entries(self.displace_slice()).finish()
248    }
249}
250
251/// The resolver for an archived hash index.
252pub struct HashIndexResolver {
253    displace_pos: usize,
254}