foldhash/
fast.rs

1//! The foldhash implementation optimized for speed.
2
3use core::hash::{BuildHasher, Hasher};
4
5use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed};
6use crate::{folded_multiply, hash_bytes_long, hash_bytes_short, rotate_right, ARBITRARY3};
7
8/// A [`Hasher`] instance implementing foldhash, optimized for speed.
9///
10/// While you can create one directly with [`FoldHasher::with_seed`], you
11/// most likely want to use [`RandomState`], [`SeedableRandomState`] or
12/// [`FixedState`] to create [`FoldHasher`]s.
13#[derive(Clone)]
14pub struct FoldHasher<'a> {
15    accumulator: u64,
16    sponge: u128,
17    sponge_len: u8,
18    seeds: &'a [u64; 6],
19}
20
21impl<'a> FoldHasher<'a> {
22    /// Initializes this [`FoldHasher`] with the given per-hasher seed and
23    /// [`SharedSeed`].
24    #[inline]
25    pub const fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
26        FoldHasher {
27            accumulator: per_hasher_seed,
28            sponge: 0,
29            sponge_len: 0,
30            seeds: &shared_seed.seeds,
31        }
32    }
33
34    #[inline(always)]
35    fn write_num<T: Into<u128>>(&mut self, x: T) {
36        let bits: usize = 8 * core::mem::size_of::<T>();
37        if self.sponge_len as usize + bits > 128 {
38            let lo = self.sponge as u64;
39            let hi = (self.sponge >> 64) as u64;
40            self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
41            self.sponge = x.into();
42            self.sponge_len = bits as u8;
43        } else {
44            self.sponge |= x.into() << self.sponge_len;
45            self.sponge_len += bits as u8;
46        }
47    }
48}
49
50impl<'a> Hasher for FoldHasher<'a> {
51    #[inline(always)]
52    fn write(&mut self, bytes: &[u8]) {
53        // We perform overlapping reads in the byte hash which could lead to
54        // trivial length-extension attacks. These should be defeated by
55        // adding a length-dependent rotation on our unpredictable seed
56        // which costs only a single cycle (or none if executed with
57        // instruction-level parallelism).
58        let len = bytes.len();
59        self.accumulator = rotate_right(self.accumulator, len as u32);
60        if len <= 16 {
61            self.accumulator = hash_bytes_short(bytes, self.accumulator, self.seeds);
62        } else {
63            unsafe {
64                // SAFETY: we checked that the length is > 16 bytes.
65                self.accumulator = hash_bytes_long(bytes, self.accumulator, self.seeds);
66            }
67        }
68    }
69
70    #[inline(always)]
71    fn write_u8(&mut self, i: u8) {
72        self.write_num(i);
73    }
74
75    #[inline(always)]
76    fn write_u16(&mut self, i: u16) {
77        self.write_num(i);
78    }
79
80    #[inline(always)]
81    fn write_u32(&mut self, i: u32) {
82        self.write_num(i);
83    }
84
85    #[inline(always)]
86    fn write_u64(&mut self, i: u64) {
87        self.write_num(i);
88    }
89
90    #[inline(always)]
91    fn write_u128(&mut self, i: u128) {
92        let lo = i as u64;
93        let hi = (i >> 64) as u64;
94        self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
95    }
96
97    #[inline(always)]
98    fn write_usize(&mut self, i: usize) {
99        // u128 doesn't implement From<usize>.
100        #[cfg(target_pointer_width = "32")]
101        self.write_num(i as u32);
102        #[cfg(target_pointer_width = "64")]
103        self.write_num(i as u64);
104    }
105
106    #[cfg(feature = "nightly")]
107    #[inline(always)]
108    fn write_str(&mut self, s: &str) {
109        // Our write function already handles length differences.
110        self.write(s.as_bytes())
111    }
112
113    #[inline(always)]
114    fn finish(&self) -> u64 {
115        if self.sponge_len > 0 {
116            let lo = self.sponge as u64;
117            let hi = (self.sponge >> 64) as u64;
118            folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0])
119        } else {
120            self.accumulator
121        }
122    }
123}
124
125/// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized.
126#[derive(Clone, Debug)]
127pub struct RandomState {
128    per_hasher_seed: u64,
129    global_seed: GlobalSeed,
130}
131
132impl Default for RandomState {
133    #[inline(always)]
134    fn default() -> Self {
135        Self {
136            per_hasher_seed: gen_per_hasher_seed(),
137            global_seed: GlobalSeed::new(),
138        }
139    }
140}
141
142impl BuildHasher for RandomState {
143    type Hasher = FoldHasher<'static>;
144
145    #[inline(always)]
146    fn build_hasher(&self) -> FoldHasher<'static> {
147        FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
148    }
149}
150
151/// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly
152/// initialized by default, but can also be initialized with a specific seed.
153///
154/// This can be useful for e.g. testing, but the downside is that this type
155/// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is.
156#[derive(Clone, Debug)]
157pub struct SeedableRandomState {
158    per_hasher_seed: u64,
159    shared_seed: &'static SharedSeed,
160}
161
162impl Default for SeedableRandomState {
163    #[inline(always)]
164    fn default() -> Self {
165        Self::random()
166    }
167}
168
169impl SeedableRandomState {
170    /// Generates a random [`SeedableRandomState`], similar to [`RandomState`].
171    #[inline(always)]
172    pub fn random() -> Self {
173        Self {
174            per_hasher_seed: gen_per_hasher_seed(),
175            shared_seed: SharedSeed::global_random(),
176        }
177    }
178
179    /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`].
180    #[inline(always)]
181    pub fn fixed() -> Self {
182        Self {
183            per_hasher_seed: ARBITRARY3,
184            shared_seed: SharedSeed::global_fixed(),
185        }
186    }
187
188    /// Generates a [`SeedableRandomState`] with the given per-hasher seed
189    /// and [`SharedSeed`].
190    #[inline(always)]
191    pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self {
192        // XOR with ARBITRARY3 such that with_seed(0) matches default.
193        Self {
194            per_hasher_seed: per_hasher_seed ^ ARBITRARY3,
195            shared_seed,
196        }
197    }
198}
199
200impl BuildHasher for SeedableRandomState {
201    type Hasher = FoldHasher<'static>;
202
203    #[inline(always)]
204    fn build_hasher(&self) -> FoldHasher<'static> {
205        FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed)
206    }
207}
208
209/// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed.
210///
211/// Not recommended unless you absolutely need determinism.
212#[derive(Clone, Debug)]
213pub struct FixedState {
214    per_hasher_seed: u64,
215}
216
217impl FixedState {
218    /// Creates a [`FixedState`] with the given per-hasher-seed.
219    #[inline(always)]
220    pub const fn with_seed(per_hasher_seed: u64) -> Self {
221        // XOR with ARBITRARY3 such that with_seed(0) matches default.
222        Self {
223            per_hasher_seed: per_hasher_seed ^ ARBITRARY3,
224        }
225    }
226}
227
228impl Default for FixedState {
229    #[inline(always)]
230    fn default() -> Self {
231        Self {
232            per_hasher_seed: ARBITRARY3,
233        }
234    }
235}
236
237impl BuildHasher for FixedState {
238    type Hasher = FoldHasher<'static>;
239
240    #[inline(always)]
241    fn build_hasher(&self) -> FoldHasher<'static> {
242        FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed())
243    }
244}