foldhash/
seed.rs

1use core::hash::BuildHasher;
2
3// These constants may end up unused depending on platform support.
4#[allow(unused)]
5use crate::{ARBITRARY1, ARBITRARY9};
6
7use super::{
8    folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7,
9    ARBITRARY8,
10};
11
12/// Used for FixedState, and RandomState if atomics for dynamic init are unavailable.
13const FIXED_GLOBAL_SEED: [u64; 4] = [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7];
14
15pub mod fast {
16    use super::*;
17    use crate::fast::FoldHasher;
18
19    /// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized.
20    #[derive(Copy, Clone, Debug)]
21    pub struct RandomState {
22        per_hasher_seed: u64,
23        global_seed: global::GlobalSeed,
24    }
25
26    impl Default for RandomState {
27        fn default() -> Self {
28            // We initialize the per-hasher seed with the stack pointer to ensure
29            // different threads have different seeds, with as side benefit that
30            // stack address randomization gives us further non-determinism.
31            let mut per_hasher_seed = 0;
32            let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64;
33            per_hasher_seed = stack_ptr;
34
35            // If we have the standard library available we use a thread-local
36            // state to ensure RandomStates are different with high probability,
37            // even if the call stack is the same.
38            #[cfg(feature = "std")]
39            {
40                use std::cell::Cell;
41                thread_local! {
42                    static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) };
43                }
44
45                let nondeterminism = PER_HASHER_NONDETERMINISM.get();
46                per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
47                PER_HASHER_NONDETERMINISM.set(per_hasher_seed);
48            };
49
50            // If we don't have the standard library we instead use a global
51            // atomic instead of a thread-local state.
52            //
53            // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner,
54            // but this doesn't matter in practice - it is impossible that two
55            // different threads have the same stack location, so they'll almost
56            // surely generate different seeds, and provide a different possible
57            // update for PER_HASHER_NONDETERMINISM. If we would use a proper
58            // fetch_add atomic update then there is a larger chance of
59            // problematic contention.
60            //
61            // We use usize instead of 64-bit atomics for best platform support.
62            #[cfg(not(feature = "std"))]
63            {
64                use core::sync::atomic::{AtomicUsize, Ordering};
65                static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
66
67                let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
68                per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
69                PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
70            }
71
72            // One extra mixing step to ensure good random bits.
73            per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY2);
74
75            Self {
76                per_hasher_seed,
77                global_seed: global::GlobalSeed::new(),
78            }
79        }
80    }
81
82    impl BuildHasher for RandomState {
83        type Hasher = FoldHasher;
84
85        #[inline(always)]
86        fn build_hasher(&self) -> FoldHasher {
87            FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
88        }
89    }
90
91    /// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed.
92    ///
93    /// Not recommended unless you absolutely need determinism.
94    #[derive(Copy, Clone, Debug)]
95    pub struct FixedState {
96        per_hasher_seed: u64,
97    }
98
99    impl FixedState {
100        /// Creates a [`FixedState`] with the given seed.
101        #[inline(always)]
102        pub const fn with_seed(seed: u64) -> Self {
103            // XOR with ARBITRARY3 such that with_seed(0) matches default.
104            Self {
105                per_hasher_seed: seed ^ ARBITRARY3,
106            }
107        }
108    }
109
110    impl Default for FixedState {
111        #[inline(always)]
112        fn default() -> Self {
113            Self {
114                per_hasher_seed: ARBITRARY3,
115            }
116        }
117    }
118
119    impl BuildHasher for FixedState {
120        type Hasher = FoldHasher;
121
122        #[inline(always)]
123        fn build_hasher(&self) -> FoldHasher {
124            FoldHasher::with_seed(self.per_hasher_seed, &FIXED_GLOBAL_SEED)
125        }
126    }
127}
128
129pub mod quality {
130    use super::*;
131    use crate::quality::FoldHasher;
132
133    /// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized.
134    #[derive(Copy, Clone, Default, Debug)]
135    pub struct RandomState {
136        inner: fast::RandomState,
137    }
138
139    impl BuildHasher for RandomState {
140        type Hasher = FoldHasher;
141
142        #[inline(always)]
143        fn build_hasher(&self) -> FoldHasher {
144            FoldHasher {
145                inner: self.inner.build_hasher(),
146            }
147        }
148    }
149
150    /// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed.
151    ///
152    /// Not recommended unless you absolutely need determinism.
153    #[derive(Copy, Clone, Default, Debug)]
154    pub struct FixedState {
155        inner: fast::FixedState,
156    }
157
158    impl FixedState {
159        /// Creates a [`FixedState`] with the given seed.
160        #[inline(always)]
161        pub const fn with_seed(seed: u64) -> Self {
162            Self {
163                // We do an additional folded multiply with the seed here for
164                // the quality hash to ensure better independence between seed
165                // and hash. If the seed is zero the folded multiply is zero,
166                // preserving with_seed(0) == default().
167                inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)),
168            }
169        }
170    }
171
172    impl BuildHasher for FixedState {
173        type Hasher = FoldHasher;
174
175        #[inline(always)]
176        fn build_hasher(&self) -> FoldHasher {
177            FoldHasher {
178                inner: self.inner.build_hasher(),
179            }
180        }
181    }
182}
183
184#[cfg(target_has_atomic = "8")]
185mod global {
186    use super::*;
187    use core::cell::UnsafeCell;
188    use core::sync::atomic::{AtomicU8, Ordering};
189
190    fn generate_global_seed() -> [u64; 4] {
191        let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
192
193        // Use address space layout randomization as our main randomness source.
194        // This isn't great, but we don't advertise HashDoS resistance in the first
195        // place. This is a whole lot better than nothing, at near zero cost with
196        // no dependencies.
197        let mut seed = 0;
198        let stack_ptr = &seed as *const _;
199        let func_ptr = generate_global_seed;
200        let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
201        seed = mix(seed, stack_ptr as usize as u64);
202        seed = mix(seed, func_ptr as usize as u64);
203        seed = mix(seed, static_ptr as usize as u64);
204
205        // If we have the standard library available, augment entropy with the
206        // current time and an address from the allocator.
207        #[cfg(feature = "std")]
208        {
209            #[cfg(not(any(
210                miri,
211                all(target_family = "wasm", target_os = "unknown"),
212                target_os = "zkvm"
213            )))]
214            if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() {
215                seed = mix(seed, duration.subsec_nanos() as u64);
216                seed = mix(seed, duration.as_secs());
217            }
218
219            let box_ptr = &*Box::new(0u8) as *const _;
220            seed = mix(seed, box_ptr as usize as u64);
221        }
222
223        let seed_a = mix(seed, 0);
224        let seed_b = mix(mix(mix(seed_a, 0), 0), 0);
225        let seed_c = mix(mix(mix(seed_b, 0), 0), 0);
226        let seed_d = mix(mix(mix(seed_c, 0), 0), 0);
227
228        // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
229        // a common input. So we want our global seeds that are XOR'ed with the
230        // input to always be non-zero. To also ensure there is always a good spread
231        // of bits, we give up 3 bits of entropy and simply force some bits on.
232        const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
233        [
234            seed_a | FORCED_ONES,
235            seed_b | FORCED_ONES,
236            seed_c | FORCED_ONES,
237            seed_d | FORCED_ONES,
238        ]
239    }
240
241    // Now all the below code purely exists to cache the above seed as
242    // efficiently as possible. Even if we weren't a no_std crate and had access to
243    // OnceLock, we don't want to check whether the global is set each time we
244    // hash an object, so we hand-roll a global storage where type safety allows us
245    // to assume the storage is initialized after construction.
246    struct GlobalSeedStorage {
247        state: AtomicU8,
248        seed: UnsafeCell<[u64; 4]>,
249    }
250
251    const UNINIT: u8 = 0;
252    const LOCKED: u8 = 1;
253    const INIT: u8 = 2;
254
255    // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
256    // LOCKED state, and only read the UnsafeCells when state is in the
257    // once-achieved-eternally-preserved state INIT.
258    unsafe impl Sync for GlobalSeedStorage {}
259
260    static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
261        state: AtomicU8::new(UNINIT),
262        seed: UnsafeCell::new([0; 4]),
263    };
264
265    /// An object representing an initialized global seed.
266    ///
267    /// Does not actually store the seed inside itself, it is a zero-sized type.
268    /// This prevents inflating the RandomState size and in turn HashMap's size.
269    #[derive(Copy, Clone, Debug)]
270    pub struct GlobalSeed {
271        // So we can't accidentally type GlobalSeed { } within this crate.
272        _no_accidental_unsafe_init: (),
273    }
274
275    impl GlobalSeed {
276        #[inline(always)]
277        pub fn new() -> Self {
278            if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
279                Self::init_slow()
280            }
281            Self {
282                _no_accidental_unsafe_init: (),
283            }
284        }
285
286        #[cold]
287        #[inline(never)]
288        fn init_slow() {
289            // Generate seed outside of critical section.
290            let seed = generate_global_seed();
291
292            loop {
293                match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
294                    UNINIT,
295                    LOCKED,
296                    Ordering::Relaxed,
297                    Ordering::Acquire,
298                ) {
299                    Ok(_) => unsafe {
300                        // SAFETY: we just acquired an exclusive lock.
301                        *GLOBAL_SEED_STORAGE.seed.get() = seed;
302                        GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
303                        return;
304                    },
305
306                    Err(INIT) => return,
307
308                    // Yes, it's a spin loop. We need to support no_std (so no easy
309                    // access to proper locks), this is a one-time-per-program
310                    // initialization, and the critical section is only a few
311                    // store instructions, so it'll be fine.
312                    _ => core::hint::spin_loop(),
313                }
314            }
315        }
316
317        #[inline(always)]
318        pub fn get(self) -> &'static [u64; 4] {
319            // SAFETY: our constructor ensured we are in the INIT state and thus
320            // this raw read does not race with any write.
321            unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
322        }
323    }
324}
325
326#[cfg(not(target_has_atomic = "8"))]
327mod global {
328    #[derive(Copy, Clone, Debug)]
329    pub struct GlobalSeed {}
330
331    impl GlobalSeed {
332        #[inline(always)]
333        pub fn new() -> Self {
334            Self {}
335        }
336
337        #[inline(always)]
338        pub fn get(self) -> &'static [u64; 4] {
339            &super::FIXED_GLOBAL_SEED
340        }
341    }
342}