foldhash/seed.rs
1use core::hash::BuildHasher;
2
3// These constants may end up unused depending on platform support.
4#[allow(unused)]
5use crate::{ARBITRARY1, ARBITRARY9};
6
7use super::{
8 folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7,
9 ARBITRARY8,
10};
11
12/// Used for FixedState, and RandomState if atomics for dynamic init are unavailable.
13const FIXED_GLOBAL_SEED: [u64; 4] = [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7];
14
15pub mod fast {
16 use super::*;
17 use crate::fast::FoldHasher;
18
19 /// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized.
20 #[derive(Copy, Clone, Debug)]
21 pub struct RandomState {
22 per_hasher_seed: u64,
23 global_seed: global::GlobalSeed,
24 }
25
26 impl Default for RandomState {
27 fn default() -> Self {
28 // We initialize the per-hasher seed with the stack pointer to ensure
29 // different threads have different seeds, with as side benefit that
30 // stack address randomization gives us further non-determinism.
31 let mut per_hasher_seed = 0;
32 let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64;
33 per_hasher_seed = stack_ptr;
34
35 // If we have the standard library available we use a thread-local
36 // state to ensure RandomStates are different with high probability,
37 // even if the call stack is the same.
38 #[cfg(feature = "std")]
39 {
40 use std::cell::Cell;
41 thread_local! {
42 static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) };
43 }
44
45 let nondeterminism = PER_HASHER_NONDETERMINISM.get();
46 per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
47 PER_HASHER_NONDETERMINISM.set(per_hasher_seed);
48 };
49
50 // If we don't have the standard library we instead use a global
51 // atomic instead of a thread-local state.
52 //
53 // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner,
54 // but this doesn't matter in practice - it is impossible that two
55 // different threads have the same stack location, so they'll almost
56 // surely generate different seeds, and provide a different possible
57 // update for PER_HASHER_NONDETERMINISM. If we would use a proper
58 // fetch_add atomic update then there is a larger chance of
59 // problematic contention.
60 //
61 // We use usize instead of 64-bit atomics for best platform support.
62 #[cfg(not(feature = "std"))]
63 {
64 use core::sync::atomic::{AtomicUsize, Ordering};
65 static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0);
66
67 let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64;
68 per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism);
69 PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed);
70 }
71
72 // One extra mixing step to ensure good random bits.
73 per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY2);
74
75 Self {
76 per_hasher_seed,
77 global_seed: global::GlobalSeed::new(),
78 }
79 }
80 }
81
82 impl BuildHasher for RandomState {
83 type Hasher = FoldHasher;
84
85 #[inline(always)]
86 fn build_hasher(&self) -> FoldHasher {
87 FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
88 }
89 }
90
91 /// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed.
92 ///
93 /// Not recommended unless you absolutely need determinism.
94 #[derive(Copy, Clone, Debug)]
95 pub struct FixedState {
96 per_hasher_seed: u64,
97 }
98
99 impl FixedState {
100 /// Creates a [`FixedState`] with the given seed.
101 #[inline(always)]
102 pub const fn with_seed(seed: u64) -> Self {
103 // XOR with ARBITRARY3 such that with_seed(0) matches default.
104 Self {
105 per_hasher_seed: seed ^ ARBITRARY3,
106 }
107 }
108 }
109
110 impl Default for FixedState {
111 #[inline(always)]
112 fn default() -> Self {
113 Self {
114 per_hasher_seed: ARBITRARY3,
115 }
116 }
117 }
118
119 impl BuildHasher for FixedState {
120 type Hasher = FoldHasher;
121
122 #[inline(always)]
123 fn build_hasher(&self) -> FoldHasher {
124 FoldHasher::with_seed(self.per_hasher_seed, &FIXED_GLOBAL_SEED)
125 }
126 }
127}
128
129pub mod quality {
130 use super::*;
131 use crate::quality::FoldHasher;
132
133 /// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized.
134 #[derive(Copy, Clone, Default, Debug)]
135 pub struct RandomState {
136 inner: fast::RandomState,
137 }
138
139 impl BuildHasher for RandomState {
140 type Hasher = FoldHasher;
141
142 #[inline(always)]
143 fn build_hasher(&self) -> FoldHasher {
144 FoldHasher {
145 inner: self.inner.build_hasher(),
146 }
147 }
148 }
149
150 /// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed.
151 ///
152 /// Not recommended unless you absolutely need determinism.
153 #[derive(Copy, Clone, Default, Debug)]
154 pub struct FixedState {
155 inner: fast::FixedState,
156 }
157
158 impl FixedState {
159 /// Creates a [`FixedState`] with the given seed.
160 #[inline(always)]
161 pub const fn with_seed(seed: u64) -> Self {
162 Self {
163 // We do an additional folded multiply with the seed here for
164 // the quality hash to ensure better independence between seed
165 // and hash. If the seed is zero the folded multiply is zero,
166 // preserving with_seed(0) == default().
167 inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)),
168 }
169 }
170 }
171
172 impl BuildHasher for FixedState {
173 type Hasher = FoldHasher;
174
175 #[inline(always)]
176 fn build_hasher(&self) -> FoldHasher {
177 FoldHasher {
178 inner: self.inner.build_hasher(),
179 }
180 }
181 }
182}
183
184#[cfg(target_has_atomic = "8")]
185mod global {
186 use super::*;
187 use core::cell::UnsafeCell;
188 use core::sync::atomic::{AtomicU8, Ordering};
189
190 fn generate_global_seed() -> [u64; 4] {
191 let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9);
192
193 // Use address space layout randomization as our main randomness source.
194 // This isn't great, but we don't advertise HashDoS resistance in the first
195 // place. This is a whole lot better than nothing, at near zero cost with
196 // no dependencies.
197 let mut seed = 0;
198 let stack_ptr = &seed as *const _;
199 let func_ptr = generate_global_seed;
200 let static_ptr = &GLOBAL_SEED_STORAGE as *const _;
201 seed = mix(seed, stack_ptr as usize as u64);
202 seed = mix(seed, func_ptr as usize as u64);
203 seed = mix(seed, static_ptr as usize as u64);
204
205 // If we have the standard library available, augment entropy with the
206 // current time and an address from the allocator.
207 #[cfg(feature = "std")]
208 {
209 #[cfg(not(any(
210 miri,
211 all(target_family = "wasm", target_os = "unknown"),
212 target_os = "zkvm"
213 )))]
214 if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() {
215 seed = mix(seed, duration.subsec_nanos() as u64);
216 seed = mix(seed, duration.as_secs());
217 }
218
219 let box_ptr = &*Box::new(0u8) as *const _;
220 seed = mix(seed, box_ptr as usize as u64);
221 }
222
223 let seed_a = mix(seed, 0);
224 let seed_b = mix(mix(mix(seed_a, 0), 0), 0);
225 let seed_c = mix(mix(mix(seed_b, 0), 0), 0);
226 let seed_d = mix(mix(mix(seed_c, 0), 0), 0);
227
228 // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be
229 // a common input. So we want our global seeds that are XOR'ed with the
230 // input to always be non-zero. To also ensure there is always a good spread
231 // of bits, we give up 3 bits of entropy and simply force some bits on.
232 const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1;
233 [
234 seed_a | FORCED_ONES,
235 seed_b | FORCED_ONES,
236 seed_c | FORCED_ONES,
237 seed_d | FORCED_ONES,
238 ]
239 }
240
241 // Now all the below code purely exists to cache the above seed as
242 // efficiently as possible. Even if we weren't a no_std crate and had access to
243 // OnceLock, we don't want to check whether the global is set each time we
244 // hash an object, so we hand-roll a global storage where type safety allows us
245 // to assume the storage is initialized after construction.
246 struct GlobalSeedStorage {
247 state: AtomicU8,
248 seed: UnsafeCell<[u64; 4]>,
249 }
250
251 const UNINIT: u8 = 0;
252 const LOCKED: u8 = 1;
253 const INIT: u8 = 2;
254
255 // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive
256 // LOCKED state, and only read the UnsafeCells when state is in the
257 // once-achieved-eternally-preserved state INIT.
258 unsafe impl Sync for GlobalSeedStorage {}
259
260 static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage {
261 state: AtomicU8::new(UNINIT),
262 seed: UnsafeCell::new([0; 4]),
263 };
264
265 /// An object representing an initialized global seed.
266 ///
267 /// Does not actually store the seed inside itself, it is a zero-sized type.
268 /// This prevents inflating the RandomState size and in turn HashMap's size.
269 #[derive(Copy, Clone, Debug)]
270 pub struct GlobalSeed {
271 // So we can't accidentally type GlobalSeed { } within this crate.
272 _no_accidental_unsafe_init: (),
273 }
274
275 impl GlobalSeed {
276 #[inline(always)]
277 pub fn new() -> Self {
278 if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT {
279 Self::init_slow()
280 }
281 Self {
282 _no_accidental_unsafe_init: (),
283 }
284 }
285
286 #[cold]
287 #[inline(never)]
288 fn init_slow() {
289 // Generate seed outside of critical section.
290 let seed = generate_global_seed();
291
292 loop {
293 match GLOBAL_SEED_STORAGE.state.compare_exchange_weak(
294 UNINIT,
295 LOCKED,
296 Ordering::Relaxed,
297 Ordering::Acquire,
298 ) {
299 Ok(_) => unsafe {
300 // SAFETY: we just acquired an exclusive lock.
301 *GLOBAL_SEED_STORAGE.seed.get() = seed;
302 GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release);
303 return;
304 },
305
306 Err(INIT) => return,
307
308 // Yes, it's a spin loop. We need to support no_std (so no easy
309 // access to proper locks), this is a one-time-per-program
310 // initialization, and the critical section is only a few
311 // store instructions, so it'll be fine.
312 _ => core::hint::spin_loop(),
313 }
314 }
315 }
316
317 #[inline(always)]
318 pub fn get(self) -> &'static [u64; 4] {
319 // SAFETY: our constructor ensured we are in the INIT state and thus
320 // this raw read does not race with any write.
321 unsafe { &*GLOBAL_SEED_STORAGE.seed.get() }
322 }
323 }
324}
325
326#[cfg(not(target_has_atomic = "8"))]
327mod global {
328 #[derive(Copy, Clone, Debug)]
329 pub struct GlobalSeed {}
330
331 impl GlobalSeed {
332 #[inline(always)]
333 pub fn new() -> Self {
334 Self {}
335 }
336
337 #[inline(always)]
338 pub fn get(self) -> &'static [u64; 4] {
339 &super::FIXED_GLOBAL_SEED
340 }
341 }
342}