1use alloc::vec::Vec;
9use core::num::NonZeroUsize;
10
11use super::CompressionLevel;
12use super::Matcher;
13use super::Sequence;
14
15const MIN_MATCH_LEN: usize = 5;
16
17pub struct MatchGeneratorDriver {
19 vec_pool: Vec<Vec<u8>>,
20 suffix_pool: Vec<SuffixStore>,
21 match_generator: MatchGenerator,
22 slice_size: usize,
23}
24
25impl MatchGeneratorDriver {
26 pub(crate) fn new(slice_size: usize, max_slices_in_window: usize) -> Self {
29 Self {
30 vec_pool: Vec::new(),
31 suffix_pool: Vec::new(),
32 match_generator: MatchGenerator::new(max_slices_in_window * slice_size),
33 slice_size,
34 }
35 }
36}
37
38impl Matcher for MatchGeneratorDriver {
39 fn reset(&mut self, _level: CompressionLevel) {
40 let vec_pool = &mut self.vec_pool;
41 let suffix_pool = &mut self.suffix_pool;
42
43 self.match_generator.reset(|mut data, mut suffixes| {
44 data.resize(data.capacity(), 0);
45 vec_pool.push(data);
46 suffixes.slots.clear();
47 suffixes.slots.resize(suffixes.slots.capacity(), None);
48 suffix_pool.push(suffixes);
49 });
50 }
51
52 fn window_size(&self) -> u64 {
53 self.match_generator.max_window_size as u64
54 }
55
56 fn get_next_space(&mut self) -> Vec<u8> {
57 self.vec_pool.pop().unwrap_or_else(|| {
58 let mut space = alloc::vec![0; self.slice_size];
59 space.resize(space.capacity(), 0);
60 space
61 })
62 }
63
64 fn get_last_space(&mut self) -> &[u8] {
65 self.match_generator.window.last().unwrap().data.as_slice()
66 }
67
68 fn commit_space(&mut self, space: Vec<u8>) {
69 let vec_pool = &mut self.vec_pool;
70 let suffixes = self
71 .suffix_pool
72 .pop()
73 .unwrap_or_else(|| SuffixStore::with_capacity(space.len()));
74 let suffix_pool = &mut self.suffix_pool;
75 self.match_generator
76 .add_data(space, suffixes, |mut data, mut suffixes| {
77 data.resize(data.capacity(), 0);
78 vec_pool.push(data);
79 suffixes.slots.clear();
80 suffixes.slots.resize(suffixes.slots.capacity(), None);
81 suffix_pool.push(suffixes);
82 });
83 }
84
85 fn start_matching(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) {
86 while self.match_generator.next_sequence(&mut handle_sequence) {}
87 }
88 fn skip_matching(&mut self) {
89 self.match_generator.skip_matching();
90 }
91}
92
93struct SuffixStore {
96 slots: Vec<Option<NonZeroUsize>>,
100 len_log: u32,
101}
102
103impl SuffixStore {
104 fn with_capacity(capacity: usize) -> Self {
105 Self {
106 slots: alloc::vec![None; capacity],
107 len_log: capacity.ilog2(),
108 }
109 }
110
111 #[inline(always)]
112 fn insert(&mut self, suffix: &[u8], idx: usize) {
113 let key = self.key(suffix);
114 self.slots[key] = Some(NonZeroUsize::new(idx + 1).unwrap());
115 }
116
117 #[inline(always)]
118 fn contains_key(&self, suffix: &[u8]) -> bool {
119 let key = self.key(suffix);
120 self.slots[key].is_some()
121 }
122
123 #[inline(always)]
124 fn get(&self, suffix: &[u8]) -> Option<usize> {
125 let key = self.key(suffix);
126 self.slots[key].map(|x| <NonZeroUsize as Into<usize>>::into(x) - 1)
127 }
128
129 #[inline(always)]
130 fn key(&self, suffix: &[u8]) -> usize {
131 let s0 = suffix[0] as u64;
132 let s1 = suffix[1] as u64;
133 let s2 = suffix[2] as u64;
134 let s3 = suffix[3] as u64;
135 let s4 = suffix[4] as u64;
136
137 const POLY: u64 = 0xCF3BCCDCABu64;
138
139 let s0 = (s0 << 24).wrapping_mul(POLY);
140 let s1 = (s1 << 32).wrapping_mul(POLY);
141 let s2 = (s2 << 40).wrapping_mul(POLY);
142 let s3 = (s3 << 48).wrapping_mul(POLY);
143 let s4 = (s4 << 56).wrapping_mul(POLY);
144
145 let index = s0 ^ s1 ^ s2 ^ s3 ^ s4;
146 let index = index >> (64 - self.len_log);
147 index as usize % self.slots.len()
148 }
149}
150
151struct WindowEntry {
154 data: Vec<u8>,
155 suffixes: SuffixStore,
157 base_offset: usize,
159}
160
161pub(crate) struct MatchGenerator {
162 max_window_size: usize,
163 window: Vec<WindowEntry>,
166 window_size: usize,
167 #[cfg(debug_assertions)]
168 concat_window: Vec<u8>,
169 suffix_idx: usize,
171 last_idx_in_sequence: usize,
173}
174
175impl MatchGenerator {
176 fn new(max_size: usize) -> Self {
178 Self {
179 max_window_size: max_size,
180 window: Vec::new(),
181 window_size: 0,
182 #[cfg(debug_assertions)]
183 concat_window: Vec::new(),
184 suffix_idx: 0,
185 last_idx_in_sequence: 0,
186 }
187 }
188
189 fn reset(&mut self, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
190 self.window_size = 0;
191 #[cfg(debug_assertions)]
192 self.concat_window.clear();
193 self.suffix_idx = 0;
194 self.last_idx_in_sequence = 0;
195 self.window.drain(..).for_each(|entry| {
196 reuse_space(entry.data, entry.suffixes);
197 });
198 }
199
200 fn next_sequence(&mut self, mut handle_sequence: impl for<'a> FnMut(Sequence<'a>)) -> bool {
205 loop {
206 let last_entry = self.window.last().unwrap();
207 let data_slice = &last_entry.data;
208
209 if self.suffix_idx >= data_slice.len() {
211 if self.last_idx_in_sequence != self.suffix_idx {
212 let literals = &data_slice[self.last_idx_in_sequence..];
213 self.last_idx_in_sequence = self.suffix_idx;
214 handle_sequence(Sequence::Literals { literals });
215 return true;
216 } else {
217 return false;
218 }
219 }
220
221 let data_slice = &data_slice[self.suffix_idx..];
223 if data_slice.len() < MIN_MATCH_LEN {
224 let last_idx_in_sequence = self.last_idx_in_sequence;
225 self.last_idx_in_sequence = last_entry.data.len();
226 self.suffix_idx = last_entry.data.len();
227 handle_sequence(Sequence::Literals {
228 literals: &last_entry.data[last_idx_in_sequence..],
229 });
230 return true;
231 }
232
233 let key = &data_slice[..MIN_MATCH_LEN];
235
236 let mut candidate = None;
238 for (match_entry_idx, match_entry) in self.window.iter().enumerate() {
239 let is_last = match_entry_idx == self.window.len() - 1;
240 if let Some(match_index) = match_entry.suffixes.get(key) {
241 let match_slice = if is_last {
242 &match_entry.data[match_index..self.suffix_idx]
243 } else {
244 &match_entry.data[match_index..]
245 };
246
247 let match_len = Self::common_prefix_len(match_slice, data_slice);
249
250 if match_len >= MIN_MATCH_LEN {
252 let offset = match_entry.base_offset + self.suffix_idx - match_index;
253
254 #[cfg(debug_assertions)]
256 {
257 let unprocessed = last_entry.data.len() - self.suffix_idx;
258 let start = self.concat_window.len() - unprocessed - offset;
259 let end = start + match_len;
260 let check_slice = &self.concat_window[start..end];
261 debug_assert_eq!(check_slice, &match_slice[..match_len]);
262 }
263
264 if let Some((old_offset, old_match_len)) = candidate {
265 if match_len > old_match_len
266 || (match_len == old_match_len && offset < old_offset)
267 {
268 candidate = Some((offset, match_len));
269 }
270 } else {
271 candidate = Some((offset, match_len));
272 }
273 }
274 }
275 }
276
277 if let Some((offset, match_len)) = candidate {
278 self.add_suffixes_till(self.suffix_idx + match_len);
281
282 let last_entry = self.window.last().unwrap();
284 let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
285
286 self.suffix_idx += match_len;
288 self.last_idx_in_sequence = self.suffix_idx;
289 handle_sequence(Sequence::Triple {
290 literals,
291 offset,
292 match_len,
293 });
294
295 return true;
296 }
297
298 let last_entry = self.window.last_mut().unwrap();
299 let key = &last_entry.data[self.suffix_idx..self.suffix_idx + MIN_MATCH_LEN];
300 if !last_entry.suffixes.contains_key(key) {
301 last_entry.suffixes.insert(key, self.suffix_idx);
302 }
303 self.suffix_idx += 1;
304 }
305 }
306
307 #[inline(always)]
309 fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
310 Self::mismatch_chunks::<8>(a, b)
311 }
312
313 fn mismatch_chunks<const N: usize>(xs: &[u8], ys: &[u8]) -> usize {
316 let off = core::iter::zip(xs.chunks_exact(N), ys.chunks_exact(N))
317 .take_while(|(x, y)| x == y)
318 .count()
319 * N;
320 off + core::iter::zip(&xs[off..], &ys[off..])
321 .take_while(|(x, y)| x == y)
322 .count()
323 }
324
325 #[inline(always)]
327 fn add_suffixes_till(&mut self, idx: usize) {
328 let last_entry = self.window.last_mut().unwrap();
329 if last_entry.data.len() < MIN_MATCH_LEN {
330 return;
331 }
332 let slice = &last_entry.data[self.suffix_idx..idx];
333 for (key_index, key) in slice.windows(MIN_MATCH_LEN).enumerate() {
334 if !last_entry.suffixes.contains_key(key) {
335 last_entry.suffixes.insert(key, self.suffix_idx + key_index);
336 }
337 }
338 }
339
340 fn skip_matching(&mut self) {
342 let len = self.window.last().unwrap().data.len();
343 self.add_suffixes_till(len);
344 self.suffix_idx = len;
345 self.last_idx_in_sequence = len;
346 }
347
348 fn add_data(
351 &mut self,
352 data: Vec<u8>,
353 suffixes: SuffixStore,
354 reuse_space: impl FnMut(Vec<u8>, SuffixStore),
355 ) {
356 assert!(
357 self.window.is_empty() || self.suffix_idx == self.window.last().unwrap().data.len()
358 );
359 self.reserve(data.len(), reuse_space);
360 #[cfg(debug_assertions)]
361 self.concat_window.extend_from_slice(&data);
362
363 if let Some(last_len) = self.window.last().map(|last| last.data.len()) {
364 for entry in self.window.iter_mut() {
365 entry.base_offset += last_len;
366 }
367 }
368
369 let len = data.len();
370 self.window.push(WindowEntry {
371 data,
372 suffixes,
373 base_offset: 0,
374 });
375 self.window_size += len;
376 self.suffix_idx = 0;
377 self.last_idx_in_sequence = 0;
378 }
379
380 fn reserve(&mut self, amount: usize, mut reuse_space: impl FnMut(Vec<u8>, SuffixStore)) {
383 assert!(self.max_window_size >= amount);
384 while self.window_size + amount > self.max_window_size {
385 let removed = self.window.remove(0);
386 self.window_size -= removed.data.len();
387 #[cfg(debug_assertions)]
388 self.concat_window.drain(0..removed.data.len());
389
390 let WindowEntry {
391 suffixes,
392 data: leaked_vec,
393 base_offset: _,
394 } = removed;
395 reuse_space(leaked_vec, suffixes);
396 }
397 }
398}
399
400#[test]
401fn matches() {
402 let mut matcher = MatchGenerator::new(1000);
403 let mut original_data = Vec::new();
404 let mut reconstructed = Vec::new();
405
406 let assert_seq_equal = |seq1: Sequence<'_>, seq2: Sequence<'_>, reconstructed: &mut Vec<u8>| {
407 assert_eq!(seq1, seq2);
408 match seq2 {
409 Sequence::Literals { literals } => reconstructed.extend_from_slice(literals),
410 Sequence::Triple {
411 literals,
412 offset,
413 match_len,
414 } => {
415 reconstructed.extend_from_slice(literals);
416 let start = reconstructed.len() - offset;
417 let end = start + match_len;
418 reconstructed.extend_from_within(start..end);
419 }
420 }
421 };
422
423 matcher.add_data(
424 alloc::vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
425 SuffixStore::with_capacity(100),
426 |_, _| {},
427 );
428 original_data.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
429
430 matcher.next_sequence(|seq| {
431 assert_seq_equal(
432 seq,
433 Sequence::Triple {
434 literals: &[0, 0, 0, 0, 0],
435 offset: 5,
436 match_len: 5,
437 },
438 &mut reconstructed,
439 )
440 });
441
442 assert!(!matcher.next_sequence(|_| {}));
443
444 matcher.add_data(
445 alloc::vec![1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,],
446 SuffixStore::with_capacity(100),
447 |_, _| {},
448 );
449 original_data.extend_from_slice(&[
450 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0,
451 ]);
452
453 matcher.next_sequence(|seq| {
454 assert_seq_equal(
455 seq,
456 Sequence::Triple {
457 literals: &[1, 2, 3, 4, 5, 6],
458 offset: 6,
459 match_len: 6,
460 },
461 &mut reconstructed,
462 )
463 });
464 matcher.next_sequence(|seq| {
465 assert_seq_equal(
466 seq,
467 Sequence::Triple {
468 literals: &[],
469 offset: 12,
470 match_len: 6,
471 },
472 &mut reconstructed,
473 )
474 });
475 matcher.next_sequence(|seq| {
476 assert_seq_equal(
477 seq,
478 Sequence::Triple {
479 literals: &[],
480 offset: 28,
481 match_len: 5,
482 },
483 &mut reconstructed,
484 )
485 });
486 assert!(!matcher.next_sequence(|_| {}));
487
488 matcher.add_data(
489 alloc::vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0],
490 SuffixStore::with_capacity(100),
491 |_, _| {},
492 );
493 original_data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0]);
494
495 matcher.next_sequence(|seq| {
496 assert_seq_equal(
497 seq,
498 Sequence::Triple {
499 literals: &[],
500 offset: 23,
501 match_len: 6,
502 },
503 &mut reconstructed,
504 )
505 });
506 matcher.next_sequence(|seq| {
507 assert_seq_equal(
508 seq,
509 Sequence::Triple {
510 literals: &[7, 8, 9, 10, 11],
511 offset: 16,
512 match_len: 5,
513 },
514 &mut reconstructed,
515 )
516 });
517 assert!(!matcher.next_sequence(|_| {}));
518
519 matcher.add_data(
520 alloc::vec![0, 0, 0, 0, 0],
521 SuffixStore::with_capacity(100),
522 |_, _| {},
523 );
524 original_data.extend_from_slice(&[0, 0, 0, 0, 0]);
525
526 matcher.next_sequence(|seq| {
527 assert_seq_equal(
528 seq,
529 Sequence::Triple {
530 literals: &[],
531 offset: 5,
532 match_len: 5,
533 },
534 &mut reconstructed,
535 )
536 });
537 assert!(!matcher.next_sequence(|_| {}));
538
539 matcher.add_data(
540 alloc::vec![7, 8, 9, 10, 11],
541 SuffixStore::with_capacity(100),
542 |_, _| {},
543 );
544 original_data.extend_from_slice(&[7, 8, 9, 10, 11]);
545
546 matcher.next_sequence(|seq| {
547 assert_seq_equal(
548 seq,
549 Sequence::Triple {
550 literals: &[],
551 offset: 15,
552 match_len: 5,
553 },
554 &mut reconstructed,
555 )
556 });
557 assert!(!matcher.next_sequence(|_| {}));
558
559 matcher.add_data(
560 alloc::vec![1, 3, 5, 7, 9],
561 SuffixStore::with_capacity(100),
562 |_, _| {},
563 );
564 matcher.skip_matching();
565 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
566 reconstructed.extend_from_slice(&[1, 3, 5, 7, 9]);
567 assert!(!matcher.next_sequence(|_| {}));
568
569 matcher.add_data(
570 alloc::vec![1, 3, 5, 7, 9],
571 SuffixStore::with_capacity(100),
572 |_, _| {},
573 );
574 original_data.extend_from_slice(&[1, 3, 5, 7, 9]);
575
576 matcher.next_sequence(|seq| {
577 assert_seq_equal(
578 seq,
579 Sequence::Triple {
580 literals: &[],
581 offset: 5,
582 match_len: 5,
583 },
584 &mut reconstructed,
585 )
586 });
587 assert!(!matcher.next_sequence(|_| {}));
588
589 matcher.add_data(
590 alloc::vec![0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23],
591 SuffixStore::with_capacity(100),
592 |_, _| {},
593 );
594 original_data.extend_from_slice(&[0, 0, 11, 13, 15, 17, 20, 11, 13, 15, 17, 20, 21, 23]);
595
596 matcher.next_sequence(|seq| {
597 assert_seq_equal(
598 seq,
599 Sequence::Triple {
600 literals: &[0, 0, 11, 13, 15, 17, 20],
601 offset: 5,
602 match_len: 5,
603 },
604 &mut reconstructed,
605 )
606 });
607 matcher.next_sequence(|seq| {
608 assert_seq_equal(
609 seq,
610 Sequence::Literals {
611 literals: &[21, 23],
612 },
613 &mut reconstructed,
614 )
615 });
616 assert!(!matcher.next_sequence(|_| {}));
617
618 assert_eq!(reconstructed, original_data);
619}