1
//! Helper type: a frozen RangeMap where most ranges do not have gaps between them.
2
//!
3
//! Ordinary RangeMaps store a start and end for each range.  But if the
4
//! there are not gaps between a pair of ranges, then the range end is redundant.
5
//!
6
//! This trick lets us save about 40%-50% of the total database size, for
7
//! a savings of around 6 MiB.  (Data checked as of April 2026)
8

            
9
use std::borrow::Cow;
10
use std::ops::RangeInclusive;
11

            
12
/// An object that has a single next element.
13
pub(crate) trait Successor: Sized {
14
    /// Return the next element after this one.
15
    ///
16
    /// Returns None if this is the maximum value
17
    fn next(&self) -> Option<Self>;
18
}
19

            
20
impl Successor for u32 {
21
5050
    fn next(&self) -> Option<Self> {
22
5050
        self.checked_add(1)
23
5050
    }
24
}
25

            
26
impl Successor for u128 {
27
152
    fn next(&self) -> Option<Self> {
28
152
        self.checked_add(1)
29
152
    }
30
}
31

            
32
/// An immutable map from ranges to `Option<V1>`, `Option<V2>`-pairs.
33
///
34
/// This type is optimized for reasonable O(lg N) performance,
35
/// and for space efficiency in the case where:
36
/// - `Option<V>` is the same size as V, or at least not much larger.
37
/// - most or all ranges have no gaps between them.
38
/// - we might not want to record any values for V2 at all.
39
/// - we are willing to treat "no entry" and "maps to None" as equivalent.
40
///
41
/// That is to say, we get our space efficiency wins in the case where,
42
/// if some range (K..=V) in the map,
43
/// (V+1..=K2) is also likely to be in the map.
44
///
45
/// (This is true for around like 99% of our IPv4 ranges and around 91% of our
46
/// IPv6 ranges.)
47
///
48
/// This type is crate-internal because its functionality is limited,
49
/// and because we may want to switch to some other approach to geoip in the future.
50
///
51
/// ## Overview
52
///
53
/// We consider a map of disjoint ranges `(S..=E) => V`
54
/// as a _dense_ map from `(S'..=E') => Option<V1, V2>`,
55
/// such that there is is exactly one `(S'..E')` range covering every value from
56
/// `min(S)` through `K::MAX` inclusive.
57
///
58
/// Because this map is dense, we can encode these ranges as a sorted list of S',
59
/// and then use a binary search to find which `Option<V1, V2>` corresponds
60
/// to any given value.
61
///
62
/// ## Invariants
63
///
64
/// - `starts` is sorted and contains no duplicates.
65
/// - `values1.len() == starts.len()`
66
/// - If `values2` is present, `values2.len() == starts.len()`
67
///
68
/// ## Semantics:
69
///
70
/// This table maps keys to value as follows:
71
///
72
/// If `starts` is empty, then every key maps to None.
73
///
74
/// Otherwise:
75
///    - Every key such that `K::min <= key < starts[0]` maps to None.
76
///    - Every key such that `starts[idx] <= key < starts[idx+1]` maps to
77
///      `values[idx]`, which may be Some or None.
78
///    - Every key such that `key >= starts.last()` maps to values.last(),
79
///      which may be Some or None.
80
///
81
/// If `values2` is present, then keys map to the same indices in `values2`
82
/// as they do in `values1`.
83
#[derive(Clone, Debug, Eq, PartialEq)]
84
pub(crate) struct DenseRangeMap<K, V1, V2>
85
where
86
    K: Clone + 'static,
87
    V1: Clone + 'static,
88
    V2: Clone + 'static,
89
{
90
    /// A list of the starting points of each range or gap.
91
    starts: Cow<'static, [K]>,
92
    /// A list of values.
93
    ///
94
    /// If `starts[i]` is the start of a range, then `values[i]` is Some(v)
95
    /// where v is the value of that range for every
96
    values1: Cow<'static, [Option<V1>]>,
97

            
98
    /// An optional list of secondary values.
99
    ///
100
    values2: Option<Cow<'static, [Option<V2>]>>,
101
}
102

            
103
impl<K, V1, V2> Default for DenseRangeMap<K, V1, V2>
104
where
105
    K: Clone + 'static,
106
    V1: Clone + 'static,
107
    V2: Clone + 'static,
108
{
109
    fn default() -> Self {
110
        Self {
111
            starts: Default::default(),
112
            values1: Default::default(),
113
            values2: None,
114
        }
115
    }
116
}
117

            
118
/// A helper type to create a [`DenseRangeMap`] from a sorted list of disjoint ranges.
119
///
120
/// ## Invariants
121
///
122
/// - `starts.len() == values.len()`
123
/// - `starts` is sorted and contains no duplicates.
124
/// - `starts` is empty if and only if `prev_end` is None.
125
///
126
/// ## Semantics
127
///
128
/// If `starts` is empty, nothing has been added to this Builder.
129
///
130
/// Otherwise:
131
///    - Every key such that `K::min <= key < starts[0]` maps to None.
132
///    - Every key such that `starts[idx] <= key < starts[idx+1]` maps to
133
///      `values[idx]`, which may be Some or None.
134
///    - Every key such that `key <= starts.last() <= prev_end` maps to
135
///      `values.last()`.
136
///    - No mappings have been added for any range S..=E such that
137
///      'S > prev_end`.
138
struct DenseRangeMapBuilder<K, V1, V2> {
139
    /// A list of range starts so far.
140
    starts: Vec<K>,
141
    /// A list of values so far.
142
    values1: Vec<Option<V1>>,
143

            
144
    /// A list of secondary values so far.
145
    ///
146
    /// None if we're ignoring secondary values.
147
    values2: Option<Vec<Option<V2>>>,
148

            
149
    /// The last element of the most recently added range.
150
    prev_end: Option<K>,
151
}
152

            
153
/// An error that occurred while building a [`DenseRangeMap`]
154
#[derive(Debug, Clone, thiserror::Error)]
155
pub(crate) enum Error {
156
    /// Some range was invalid
157
    #[error("Found an entry with an invalid range")]
158
    BadEntry,
159

            
160
    /// The entries in the database were not sorted.
161
    #[error("Entries were not sorted")]
162
    Unsorted,
163
}
164

            
165
impl<K: Eq + Ord + Successor, V1, V2> DenseRangeMapBuilder<K, V1, V2>
166
where
167
    K: Clone + 'static,
168
    V1: Clone + 'static,
169
    V2: Clone + 'static,
170
{
171
    /// Construct a new empty builder.
172
1134
    fn new() -> Self {
173
1134
        Self {
174
1134
            starts: Vec::new(),
175
1134
            values1: Vec::new(),
176
1134
            values2: Some(Vec::new()),
177
1134
            prev_end: None,
178
1134
        }
179
1134
    }
180

            
181
    /// Add a single entry to this builder.
182
6144
    fn push(&mut self, start: K, v1: Option<V1>, v2: Option<V2>) {
183
6144
        self.starts.push(start);
184
6144
        self.values1.push(v1);
185
6144
        if let Some(values2) = self.values2.as_mut() {
186
258
            values2.push(v2);
187
6136
        }
188
6144
    }
189

            
190
    /// Consume this builder and return a DenseRangeMap with the same values.
191
1134
    fn build(mut self) -> DenseRangeMap<K, V1, V2> {
192
1134
        if let Some(prev_end) = self.prev_end.take()
193
1078
            && let Some(next_range_start) = prev_end.next()
194
758
        {
195
758
            // There is empty space after the last range, so we need to
196
758
            // represent that with a gap entry.
197
758
            self.push(next_range_start, None, None);
198
758
        }
199
        // See if we can reclaim any space.
200
1134
        self.starts.shrink_to_fit();
201
1134
        self.values1.shrink_to_fit();
202
1134
        if let Some(values2) = self.values2.as_mut() {
203
104
            values2.shrink_to_fit();
204
1130
        }
205

            
206
1134
        let map = DenseRangeMap {
207
1134
            starts: self.starts.into(),
208
1134
            values1: self.values1.into(),
209
1134
            values2: self.values2.map(Into::into),
210
1134
        };
211

            
212
        #[cfg(test)]
213
1034
        map.assert_valid();
214

            
215
1134
        map
216
1134
    }
217

            
218
    /// Add an entry to this [`DenseRangeMapBuilder`].
219
    ///
220
    /// Returns an error if `range` is not in strictly ascending order with respect
221
    /// to all previous ranges.
222
5204
    fn add_entry(
223
5204
        &mut self,
224
5204
        range: RangeInclusive<K>,
225
5204
        value1: Option<V1>,
226
5204
        value2: Option<V2>,
227
5204
    ) -> Result<(), Error> {
228
5204
        if value1.is_none() && value2.is_none() {
229
2
            return Ok(());
230
5202
        }
231

            
232
        // NOTE: We _could_ coalesce our ranges if two abutting ranges have the
233
        // same value.  But our geoip data processing tools already do that.
234
        use std::cmp::Ordering::*;
235

            
236
5202
        let (start, end) = range.into_inner();
237
5202
        if start > end {
238
            return Err(Error::BadEntry);
239
5202
        }
240

            
241
        // Set "next_range_start" to the place that this range would start if
242
        // there is no gap after the last range.
243
5202
        if let Some(prev_end) = self.prev_end.take() {
244
            // This is not the first entry, so we might need to add a gap entry.
245

            
246
            // Find the start of a possible gap entry.
247
            // (If there is not a successor to the end of the last range, then
248
            // any entry we are trying to push is unsorted!)
249
4124
            let gap_start = prev_end.next().ok_or(Error::Unsorted)?;
250

            
251
            // Compare the start of the possible gap to the start of this range.
252
4124
            match gap_start.cmp(&start) {
253
184
                Less => {
254
184
                    // There is a gap between the end of the last entry and the
255
184
                    // start of this one.  Add a representation of that gap.
256
184
                    self.push(gap_start, None, None);
257
184
                }
258
3940
                Equal => {
259
3940
                    // There is no gap, so we don't have to represent it. Cool!
260
3940
                }
261
                Greater => {
262
                    // We aren't sorted; give up.
263
                    return Err(Error::Unsorted);
264
                }
265
            }
266
1078
        }
267
        // Add this entry.
268
5202
        self.push(start, value1, value2);
269
5202
        self.prev_end = Some(end);
270

            
271
5202
        Ok(())
272
5204
    }
273
}
274

            
275
impl<K: Eq + Ord + Successor, V1, V2> DenseRangeMap<K, V1, V2>
276
where
277
    K: Clone + 'static,
278
    V1: Clone + 'static,
279
    V2: Clone + 'static,
280
{
281
    /// Create a new map from its raw parts.
282
    ///
283
    /// Requires that the slices are actually the results of calling
284
    /// `export()` on a map of this type.
285
104
    pub(crate) fn from_static_parts(
286
104
        starts: &'static [K],
287
104
        values1: &'static [Option<V1>],
288
104
        values2: Option<&'static [Option<V2>]>,
289
104
    ) -> Self {
290
104
        let map = Self {
291
104
            starts: starts.into(),
292
104
            values1: values1.into(),
293
104
            values2: values2.map(Into::into),
294
104
        };
295
        #[cfg(test)]
296
4
        map.assert_valid();
297

            
298
104
        map
299
104
    }
300

            
301
    /// Construct a [`DenseRangeMap`] from an iterator of `(range,v1)` tuples.
302
    ///
303
    /// The ranges must be disjoint and sorted in ascending order by their start.
304
    #[cfg(test)]
305
1030
    pub(crate) fn from_sorted_inclusive_ranges<S>(iter: S) -> Result<Self, Error>
306
1030
    where
307
1030
        S: Iterator<Item = (RangeInclusive<K>, V1)>,
308
    {
309
1030
        let discard_v2 = true;
310
1030
        Self::try_from_sorted_inclusive_ranges(
311
5048
            iter.map(|(r, v1)| Ok((r, Some(v1), None))),
312
1030
            discard_v2,
313
        )
314
1030
    }
315

            
316
    /// Construct a [`DenseRangeMap`] from an iterator of
317
    /// `Result<(range,optvalue1,optvalue2)>` tuples.
318
    ///
319
    /// The ranges must be disjoint and sorted in ascending order by their
320
    /// start.
321
1134
    pub(crate) fn try_from_sorted_inclusive_ranges<S, E>(
322
1134
        iter: S,
323
1134
        discard_v2: bool,
324
1134
    ) -> Result<Self, E>
325
1134
    where
326
1134
        S: Iterator<Item = Result<(RangeInclusive<K>, Option<V1>, Option<V2>), E>>,
327
1134
        E: From<Error>,
328
    {
329
1134
        let mut b = DenseRangeMapBuilder::new();
330
1134
        if discard_v2 {
331
1030
            b.values2 = None;
332
1130
        }
333
5204
        for entry in iter {
334
5204
            let (range, value1, mut value2) = entry?;
335
5204
            if discard_v2 {
336
5048
                value2 = None;
337
5198
            }
338
5204
            b.add_entry(range, value1, value2)?;
339
        }
340

            
341
1134
        Ok(b.build())
342
1134
    }
343

            
344
    /// Return the index for the values corresponding to `key`.
345
51594
    pub(crate) fn index_for_key(&self, key: &K) -> Option<usize> {
346
51594
        match self.starts.binary_search(key) {
347
118
            Ok(v) => Some(v),
348
14136
            Err(0) => None,
349
37340
            Err(v) => Some(v - 1),
350
        }
351
51594
    }
352

            
353
    /// Return the value, if any, associated with the given `key`.
354
51594
    pub(crate) fn get1(&self, key: &K) -> Option<&V1> {
355
51594
        self.index_for_key(key)
356
51594
            .and_then(|index| self.values1[index].as_ref())
357
51594
    }
358

            
359
    /// Return the secondary value, if any, associated with the given `key`.
360
    pub(crate) fn get2(&self, key: &K) -> Option<&V2> {
361
        if let Some(values2) = self.values2.as_ref() {
362
            self.index_for_key(key)
363
                .and_then(|index| values2[index].as_ref())
364
        } else {
365
            None
366
        }
367
    }
368

            
369
    /// Testing only: Assert that this object obeys its invariants.
370
    #[cfg(test)]
371
1040
    fn assert_valid(&self) {
372
        // We don't use `is_sorted` here since it allows duplicates.
373
1358990
        for pair in self.starts.windows(2) {
374
1358990
            assert!(pair[0] < pair[1]);
375
        }
376
1040
        assert_eq!(self.values1.len(), self.starts.len());
377
1040
        if let Some(values2) = &self.values2 {
378
4
            assert_eq!(values2.len(), self.starts.len());
379
1036
        }
380
1040
    }
381

            
382
    /// Testing only: return a `Vec` of `(start, Option<V1, V2>)` to represent
383
    /// this entries table.
384
    ///
385
    /// This is more convenient to inspect than looking at `starts` and `values`
386
    /// manually.
387
    ///
388
    /// (We store `starts` and `values` in separate lists to avoid padding issues.)
389
    #[cfg(test)]
390
4
    fn rep(&self) -> Vec<(K, Option<V1>)>
391
4
    where
392
4
        K: Clone + 'static,
393
4
        V1: Clone + 'static,
394
    {
395
4
        self.starts
396
4
            .iter()
397
4
            .cloned()
398
4
            .zip(self.values1.iter().cloned())
399
4
            .collect()
400
4
    }
401

            
402
    /// Return a triple of slices that encode this map.
403
    #[cfg(feature = "export")]
404
    #[allow(clippy::type_complexity)]
405
    pub(crate) fn export(&self) -> (&[K], &[Option<V1>], Option<&[Option<V2>]>) {
406
        (
407
            self.starts.as_ref(),
408
            self.values1.as_ref(),
409
            self.values2.as_ref().map(|v2| v2.as_ref()),
410
        )
411
    }
412
}
413

            
414
#[cfg(test)]
415
mod test {
416
    // @@ begin test lint list maintained by maint/add_warning @@
417
    #![allow(clippy::bool_assert_comparison)]
418
    #![allow(clippy::clone_on_copy)]
419
    #![allow(clippy::dbg_macro)]
420
    #![allow(clippy::mixed_attributes_style)]
421
    #![allow(clippy::print_stderr)]
422
    #![allow(clippy::print_stdout)]
423
    #![allow(clippy::single_char_pattern)]
424
    #![allow(clippy::unwrap_used)]
425
    #![allow(clippy::unchecked_time_subtraction)]
426
    #![allow(clippy::useless_vec)]
427
    #![allow(clippy::needless_pass_by_value)]
428
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
429

            
430
    use super::*;
431
    use proptest::prelude::*;
432

            
433
    type M = DenseRangeMap<u32, &'static str, ()>;
434

            
435
    #[test]
436
    fn empty() {
437
        let map = M::from_sorted_inclusive_ranges(std::iter::empty()).unwrap();
438
        assert_eq!(map.get1(&0), None);
439
        assert_eq!(map.get1(&1), None);
440
        assert_eq!(map.get1(&50), None);
441
        assert_eq!(map.get1(&(u32::MAX - 1)), None);
442
        assert_eq!(map.get1(&(u32::MAX)), None);
443
    }
444

            
445
    #[test]
446
    fn both_ends_open() {
447
        // construct a map that has gaps at both ends.
448
        let map = M::from_sorted_inclusive_ranges(
449
            [
450
                //
451
                (5..=10, "small"),
452
                (11..=90, "medium"),
453
                (100..=1000, "big"),
454
            ]
455
            .into_iter(),
456
        )
457
        .unwrap();
458
        map.assert_valid();
459

            
460
        assert_eq!(
461
            map.rep()[..],
462
            [
463
                (5, Some("small")),
464
                (11, Some("medium")),
465
                (91, None),
466
                (100, Some("big")),
467
                (1001, None),
468
            ]
469
        );
470

            
471
        assert_eq!(map.get1(&0), None);
472
        assert_eq!(map.get1(&1), None);
473
        assert_eq!(map.get1(&5), Some(&"small"));
474
        assert_eq!(map.get1(&10), Some(&"small"));
475
        assert_eq!(map.get1(&11), Some(&"medium"));
476
        assert_eq!(map.get1(&85), Some(&"medium"));
477
        assert_eq!(map.get1(&90), Some(&"medium"));
478
        assert_eq!(map.get1(&91), None);
479
        assert_eq!(map.get1(&99), None);
480
        assert_eq!(map.get1(&100), Some(&"big"));
481
        assert_eq!(map.get1(&500), Some(&"big"));
482
        assert_eq!(map.get1(&1000), Some(&"big"));
483
        assert_eq!(map.get1(&1001), None);
484
        assert_eq!(map.get1(&(u32::MAX - 1)), None);
485
        assert_eq!(map.get1(&u32::MAX), None);
486
    }
487

            
488
    #[test]
489
    fn both_ends_filled_map() {
490
        // construct a map that has no gap at either end.
491
        let map = M::from_sorted_inclusive_ranges(
492
            [
493
                //
494
                (0..=10, "small"),
495
                (11..=90, "medium"),
496
                (100..=u32::MAX, "big"),
497
            ]
498
            .into_iter(),
499
        )
500
        .unwrap();
501

            
502
        assert_eq!(
503
            map.rep()[..],
504
            [
505
                (0, Some("small")),
506
                (11, Some("medium")),
507
                (91, None),
508
                (100, Some("big")),
509
            ]
510
        );
511

            
512
        assert_eq!(map.get1(&0), Some(&"small"));
513
        assert_eq!(map.get1(&1), Some(&"small"));
514
        assert_eq!(map.get1(&5), Some(&"small"));
515
        assert_eq!(map.get1(&10), Some(&"small"));
516
        assert_eq!(map.get1(&11), Some(&"medium"));
517
        assert_eq!(map.get1(&85), Some(&"medium"));
518
        assert_eq!(map.get1(&90), Some(&"medium"));
519
        assert_eq!(map.get1(&91), None);
520
        assert_eq!(map.get1(&99), None);
521
        assert_eq!(map.get1(&100), Some(&"big"));
522
        assert_eq!(map.get1(&500), Some(&"big"));
523
        assert_eq!(map.get1(&1000), Some(&"big"));
524
        assert_eq!(map.get1(&1001), Some(&"big"));
525
        assert_eq!(map.get1(&(u32::MAX - 1)), Some(&"big"));
526
        assert_eq!(map.get1(&u32::MAX), Some(&"big"));
527
    }
528

            
529
    proptest! {
530
        // Property test: build a RangeIncluseiveMap at random, then construct a new
531
        // DenseRangeMap from that map, and make sure they give the same outputs.
532
        #[test]
533
        fn matches_rangemap(ranges: Vec<RangeInclusive<u32>>, probes: Vec<u32>) {
534
            let mut rangemap: rangemap::RangeInclusiveMap<u32, usize> = Default::default();
535
            for (n, range) in ranges.into_iter().enumerate() {
536
                rangemap.insert(range, n);
537
            }
538
            let dense_map = DenseRangeMap::<u32, usize, ()>::from_sorted_inclusive_ranges(
539
                rangemap.iter().map(|(k,v)| (k.clone(), *v))
540
            ).unwrap();
541

            
542
            for probe in probes.iter() {
543
                assert_eq!(rangemap.get(probe), dense_map.get1(probe));
544
            }
545
        }
546

            
547
        // Property test: construct a disjoint list of ranges in ascending
548
        // order, use that list to construct a RangeInclusiveMap and a
549
        // DenseRangeMap and make sure they give the same outputs.
550
        #[test]
551
        fn matches_rangemap2(r: Vec<(u32,u32)>, probes: Vec<u32>) {
552
            let mut ranges = vec![];
553
            let mut next = 0_u32;
554
            for (gap,len) in r {
555
                let Some(start) = next.checked_add(gap) else {break;};
556
                let end = start.saturating_add(len);
557
                ranges.push(start..=end);
558
                if let Some(n) = end.checked_add(1) {
559
                    next = n;
560
                } else {
561
                    break;
562
                }
563
            }
564

            
565
            let mut rangemap: rangemap::RangeInclusiveMap<u32, usize> = Default::default();
566
            for (n, range) in ranges.iter().enumerate() {
567
                rangemap.insert(range.clone(), n);
568
            }
569

            
570
            let dense_map = DenseRangeMap::<u32, usize, ()>::from_sorted_inclusive_ranges(
571
                ranges.into_iter().enumerate().map(|(n, r)| (r, n))
572
            ).unwrap();
573

            
574
            for probe in probes.iter() {
575
                assert_eq!(rangemap.get(probe), dense_map.get1(probe));
576
            }
577
        }
578
    }
579
}