1
//! Declare types for interning various objects.
2

            
3
use std::fmt::Debug;
4
use std::hash::Hash;
5
use std::sync::{Arc, Mutex, MutexGuard, OnceLock, Weak};
6

            
7
use derive_deftly::define_derive_deftly;
8
use derive_more::{Deref, Display, Into};
9

            
10
/// Alias to force use of RandomState, regardless of features enabled in `weak_tables`.
11
///
12
/// See <https://github.com/tov/weak-table-rs/issues/23> for discussion.
13
type WeakHashSet<T> = weak_table::WeakHashSet<T, std::hash::RandomState>;
14

            
15
/// A wrapper around [`Arc`] representing owned [`InternCache`] entries.
16
///
17
/// The wrapper type serves the purpose of semantic meaning only, implying that
18
/// this value is cached in some way or another by this module.
19
///
20
/// We only conveniently allow obtaining the underlying [`Arc`] with a [`From`] but not the
21
/// other way around.  This means that interfacing code can make the type to
22
/// "forget" it originated from an [`InternCache`] but not the other way around,
23
/// i.e. cannot accidentally create fake entries that look like they came from an
24
/// [`InternCache`].  If one really has to circumvent this, then the
25
/// [`Intern::new_uncached_uninterned()`] method exists.
26
///
27
/// This ensures that interning is done everywhere that it's expected,
28
/// avoiding excess memory usage.
29
//
30
// Right now, this is the bare minimum of derives; it may need more in the
31
// future.  If so, just add them.
32
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Display, Into, Deref)]
33
pub struct Intern<T: ?Sized>(Arc<T>);
34

            
35
impl<T: ?Sized> Intern<T> {
36
    /// Creates an [`Intern`] from an arbitrary [`Arc`].
37
    ///
38
    /// The use of this is generally discouraged, as it effectively destroys
39
    /// the boundary of implying that certain cache entries come from the
40
    /// [`InternCache`].
41
    pub fn new_uncached_uninterned(value: Arc<T>) -> Intern<T> {
42
        Intern(value)
43
    }
44
}
45

            
46
// Some Arti code is pretty keen on using &Arc<T>.
47
impl<'a, T: ?Sized> From<&'a Intern<T>> for &'a Arc<T> {
48
18338790
    fn from(value: &'a Intern<T>) -> Self {
49
18338790
        &value.0
50
18338790
    }
51
}
52

            
53
/// Offers access to globally available cache for [`InternCache`].
54
///
55
/// Typically derived using [`crate::derive_deftly_template_GloballyInternable`].
56
pub trait GloballyInternable: Sized {
57
    /// Returns a reference to the global cache instance of this type.
58
    ///
59
    /// Implemented by implementors of this trait.
60
    /// Users of the trait should usually use [`GloballyInternable::into_intern()`].
61
    fn intern_cache() -> &'static InternCache<Self>;
62

            
63
    /// Places `self` into the global cache.
64
    ///
65
    /// Please use this instead of `T::intern_cache().intern(value)`.
66
1425314
    fn into_intern(self) -> Intern<Self>
67
1425314
    where
68
1425314
        Self: Eq + Hash + 'static,
69
    {
70
1425314
        Self::intern_cache().intern(self)
71
1425314
    }
72
}
73

            
74
define_derive_deftly! {
75
    /// Implement the [`GloballyInternable`] trait for a specific type.
76
    ///
77
    /// The implementation in itself is trivial and straightforward with this
78
    /// macro primarily serving as a convenience method.
79
    export GloballyInternable for struct:
80

            
81
    impl $crate::intern::GloballyInternable for $ttype {
82
1425314
        fn intern_cache() -> &'static $crate::intern::InternCache<Self> {
83
            static S: $crate::intern::InternCache::<$ttype> = $crate::intern::InternCache::new();
84
            &S
85
        }
86
    }
87
}
88

            
89
/// An InternCache is a lazily-constructed weak set of objects.
90
///
91
/// Let's break that down!  It's "lazily constructed" because it
92
/// doesn't actually allocate anything until you use it for the first
93
/// time.  That allows it to have a const [`new`](InternCache::new)
94
/// method, so you can make these static.
95
///
96
/// It's "weak" because it only holds weak references to its objects;
97
/// once every strong reference is gone, the object is unallocated.
98
/// Later, the hash entry is (lazily) removed.
99
pub struct InternCache<T: ?Sized> {
100
    /// Underlying hashset for interned objects
101
    //
102
    // TODO: If WeakHashSet::new is someday const, we can do away with OnceLock here.
103
    cache: OnceLock<Mutex<WeakHashSet<Weak<T>>>>,
104
}
105

            
106
impl<T: ?Sized> InternCache<T> {
107
    /// Create a new, empty, InternCache.
108
4
    pub const fn new() -> Self {
109
4
        InternCache {
110
4
            cache: OnceLock::new(),
111
4
        }
112
4
    }
113
}
114

            
115
impl<T: ?Sized> Default for InternCache<T> {
116
    fn default() -> Self {
117
        Self::new()
118
    }
119
}
120

            
121
impl<T: Eq + Hash + ?Sized> InternCache<T> {
122
    /// Helper: initialize the cache if needed, then lock it.
123
2058818
    fn cache(&self) -> MutexGuard<'_, WeakHashSet<Weak<T>>> {
124
2058818
        let cache = self.cache.get_or_init(|| Mutex::new(WeakHashSet::new()));
125
2058818
        cache.lock().expect("Poisoned lock lock for cache")
126
2058818
    }
127
}
128

            
129
impl<T: Eq + Hash> InternCache<T> {
130
    /// Intern a given value into this cache.
131
    ///
132
    /// If `value` is already stored in this cache, we return a
133
    /// reference to the stored value.  Otherwise, we insert `value`
134
    /// into the cache, and return that.
135
2058808
    pub fn intern(&self, value: T) -> Intern<T> {
136
2058808
        let mut cache = self.cache();
137
2058808
        if let Some(pp) = cache.get(&value) {
138
1978416
            Intern(pp)
139
        } else {
140
80392
            let arc = Arc::new(value);
141
80392
            cache.insert(Arc::clone(&arc));
142
80392
            Intern(arc)
143
        }
144
2058808
    }
145
}
146

            
147
impl<T: Hash + Eq + ?Sized> InternCache<T> {
148
    /// Intern an object by reference.
149
    ///
150
    /// Works with unsized types, but requires that the reference implements
151
    /// `Into<Arc<T>>`.
152
10
    pub fn intern_ref<'a, V>(&self, value: &'a V) -> Intern<T>
153
10
    where
154
10
        V: Hash + Eq + ?Sized,
155
10
        &'a V: Into<Arc<T>>,
156
10
        T: std::borrow::Borrow<V>,
157
    {
158
10
        let mut cache = self.cache();
159
10
        if let Some(arc) = cache.get(value) {
160
2
            Intern(arc)
161
        } else {
162
8
            let arc = value.into();
163
8
            cache.insert(Arc::clone(&arc));
164
8
            Intern(arc)
165
        }
166
10
    }
167
}
168

            
169
#[cfg(test)]
170
mod test {
171
    // @@ begin test lint list maintained by maint/add_warning @@
172
    #![allow(clippy::bool_assert_comparison)]
173
    #![allow(clippy::clone_on_copy)]
174
    #![allow(clippy::dbg_macro)]
175
    #![allow(clippy::mixed_attributes_style)]
176
    #![allow(clippy::print_stderr)]
177
    #![allow(clippy::print_stdout)]
178
    #![allow(clippy::single_char_pattern)]
179
    #![allow(clippy::unwrap_used)]
180
    #![allow(clippy::unchecked_time_subtraction)]
181
    #![allow(clippy::useless_vec)]
182
    #![allow(clippy::needless_pass_by_value)]
183
    #![allow(clippy::string_slice)] // See arti#2571
184
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
185
    use super::*;
186

            
187
    #[test]
188
    fn interning_by_value() {
189
        // "intern" case.
190
        let c: InternCache<String> = InternCache::new();
191

            
192
        let s1: Arc<String> = c.intern("abc".to_string()).into();
193
        let s2 = c.intern("def".to_string()).into();
194
        let s3 = c.intern("abc".to_string()).into();
195
        assert!(Arc::ptr_eq(&s1, &s3));
196
        assert!(!Arc::ptr_eq(&s1, &s2));
197
        assert_eq!(s2.as_ref(), "def");
198
        assert_eq!(s3.as_ref(), "abc");
199
    }
200

            
201
    #[test]
202
    fn interning_by_ref() {
203
        // "intern" case.
204
        let c: InternCache<str> = InternCache::new();
205

            
206
        let s1: Arc<str> = c.intern_ref("abc").into();
207
        let s2 = c.intern_ref("def").into();
208
        let s3 = c.intern_ref("abc").into();
209
        assert!(Arc::ptr_eq(&s1, &s3));
210
        assert!(!Arc::ptr_eq(&s1, &s2));
211
        assert_eq!(&*s2, "def");
212
        assert_eq!(&*s3, "abc");
213
    }
214
}