1
//! Methods for storing and loading directory information from disk.
2
//!
3
//! We have code implemented for a flexible storage format based on sqlite.
4

            
5
// (There was once a read-only format based on the C tor implementation's
6
// storage: Search the git history for tor-dirmgr/src/storage/legacy.rs
7
// if you ever need to reinstate it.)
8

            
9
use tor_netdoc::doc::authcert::AuthCertKeyIds;
10
use tor_netdoc::doc::microdesc::MdDigest;
11
use tor_netdoc::doc::netstatus::{ConsensusFlavor, ProtoStatuses};
12

            
13
#[cfg(feature = "routerdesc")]
14
use tor_netdoc::doc::routerdesc::RdDigest;
15

            
16
#[cfg(feature = "bridge-client")]
17
pub(crate) use tor_guardmgr::bridge::BridgeConfig;
18

            
19
use crate::docmeta::{AuthCertMeta, ConsensusMeta};
20
use crate::{Error, Result};
21
use std::cell::RefCell;
22
use std::collections::HashMap;
23
use std::fs::File;
24
use std::io::Result as IoResult;
25
use std::str::Utf8Error;
26
use std::time::SystemTime;
27
use time::Duration;
28

            
29
pub(crate) mod sqlite;
30

            
31
pub(crate) use sqlite::SqliteStore;
32

            
33
/// Convenient Sized & dynamic [`Store`]
34
pub(crate) type DynStore = Box<dyn Store>;
35

            
36
/// A document returned by a directory manager.
37
///
38
/// This document may be in memory, or may be mapped from a cache.  It is
39
/// not necessarily valid UTF-8.
40
pub struct DocumentText {
41
    /// The underlying InputString.  We only wrap this type to make it
42
    /// opaque to other crates, so they don't have to worry about the
43
    /// implementation details.
44
    s: InputString,
45
}
46

            
47
impl From<InputString> for DocumentText {
48
12
    fn from(s: InputString) -> DocumentText {
49
12
        DocumentText { s }
50
12
    }
51
}
52

            
53
impl AsRef<[u8]> for DocumentText {
54
4
    fn as_ref(&self) -> &[u8] {
55
4
        self.s.as_ref()
56
4
    }
57
}
58

            
59
impl DocumentText {
60
    /// Try to return a view of this document as a string.
61
24
    pub(crate) fn as_str(&self) -> std::result::Result<&str, Utf8Error> {
62
24
        self.s.as_str_impl()
63
24
    }
64

            
65
    /// Create a new DocumentText holding the provided string.
66
36
    pub(crate) fn from_string(s: String) -> Self {
67
36
        DocumentText {
68
36
            s: InputString::Utf8(s),
69
36
        }
70
36
    }
71
}
72

            
73
/// An abstraction over a possible string that we've loaded or mapped from
74
/// a cache.
75
#[derive(Debug)]
76
pub(crate) enum InputString {
77
    /// A string that's been validated as UTF-8
78
    Utf8(String),
79
    /// A set of unvalidated bytes.
80
    UncheckedBytes {
81
        /// The underlying bytes
82
        bytes: Vec<u8>,
83
        /// Whether the bytes have been validated previously as UTF-8
84
        validated: RefCell<bool>,
85
    },
86
    #[cfg(feature = "mmap")]
87
    /// A set of memory-mapped bytes (not yet validated as UTF-8).
88
    MappedBytes {
89
        /// The underlying bytes
90
        bytes: memmap2::Mmap,
91
        /// Whether the bytes have been validated previously as UTF-8
92
        validated: RefCell<bool>,
93
    },
94
}
95

            
96
impl InputString {
97
    /// Return a view of this InputString as a &str, if it is valid UTF-8.
98
32
    pub(crate) fn as_str(&self) -> Result<&str> {
99
32
        self.as_str_impl().map_err(Error::BadUtf8InCache)
100
32
    }
101

            
102
    /// Helper for [`Self::as_str()`], with unwrapped error type.
103
    #[allow(clippy::string_slice)] // TODO
104
56
    fn as_str_impl(&self) -> std::result::Result<&str, Utf8Error> {
105
        // It is not necessary to re-check the UTF8 every time
106
        // this function is called so remember the result
107
        // we got with `validated`
108

            
109
56
        match self {
110
24
            InputString::Utf8(s) => Ok(&s[..]),
111
8
            InputString::UncheckedBytes { bytes, validated } => {
112
8
                if *validated.borrow() {
113
2
                    unsafe { Ok(std::str::from_utf8_unchecked(&bytes[..])) }
114
                } else {
115
6
                    let result = std::str::from_utf8(&bytes[..])?;
116
2
                    validated.replace(true);
117
2
                    Ok(result)
118
                }
119
            }
120
            #[cfg(feature = "mmap")]
121
24
            InputString::MappedBytes { bytes, validated } => {
122
24
                if *validated.borrow() {
123
2
                    unsafe { Ok(std::str::from_utf8_unchecked(&bytes[..])) }
124
                } else {
125
22
                    let result = std::str::from_utf8(&bytes[..])?;
126
20
                    validated.replace(true);
127
20
                    Ok(result)
128
                }
129
            }
130
        }
131
56
    }
132
    /// Try to create an [`InputString`] from an open [`File`].
133
    ///
134
    /// We'll try to memory-map the file if we can.  If that fails, or if we
135
    /// were built without the `mmap` feature, we'll fall back to reading the
136
    /// file into memory.
137
26
    pub(crate) fn load(file: File) -> IoResult<Self> {
138
        #[cfg(feature = "mmap")]
139
        {
140
26
            let mapping = unsafe {
141
                // I'd rather have a safe option, but that's not possible
142
                // with mmap, since other processes could in theory replace
143
                // the contents of the file while we're using it.
144
26
                memmap2::Mmap::map(&file)
145
            };
146
26
            if let Ok(bytes) = mapping {
147
26
                return Ok(InputString::MappedBytes {
148
26
                    bytes,
149
26
                    validated: RefCell::new(false),
150
26
                });
151
            }
152
        }
153
        use std::io::{BufReader, Read};
154
        let mut f = BufReader::new(file);
155
        let mut result = String::new();
156
        f.read_to_string(&mut result)?;
157
        Ok(InputString::Utf8(result))
158
26
    }
159
}
160

            
161
impl AsRef<[u8]> for InputString {
162
12
    fn as_ref(&self) -> &[u8] {
163
12
        match self {
164
4
            InputString::Utf8(s) => s.as_ref(),
165
6
            InputString::UncheckedBytes { bytes, .. } => &bytes[..],
166
            #[cfg(feature = "mmap")]
167
2
            InputString::MappedBytes { bytes, .. } => &bytes[..],
168
        }
169
12
    }
170
}
171

            
172
impl From<String> for InputString {
173
10
    fn from(s: String) -> InputString {
174
10
        InputString::Utf8(s)
175
10
    }
176
}
177

            
178
impl From<Vec<u8>> for InputString {
179
6
    fn from(bytes: Vec<u8>) -> InputString {
180
6
        InputString::UncheckedBytes {
181
6
            bytes,
182
6
            validated: RefCell::new(false),
183
6
        }
184
6
    }
185
}
186

            
187
/// Configuration of expiration of each element of a [`Store`].
188
pub(crate) struct ExpirationConfig {
189
    /// How long to keep router descriptors.
190
    ///
191
    /// This timeout is measured since the publication date of the router
192
    /// descriptor.
193
    ///
194
    /// TODO(nickm): We may want a better approach in the future; see notes in
195
    /// `EXPIRATION_DEFAULTS`.
196
    pub(super) router_descs: Duration,
197
    /// How long to keep unlisted microdescriptors.
198
    ///
199
    /// This timeout counts the amount of time since a microdescriptor is no
200
    /// longer listed in a live consensus. Shorter values save storage at the
201
    /// expense of extra bandwidth spent re-downloading microdescriptors; higher
202
    /// values save bandwidth at the expense of storage used to store old
203
    /// microdescriptors that might become listed again.
204
    pub(super) microdescs: Duration,
205
    /// How long to keep expired authority certificate.
206
    pub(super) authcerts: Duration,
207
    /// How long to keep expired consensus.
208
    pub(super) consensuses: Duration,
209
}
210

            
211
/// Configuration of expiration shared between [`Store`] implementations.
212
pub(crate) const EXPIRATION_DEFAULTS: ExpirationConfig = {
213
    ExpirationConfig {
214
        // TODO: This is the value that C Tor uses here, but it may be desirable
215
        // to adjust it depending on what we find in practice.  For relays,
216
        // instead of looking at publication date, we might want to use an
217
        // approach more similar to the "last-listed" approach taken by
218
        // microdescriptors.  For bridges, we can keep descriptors for a longer
219
        // time.  In either case, we may be able to discard all but the most
220
        // recent descriptor from each identity.
221
        router_descs: Duration::days(5),
222
        // This value is a compromise between saving bandwidth (by not having to
223
        // re-download microdescs) and saving space (by not having to store too
224
        // many microdescs).  It's the same one that C tor uses; experiments on
225
        // 2022 data suggest that it winds up using only 1% more microdesc dl
226
        // bandwidth than strictly necessary, at the cost of storing 40% more
227
        // microdescriptors than will be immediately useful at any given time.
228
        microdescs: Duration::days(7),
229
        authcerts: Duration::ZERO,
230
        consensuses: Duration::days(2),
231
    }
232
};
233

            
234
/// Representation of a storage.
235
///
236
/// When creating an instance of this [`Store`], it should try to grab the lock during
237
/// initialization (`is_readonly() iff some other implementation grabbed it`).
238
pub(crate) trait Store: Send + 'static {
239
    /// Return true if this [`Store`] is opened in read-only mode.
240
    fn is_readonly(&self) -> bool;
241
    /// Try to upgrade from a read-only connection to a read-write connection.
242
    ///
243
    /// Return true on success; false if another process had the lock.
244
    fn upgrade_to_readwrite(&mut self) -> Result<bool>;
245

            
246
    /// Delete all completely-expired objects from the database.
247
    ///
248
    /// This is pretty conservative, and only removes things that are
249
    /// definitely past their good-by date.
250
    fn expire_all(&mut self, expiration: &ExpirationConfig) -> Result<()>;
251

            
252
    /// Load the latest consensus from disk.
253
    ///
254
    /// If `pending` is given, we will only return a consensus with
255
    /// the given "pending" status.  (A pending consensus doesn't have
256
    /// enough descriptors yet.)  If `pending_ok` is None, we'll
257
    /// return a consensus with any pending status.
258
    fn latest_consensus(
259
        &self,
260
        flavor: ConsensusFlavor,
261
        pending: Option<bool>,
262
    ) -> Result<Option<InputString>>;
263
    /// Return the information about the latest non-pending consensus,
264
    /// including its valid-after time and digest.
265
    fn latest_consensus_meta(&self, flavor: ConsensusFlavor) -> Result<Option<ConsensusMeta>>;
266
    /// Try to read the consensus corresponding to the provided metadata object.
267
    #[cfg(test)]
268
    fn consensus_by_meta(&self, cmeta: &ConsensusMeta) -> Result<InputString>;
269
    /// Try to read the consensus whose SHA3-256 digests is the provided
270
    /// value, and its metadata.
271
    fn consensus_by_sha3_digest_of_signed_part(
272
        &self,
273
        d: &[u8; 32],
274
    ) -> Result<Option<(InputString, ConsensusMeta)>>;
275
    /// Write a consensus to disk.
276
    fn store_consensus(
277
        &mut self,
278
        cmeta: &ConsensusMeta,
279
        flavor: ConsensusFlavor,
280
        pending: bool,
281
        contents: &str,
282
    ) -> Result<()>;
283
    /// Mark the consensus generated from `cmeta` as no longer pending.
284
    fn mark_consensus_usable(&mut self, cmeta: &ConsensusMeta) -> Result<()>;
285
    /// Remove the consensus generated from `cmeta`.
286
    //
287
    // Nothing uses this yet; removal is handled from `expire_all`.
288
    #[allow(dead_code)] // see also allow on REMOVE_CONSENSUS
289
    fn delete_consensus(&mut self, cmeta: &ConsensusMeta) -> Result<()>;
290

            
291
    /// Read all of the specified authority certs from the cache.
292
    fn authcerts(&self, certs: &[AuthCertKeyIds]) -> Result<HashMap<AuthCertKeyIds, String>>;
293
    /// Save a list of authority certificates to the cache.
294
    fn store_authcerts(&mut self, certs: &[(AuthCertMeta, &str)]) -> Result<()>;
295

            
296
    /// Read all the microdescriptors listed in `input` from the cache.
297
    fn microdescs(&self, digests: &[MdDigest]) -> Result<HashMap<MdDigest, String>>;
298
    /// Store every microdescriptor in `input` into the cache, and say that
299
    /// it was last listed at `when`.
300
    fn store_microdescs(&mut self, digests: &[(&str, &MdDigest)], when: SystemTime) -> Result<()>;
301
    /// Update the `last-listed` time of every microdescriptor in
302
    /// `input` to `when` or later.
303
    fn update_microdescs_listed(&mut self, digests: &[MdDigest], when: SystemTime) -> Result<()>;
304

            
305
    /// Read all the microdescriptors listed in `input` from the cache.
306
    ///
307
    /// Only available when the `routerdesc` feature is present.
308
    #[cfg(feature = "routerdesc")]
309
    fn routerdescs(&self, digests: &[RdDigest]) -> Result<HashMap<RdDigest, String>>;
310
    /// Store every router descriptors in `input` into the cache.
311
    #[cfg(feature = "routerdesc")]
312
    #[allow(unused)]
313
    fn store_routerdescs(&mut self, digests: &[(&str, SystemTime, &RdDigest)]) -> Result<()>;
314

            
315
    /// Look up a cached bridge descriptor.
316
    #[cfg(feature = "bridge-client")]
317
    fn lookup_bridgedesc(&self, bridge: &BridgeConfig) -> Result<Option<CachedBridgeDescriptor>>;
318

            
319
    /// Store a cached bridge descriptor.
320
    ///
321
    /// This entry will be deleted some time after `until`
322
    /// (but the caller is not allowed to rely on either timely deletion,
323
    /// or retention until that time).
324
    #[cfg(feature = "bridge-client")]
325
    fn store_bridgedesc(
326
        &mut self,
327
        bridge: &BridgeConfig,
328
        entry: CachedBridgeDescriptor,
329
        until: SystemTime,
330
    ) -> Result<()>;
331

            
332
    /// Delete a cached bridge descriptor for this bridge.
333
    ///
334
    /// It's not an error if it's not present.
335
    #[cfg(feature = "bridge-client")]
336
    // Nothing uses this yet; removal is handled from `expire_all`.
337
    #[allow(dead_code)] // see also allow on DELETE_BRIDGEDESC
338
    fn delete_bridgedesc(&mut self, bridge: &BridgeConfig) -> Result<()>;
339

            
340
    /// Try to update our cached protocol recommendations to those listed in `protocols`.
341
    fn update_protocol_recommendations(
342
        &mut self,
343
        valid_after: SystemTime,
344
        protocols: &ProtoStatuses,
345
    ) -> Result<()>;
346

            
347
    /// Return our most recent cached protocol recommendations.
348
    fn cached_protocol_recommendations(&self) -> Result<Option<(SystemTime, ProtoStatuses)>>;
349
}
350

            
351
/// Value in the bridge descriptor cache
352
#[derive(Clone, Debug)]
353
#[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
354
pub(crate) struct CachedBridgeDescriptor {
355
    /// When we fetched this
356
    pub(crate) fetched: SystemTime,
357

            
358
    /// The document text, as we fetched it
359
    pub(crate) document: String,
360
}
361

            
362
#[cfg(test)]
363
mod test {
364
    // @@ begin test lint list maintained by maint/add_warning @@
365
    #![allow(clippy::bool_assert_comparison)]
366
    #![allow(clippy::clone_on_copy)]
367
    #![allow(clippy::dbg_macro)]
368
    #![allow(clippy::mixed_attributes_style)]
369
    #![allow(clippy::print_stderr)]
370
    #![allow(clippy::print_stdout)]
371
    #![allow(clippy::single_char_pattern)]
372
    #![allow(clippy::unwrap_used)]
373
    #![allow(clippy::unchecked_time_subtraction)]
374
    #![allow(clippy::useless_vec)]
375
    #![allow(clippy::needless_pass_by_value)]
376
    #![allow(clippy::string_slice)] // See arti#2571
377
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
378
    use super::*;
379
    use tempfile::tempdir;
380

            
381
    #[test]
382
    fn strings() {
383
        let s: InputString = "Hello world".to_string().into();
384
        assert_eq!(s.as_ref(), b"Hello world");
385
        assert_eq!(s.as_str().unwrap(), "Hello world");
386
        assert_eq!(s.as_str().unwrap(), "Hello world");
387

            
388
        let s: InputString = b"Hello world".to_vec().into();
389
        assert_eq!(s.as_ref(), b"Hello world");
390
        assert_eq!(s.as_str().unwrap(), "Hello world");
391
        assert_eq!(s.as_str().unwrap(), "Hello world");
392

            
393
        // bad utf-8
394
        let s: InputString = b"Hello \xff world".to_vec().into();
395
        assert_eq!(s.as_ref(), b"Hello \xff world");
396
        assert!(s.as_str().is_err());
397
    }
398

            
399
    #[test]
400
    fn files() {
401
        let td = tempdir().unwrap();
402

            
403
        let goodstr = td.path().join("goodstr");
404
        std::fs::write(&goodstr, "This is a reasonable file.\n").unwrap();
405
        let s = InputString::load(File::open(goodstr).unwrap());
406
        let s = s.unwrap();
407
        assert_eq!(s.as_str().unwrap(), "This is a reasonable file.\n");
408
        assert_eq!(s.as_str().unwrap(), "This is a reasonable file.\n");
409
        assert_eq!(s.as_ref(), b"This is a reasonable file.\n");
410

            
411
        let badutf8 = td.path().join("badutf8");
412
        std::fs::write(&badutf8, b"Not good \xff UTF-8.\n").unwrap();
413
        let s = InputString::load(File::open(badutf8).unwrap());
414
        assert!(s.is_err() || s.unwrap().as_str().is_err());
415
    }
416

            
417
    #[test]
418
    fn doctext() {
419
        let s: InputString = "Hello universe".to_string().into();
420
        let dt: DocumentText = s.into();
421
        assert_eq!(dt.as_ref(), b"Hello universe");
422
        assert_eq!(dt.as_str(), Ok("Hello universe"));
423
        assert_eq!(dt.as_str(), Ok("Hello universe"));
424

            
425
        let s: InputString = b"Hello \xff universe".to_vec().into();
426
        let dt: DocumentText = s.into();
427
        assert_eq!(dt.as_ref(), b"Hello \xff universe");
428
        assert!(dt.as_str().is_err());
429
    }
430
}