1
//! Key rotation tasks of the relay.
2

            
3
use anyhow::Context;
4
use std::{
5
    sync::Arc,
6
    time::{Duration, SystemTime},
7
};
8
use tor_basic_utils::rand_hostname;
9
use tor_cert::x509::TlsKeyAndCert;
10
use tor_chanmgr::ChanMgr;
11
use tor_error::internal;
12
use tor_key_forge::ToEncodableCert;
13
use tor_keymgr::{
14
    CertSpecifierPattern, KeyCertificateSpecifier, KeyMgr, KeyPath, KeySpecifier,
15
    KeySpecifierPattern, Keygen, KeystoreEntry, KeystoreSelector, ToEncodableKey,
16
};
17
use tor_proto::RelayChannelAuthMaterial;
18
use tor_proto::relay::CreateRequestHandler;
19
use tor_relay_crypto::{RelaySigningKeyCert, gen_link_cert, gen_signing_cert, gen_tls_cert};
20

            
21
use crate::keys::{
22
    RelayIdentityKeypairSpecifier, RelayIdentityRsaKeypairSpecifier,
23
    RelayLinkSigningKeypairSpecifier, RelayLinkSigningKeypairSpecifierPattern,
24
    RelayNtorKeypairSpecifier, RelayNtorKeypairSpecifierPattern, RelaySigningKeyCertSpecifier,
25
    RelaySigningKeyCertSpecifierPattern, RelaySigningKeypairSpecifier,
26
    RelaySigningKeypairSpecifierPattern, RelaySigningPublicKeySpecifier, Timestamp,
27
};
28
use tor_relay_crypto::pk::{
29
    RelayIdentityKeypair, RelayIdentityRsaKeypair, RelayLinkSigningKeypair, RelayNtorKeypair,
30
    RelaySigningKeypair,
31
};
32
use tor_rtcompat::{Runtime, SleepProviderExt};
33

            
34
use smallvec::SmallVec;
35

            
36
/// Buffer time before key expiry to trigger rotation. This ensures we rotate slightly before the
37
/// key actually expires rather than right at or after expiry.
38
///
39
/// C-tor uses 3 hours for the link/auth key and 1 day for the signing key. Let's use 3 hours here,
40
/// it should be plenty to make it happen even if hiccups happen.
41
const KEY_ROTATION_EXPIRE_BUFFER: Duration = Duration::from_secs(3 * 60 * 60);
42

            
43
// The following expiry durations have been taken from C-tor.
44

            
45
/// Lifetime of the link authentication key (KP_link_ed) certificate.
46
const LINK_CERT_LIFETIME: Duration = Duration::from_secs(2 * 24 * 60 * 60);
47
/// Lifetime of the relay signing key (KP_relaysign_ed) certificate.
48
const SIGNING_KEY_CERT_LIFETIME: Duration = Duration::from_secs(30 * 24 * 60 * 60);
49
/// Lifetime of the RSA identity key certificate.
50
const RSA_CROSSCERT_LIFETIME: Duration = Duration::from_secs(6 * 30 * 24 * 60 * 60);
51
/// Lifetime of the ntor circuit extension key (KP_ntor).
52
///
53
// TODO(relay): we should be using the "onion-key-rotation-days" consensus param
54
// instead of this hard-coded value.
55
const NTOR_KEY_LIFETIME: Duration = Duration::from_secs(28 * 24 * 60 * 60);
56

            
57
/// Default grace period for acceptance of an onion key (KP_ntor).
58
///
59
/// This represents the amount of time we are still willing to use this key
60
/// after it expires.
61
///
62
// TODO(relay): we should be using the "onion-key-grace-period-days" consensus param
63
// instead of this hard-coded value.
64
const NTOR_KEY_GRACE_PERIOD: Duration = Duration::from_secs(7 * 24 * 60 * 60);
65

            
66
/// The result of an action that affects the relay keys in the keystore.
67
#[derive(Copy, Clone, Debug)]
68
struct KeyChange {
69
    /// Whether the chan auth material has changed.
70
    chan_auth: bool,
71
    /// Whether the ntor keys have changed.
72
    ntor: bool,
73
}
74

            
75
impl KeyChange {
76
    /// The combined result of two [`KeyChange`]s.
77
56
    fn or(&self, other: &KeyChange) -> KeyChange {
78
        KeyChange {
79
56
            chan_auth: self.chan_auth || other.chan_auth,
80
56
            ntor: self.ntor || other.ntor,
81
        }
82
56
    }
83
}
84

            
85
/// Build a fresh [`RelayChannelAuthMaterial`] object using a [`KeyMgr`].
86
///
87
/// The link cert and TLS certs are created in this function.
88
/// The signing key certificate is retrieved from the keymgr.
89
///
90
/// This function assumes that all required keys,
91
/// as well as the signing key certificate,
92
/// are already in the keystore.
93
4
fn build_proto_relay_auth_material(
94
4
    now: SystemTime,
95
4
    keymgr: &KeyMgr,
96
4
) -> anyhow::Result<RelayChannelAuthMaterial> {
97
4
    let mut rng = tor_llcrypto::rng::CautiousRng;
98

            
99
    // Get the identity keypairs.
100
4
    let rsa_id_kp: RelayIdentityRsaKeypair = keymgr
101
4
        .get(&RelayIdentityRsaKeypairSpecifier::new())
102
4
        .context("Failed to get RSA identity from key manager")?
103
4
        .context("Missing RSA identity")?;
104
4
    let ed_id_kp: RelayIdentityKeypair = keymgr
105
4
        .get(&RelayIdentityKeypairSpecifier::new())
106
4
        .context("Failed to get Ed25519 identity from key manager")?
107
4
        .context("Missing Ed25519 identity")?;
108
    // We have to list match here because the key specifier here uses a valid_until. We don't know
109
    // what it is so we list and take the first one.
110
4
    let link_sign_kp: RelayLinkSigningKeypair = keymgr
111
4
        .get_entry(
112
4
            keymgr
113
4
                .list_matching(&RelayLinkSigningKeypairSpecifierPattern::new_any().arti_pattern()?)?
114
4
                .first()
115
4
                .context("No store entry for link authentication key")?,
116
        )
117
4
        .context("Failed to get link authentication key from key manager")?
118
4
        .context("Missing link authentication key")?;
119
4
    let kp_relaysign_id: RelaySigningKeypair = keymgr
120
4
        .get_entry(
121
4
            keymgr
122
4
                .list_matching(&RelaySigningKeypairSpecifierPattern::new_any().arti_pattern()?)?
123
4
                .first()
124
4
                .context("No store entry for signing key")?,
125
        )
126
4
        .context("Failed to get signing key from key manager")?
127
4
        .context("Missing signing key")?;
128
4
    let cert_id_sign_ed: RelaySigningKeyCert = keymgr
129
4
        .get_cert_entry::<RelaySigningKeyCertSpecifier, _, _>(
130
4
            keymgr
131
4
                .list_matching(&RelaySigningKeyCertSpecifierPattern::new_any().arti_pattern()?)?
132
4
                .first()
133
4
                .context("No store entry for signing key cert")?,
134
4
            &RelayIdentityKeypairSpecifier::new(),
135
        )
136
4
        .context("Failed to get signing key cert from key manager")?
137
4
        .context("Missing signing key cert")?;
138

            
139
    // TLS key and cert. Random hostname like C-tor. We re-use the issuer_hostname for the RSA
140
    // legacy cert.
141
4
    let issuer_hostname = rand_hostname::random_hostname(&mut rng);
142
4
    let subject_hostname = rand_hostname::random_hostname(&mut rng);
143
4
    let tls_key_and_cert =
144
4
        TlsKeyAndCert::create(&mut rng, now, &issuer_hostname, &subject_hostname)
145
4
            .context("Failed to create TLS keys and certificates")?;
146

            
147
    // Create the RSA X509 certificate.
148
4
    let cert_id_x509_rsa = tor_cert::x509::create_legacy_rsa_id_cert(
149
4
        &mut rng,
150
4
        now,
151
4
        &issuer_hostname,
152
4
        rsa_id_kp.keypair(),
153
    )
154
4
    .context("Failed to create legacy RSA identity certificate")?;
155

            
156
4
    let cert_id_rsa = tor_cert::rsa::EncodedRsaCrosscert::encode_and_sign(
157
4
        rsa_id_kp.keypair(),
158
4
        &ed_id_kp.to_ed25519_id(),
159
4
        now + RSA_CROSSCERT_LIFETIME,
160
    )?;
161

            
162
    // Create the link cert and tls cert.
163
4
    let cert_sign_link_auth_ed =
164
4
        gen_link_cert(&kp_relaysign_id, &link_sign_kp, now + LINK_CERT_LIFETIME)?;
165
4
    let cert_sign_tls_ed = gen_tls_cert(
166
4
        &kp_relaysign_id,
167
4
        *tls_key_and_cert.link_cert_sha256(),
168
4
        now + LINK_CERT_LIFETIME,
169
    )?;
170

            
171
4
    Ok(RelayChannelAuthMaterial::new(
172
4
        &rsa_id_kp.public().into(),
173
4
        ed_id_kp.to_ed25519_id(),
174
4
        link_sign_kp,
175
4
        cert_id_sign_ed.to_encodable_cert(),
176
4
        cert_sign_tls_ed,
177
4
        cert_sign_link_auth_ed.to_encodable_cert(),
178
4
        cert_id_x509_rsa,
179
4
        cert_id_rsa,
180
4
        tls_key_and_cert,
181
4
    ))
182
4
}
183

            
184
/// Generate a key `K` directly into the key manager.
185
///
186
/// If the key already exists, the error is ignored as this could happen if the system time drifts
187
/// between the get and the generate.
188
48
fn generate_key<K>(keymgr: &KeyMgr, spec: &dyn KeySpecifier) -> Result<(), tor_keymgr::Error>
189
48
where
190
48
    K: ToEncodableKey,
191
48
    K::Key: Keygen,
192
{
193
48
    let mut rng = tor_llcrypto::rng::CautiousRng;
194

            
195
48
    match keymgr.generate::<K>(spec, KeystoreSelector::default(), &mut rng, false) {
196
48
        Ok(_) => {}
197
        // Key already existing can happen due to wall clock strangeness,
198
        // so simply ignore it.
199
        Err(tor_keymgr::Error::KeyAlreadyExists) => (),
200
        Err(e) => return Err(e),
201
    };
202
48
    Ok(())
203
48
}
204

            
205
/// Go through keystore entries matching `pattern` and remove any that are
206
/// expired according to `is_expired`.
207
///
208
/// Returns `(removed, min_remaining)` where `removed` indicates whether any entry was deleted and
209
/// `min_remaining` is the minimum `valid_until` of the entries that were kept (if any).
210
224
fn remove_expired<F, E>(
211
224
    now: SystemTime,
212
224
    keymgr: &KeyMgr,
213
224
    pattern: &tor_keymgr::KeyPathPattern,
214
224
    label: &'static str,
215
224
    expiry_from_keypath: F,
216
224
    is_expired: E,
217
224
) -> anyhow::Result<(bool, Option<SystemTime>)>
218
224
where
219
224
    F: Fn(&KeyPath) -> anyhow::Result<Timestamp>,
220
224
    E: Fn(&Timestamp, SystemTime) -> bool,
221
{
222
224
    let entries = keymgr.list_matching(pattern)?;
223
224
    let mut removed = false;
224
224
    let mut min_valid_until: Option<Timestamp> = None;
225

            
226
224
    for entry in entries {
227
136
        let valid_until = expiry_from_keypath(entry.key_path())?;
228
136
        if is_expired(&valid_until, now) {
229
36
            tracing::debug!("Expired {} in keymgr. Removing it.", label);
230
36
            keymgr.remove_entry(&entry)?;
231
36
            removed = true;
232
        } else {
233
            min_valid_until =
234
100
                Some(min_valid_until.map_or(valid_until, |current| current.min(valid_until)));
235
        }
236
    }
237

            
238
224
    Ok((removed, min_valid_until.map(SystemTime::from)))
239
224
}
240

            
241
/// Attempt to generate a key using the given [`KeySpecifier`].
242
///
243
/// Return true if generated else false.
244
112
fn try_generate_key<K, P, F>(
245
112
    keymgr: &KeyMgr,
246
112
    spec: &dyn KeySpecifier,
247
112
    should_generate: F,
248
112
) -> anyhow::Result<bool>
249
112
where
250
112
    K: ToEncodableKey,
251
112
    K::Key: Keygen,
252
112
    P: KeySpecifierPattern,
253
112
    F: Fn(&[KeystoreEntry]) -> anyhow::Result<bool>,
254
{
255
112
    let mut generated = false;
256
112
    let mut rng = tor_llcrypto::rng::CautiousRng;
257
112
    let entries = keymgr.list_matching(&P::new_any().arti_pattern()?)?;
258
112
    if should_generate(&entries)? {
259
72
        let _ = keymgr.get_or_generate::<K>(spec, KeystoreSelector::default(), &mut rng)?;
260
72
        generated = true;
261
40
    }
262

            
263
112
    Ok(generated)
264
112
}
265

            
266
/// Attempt to generate a key and cert using the given [`KeyCertificateSpecifier`] which is signed
267
/// by the given [`KeySpecifier]` in `signing_key_spec`.
268
///
269
/// The `make_certificate` is used to generate the certificate stored in the [`KeyMgr`].
270
///
271
/// Return true if generated else false.
272
56
fn try_generate_key_cert<K, C, P>(
273
56
    keymgr: &KeyMgr,
274
56
    cert_spec: &dyn KeyCertificateSpecifier,
275
56
    signing_key_spec: &dyn KeySpecifier,
276
56
    make_certificate: impl FnOnce(&K, &<C as ToEncodableCert<K>>::SigningKey) -> C,
277
56
) -> anyhow::Result<bool>
278
56
where
279
56
    K: ToEncodableKey,
280
56
    K::Key: Keygen,
281
56
    C: ToEncodableCert<K>,
282
56
    P: CertSpecifierPattern,
283
{
284
56
    let mut generated = false;
285
56
    let mut rng = tor_llcrypto::rng::CautiousRng;
286
56
    let entries = keymgr.list_matching(&P::new_any().arti_pattern()?)?;
287
56
    if entries.is_empty() {
288
32
        let _ = keymgr.get_or_generate_key_and_cert::<K, C>(
289
32
            cert_spec,
290
32
            signing_key_spec,
291
32
            make_certificate,
292
32
            KeystoreSelector::default(),
293
32
            &mut rng,
294
        )?;
295
32
        generated = true;
296
24
    }
297

            
298
56
    Ok(generated)
299
56
}
300

            
301
/// Try to generate all keys and certs needed for a relay.
302
///
303
/// This tries to generate the [`RelayLinkSigningKeypair`] and the [`RelaySigningKeypair`] +
304
/// [`RelaySigningKeyCert`]. Note that identity keys are NOT generated within this function, it is
305
/// only attempted once at boot time. This is so we avoid retrying to generate them at each key
306
/// rotation as those identity keys never rotate.
307
///
308
/// Returns the minimum valid until value if a key was generated. Else, a None value indicates that
309
/// no key was generated.
310
56
fn try_generate_all(
311
56
    now: SystemTime,
312
56
    keymgr: &KeyMgr,
313
56
) -> anyhow::Result<(KeyChange, Option<SystemTime>)> {
314
56
    let link_expiry = now + LINK_CERT_LIFETIME;
315
56
    let link_spec = RelayLinkSigningKeypairSpecifier::new(Timestamp::from(link_expiry));
316
56
    let link_generated =
317
56
        try_generate_key::<RelayLinkSigningKeypair, RelayLinkSigningKeypairSpecifierPattern, _>(
318
56
            keymgr,
319
56
            &link_spec,
320
56
            |entries: &[KeystoreEntry<'_>]| Ok(entries.is_empty()),
321
        )?;
322

            
323
56
    let cert_expiry = now + SIGNING_KEY_CERT_LIFETIME;
324

            
325
    // The make certificate function needed for the get_or_generate_key_and_cert(). It is a closure
326
    // so we can capture the runtime wallclock.
327
56
    let make_signing_cert = |subject_key: &RelaySigningKeypair,
328
32
                             signing_key: &RelayIdentityKeypair| {
329
32
        gen_signing_cert(signing_key, subject_key, cert_expiry)
330
32
            .expect("failed to generate relay signing cert")
331
32
    };
332

            
333
    // We either get the existing one or generate this new one.
334
56
    let cert_spec = RelaySigningKeyCertSpecifier::new(RelaySigningPublicKeySpecifier::new(
335
56
        Timestamp::from(cert_expiry),
336
    ));
337
56
    let cert_generated = try_generate_key_cert::<
338
56
        RelaySigningKeypair,
339
56
        RelaySigningKeyCert,
340
56
        RelaySigningKeyCertSpecifierPattern,
341
56
    >(
342
56
        keymgr,
343
56
        &cert_spec,
344
56
        &RelayIdentityKeypairSpecifier::new(),
345
56
        make_signing_cert,
346
    )?;
347

            
348
56
    let ntor_expiry = now + NTOR_KEY_LIFETIME;
349
56
    let ntor_spec = RelayNtorKeypairSpecifier::new(Timestamp::from(ntor_expiry));
350

            
351
    // We generate a new ntor key if all existing keys are expired `now`
352
    // (without taking into account the grace period)
353
84
    let should_generate_ntor = |entries: &[KeystoreEntry<'_>]| {
354
56
        let mut all_expired = true;
355
56
        for entry in entries {
356
34
            let key_path = entry.key_path();
357
34
            let valid_until =
358
34
                SystemTime::from(RelayNtorKeypairSpecifier::try_from(key_path)?.valid_until);
359

            
360
            // If *all* the ntor keys are expired (but still within the grace period),
361
            // we want to generate a new ntor key.
362
            //
363
            // Note: this needs to take the KEY_ROTATION_EXPIRE_BUFFER into account
364
            // because the main loop will wake us KEY_ROTATION_EXPIRE_BUFFER
365
            // *before* the valid_until elapses
366
34
            if valid_until > now + KEY_ROTATION_EXPIRE_BUFFER {
367
24
                all_expired = false;
368
24
                break;
369
10
            }
370
        }
371

            
372
56
        Ok(all_expired)
373
56
    };
374

            
375
56
    let ntor_generated = try_generate_key::<RelayNtorKeypair, RelayNtorKeypairSpecifierPattern, _>(
376
56
        keymgr,
377
56
        &ntor_spec,
378
56
        should_generate_ntor,
379
    )?;
380

            
381
56
    let change = KeyChange {
382
56
        chan_auth: link_generated || cert_generated,
383
56
        ntor: ntor_generated,
384
    };
385

            
386
56
    Ok((
387
56
        change,
388
56
        [
389
56
            link_generated.then_some(link_expiry),
390
56
            cert_generated.then_some(cert_expiry),
391
56
            ntor_generated.then_some(ntor_expiry),
392
56
        ]
393
56
        .into_iter()
394
56
        .flatten()
395
56
        .min(),
396
56
    ))
397
56
}
398

            
399
/// Remove any expired keys (and certs) that are expired.
400
///
401
/// Return (`removed`, `next_expiry`) where the `removed` indicates if at least one key has been
402
/// removed because it was expired. The `next_expiry` is the minimum value of all valid_until which
403
/// indicates the next closest expiry time.
404
56
fn remove_expired_keys(
405
56
    now: SystemTime,
406
56
    keymgr: &KeyMgr,
407
56
) -> anyhow::Result<(KeyChange, Option<SystemTime>)> {
408
124
    let is_expired_with_buffer = |valid_until: &Timestamp, now| {
409
96
        *valid_until <= Timestamp::from(now + KEY_ROTATION_EXPIRE_BUFFER)
410
96
    };
411
56
    let (relaysign_removed, relaysign_expiry) = remove_expired(
412
56
        now,
413
56
        keymgr,
414
56
        &RelaySigningKeypairSpecifierPattern::new_any().arti_pattern()?,
415
        "key KP_relaysign_ed",
416
32
        |key_path| Ok(RelaySigningKeypairSpecifier::try_from(key_path)?.valid_until),
417
56
        is_expired_with_buffer,
418
    )?;
419
56
    let (link_removed, link_expiry) = remove_expired(
420
56
        now,
421
56
        keymgr,
422
56
        &RelayLinkSigningKeypairSpecifierPattern::new_any().arti_pattern()?,
423
        "key KP_link_ed",
424
32
        |key_path| Ok(RelayLinkSigningKeypairSpecifier::try_from(key_path)?.valid_until),
425
56
        is_expired_with_buffer,
426
    )?;
427

            
428
    // This should always be removed if the signing key above has been removed. However, we still
429
    // do a pass at the keystore considering the upcoming offline key feature that might have more
430
    // than one expired cert in the keystore.
431
56
    let (sign_cert_removed, sign_cert_expiry) = remove_expired(
432
56
        now,
433
56
        keymgr,
434
56
        &RelaySigningKeyCertSpecifierPattern::new_any().arti_pattern()?,
435
        "signing key cert",
436
32
        |key_path| {
437
32
            let spec: RelaySigningKeyCertSpecifier = key_path.try_into()?;
438
32
            let subject_key_path = KeyPath::Arti(spec.subject_key_specifier().arti_path()?);
439
32
            let subject_key_spec: RelaySigningPublicKeySpecifier =
440
32
                (&subject_key_path).try_into()?;
441
32
            Ok(subject_key_spec.valid_until)
442
32
        },
443
56
        is_expired_with_buffer,
444
    )?;
445

            
446
    // When deciding whether to remove the key,
447
    // we need to take into account the special grace period ntor keys have
448
    // (we need to keep the key around even if it's "expired",
449
    // because some clients might still be using an older consensus
450
    // and hence might not know about our new key yet).
451
76
    let is_expired_ntor = |valid_until: &Timestamp, now| {
452
        // Note: we need to take into account KEY_ROTATION_EXPIRE_BUFFER
453
        // because the main loop always subtracts KEY_ROTATION_EXPIRE_BUFFER
454
        // from the returned next_expiry, but ideally,
455
        // I don't think we should be using this buffer for the ntor keys,
456
        // because they have a grace period and don't get removed immediately
457
        // anyway
458
40
        *valid_until <= Timestamp::from(now - NTOR_KEY_GRACE_PERIOD + KEY_ROTATION_EXPIRE_BUFFER)
459
40
    };
460

            
461
56
    let (ntor_key_removed, ntor_key_expiry) = remove_expired(
462
56
        now,
463
56
        keymgr,
464
56
        &RelayNtorKeypairSpecifierPattern::new_any().arti_pattern()?,
465
        "key KP_ntor",
466
40
        |key_path| Ok(RelayNtorKeypairSpecifier::try_from(key_path)?.valid_until),
467
56
        is_expired_ntor,
468
    )?;
469

            
470
    // Have we at least removed one?
471
56
    let removed = KeyChange {
472
56
        chan_auth: relaysign_removed || link_removed || sign_cert_removed,
473
56
        ntor: ntor_key_removed,
474
    };
475

            
476
    // TODO: we could, in theory, return this from remove_expired(),
477
    // but I don't want to make it any more complicated than it already is,
478
    // especially for an operation that runs relatively infrequently.
479
56
    let ntor_key_count = keymgr
480
56
        .list_matching(&RelayNtorKeypairSpecifierPattern::new_any().arti_pattern()?)?
481
56
        .len();
482

            
483
    // This is a best effort check. There is no guarantee the
484
    // second key is the "successor" of this key,
485
    // but in general, it will be, unless an external process
486
    // is concurrently modifying the keystore
487
    // (which something we explicitly don't try to protect against).
488
    //
489
    // We could, in theory, check that the valid_until of the two
490
    // keys are adequately spaced, but in practice I don't think
491
    // it matters much.
492
56
    let next_key_exists = ntor_key_count >= 2;
493

            
494
    // Note: for each ntor key, we need to wake up twice
495
    //
496
    //   * at its expiry time, to generate the next ntor key
497
    //   * at its expiry time + GRACE_PERIOD, to remove the old ntor key
498
56
    let ntor_key_expiry = match ntor_key_expiry {
499
        None => {
500
            // We removed the last ntor key, the wakeup time will be
501
            // determined by try_generate_key() later
502
24
            None
503
        }
504
        // This special case may seem strange, but it's needed for
505
        // the specific scenario where there is only one ntor key
506
        // in the keystore with valid_until < now.
507
        //
508
        // Without it, there is no guarantee we will wake up at valid_until
509
        // to generate the new ntor key (when the key is generated,
510
        // we try to schedule a rotation task wakeup at valid_until,
511
        // but if the other keys have "sooner" `valid_until`s,
512
        // that wakeup will be lost.
513
28
        Some(valid_until) if !next_key_exists => {
514
            // The next key doesn't exist yet,
515
            // wake up at valid_until to generate it
516
28
            Some(valid_until)
517
        }
518
4
        Some(valid_until) => {
519
            // The next key exists, we only need to wake up
520
            // to garbage collect this one, after the grace period
521
            //
522
            // This avoids busy looping in the [valid_until, valid_until + grace_period]
523
            // time interval (if we don't add the grace period here, when
524
            // now = valid_until, we will keep waking up the main loop of the
525
            // key rotation task, and then not actually removing the key because
526
            // it's still within the grace period).
527
4
            Some(valid_until + NTOR_KEY_GRACE_PERIOD)
528
        }
529
    };
530

            
531
56
    let next_expiry = [
532
56
        relaysign_expiry,
533
56
        link_expiry,
534
56
        sign_cert_expiry,
535
56
        ntor_key_expiry,
536
56
    ]
537
56
    .into_iter()
538
56
    .flatten()
539
56
    .min();
540

            
541
56
    Ok((removed, next_expiry))
542
56
}
543

            
544
/// Attempt to rotate all keys except identity keys.
545
///
546
/// Returns (rotated, next_expiry) where `rotated` indicates if any key was rotated and
547
/// `next_expiry` is the earliest expiry time across all keys.
548
56
fn try_rotate_keys(now: SystemTime, keymgr: &KeyMgr) -> anyhow::Result<(KeyChange, SystemTime)> {
549
    // First do a pass to remove every expired key(s) or/and cert(s).
550
56
    let (have_removed, min_expiry) = remove_expired_keys(now, keymgr)?;
551

            
552
    // Then attempt to generate keys. If at least one was generated, we'll get the min expiry time
553
    // which we need to consider "rotated" so the caller can know that a new key appeared.
554
56
    let (generated, gen_min_expiry) = try_generate_all(now, keymgr)?;
555
56
    let have_rotated = have_removed.or(&generated);
556

            
557
    // We should never get no expiry time.
558
56
    let next_expiry = [min_expiry, gen_min_expiry]
559
56
        .into_iter()
560
56
        .flatten()
561
56
        .min()
562
56
        .ok_or(internal!("No relay keys after rotation task loop"))?;
563

            
564
56
    Ok((have_rotated, next_expiry))
565
56
}
566

            
567
/// Attempt to generate all keys. The list of keys is:
568
///
569
/// * Identity Ed25519 keypair [`RelayIdentityKeypair`].
570
/// * Identity RSA [`RelayIdentityRsaKeypair`].
571
/// * Relay signing keypair [`RelaySigningKeypair`].
572
/// * Relay link signing keypair [`RelayLinkSigningKeypair`].
573
/// * Relay ntor keypair [`RelayNtorKeypair`].
574
///
575
/// This function is only called when our relay bootstraps in order to attempt to generate any
576
/// missing keys or/and rotate expired keys.
577
4
pub(crate) fn try_generate_keys<R: Runtime>(
578
4
    runtime: &R,
579
4
    keymgr: &KeyMgr,
580
4
) -> anyhow::Result<RelayChannelAuthMaterial> {
581
4
    let now = runtime.wallclock();
582
    // Attempt to generate our identity keys (ed and RSA). Those keys DO NOT rotate. It won't be
583
    // replaced if they already exists.
584
4
    generate_key::<RelayIdentityKeypair>(keymgr, &RelayIdentityKeypairSpecifier::new())?;
585
4
    generate_key::<RelayIdentityRsaKeypair>(keymgr, &RelayIdentityRsaKeypairSpecifier::new())?;
586

            
587
    // Attempt to rotate the keys. Any missing keys (and cert) will be generated.
588
4
    let _ = try_rotate_keys(now, keymgr)?;
589

            
590
    // Now that we have our up-to-date keys, build the relay channel auth material object.
591
4
    build_proto_relay_auth_material(now, keymgr)
592
4
}
593
/// Task to rotate keys when they need to be rotated.
594
pub(crate) async fn rotate_keys_task<R: Runtime>(
595
    runtime: R,
596
    keymgr: Arc<KeyMgr>,
597
    chanmgr: Arc<ChanMgr<R>>,
598
    create_request_handler: Arc<CreateRequestHandler>,
599
) -> anyhow::Result<void::Void> {
600
    loop {
601
        let now = runtime.wallclock();
602
        // Attempt a rotation of all keys.
603
        let (have_rotated, next_expiry) = try_rotate_keys(now, &keymgr)?;
604
        if have_rotated.chan_auth {
605
            let auth_material = build_proto_relay_auth_material(now, &keymgr)?;
606
            chanmgr
607
                .set_relay_auth_material(Arc::new(auth_material))
608
                .context("Failed to set relay auth material on ChanMgr")?;
609
        }
610

            
611
        if have_rotated.ntor {
612
            // Any keys left in the keystore at this point are considered to be usable
613
            // (either because they are newly generated, or because they are still
614
            // within the grace period).
615
            let ntor_keys = keymgr
616
                .list_matching(&RelayNtorKeypairSpecifierPattern::new_any().arti_pattern()?)?
617
                .into_iter()
618
                .map(|entry| {
619
                    keymgr
620
                        .get_entry::<RelayNtorKeypair>(&entry)
621
                        .context("failed to retrieve ntor key")?
622
                        .context("ntor key disappeared?!")
623
                })
624
                .collect::<anyhow::Result<SmallVec<_>>>()?;
625

            
626
            create_request_handler.update_ntor_keys(ntor_keys);
627
        }
628

            
629
        // Sleep until the earliest key expiry minus buffer so we rotate before it expires.
630
        // If the subtraction would underflow, wake up immediately to rotate the expired key.
631
        let next_wake = next_expiry
632
            .checked_sub(KEY_ROTATION_EXPIRE_BUFFER)
633
            .unwrap_or(now);
634
        runtime.sleep_until_wallclock(next_wake).await;
635
    }
636
}
637

            
638
#[cfg(test)]
639
mod test {
640
    // @@ begin test lint list maintained by maint/add_warning @@
641
    #![allow(clippy::bool_assert_comparison)]
642
    #![allow(clippy::clone_on_copy)]
643
    #![allow(clippy::dbg_macro)]
644
    #![allow(clippy::mixed_attributes_style)]
645
    #![allow(clippy::print_stderr)]
646
    #![allow(clippy::print_stdout)]
647
    #![allow(clippy::single_char_pattern)]
648
    #![allow(clippy::unwrap_used)]
649
    #![allow(clippy::unchecked_time_subtraction)]
650
    #![allow(clippy::useless_vec)]
651
    #![allow(clippy::needless_pass_by_value)]
652
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
653

            
654
    use super::*;
655

            
656
    use crate::keys::{
657
        RelayLinkSigningKeypairSpecifierPattern, RelaySigningKeypairSpecifierPattern,
658
    };
659
    use tor_keymgr::{ArtiEphemeralKeystore, KeyMgrBuilder, KeySpecifierPattern};
660
    use tor_rtcompat::SleepProvider;
661
    use tor_rtmock::MockRuntime;
662

            
663
    /// Generate the non-rotating identity keys so the rest of the key machinery can run.
664
    fn setup_identity_keys(keymgr: &KeyMgr) {
665
        use crate::keys::{RelayIdentityKeypairSpecifier, RelayIdentityRsaKeypairSpecifier};
666
        use tor_relay_crypto::pk::{RelayIdentityKeypair, RelayIdentityRsaKeypair};
667
        generate_key::<RelayIdentityKeypair>(keymgr, &RelayIdentityKeypairSpecifier::new())
668
            .unwrap();
669
        generate_key::<RelayIdentityRsaKeypair>(keymgr, &RelayIdentityRsaKeypairSpecifier::new())
670
            .unwrap();
671
    }
672

            
673
    /// Initialize test basics that is runtime and a KeyMgr.
674
    fn new_keymgr() -> KeyMgr {
675
        let store = Box::new(ArtiEphemeralKeystore::new("test".to_string()));
676
        KeyMgrBuilder::default()
677
            .primary_store(store)
678
            .build()
679
            .unwrap()
680
    }
681

            
682
    /// Initial setup of a test. Build a mock runtime, key manager and setup identity keys.
683
    fn setup() -> KeyMgr {
684
        let keymgr = new_keymgr();
685
        setup_identity_keys(&keymgr);
686
        keymgr
687
    }
688

            
689
    /// Return a [`Timestamp`] given a [`SystemTime`] rounded down to its nearest second.
690
    ///
691
    /// In other words, the `tv_nsec` of a [`SystemTime`] is dropped.
692
    fn to_timestamp_in_secs(valid_until: SystemTime) -> Timestamp {
693
        use std::time::UNIX_EPOCH;
694
        let seconds = valid_until.duration_since(UNIX_EPOCH).unwrap().as_secs();
695
        Timestamp::from(UNIX_EPOCH + Duration::from_secs(seconds))
696
    }
697

            
698
    /// Return the number of keys matching the specified pattern
699
    fn count_keys(keymgr: &KeyMgr, pat: &dyn KeySpecifierPattern) -> usize {
700
        keymgr
701
            .list_matching(&pat.arti_pattern().unwrap())
702
            .unwrap()
703
            .len()
704
    }
705

            
706
    /// Return the number of link keys in the given KeyMgr.
707
    fn count_link_keys(keymgr: &KeyMgr) -> usize {
708
        count_keys(keymgr, &RelayLinkSigningKeypairSpecifierPattern::new_any())
709
    }
710

            
711
    /// Return the number of signing keys in the given KeyMgr.
712
    fn count_signing_keys(keymgr: &KeyMgr) -> usize {
713
        count_keys(keymgr, &RelaySigningKeypairSpecifierPattern::new_any())
714
    }
715

            
716
    /// Return the number of ntor keys in the given KeyMgr.
717
    fn count_ntor_keys(keymgr: &KeyMgr) -> usize {
718
        count_keys(keymgr, &RelayNtorKeypairSpecifierPattern::new_any())
719
    }
720

            
721
    /// Test the actual bootstrap function, `try_generate_keys()` which is in charge of
722
    /// initializing the auth material.
723
    #[test]
724
    fn test_bootstrap() {
725
        MockRuntime::test_with_various(|runtime| async move {
726
            let keymgr = new_keymgr();
727

            
728
            let _auth_material = match try_generate_keys(&runtime, &keymgr) {
729
                Ok(a) => a,
730
                Err(e) => {
731
                    panic!("Unable to bootstrap keys and generate RelayChannelAuthMaterial: {e}");
732
                }
733
            };
734
        });
735
    }
736

            
737
    /// Simulate the bootstrap when no keys exists. We should have one link key and one signing key
738
    /// after the first rotation.
739
    #[test]
740
    fn test_initial_key_generation() {
741
        MockRuntime::test_with_various(|runtime| async move {
742
            let keymgr = setup();
743
            let now = runtime.wallclock();
744

            
745
            let (rotated, next_expiry) = try_rotate_keys(now, &keymgr).unwrap();
746

            
747
            assert!(
748
                rotated.chan_auth && rotated.ntor,
749
                "keys should be reported as generated on first rotation"
750
            );
751
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
752
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
753
            assert_eq!(count_ntor_keys(&keymgr), 1, "expected one ntor key");
754

            
755
            // The earliest expiry should be the link key (~2 days out).
756
            let expected = runtime.wallclock() + LINK_CERT_LIFETIME;
757
            assert_eq!(
758
                next_expiry, expected,
759
                "next expiry should be ~{LINK_CERT_LIFETIME:?} from now, got {next_expiry:?}"
760
            );
761
        });
762
    }
763

            
764
    /// Calling rotate_keys a second time with fresh keys should indicate no rotation.
765
    #[test]
766
    fn test_rotation_on_fresh_keys() {
767
        MockRuntime::test_with_various(|runtime| async move {
768
            let keymgr = setup();
769
            let now = runtime.wallclock();
770
            try_rotate_keys(now, &keymgr).unwrap();
771

            
772
            // Advance by 1 hour (inside 2 days of link key).
773
            runtime.advance_by(Duration::from_secs(60 * 60)).await;
774

            
775
            let (rotated, _) = try_rotate_keys(now, &keymgr).unwrap();
776

            
777
            assert!(
778
                !rotated.chan_auth && !rotated.ntor,
779
                "fresh keys must not trigger a rotation"
780
            );
781
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
782
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
783
            assert_eq!(count_ntor_keys(&keymgr), 1, "expected one ntor key");
784
        });
785
    }
786

            
787
    /// Test rotation before and after rotation expiry buffer for the link key.
788
    #[test]
789
    fn test_rotation_link_key() {
790
        MockRuntime::test_with_various(|runtime| async move {
791
            let keymgr = setup();
792
            // First rotation creates the keys.
793
            try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
794

            
795
            // Advance to 1 second _before_ the rotation-buffer threshold. We should not rotate
796
            // with this.
797
            let just_before =
798
                LINK_CERT_LIFETIME - KEY_ROTATION_EXPIRE_BUFFER - Duration::from_secs(1);
799
            runtime.advance_by(just_before).await;
800

            
801
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
802

            
803
            assert!(
804
                !rotated.chan_auth,
805
                "link key MUST NOT rotate before the expiry buffer threshold"
806
            );
807
            assert!(
808
                !rotated.ntor,
809
                "ntor key MUST NOT rotate before the expiry buffer threshold"
810
            );
811
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
812
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
813

            
814
            // Move it just after the expiry buffer and expect a rotation.
815
            runtime.advance_by(Duration::from_secs(1)).await;
816

            
817
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
818
            assert!(
819
                rotated.chan_auth,
820
                "link key should rotate inside the expiry buffer threshold"
821
            );
822
        });
823
    }
824

            
825
    /// Test rotation before and after rotation expiry buffer for the signing key.
826
    #[test]
827
    fn test_rotation_signing_key() {
828
        MockRuntime::test_with_various(|runtime| async move {
829
            let keymgr = setup();
830
            // First rotation creates the keys.
831
            try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
832

            
833
            // Closure to get the relay signing key keystore entry.
834
            let get_key_spec = || {
835
                let entries = keymgr
836
                    .list_matching(
837
                        &RelaySigningKeypairSpecifierPattern::new_any()
838
                            .arti_pattern()
839
                            .unwrap(),
840
                    )
841
                    .unwrap();
842
                let entry = entries.first().unwrap();
843
                let spec: RelaySigningKeypairSpecifier = entry.key_path().try_into().unwrap();
844
                spec
845
            };
846

            
847
            // Advance to 1 second _before_ the rotation-buffer threshold. We should not rotate
848
            // with this.
849
            let just_before =
850
                SIGNING_KEY_CERT_LIFETIME - KEY_ROTATION_EXPIRE_BUFFER - Duration::from_secs(1);
851
            runtime.advance_by(just_before).await;
852

            
853
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
854
            assert!(rotated.chan_auth, "Rotation must happen after 30 days");
855

            
856
            let spec = get_key_spec();
857
            assert_eq!(
858
                spec.valid_until,
859
                to_timestamp_in_secs(
860
                    runtime.wallclock() + KEY_ROTATION_EXPIRE_BUFFER + Duration::from_secs(1)
861
                ),
862
                "RelaySigningKeypairSpecifier should not have rotated"
863
            );
864

            
865
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
866
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
867

            
868
            // Move it just after the expiry buffer and expect a rotation.
869
            runtime.advance_by(Duration::from_secs(1)).await;
870

            
871
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
872
            assert!(rotated.chan_auth, "Rotation must happen after 30 days");
873

            
874
            let spec = get_key_spec();
875
            assert_eq!(
876
                spec.valid_until,
877
                to_timestamp_in_secs(runtime.wallclock() + SIGNING_KEY_CERT_LIFETIME),
878
                "RelaySigningKeypairSpecifier should have rotated"
879
            );
880
        });
881
    }
882

            
883
    /// Test rotation before and after rotation expiry buffer for the ntor key.
884
    #[test]
885
    fn test_rotation_ntor_key() {
886
        MockRuntime::test_with_various(|runtime| async move {
887
            let keymgr = setup();
888
            // First rotation creates the keys.
889
            try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
890

            
891
            // Advance to 1 second _before_ the rotation-buffer threshold. We should not rotate
892
            // with this.
893
            let just_before =
894
                NTOR_KEY_LIFETIME - KEY_ROTATION_EXPIRE_BUFFER - Duration::from_secs(1);
895
            runtime.advance_by(just_before).await;
896

            
897
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
898

            
899
            assert!(
900
                !rotated.ntor,
901
                "Ntor key MUST NOT rotate before the expiry buffer threshold"
902
            );
903
            assert_eq!(count_ntor_keys(&keymgr), 1, "expected one ntor key");
904

            
905
            // Move it just after the expiry buffer and expect a rotation.
906
            runtime.advance_by(Duration::from_secs(1)).await;
907

            
908
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
909
            assert!(
910
                rotated.ntor,
911
                "ntor key should rotate inside the expiry buffer threshold"
912
            );
913

            
914
            assert_eq!(
915
                count_ntor_keys(&keymgr),
916
                2,
917
                "there should be 2 ntor keys in the grace period"
918
            );
919

            
920
            runtime.advance_by(NTOR_KEY_GRACE_PERIOD).await;
921

            
922
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
923
            assert!(
924
                rotated.ntor,
925
                "ntor key should rotate after the grace period"
926
            );
927

            
928
            assert_eq!(
929
                count_ntor_keys(&keymgr),
930
                1,
931
                "the old ntor key should have been removed after the grace period"
932
            );
933
        });
934
    }
935
}