1
//! Key rotation tasks of the relay.
2

            
3
use anyhow::Context;
4
use std::{
5
    sync::Arc,
6
    time::{Duration, SystemTime},
7
};
8
use tor_basic_utils::rand_hostname;
9
use tor_cert::x509::TlsKeyAndCert;
10
use tor_chanmgr::ChanMgr;
11
use tor_error::internal;
12
use tor_key_forge::ToEncodableCert;
13
use tor_keymgr::{
14
    CertSpecifierPattern, KeyCertificateSpecifier, KeyMgr, KeyPath, KeySpecifier,
15
    KeySpecifierPattern, Keygen, KeystoreEntry, KeystoreSelector, ToEncodableKey,
16
};
17
use tor_proto::RelayChannelAuthMaterial;
18
use tor_proto::relay::CreateRequestHandler;
19
use tor_relay_crypto::{RelaySigningKeyCert, gen_link_cert, gen_signing_cert, gen_tls_cert};
20

            
21
use crate::keys::{
22
    RelayIdentityKeypairSpecifier, RelayIdentityRsaKeypairSpecifier,
23
    RelayLinkSigningKeypairSpecifier, RelayLinkSigningKeypairSpecifierPattern,
24
    RelayNtorKeypairSpecifier, RelayNtorKeypairSpecifierPattern, RelaySigningKeyCertSpecifier,
25
    RelaySigningKeyCertSpecifierPattern, RelaySigningKeypairSpecifier,
26
    RelaySigningKeypairSpecifierPattern, RelaySigningPublicKeySpecifier, Timestamp,
27
};
28
use tor_relay_crypto::pk::{
29
    RelayIdentityKeypair, RelayIdentityRsaKeypair, RelayLinkSigningKeypair, RelayNtorKeypair,
30
    RelayNtorKeys, RelaySigningKeypair,
31
};
32
use tor_rtcompat::{Runtime, SleepProviderExt};
33

            
34
/// Buffer time before key expiry to trigger rotation. This ensures we rotate slightly before the
35
/// key actually expires rather than right at or after expiry.
36
///
37
/// C-tor uses 3 hours for the link/auth key and 1 day for the signing key. Let's use 3 hours here,
38
/// it should be plenty to make it happen even if hiccups happen.
39
const KEY_ROTATION_EXPIRE_BUFFER: Duration = Duration::from_secs(3 * 60 * 60);
40

            
41
// The following expiry durations have been taken from C-tor.
42

            
43
/// Lifetime of the link authentication key (KP_link_ed) certificate.
44
const LINK_CERT_LIFETIME: Duration = Duration::from_secs(2 * 24 * 60 * 60);
45
/// Lifetime of the relay signing key (KP_relaysign_ed) certificate.
46
const SIGNING_KEY_CERT_LIFETIME: Duration = Duration::from_secs(30 * 24 * 60 * 60);
47
/// Lifetime of the RSA identity key certificate.
48
const RSA_CROSSCERT_LIFETIME: Duration = Duration::from_secs(6 * 30 * 24 * 60 * 60);
49
/// Lifetime of the ntor circuit extension key (KP_ntor).
50
///
51
// TODO(relay): we should be using the "onion-key-rotation-days" consensus param
52
// instead of this hard-coded value.
53
const NTOR_KEY_LIFETIME: Duration = Duration::from_secs(28 * 24 * 60 * 60);
54

            
55
/// Default grace period for acceptance of an onion key (KP_ntor).
56
///
57
/// This represents the amount of time we are still willing to use this key
58
/// after it expires.
59
///
60
// TODO(relay): we should be using the "onion-key-grace-period-days" consensus param
61
// instead of this hard-coded value.
62
const NTOR_KEY_GRACE_PERIOD: Duration = Duration::from_secs(7 * 24 * 60 * 60);
63

            
64
/// The result of an action that affects the relay keys in the keystore.
65
#[derive(Copy, Clone, Debug)]
66
struct KeyChange {
67
    /// Whether the chan auth material has changed.
68
    chan_auth: bool,
69
    /// Whether the ntor keys have changed.
70
    ntor: bool,
71
}
72

            
73
impl KeyChange {
74
    /// The combined result of two [`KeyChange`]s.
75
56
    fn or(&self, other: &KeyChange) -> KeyChange {
76
        KeyChange {
77
56
            chan_auth: self.chan_auth || other.chan_auth,
78
56
            ntor: self.ntor || other.ntor,
79
        }
80
56
    }
81
}
82

            
83
/// Build a fresh [`RelayChannelAuthMaterial`] object using a [`KeyMgr`].
84
///
85
/// The link cert and TLS certs are created in this function.
86
/// The signing key certificate is retrieved from the keymgr.
87
///
88
/// This function assumes that all required keys,
89
/// as well as the signing key certificate,
90
/// are already in the keystore.
91
4
fn build_proto_relay_auth_material(
92
4
    now: SystemTime,
93
4
    keymgr: &KeyMgr,
94
4
) -> anyhow::Result<RelayChannelAuthMaterial> {
95
4
    let mut rng = tor_llcrypto::rng::CautiousRng;
96

            
97
    // Get the identity keypairs.
98
4
    let rsa_id_kp: RelayIdentityRsaKeypair = keymgr
99
4
        .get(&RelayIdentityRsaKeypairSpecifier::new())
100
4
        .context("Failed to get RSA identity from key manager")?
101
4
        .context("Missing RSA identity")?;
102
4
    let ed_id_kp: RelayIdentityKeypair = keymgr
103
4
        .get(&RelayIdentityKeypairSpecifier::new())
104
4
        .context("Failed to get Ed25519 identity from key manager")?
105
4
        .context("Missing Ed25519 identity")?;
106
    // We have to list match here because the key specifier here uses a valid_until. We don't know
107
    // what it is so we list and take the first one.
108
4
    let link_sign_kp: RelayLinkSigningKeypair = keymgr
109
4
        .get_entry(
110
4
            keymgr
111
4
                .list_matching(&RelayLinkSigningKeypairSpecifierPattern::new_any().arti_pattern()?)?
112
4
                .first()
113
4
                .context("No store entry for link authentication key")?,
114
        )
115
4
        .context("Failed to get link authentication key from key manager")?
116
4
        .context("Missing link authentication key")?;
117
4
    let kp_relaysign_id: RelaySigningKeypair = keymgr
118
4
        .get_entry(
119
4
            keymgr
120
4
                .list_matching(&RelaySigningKeypairSpecifierPattern::new_any().arti_pattern()?)?
121
4
                .first()
122
4
                .context("No store entry for signing key")?,
123
        )
124
4
        .context("Failed to get signing key from key manager")?
125
4
        .context("Missing signing key")?;
126
4
    let cert_id_sign_ed: RelaySigningKeyCert = keymgr
127
4
        .get_cert_entry::<RelaySigningKeyCertSpecifier, _, _>(
128
4
            keymgr
129
4
                .list_matching(&RelaySigningKeyCertSpecifierPattern::new_any().arti_pattern()?)?
130
4
                .first()
131
4
                .context("No store entry for signing key cert")?,
132
4
            &RelayIdentityKeypairSpecifier::new(),
133
        )
134
4
        .context("Failed to get signing key cert from key manager")?
135
4
        .context("Missing signing key cert")?;
136

            
137
    // TLS key and cert. Random hostname like C-tor. We re-use the issuer_hostname for the RSA
138
    // legacy cert.
139
4
    let issuer_hostname = rand_hostname::random_hostname(&mut rng);
140
4
    let subject_hostname = rand_hostname::random_hostname(&mut rng);
141
4
    let tls_key_and_cert =
142
4
        TlsKeyAndCert::create(&mut rng, now, &issuer_hostname, &subject_hostname)
143
4
            .context("Failed to create TLS keys and certificates")?;
144

            
145
    // Create the RSA X509 certificate.
146
4
    let cert_id_x509_rsa = tor_cert::x509::create_legacy_rsa_id_cert(
147
4
        &mut rng,
148
4
        now,
149
4
        &issuer_hostname,
150
4
        rsa_id_kp.keypair(),
151
    )
152
4
    .context("Failed to create legacy RSA identity certificate")?;
153

            
154
4
    let cert_id_rsa = tor_cert::rsa::EncodedRsaCrosscert::encode_and_sign(
155
4
        rsa_id_kp.keypair(),
156
4
        &ed_id_kp.to_ed25519_id(),
157
4
        now + RSA_CROSSCERT_LIFETIME,
158
    )?;
159

            
160
    // Create the link cert and tls cert.
161
4
    let cert_sign_link_auth_ed =
162
4
        gen_link_cert(&kp_relaysign_id, &link_sign_kp, now + LINK_CERT_LIFETIME)?;
163
4
    let cert_sign_tls_ed = gen_tls_cert(
164
4
        &kp_relaysign_id,
165
4
        *tls_key_and_cert.link_cert_sha256(),
166
4
        now + LINK_CERT_LIFETIME,
167
    )?;
168

            
169
4
    Ok(RelayChannelAuthMaterial::new(
170
4
        &rsa_id_kp.public().into(),
171
4
        ed_id_kp.to_ed25519_id(),
172
4
        link_sign_kp,
173
4
        cert_id_sign_ed.to_encodable_cert(),
174
4
        cert_sign_tls_ed,
175
4
        cert_sign_link_auth_ed.to_encodable_cert(),
176
4
        cert_id_x509_rsa,
177
4
        cert_id_rsa,
178
4
        tls_key_and_cert,
179
4
    ))
180
4
}
181

            
182
/// Generate a key `K` directly into the key manager.
183
///
184
/// If the key already exists, the error is ignored as this could happen if the system time drifts
185
/// between the get and the generate.
186
48
fn generate_key<K>(keymgr: &KeyMgr, spec: &dyn KeySpecifier) -> Result<(), tor_keymgr::Error>
187
48
where
188
48
    K: ToEncodableKey,
189
48
    K::Key: Keygen,
190
{
191
48
    let mut rng = tor_llcrypto::rng::CautiousRng;
192

            
193
48
    match keymgr.generate::<K>(spec, KeystoreSelector::default(), &mut rng, false) {
194
48
        Ok(_) => {}
195
        // Key already existing can happen due to wall clock strangeness,
196
        // so simply ignore it.
197
        Err(tor_keymgr::Error::KeyAlreadyExists) => (),
198
        Err(e) => return Err(e),
199
    };
200
48
    Ok(())
201
48
}
202

            
203
/// Go through keystore entries matching `pattern` and remove any that are
204
/// expired according to `is_expired`.
205
///
206
/// Returns `(removed, min_remaining)` where `removed` indicates whether any entry was deleted and
207
/// `min_remaining` is the minimum `valid_until` of the entries that were kept (if any).
208
224
fn remove_expired<F, E>(
209
224
    now: SystemTime,
210
224
    keymgr: &KeyMgr,
211
224
    pattern: &tor_keymgr::KeyPathPattern,
212
224
    label: &'static str,
213
224
    expiry_from_keypath: F,
214
224
    is_expired: E,
215
224
) -> anyhow::Result<(bool, Option<SystemTime>)>
216
224
where
217
224
    F: Fn(&KeyPath) -> anyhow::Result<Timestamp>,
218
224
    E: Fn(&Timestamp, SystemTime) -> bool,
219
{
220
224
    let entries = keymgr.list_matching(pattern)?;
221
224
    let mut removed = false;
222
224
    let mut min_valid_until: Option<Timestamp> = None;
223

            
224
224
    for entry in entries {
225
136
        let valid_until = expiry_from_keypath(entry.key_path())?;
226
136
        if is_expired(&valid_until, now) {
227
36
            tracing::debug!("Expired {} in keymgr. Removing it.", label);
228
36
            keymgr.remove_entry(&entry)?;
229
36
            removed = true;
230
        } else {
231
            min_valid_until =
232
100
                Some(min_valid_until.map_or(valid_until, |current| current.min(valid_until)));
233
        }
234
    }
235

            
236
224
    Ok((removed, min_valid_until.map(SystemTime::from)))
237
224
}
238

            
239
/// Attempt to generate a key using the given [`KeySpecifier`].
240
///
241
/// Return true if generated else false.
242
112
fn try_generate_key<K, P, F>(
243
112
    keymgr: &KeyMgr,
244
112
    spec: &dyn KeySpecifier,
245
112
    should_generate: F,
246
112
) -> anyhow::Result<bool>
247
112
where
248
112
    K: ToEncodableKey,
249
112
    K::Key: Keygen,
250
112
    P: KeySpecifierPattern,
251
112
    F: Fn(&[KeystoreEntry]) -> anyhow::Result<bool>,
252
{
253
112
    let mut generated = false;
254
112
    let mut rng = tor_llcrypto::rng::CautiousRng;
255
112
    let entries = keymgr.list_matching(&P::new_any().arti_pattern()?)?;
256
112
    if should_generate(&entries)? {
257
72
        let _ = keymgr.get_or_generate::<K>(spec, KeystoreSelector::default(), &mut rng)?;
258
72
        generated = true;
259
40
    }
260

            
261
112
    Ok(generated)
262
112
}
263

            
264
/// Attempt to generate a key and cert using the given [`KeyCertificateSpecifier`] which is signed
265
/// by the given [`KeySpecifier]` in `signing_key_spec`.
266
///
267
/// The `make_certificate` is used to generate the certificate stored in the [`KeyMgr`].
268
///
269
/// Return true if generated else false.
270
56
fn try_generate_key_cert<K, C, P>(
271
56
    keymgr: &KeyMgr,
272
56
    cert_spec: &dyn KeyCertificateSpecifier,
273
56
    signing_key_spec: &dyn KeySpecifier,
274
56
    make_certificate: impl FnOnce(&K, &<C as ToEncodableCert<K>>::SigningKey) -> C,
275
56
) -> anyhow::Result<bool>
276
56
where
277
56
    K: ToEncodableKey,
278
56
    K::Key: Keygen,
279
56
    C: ToEncodableCert<K>,
280
56
    P: CertSpecifierPattern,
281
{
282
56
    let mut generated = false;
283
56
    let mut rng = tor_llcrypto::rng::CautiousRng;
284
56
    let entries = keymgr.list_matching(&P::new_any().arti_pattern()?)?;
285
56
    if entries.is_empty() {
286
32
        let _ = keymgr.get_or_generate_key_and_cert::<K, C>(
287
32
            cert_spec,
288
32
            signing_key_spec,
289
32
            make_certificate,
290
32
            KeystoreSelector::default(),
291
32
            &mut rng,
292
        )?;
293
32
        generated = true;
294
24
    }
295

            
296
56
    Ok(generated)
297
56
}
298

            
299
/// Try to generate all keys and certs needed for a relay.
300
///
301
/// This tries to generate the [`RelayLinkSigningKeypair`] and the [`RelaySigningKeypair`] +
302
/// [`RelaySigningKeyCert`]. Note that identity keys are NOT generated within this function, it is
303
/// only attempted once at boot time. This is so we avoid retrying to generate them at each key
304
/// rotation as those identity keys never rotate.
305
///
306
/// Returns the minimum valid until value if a key was generated. Else, a None value indicates that
307
/// no key was generated.
308
56
fn try_generate_all(
309
56
    now: SystemTime,
310
56
    keymgr: &KeyMgr,
311
56
) -> anyhow::Result<(KeyChange, Option<SystemTime>)> {
312
56
    let link_expiry = now + LINK_CERT_LIFETIME;
313
56
    let link_spec = RelayLinkSigningKeypairSpecifier::new(Timestamp::from(link_expiry));
314
56
    let link_generated =
315
56
        try_generate_key::<RelayLinkSigningKeypair, RelayLinkSigningKeypairSpecifierPattern, _>(
316
56
            keymgr,
317
56
            &link_spec,
318
56
            |entries: &[KeystoreEntry<'_>]| Ok(entries.is_empty()),
319
        )?;
320

            
321
56
    let cert_expiry = now + SIGNING_KEY_CERT_LIFETIME;
322

            
323
    // The make certificate function needed for the get_or_generate_key_and_cert(). It is a closure
324
    // so we can capture the runtime wallclock.
325
56
    let make_signing_cert = |subject_key: &RelaySigningKeypair,
326
32
                             signing_key: &RelayIdentityKeypair| {
327
32
        gen_signing_cert(signing_key, subject_key, cert_expiry)
328
32
            .expect("failed to generate relay signing cert")
329
32
    };
330

            
331
    // We either get the existing one or generate this new one.
332
56
    let cert_spec = RelaySigningKeyCertSpecifier::new(RelaySigningPublicKeySpecifier::new(
333
56
        Timestamp::from(cert_expiry),
334
    ));
335
56
    let cert_generated = try_generate_key_cert::<
336
56
        RelaySigningKeypair,
337
56
        RelaySigningKeyCert,
338
56
        RelaySigningKeyCertSpecifierPattern,
339
56
    >(
340
56
        keymgr,
341
56
        &cert_spec,
342
56
        &RelayIdentityKeypairSpecifier::new(),
343
56
        make_signing_cert,
344
    )?;
345

            
346
56
    let ntor_expiry = now + NTOR_KEY_LIFETIME;
347
56
    let ntor_spec = RelayNtorKeypairSpecifier::new(Timestamp::from(ntor_expiry));
348

            
349
    // We generate a new ntor key if all existing keys are expired `now`
350
    // (without taking into account the grace period)
351
84
    let should_generate_ntor = |entries: &[KeystoreEntry<'_>]| {
352
56
        let mut all_expired = true;
353
56
        for entry in entries {
354
32
            let key_path = entry.key_path();
355
32
            let valid_until =
356
32
                SystemTime::from(RelayNtorKeypairSpecifier::try_from(key_path)?.valid_until);
357

            
358
            // If *all* the ntor keys are expired (but still within the grace period),
359
            // we want to generate a new ntor key.
360
            //
361
            // Note: this needs to take the KEY_ROTATION_EXPIRE_BUFFER into account
362
            // because the main loop will wake us KEY_ROTATION_EXPIRE_BUFFER
363
            // *before* the valid_until elapses
364
32
            if valid_until > now + KEY_ROTATION_EXPIRE_BUFFER {
365
24
                all_expired = false;
366
24
                break;
367
8
            }
368
        }
369

            
370
56
        Ok(all_expired)
371
56
    };
372

            
373
56
    let ntor_generated = try_generate_key::<RelayNtorKeypair, RelayNtorKeypairSpecifierPattern, _>(
374
56
        keymgr,
375
56
        &ntor_spec,
376
56
        should_generate_ntor,
377
    )?;
378

            
379
56
    let change = KeyChange {
380
56
        chan_auth: link_generated || cert_generated,
381
56
        ntor: ntor_generated,
382
    };
383

            
384
56
    Ok((
385
56
        change,
386
56
        [
387
56
            link_generated.then_some(link_expiry),
388
56
            cert_generated.then_some(cert_expiry),
389
56
            ntor_generated.then_some(ntor_expiry),
390
56
        ]
391
56
        .into_iter()
392
56
        .flatten()
393
56
        .min(),
394
56
    ))
395
56
}
396

            
397
/// Remove any expired keys (and certs) that are expired.
398
///
399
/// Return (`removed`, `next_expiry`) where the `removed` indicates if at least one key has been
400
/// removed because it was expired. The `next_expiry` is the minimum value of all valid_until which
401
/// indicates the next closest expiry time.
402
56
fn remove_expired_keys(
403
56
    now: SystemTime,
404
56
    keymgr: &KeyMgr,
405
56
) -> anyhow::Result<(KeyChange, Option<SystemTime>)> {
406
124
    let is_expired_with_buffer = |valid_until: &Timestamp, now| {
407
96
        *valid_until <= Timestamp::from(now + KEY_ROTATION_EXPIRE_BUFFER)
408
96
    };
409
56
    let (relaysign_removed, relaysign_expiry) = remove_expired(
410
56
        now,
411
56
        keymgr,
412
56
        &RelaySigningKeypairSpecifierPattern::new_any().arti_pattern()?,
413
        "key KP_relaysign_ed",
414
32
        |key_path| Ok(RelaySigningKeypairSpecifier::try_from(key_path)?.valid_until),
415
56
        is_expired_with_buffer,
416
    )?;
417
56
    let (link_removed, link_expiry) = remove_expired(
418
56
        now,
419
56
        keymgr,
420
56
        &RelayLinkSigningKeypairSpecifierPattern::new_any().arti_pattern()?,
421
        "key KP_link_ed",
422
32
        |key_path| Ok(RelayLinkSigningKeypairSpecifier::try_from(key_path)?.valid_until),
423
56
        is_expired_with_buffer,
424
    )?;
425

            
426
    // This should always be removed if the signing key above has been removed. However, we still
427
    // do a pass at the keystore considering the upcoming offline key feature that might have more
428
    // than one expired cert in the keystore.
429
56
    let (sign_cert_removed, sign_cert_expiry) = remove_expired(
430
56
        now,
431
56
        keymgr,
432
56
        &RelaySigningKeyCertSpecifierPattern::new_any().arti_pattern()?,
433
        "signing key cert",
434
32
        |key_path| {
435
32
            let spec: RelaySigningKeyCertSpecifier = key_path.try_into()?;
436
32
            let subject_key_path = KeyPath::Arti(spec.subject_key_specifier().arti_path()?);
437
32
            let subject_key_spec: RelaySigningPublicKeySpecifier =
438
32
                (&subject_key_path).try_into()?;
439
32
            Ok(subject_key_spec.valid_until)
440
32
        },
441
56
        is_expired_with_buffer,
442
    )?;
443

            
444
    // When deciding whether to remove the key,
445
    // we need to take into account the special grace period ntor keys have
446
    // (we need to keep the key around even if it's "expired",
447
    // because some clients might still be using an older consensus
448
    // and hence might not know about our new key yet).
449
76
    let is_expired_ntor = |valid_until: &Timestamp, now| {
450
        // Note: we need to take into account KEY_ROTATION_EXPIRE_BUFFER
451
        // because the main loop always subtracts KEY_ROTATION_EXPIRE_BUFFER
452
        // from the returned next_expiry, but ideally,
453
        // I don't think we should be using this buffer for the ntor keys,
454
        // because they have a grace period and don't get removed immediately
455
        // anyway
456
40
        *valid_until <= Timestamp::from(now - NTOR_KEY_GRACE_PERIOD + KEY_ROTATION_EXPIRE_BUFFER)
457
40
    };
458

            
459
56
    let (ntor_key_removed, ntor_key_expiry) = remove_expired(
460
56
        now,
461
56
        keymgr,
462
56
        &RelayNtorKeypairSpecifierPattern::new_any().arti_pattern()?,
463
        "key KP_ntor",
464
40
        |key_path| Ok(RelayNtorKeypairSpecifier::try_from(key_path)?.valid_until),
465
56
        is_expired_ntor,
466
    )?;
467

            
468
    // Have we at least removed one?
469
56
    let removed = KeyChange {
470
56
        chan_auth: relaysign_removed || link_removed || sign_cert_removed,
471
56
        ntor: ntor_key_removed,
472
    };
473

            
474
    // TODO: we could, in theory, return this from remove_expired(),
475
    // but I don't want to make it any more complicated than it already is,
476
    // especially for an operation that runs relatively infrequently.
477
56
    let ntor_key_count = keymgr
478
56
        .list_matching(&RelayNtorKeypairSpecifierPattern::new_any().arti_pattern()?)?
479
56
        .len();
480

            
481
    // This is a best effort check. There is no guarantee the
482
    // second key is the "successor" of this key,
483
    // but in general, it will be, unless an external process
484
    // is concurrently modifying the keystore
485
    // (which something we explicitly don't try to protect against).
486
    //
487
    // We could, in theory, check that the valid_until of the two
488
    // keys are adequately spaced, but in practice I don't think
489
    // it matters much.
490
56
    let next_key_exists = ntor_key_count >= 2;
491

            
492
    // Note: for each ntor key, we need to wake up twice
493
    //
494
    //   * at its expiry time, to generate the next ntor key
495
    //   * at its expiry time + GRACE_PERIOD, to remove the old ntor key
496
56
    let ntor_key_expiry = match ntor_key_expiry {
497
        None => {
498
            // We removed the last ntor key, the wakeup time will be
499
            // determined by try_generate_key() later
500
24
            None
501
        }
502
        // This special case may seem strange, but it's needed for
503
        // the specific scenario where there is only one ntor key
504
        // in the keystore with valid_until < now.
505
        //
506
        // Without it, there is no guarantee we will wake up at valid_until
507
        // to generate the new ntor key (when the key is generated,
508
        // we try to schedule a rotation task wakeup at valid_until,
509
        // but if the other keys have "sooner" `valid_until`s,
510
        // that wakeup will be lost.
511
28
        Some(valid_until) if !next_key_exists => {
512
            // The next key doesn't exist yet,
513
            // wake up at valid_until to generate it
514
28
            Some(valid_until)
515
        }
516
4
        Some(valid_until) => {
517
            // The next key exists, we only need to wake up
518
            // to garbage collect this one, after the grace period
519
            //
520
            // This avoids busy looping in the [valid_until, valid_until + grace_period]
521
            // time interval (if we don't add the grace period here, when
522
            // now = valid_until, we will keep waking up the main loop of the
523
            // key rotation task, and then not actually removing the key because
524
            // it's still within the grace period).
525
4
            Some(valid_until + NTOR_KEY_GRACE_PERIOD)
526
        }
527
    };
528

            
529
56
    let next_expiry = [
530
56
        relaysign_expiry,
531
56
        link_expiry,
532
56
        sign_cert_expiry,
533
56
        ntor_key_expiry,
534
56
    ]
535
56
    .into_iter()
536
56
    .flatten()
537
56
    .min();
538

            
539
56
    Ok((removed, next_expiry))
540
56
}
541

            
542
/// Attempt to rotate all keys except identity keys.
543
///
544
/// Returns (rotated, next_expiry) where `rotated` indicates if any key was rotated and
545
/// `next_expiry` is the earliest expiry time across all keys.
546
56
fn try_rotate_keys(now: SystemTime, keymgr: &KeyMgr) -> anyhow::Result<(KeyChange, SystemTime)> {
547
    // First do a pass to remove every expired key(s) or/and cert(s).
548
56
    let (have_removed, min_expiry) = remove_expired_keys(now, keymgr)?;
549

            
550
    // Then attempt to generate keys. If at least one was generated, we'll get the min expiry time
551
    // which we need to consider "rotated" so the caller can know that a new key appeared.
552
56
    let (generated, gen_min_expiry) = try_generate_all(now, keymgr)?;
553
56
    let have_rotated = have_removed.or(&generated);
554

            
555
    // We should never get no expiry time.
556
56
    let next_expiry = [min_expiry, gen_min_expiry]
557
56
        .into_iter()
558
56
        .flatten()
559
56
        .min()
560
56
        .ok_or(internal!("No relay keys after rotation task loop"))?;
561

            
562
56
    Ok((have_rotated, next_expiry))
563
56
}
564

            
565
/// Attempt to generate all keys. The list of keys is:
566
///
567
/// * Identity Ed25519 keypair [`RelayIdentityKeypair`].
568
/// * Identity RSA [`RelayIdentityRsaKeypair`].
569
/// * Relay signing keypair [`RelaySigningKeypair`].
570
/// * Relay link signing keypair [`RelayLinkSigningKeypair`].
571
/// * Relay ntor keypair [`RelayNtorKeypair`].
572
///
573
/// This function is only called when our relay bootstraps in order to attempt to generate any
574
/// missing keys or/and rotate expired keys.
575
4
pub(crate) fn try_generate_keys<R: Runtime>(
576
4
    runtime: &R,
577
4
    keymgr: &KeyMgr,
578
4
) -> anyhow::Result<RelayChannelAuthMaterial> {
579
4
    let now = runtime.wallclock();
580
    // Attempt to generate our identity keys (ed and RSA). Those keys DO NOT rotate. It won't be
581
    // replaced if they already exists.
582
4
    generate_key::<RelayIdentityKeypair>(keymgr, &RelayIdentityKeypairSpecifier::new())?;
583
4
    generate_key::<RelayIdentityRsaKeypair>(keymgr, &RelayIdentityRsaKeypairSpecifier::new())?;
584

            
585
    // Attempt to rotate the keys. Any missing keys (and cert) will be generated.
586
4
    let _ = try_rotate_keys(now, keymgr)?;
587

            
588
    // Now that we have our up-to-date keys, build the relay channel auth material object.
589
4
    build_proto_relay_auth_material(now, keymgr)
590
4
}
591
/// Return the current ntor keypairs from the keystore as [`RelayNtorKeys`].
592
pub(crate) fn get_ntor_keys(keymgr: &KeyMgr) -> anyhow::Result<RelayNtorKeys> {
593
    let mut entries = keymgr
594
        .list_matching(&RelayNtorKeypairSpecifierPattern::new_any().arti_pattern()?)?
595
        .into_iter()
596
        .map(|entry| {
597
            let valid_until = RelayNtorKeypairSpecifier::try_from(entry.key_path())?.valid_until;
598
            Ok((valid_until, entry))
599
        })
600
        .collect::<anyhow::Result<Vec<_>>>()?;
601
    // Sort in ascending order and then reverse so we get the descending order as in the newest
602
    // keys first.
603
    entries.sort_by_key(|(valid_until, _)| *valid_until);
604
    entries.reverse();
605

            
606
    let mut iter = entries.into_iter();
607
    // Get newest and if none, return an error.
608
    let (_, newest_entry) = iter
609
        .next()
610
        .ok_or_else(|| anyhow::anyhow!("no ntor keys found"))?;
611
    let latest = keymgr
612
        .get_entry::<RelayNtorKeypair>(&newest_entry)?
613
        .context("failed to retrieve newest ntor key")?;
614
    let previous: Option<RelayNtorKeypair> = iter
615
        .next()
616
        .map(|(_, entry)| -> anyhow::Result<RelayNtorKeypair> {
617
            keymgr
618
                .get_entry::<RelayNtorKeypair>(&entry)?
619
                .context("ntor key disappeared")
620
        })
621
        .transpose()?;
622
    let mut keys = RelayNtorKeys::new(latest);
623
    if let Some(prev) = previous {
624
        keys = keys.with_previous(prev);
625
    }
626
    Ok(keys)
627
}
628

            
629
/// Task to rotate keys when they need to be rotated.
630
pub(crate) async fn rotate_keys_task<R: Runtime>(
631
    runtime: R,
632
    keymgr: Arc<KeyMgr>,
633
    chanmgr: Arc<ChanMgr<R>>,
634
    create_request_handler: Arc<CreateRequestHandler>,
635
) -> anyhow::Result<void::Void> {
636
    loop {
637
        let now = runtime.wallclock();
638
        // Attempt a rotation of all keys.
639
        let (have_rotated, next_expiry) = try_rotate_keys(now, &keymgr)?;
640
        if have_rotated.chan_auth {
641
            let auth_material = build_proto_relay_auth_material(now, &keymgr)?;
642
            chanmgr
643
                .set_relay_auth_material(Arc::new(auth_material))
644
                .context("Failed to set relay auth material on ChanMgr")?;
645
        }
646

            
647
        if have_rotated.ntor {
648
            // Any keys left in the keystore at this point are considered to be usable
649
            // (either because they are newly generated, or because they are still
650
            // within the grace period).
651
            let ntor_keys = get_ntor_keys(&keymgr)?;
652
            create_request_handler.update_ntor_keys(ntor_keys);
653
        }
654

            
655
        // Sleep until the earliest key expiry minus buffer so we rotate before it expires.
656
        // If the subtraction would underflow, wake up immediately to rotate the expired key.
657
        let next_wake = next_expiry
658
            .checked_sub(KEY_ROTATION_EXPIRE_BUFFER)
659
            .unwrap_or(now);
660
        runtime.sleep_until_wallclock(next_wake).await;
661
    }
662
}
663

            
664
#[cfg(test)]
665
mod test {
666
    // @@ begin test lint list maintained by maint/add_warning @@
667
    #![allow(clippy::bool_assert_comparison)]
668
    #![allow(clippy::clone_on_copy)]
669
    #![allow(clippy::dbg_macro)]
670
    #![allow(clippy::mixed_attributes_style)]
671
    #![allow(clippy::print_stderr)]
672
    #![allow(clippy::print_stdout)]
673
    #![allow(clippy::single_char_pattern)]
674
    #![allow(clippy::unwrap_used)]
675
    #![allow(clippy::unchecked_time_subtraction)]
676
    #![allow(clippy::useless_vec)]
677
    #![allow(clippy::needless_pass_by_value)]
678
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
679

            
680
    use super::*;
681

            
682
    use crate::keys::{
683
        RelayLinkSigningKeypairSpecifierPattern, RelaySigningKeypairSpecifierPattern,
684
    };
685
    use tor_keymgr::{ArtiEphemeralKeystore, KeyMgrBuilder, KeySpecifierPattern};
686
    use tor_rtcompat::SleepProvider;
687
    use tor_rtmock::MockRuntime;
688

            
689
    /// Generate the non-rotating identity keys so the rest of the key machinery can run.
690
    fn setup_identity_keys(keymgr: &KeyMgr) {
691
        use crate::keys::{RelayIdentityKeypairSpecifier, RelayIdentityRsaKeypairSpecifier};
692
        use tor_relay_crypto::pk::{RelayIdentityKeypair, RelayIdentityRsaKeypair};
693
        generate_key::<RelayIdentityKeypair>(keymgr, &RelayIdentityKeypairSpecifier::new())
694
            .unwrap();
695
        generate_key::<RelayIdentityRsaKeypair>(keymgr, &RelayIdentityRsaKeypairSpecifier::new())
696
            .unwrap();
697
    }
698

            
699
    /// Initialize test basics that is runtime and a KeyMgr.
700
    fn new_keymgr() -> KeyMgr {
701
        let store = Box::new(ArtiEphemeralKeystore::new("test".to_string()));
702
        KeyMgrBuilder::default()
703
            .primary_store(store)
704
            .build()
705
            .unwrap()
706
    }
707

            
708
    /// Initial setup of a test. Build a mock runtime, key manager and setup identity keys.
709
    fn setup() -> KeyMgr {
710
        let keymgr = new_keymgr();
711
        setup_identity_keys(&keymgr);
712
        keymgr
713
    }
714

            
715
    /// Return a [`Timestamp`] given a [`SystemTime`] rounded down to its nearest second.
716
    ///
717
    /// In other words, the `tv_nsec` of a [`SystemTime`] is dropped.
718
    fn to_timestamp_in_secs(valid_until: SystemTime) -> Timestamp {
719
        use std::time::UNIX_EPOCH;
720
        let seconds = valid_until.duration_since(UNIX_EPOCH).unwrap().as_secs();
721
        Timestamp::from(UNIX_EPOCH + Duration::from_secs(seconds))
722
    }
723

            
724
    /// Return the number of keys matching the specified pattern
725
    fn count_keys(keymgr: &KeyMgr, pat: &dyn KeySpecifierPattern) -> usize {
726
        keymgr
727
            .list_matching(&pat.arti_pattern().unwrap())
728
            .unwrap()
729
            .len()
730
    }
731

            
732
    /// Return the number of link keys in the given KeyMgr.
733
    fn count_link_keys(keymgr: &KeyMgr) -> usize {
734
        count_keys(keymgr, &RelayLinkSigningKeypairSpecifierPattern::new_any())
735
    }
736

            
737
    /// Return the number of signing keys in the given KeyMgr.
738
    fn count_signing_keys(keymgr: &KeyMgr) -> usize {
739
        count_keys(keymgr, &RelaySigningKeypairSpecifierPattern::new_any())
740
    }
741

            
742
    /// Return the number of ntor keys in the given KeyMgr.
743
    fn count_ntor_keys(keymgr: &KeyMgr) -> usize {
744
        count_keys(keymgr, &RelayNtorKeypairSpecifierPattern::new_any())
745
    }
746

            
747
    /// Test the actual bootstrap function, `try_generate_keys()` which is in charge of
748
    /// initializing the auth material.
749
    #[test]
750
    fn test_bootstrap() {
751
        MockRuntime::test_with_various(|runtime| async move {
752
            let keymgr = new_keymgr();
753

            
754
            let _auth_material = match try_generate_keys(&runtime, &keymgr) {
755
                Ok(a) => a,
756
                Err(e) => {
757
                    panic!("Unable to bootstrap keys and generate RelayChannelAuthMaterial: {e}");
758
                }
759
            };
760
        });
761
    }
762

            
763
    /// Simulate the bootstrap when no keys exists. We should have one link key and one signing key
764
    /// after the first rotation.
765
    #[test]
766
    fn test_initial_key_generation() {
767
        MockRuntime::test_with_various(|runtime| async move {
768
            let keymgr = setup();
769
            let now = runtime.wallclock();
770

            
771
            let (rotated, next_expiry) = try_rotate_keys(now, &keymgr).unwrap();
772

            
773
            assert!(
774
                rotated.chan_auth && rotated.ntor,
775
                "keys should be reported as generated on first rotation"
776
            );
777
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
778
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
779
            assert_eq!(count_ntor_keys(&keymgr), 1, "expected one ntor key");
780

            
781
            // The earliest expiry should be the link key (~2 days out).
782
            let expected = runtime.wallclock() + LINK_CERT_LIFETIME;
783
            assert_eq!(
784
                next_expiry, expected,
785
                "next expiry should be ~{LINK_CERT_LIFETIME:?} from now, got {next_expiry:?}"
786
            );
787
        });
788
    }
789

            
790
    /// Calling rotate_keys a second time with fresh keys should indicate no rotation.
791
    #[test]
792
    fn test_rotation_on_fresh_keys() {
793
        MockRuntime::test_with_various(|runtime| async move {
794
            let keymgr = setup();
795
            let now = runtime.wallclock();
796
            try_rotate_keys(now, &keymgr).unwrap();
797

            
798
            // Advance by 1 hour (inside 2 days of link key).
799
            runtime.advance_by(Duration::from_secs(60 * 60)).await;
800

            
801
            let (rotated, _) = try_rotate_keys(now, &keymgr).unwrap();
802

            
803
            assert!(
804
                !rotated.chan_auth && !rotated.ntor,
805
                "fresh keys must not trigger a rotation"
806
            );
807
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
808
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
809
            assert_eq!(count_ntor_keys(&keymgr), 1, "expected one ntor key");
810
        });
811
    }
812

            
813
    /// Test rotation before and after rotation expiry buffer for the link key.
814
    #[test]
815
    fn test_rotation_link_key() {
816
        MockRuntime::test_with_various(|runtime| async move {
817
            let keymgr = setup();
818
            // First rotation creates the keys.
819
            try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
820

            
821
            // Advance to 1 second _before_ the rotation-buffer threshold. We should not rotate
822
            // with this.
823
            let just_before =
824
                LINK_CERT_LIFETIME - KEY_ROTATION_EXPIRE_BUFFER - Duration::from_secs(1);
825
            runtime.advance_by(just_before).await;
826

            
827
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
828

            
829
            assert!(
830
                !rotated.chan_auth,
831
                "link key MUST NOT rotate before the expiry buffer threshold"
832
            );
833
            assert!(
834
                !rotated.ntor,
835
                "ntor key MUST NOT rotate before the expiry buffer threshold"
836
            );
837
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
838
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
839

            
840
            // Move it just after the expiry buffer and expect a rotation.
841
            runtime.advance_by(Duration::from_secs(1)).await;
842

            
843
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
844
            assert!(
845
                rotated.chan_auth,
846
                "link key should rotate inside the expiry buffer threshold"
847
            );
848
        });
849
    }
850

            
851
    /// Test rotation before and after rotation expiry buffer for the signing key.
852
    #[test]
853
    fn test_rotation_signing_key() {
854
        MockRuntime::test_with_various(|runtime| async move {
855
            let keymgr = setup();
856
            // First rotation creates the keys.
857
            try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
858

            
859
            // Closure to get the relay signing key keystore entry.
860
            let get_key_spec = || {
861
                let entries = keymgr
862
                    .list_matching(
863
                        &RelaySigningKeypairSpecifierPattern::new_any()
864
                            .arti_pattern()
865
                            .unwrap(),
866
                    )
867
                    .unwrap();
868
                let entry = entries.first().unwrap();
869
                let spec: RelaySigningKeypairSpecifier = entry.key_path().try_into().unwrap();
870
                spec
871
            };
872

            
873
            // Advance to 1 second _before_ the rotation-buffer threshold. We should not rotate
874
            // with this.
875
            let just_before =
876
                SIGNING_KEY_CERT_LIFETIME - KEY_ROTATION_EXPIRE_BUFFER - Duration::from_secs(1);
877
            runtime.advance_by(just_before).await;
878

            
879
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
880
            assert!(rotated.chan_auth, "Rotation must happen after 30 days");
881

            
882
            let spec = get_key_spec();
883
            assert_eq!(
884
                spec.valid_until,
885
                to_timestamp_in_secs(
886
                    runtime.wallclock() + KEY_ROTATION_EXPIRE_BUFFER + Duration::from_secs(1)
887
                ),
888
                "RelaySigningKeypairSpecifier should not have rotated"
889
            );
890

            
891
            assert_eq!(count_link_keys(&keymgr), 1, "expected one link key");
892
            assert_eq!(count_signing_keys(&keymgr), 1, "expected one signing key");
893

            
894
            // Move it just after the expiry buffer and expect a rotation.
895
            runtime.advance_by(Duration::from_secs(1)).await;
896

            
897
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
898
            assert!(rotated.chan_auth, "Rotation must happen after 30 days");
899

            
900
            let spec = get_key_spec();
901
            assert_eq!(
902
                spec.valid_until,
903
                to_timestamp_in_secs(runtime.wallclock() + SIGNING_KEY_CERT_LIFETIME),
904
                "RelaySigningKeypairSpecifier should have rotated"
905
            );
906
        });
907
    }
908

            
909
    /// Test rotation before and after rotation expiry buffer for the ntor key.
910
    #[test]
911
    fn test_rotation_ntor_key() {
912
        MockRuntime::test_with_various(|runtime| async move {
913
            let keymgr = setup();
914
            // First rotation creates the keys.
915
            try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
916

            
917
            // Advance to 1 second _before_ the rotation-buffer threshold. We should not rotate
918
            // with this.
919
            let just_before =
920
                NTOR_KEY_LIFETIME - KEY_ROTATION_EXPIRE_BUFFER - Duration::from_secs(1);
921
            runtime.advance_by(just_before).await;
922

            
923
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
924

            
925
            assert!(
926
                !rotated.ntor,
927
                "Ntor key MUST NOT rotate before the expiry buffer threshold"
928
            );
929
            assert_eq!(count_ntor_keys(&keymgr), 1, "expected one ntor key");
930

            
931
            // Move it just after the expiry buffer and expect a rotation.
932
            runtime.advance_by(Duration::from_secs(1)).await;
933

            
934
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
935
            assert!(
936
                rotated.ntor,
937
                "ntor key should rotate inside the expiry buffer threshold"
938
            );
939

            
940
            assert_eq!(
941
                count_ntor_keys(&keymgr),
942
                2,
943
                "there should be 2 ntor keys in the grace period"
944
            );
945

            
946
            runtime.advance_by(NTOR_KEY_GRACE_PERIOD).await;
947

            
948
            let (rotated, _) = try_rotate_keys(runtime.wallclock(), &keymgr).unwrap();
949
            assert!(
950
                rotated.ntor,
951
                "ntor key should rotate after the grace period"
952
            );
953

            
954
            assert_eq!(
955
                count_ntor_keys(&keymgr),
956
                1,
957
                "the old ntor key should have been removed after the grace period"
958
            );
959
        });
960
    }
961
}