1
//! Declare an error type for tor-circmgr
2

            
3
use std::sync::Arc;
4

            
5
use futures::task::SpawnError;
6
use retry_error::RetryError;
7
use thiserror::Error;
8

            
9
use oneshot_fused_workaround as oneshot;
10
use tor_error::{Bug, ErrorKind, HasKind, HasRetryTime};
11
use tor_linkspec::{LoggedChanTarget, OwnedChanTarget};
12
use tor_proto::circuit::UniqId;
13
use web_time_compat::Instant;
14

            
15
use crate::mgr::RestrictionFailed;
16

            
17
/// An error returned while looking up or building a circuit
18
#[derive(Error, Debug, Clone)]
19
#[non_exhaustive]
20
pub enum Error {
21
    /// We started building a circuit on a guard, but later decided not
22
    /// to use that guard.
23
    #[error("Discarded circuit {} because of speculative guard selection", _0.display_chan_circ())]
24
    GuardNotUsable(UniqId),
25

            
26
    /// We were waiting on a pending circuit, but it failed to report
27
    #[error("Pending circuit(s) failed without reporting status")]
28
    PendingCanceled,
29

            
30
    /// We were waiting on a pending circuit, but it failed.
31
    #[error("Circuit we were waiting for failed to complete")]
32
    PendingFailed(#[source] Box<Error>),
33

            
34
    /// We were told that we could use a given circuit, but before we got a
35
    /// chance to try it, its usage changed so that we had no longer find
36
    /// it suitable.
37
    ///
38
    /// This is a version of `UsageMismatched` for when a race is the
39
    /// likeliest explanation for the mismatch.
40
    #[error("Circuit seemed suitable, but another request got it first")]
41
    LostUsabilityRace(#[source] RestrictionFailed),
42

            
43
    /// A circuit succeeded, but was cancelled before it could be used.
44
    ///
45
    /// Circuits can be cancelled either by a call to
46
    /// `retire_all_circuits()`, or by a configuration change that
47
    /// makes old paths unusable.
48
    //
49
    // TODO: ideally this would also include the circuit identifier (e.g. its UniqId).
50
    // However, this would mean making Error generic over Id,
51
    // (this variant is constructed in AbstractCircMgr::do_launch,
52
    // where the circuit ID is generic).
53
    #[error("Circuit canceled")]
54
    CircCanceled,
55

            
56
    /// We were told that we could use a circuit, but when we tried, we found
57
    /// that its usage did not support what we wanted.
58
    ///
59
    /// This can happen due to a race when a number of tasks all decide that
60
    /// they can use the same pending circuit at once: one of them will restrict
61
    /// the circuit, and the others will get this error.
62
    ///
63
    /// See `LostUsabilityRace`.
64
    #[error("Couldn't apply circuit restriction")]
65
    UsageMismatched(#[from] RestrictionFailed),
66

            
67
    /// A circuit build took too long to finish.
68
    #[error("Circuit{} took too long to build", OptUniqId(_0))]
69
    CircTimeout(Option<UniqId>),
70

            
71
    /// A request spent too long waiting for a circuit
72
    #[error("Spent too long trying to construct circuits for this request")]
73
    RequestTimeout,
74

            
75
    /// Unable to find a relay in order to build a given path type.
76
    #[error("Can't find {role} for {path_kind} circuit: {problem}")]
77
    NoRelay {
78
        /// The kind of path we were trying to build
79
        path_kind: &'static str,
80
        /// The kind of relay we were trying to pick
81
        role: &'static str,
82
        /// The problem we encountered
83
        problem: String,
84
    },
85

            
86
    /// Problem creating or updating a guard manager.
87
    #[error("Problem creating or updating guards list")]
88
    GuardMgr(#[source] tor_guardmgr::GuardMgrError),
89

            
90
    /// Problem selecting a guard relay.
91
    #[error("Unable to select a guard relay")]
92
    Guard(#[from] tor_guardmgr::PickGuardError),
93

            
94
    /// Problem creating a vanguard manager.
95
    #[cfg(all(feature = "vanguards", feature = "hs-common"))]
96
    #[error("Unable to create vanguard manager")]
97
    VanguardMgrInit(#[from] tor_guardmgr::vanguards::VanguardMgrError),
98

            
99
    /// Unable to get or build a circuit, despite retrying.
100
    #[error("{0}")]
101
    RequestFailed(RetryError<Box<Error>>),
102

            
103
    /// Problem with channel
104
    #[error("Problem opening a channel to {peer}")]
105
    Channel {
106
        /// Which relay we were trying to connect to
107
        peer: LoggedChanTarget,
108

            
109
        /// What went wrong
110
        #[source]
111
        cause: tor_chanmgr::Error,
112
    },
113

            
114
    /// Protocol issue while building a circuit.
115
    #[error(
116
        "Problem building circuit{}, while {}{}",
117
        OptUniqId(unique_id),
118
        action,
119
        WithOptPeer(peer)
120
    )]
121
    Protocol {
122
        /// The action that we were trying to take.
123
        action: &'static str,
124
        /// The peer that created the protocol error.
125
        ///
126
        /// This is set to None if we can't blame a single party.
127
        peer: Option<LoggedChanTarget>,
128
        /// The underlying error.
129
        #[source]
130
        error: tor_proto::Error,
131
        /// The UniqId of the circuit.
132
        unique_id: Option<UniqId>,
133
    },
134

            
135
    /// Unable to spawn task
136
    #[error("Unable to spawn {spawning}")]
137
    Spawn {
138
        /// What we were trying to spawn
139
        spawning: &'static str,
140
        /// What happened when we tried to spawn it.
141
        #[source]
142
        cause: Arc<SpawnError>,
143
    },
144

            
145
    /// Problem loading or storing persistent state.
146
    #[error("Problem loading or storing state")]
147
    State(#[from] tor_persist::Error),
148

            
149
    /// An error caused by a programming issue . or a failure in another
150
    /// library that we can't work around.
151
    #[error("Programming error")]
152
    Bug(#[from] Bug),
153
}
154

            
155
tor_error::define_asref_dyn_std_error!(Error);
156
tor_error::define_asref_dyn_std_error!(Box<Error>);
157

            
158
impl From<oneshot::Canceled> for Error {
159
    fn from(_: oneshot::Canceled) -> Error {
160
        Error::PendingCanceled
161
    }
162
}
163

            
164
impl From<tor_guardmgr::GuardMgrError> for Error {
165
    fn from(err: tor_guardmgr::GuardMgrError) -> Error {
166
        match err {
167
            tor_guardmgr::GuardMgrError::State(e) => Error::State(e),
168
            _ => Error::GuardMgr(err),
169
        }
170
    }
171
}
172

            
173
impl HasKind for Error {
174
8
    fn kind(&self) -> ErrorKind {
175
        use Error as E;
176
        use ErrorKind as EK;
177
8
        match self {
178
            E::Channel { cause, .. } => cause.kind(),
179
            E::Bug(e) => e.kind(),
180
8
            E::NoRelay { .. } => EK::NoPath,
181
            E::PendingCanceled => EK::ReactorShuttingDown,
182
            E::PendingFailed(e) => e.kind(),
183
            E::CircTimeout(_) => EK::TorNetworkTimeout,
184
            E::GuardNotUsable(_) => EK::TransientFailure,
185
            E::UsageMismatched(_) => EK::Internal,
186
            E::LostUsabilityRace(_) => EK::TransientFailure,
187
            E::RequestTimeout => EK::TorNetworkTimeout,
188
            E::RequestFailed(errs) => E::summarized_error_kind(errs.sources().map(AsRef::as_ref)),
189
            E::CircCanceled => EK::TransientFailure,
190
            E::Protocol { error, .. } => error.kind(),
191
            E::State(e) => e.kind(),
192
            E::GuardMgr(e) => e.kind(),
193
            E::Guard(e) => e.kind(),
194
            #[cfg(all(feature = "vanguards", feature = "hs-common"))]
195
            E::VanguardMgrInit(e) => e.kind(),
196
            E::Spawn { cause, .. } => cause.kind(),
197
        }
198
8
    }
199
}
200

            
201
impl HasRetryTime for Error {
202
96
    fn retry_time(&self) -> tor_error::RetryTime {
203
        use Error as E;
204
        use tor_error::RetryTime as RT;
205

            
206
        match self {
207
            // If we fail because of a timeout, there is no need to wait before trying again.
208
80
            E::CircTimeout(_) | E::RequestTimeout => RT::Immediate,
209

            
210
            // If a circuit that seemed usable was restricted before we got a
211
            // chance to try it, that's not our fault: we can try again
212
            // immediately.
213
            E::LostUsabilityRace(_) => RT::Immediate,
214

            
215
            // If we can't build a path for the usage at all, then retrying
216
            // won't help.
217
            //
218
            // TODO: In some rare cases, these errors can actually happen when
219
            // we have walked ourselves into a snag in our path selection.  See
220
            // additional "TODO" comments in exitpath.rs.
221
8
            E::NoRelay { .. } => RT::Never,
222

            
223
            // If we encounter UsageMismatched without first converting to
224
            // LostUsabilityRace, it reflects a real problem in our code.
225
            E::UsageMismatched(_) => RT::Never,
226

            
227
            // These don't reflect a real problem in the circuit building, but
228
            // rather mean that we were waiting for something that didn't pan out.
229
            // It's okay to try again after a short delay.
230
            E::GuardNotUsable(_) | E::PendingCanceled | E::CircCanceled | E::Protocol { .. } => {
231
                RT::AfterWaiting
232
            }
233

            
234
            // For Channel errors, we can mostly delegate the retry_time decision to
235
            // the inner error.
236
            //
237
            // (We have to handle UnusableTarget specially, since it just means
238
            // that we picked a guard or fallback we couldn't use.  A channel to
239
            // _that_ target will never succeed, but circuit operations using it
240
            // will do fine.)
241
            E::Channel {
242
                cause: tor_chanmgr::Error::UnusableTarget(_),
243
                ..
244
            } => RT::AfterWaiting,
245
            E::Channel { cause, .. } => cause.retry_time(),
246

            
247
            // These errors are safe to delegate.
248
            E::Guard(e) => e.retry_time(),
249
8
            E::PendingFailed(e) => e.retry_time(),
250

            
251
            // When we encounter a bunch of errors, choose the earliest.
252
            E::RequestFailed(errors) => {
253
                RT::earliest_approx(errors.sources().map(|err| err.retry_time()))
254
                    .unwrap_or(RT::Never)
255
            }
256

            
257
            #[cfg(all(feature = "vanguards", feature = "hs-common"))]
258
            E::VanguardMgrInit(_) => RT::Never,
259

            
260
            // These all indicate an internal error, or an error that shouldn't
261
            // be able to happen when we're building a circuit.
262
            E::Spawn { .. } | E::GuardMgr(_) | E::State(_) | E::Bug(_) => RT::Never,
263
        }
264
96
    }
265

            
266
88
    fn abs_retry_time<F>(&self, now: Instant, choose_delay: F) -> tor_error::AbsRetryTime
267
88
    where
268
88
        F: FnOnce() -> std::time::Duration,
269
    {
270
88
        match self {
271
            // We special-case this kind of problem, since we want to choose the
272
            // earliest valid retry time.
273
80
            Self::RequestFailed(errors) => tor_error::RetryTime::earliest_absolute(
274
80
                errors.sources().map(|err| err.retry_time()),
275
80
                now,
276
80
                choose_delay,
277
            )
278
80
            .unwrap_or(tor_error::AbsRetryTime::Never),
279

            
280
            // For everything else, we just delegate.
281
8
            _ => self.retry_time().absolute(now, choose_delay),
282
        }
283
88
    }
284
}
285

            
286
impl Error {
287
    /// Construct a new `Error` from a `SpawnError`.
288
    pub(crate) fn from_spawn(spawning: &'static str, err: SpawnError) -> Error {
289
        Error::Spawn {
290
            spawning,
291
            cause: Arc::new(err),
292
        }
293
    }
294

            
295
    /// Return an integer representing the relative severity of this error.
296
    ///
297
    /// Used to determine which error to use when determining the kind of a retry error.
298
    fn severity(&self) -> usize {
299
        use Error as E;
300
        match self {
301
            E::GuardNotUsable(_) | E::LostUsabilityRace(_) => 10,
302
            E::PendingCanceled => 20,
303
            E::CircCanceled => 20,
304
            E::CircTimeout(_) => 30,
305
            E::RequestTimeout => 30,
306
            E::NoRelay { .. } => 40,
307
            E::GuardMgr(_) => 40,
308
            E::Guard(_) => 40,
309
            #[cfg(all(feature = "vanguards", feature = "hs-common"))]
310
            E::VanguardMgrInit(_) => 40,
311
            E::RequestFailed(_) => 40,
312
            E::Channel { .. } => 40,
313
            E::Protocol { .. } => 45,
314
            E::Spawn { .. } => 90,
315
            E::State(_) => 90,
316
            E::UsageMismatched(_) => 90,
317
            E::Bug(_) => 100,
318
            E::PendingFailed(e) => e.severity(),
319
        }
320
    }
321

            
322
    /// Return true if this error should not count against our total number of
323
    /// failures.
324
    ///
325
    /// We count an error as an "internal reset" if it can happen in normal
326
    /// operation and doesn't indicate a real problem with building a circuit, so much as an externally generated "need to retry".
327
88
    pub(crate) fn is_internal_reset(&self) -> bool {
328
88
        match self {
329
            // This error is a reset because we expect it to happen while
330
            // we're picking guards; if it happens, it means that we now know a
331
            // good guard that we should have used instead.
332
            Error::GuardNotUsable(_) => true,
333
            // This error is a reset because it can only happen on the basis
334
            // of a caller action (for example, a decision to reconfigure the
335
            // `CircMgr`). If it happens, it just means that we should try again
336
            // with the new configuration.
337
            Error::CircCanceled => true,
338
            // This error is a reset because it doesn't indicate anything wrong
339
            // with the circuit: it just means that multiple requests all wanted
340
            // to use the circuit at once, and they turned out not to be
341
            // compatible with one another after the circuit was built.
342
            Error::LostUsabilityRace(_) => true,
343
            #[cfg(all(feature = "vanguards", feature = "hs-common"))]
344
            Error::VanguardMgrInit(_) => false,
345
            Error::PendingCanceled
346
            | Error::PendingFailed(_)
347
            | Error::UsageMismatched(_)
348
            | Error::CircTimeout(_)
349
            | Error::RequestTimeout
350
            | Error::NoRelay { .. }
351
            | Error::GuardMgr(_)
352
            | Error::Guard(_)
353
            | Error::RequestFailed(_)
354
            | Error::Channel { .. }
355
            | Error::Protocol { .. }
356
            | Error::Spawn { .. }
357
            | Error::State(_)
358
88
            | Error::Bug(_) => false,
359
        }
360
88
    }
361

            
362
    /// Return a list of the peers to "blame" for this error, if there are any.
363
    pub fn peers(&self) -> Vec<&OwnedChanTarget> {
364
        match self {
365
            Error::RequestFailed(errors) => errors.sources().flat_map(|e| e.peers()).collect(),
366
            Error::Channel { peer, .. } => vec![peer.as_inner()],
367
            Error::Protocol {
368
                peer: Some(peer), ..
369
            } => vec![peer.as_inner()],
370
            _ => vec![],
371
        }
372
    }
373

            
374
    /// Given an iterator of errors that have occurred while attempting a single
375
    /// failed operation, return the [`ErrorKind`] for the entire attempt.
376
    pub fn summarized_error_kind<'a, I>(errs: I) -> ErrorKind
377
    where
378
        I: Iterator<Item = &'a Error>,
379
    {
380
        errs.max_by_key(|e| e.severity())
381
            .map(|e| e.kind())
382
            .unwrap_or(ErrorKind::Internal)
383
    }
384
}
385

            
386
/// A failure to build any preemptive circuits, with at least one error
387
/// condition.
388
///
389
/// This is a separate type since we never report it outside the crate.
390
#[derive(Debug)]
391
pub(crate) struct PreemptiveCircError;
392

            
393
/// Helper to display an optional peer, prefixed with the string " with".
394
struct WithOptPeer<'a, T>(&'a Option<T>);
395

            
396
impl<'a, T> std::fmt::Display for WithOptPeer<'a, T>
397
where
398
    T: std::fmt::Display,
399
{
400
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401
        if let Some(peer) = self.0.as_ref() {
402
            write!(f, " with {}", peer)
403
        } else {
404
            Ok(())
405
        }
406
    }
407
}
408

            
409
/// Helper to display an optional UniqId.
410
struct OptUniqId<'a>(&'a Option<UniqId>);
411

            
412
impl<'a> std::fmt::Display for OptUniqId<'a> {
413
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
414
        if let Some(unique_id) = self.0 {
415
            write!(f, " {}", unique_id.display_chan_circ())
416
        } else {
417
            write!(f, "")
418
        }
419
    }
420
}