1
//! "Slugs" used as part of on-disk filenames and other similar purposes
2
//!
3
//! Arti uses "slugs" as parts of filenames in many places.
4
//! Slugs are fixed or variable strings which either
5
//! designate the kind of a thing, or which of various things this is.
6
//!
7
//! Slugs have a restricted character set:
8
//! Lowercase ASCII alphanumerics, underscore, hyphen.
9
//! We may extend this to allow additional characters in the future,
10
//! but /, +, and . (the slug separators) will never be valid slug characters.
11
//! Additionally, : will never be a valid slug character,
12
//! because Windows does not allow colons in filenames[^1],
13
//!
14
//! Slugs may not be empty, and they may not start with a hyphen.
15
//!
16
//! Slugs can be concatenated to build file names.
17
//! When concatenating slugs to make filenames,
18
//! they should be separated using `/`, `+`, or `.`
19
//! ([`SLUG_SEPARATOR_CHARS`]).
20
//! Slugs should not be concatenated without separators (for security reasons).
21
//!
22
//! On Windows only, the following slugs are forbidden,
23
//! because of [absurd Windows filename behaviours](https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file):
24
//! `con` `prn` `aux` `nul`
25
//! `com1` `com2` `com3` `com4` `com5` `com6` `com7` `com8` `com9` `com0`
26
//! `lpt1` `lpt2` `lpt3` `lpt4` `lpt5` `lpt6` `lpt7` `lpt8` `lpt9` `lpt0`.
27
//!
28
//! [^1]: <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>
29

            
30
pub mod timestamp;
31

            
32
use std::borrow::Borrow;
33
use std::ffi::OsStr;
34
use std::fmt::{self, Display};
35
use std::mem;
36
use std::ops::Deref;
37
use std::path::Path;
38

            
39
use paste::paste;
40
use serde::{Deserialize, Serialize};
41
use thiserror::Error;
42

            
43
#[cfg(target_family = "windows")]
44
pub use os::ForbiddenOnWindows;
45

            
46
/// An owned slug, checked for syntax
47
///
48
/// The syntax check can be relied on for safety/soundness.
49
// We adopt this rule so that eventually we could have AsRef<[std::ascii::Char]>, etc.
50
#[derive(Debug, Clone, Serialize, Deserialize)] //
51
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
52
#[derive(derive_more::Display)]
53
#[serde(try_from = "String", into = "String")]
54
// Box<str> since we don't expect to change the size; that makes it 2 words rather than 3
55
// (But our public APIs are in terms of String.)
56
pub struct Slug(Box<str>);
57

            
58
/// A borrwed slug, checked for syntax
59
///
60
/// The syntax check can be relied on for safety/soundness.
61
#[derive(Debug, Serialize)] //
62
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
63
#[derive(derive_more::Display)]
64
#[serde(transparent)]
65
#[repr(transparent)] // SAFETY: this attribute is needed for unsafe in new_unchecked
66
pub struct SlugRef(str);
67

            
68
/// Characters which are good to use to separate slugs
69
///
70
/// Guaranteed to never overlap with the valid slug character set.
71
///
72
/// We might expand this set, but not ever reduce it.
73
pub const SLUG_SEPARATOR_CHARS: &str = "/+.";
74

            
75
/// Error for an invalid slug
76
#[derive(Error, Debug, Clone, Eq, PartialEq, Hash)]
77
#[non_exhaustive]
78
pub enum BadSlug {
79
    /// Slug contains a forbidden character
80
    BadCharacter(char),
81
    /// Slug starts with a disallowed character
82
    BadFirstCharacter(char),
83
    /// An empty slug was supplied where a nonempty one is required
84
    EmptySlugNotAllowed,
85
    /// We are on Windows and the slug is one of the forbidden ones
86
    ///
87
    /// On platforms other than Windows, this variant is absent.
88
    #[cfg(target_family = "windows")]
89
    ForbiddenOnWindows(ForbiddenOnWindows),
90
}
91

            
92
/// Types which can perhaps be used as a slug
93
///
94
/// This is a trait implemented by `str`, `std::fmt::Arguments`,
95
/// and other implementors of `ToString`, for the convenience of call sites:
96
/// APIs can have functions taking an `&(impl TryIntoSlug + ?Sized)` or `&dyn TryIntoSlug`
97
/// and callers then don't need error-handling boilerplate.
98
///
99
/// Functions that take a `TryIntoSlug` will need to do a runtime syntax check.
100
pub trait TryIntoSlug {
101
    /// Convert `self` into a `Slug`, if it has the right syntax
102
    fn try_into_slug(&self) -> Result<Slug, BadSlug>;
103
}
104

            
105
impl<T: ToString + ?Sized> TryIntoSlug for T {
106
126
    fn try_into_slug(&self) -> Result<Slug, BadSlug> {
107
126
        self.to_string().try_into()
108
126
    }
109
}
110

            
111
impl Slug {
112
    /// Make a Slug out of an owned `String`, if it has the correct syntax
113
118064
    pub fn new(s: String) -> Result<Slug, BadSlug> {
114
        Ok(unsafe {
115
            // SAFETY: we check, and then call new_unchecked
116
118064
            check_syntax(&s)?;
117
117983
            Slug::new_unchecked(s)
118
        })
119
118064
    }
120

            
121
    /// Make a Slug out of an owned `String`, without checking the syntax
122
    ///
123
    /// # Safety
124
    ///
125
    /// It's the caller's responsibility to check the syntax of the input string.
126
118285
    pub unsafe fn new_unchecked(s: String) -> Slug {
127
118285
        Slug(s.into())
128
118285
    }
129
}
130

            
131
impl SlugRef {
132
    /// Make a SlugRef out of a `str`, if it has the correct syntax
133
2904
    pub fn new(s: &str) -> Result<&SlugRef, BadSlug> {
134
        Ok(unsafe {
135
            // SAFETY: we check, and then call new_unchecked
136
2904
            check_syntax(s)?;
137
2878
            SlugRef::new_unchecked(s)
138
        })
139
2904
    }
140

            
141
    /// Make a SlugRef out of a `str`, without checking the syntax
142
    ///
143
    /// # Safety
144
    ///
145
    /// It's the caller's responsibility to check the syntax of the input string.
146
103417
    pub unsafe fn new_unchecked<'s>(s: &'s str) -> &'s SlugRef {
147
        unsafe {
148
            // SAFETY
149
            // SlugRef is repr(transparent).  So the alignment and memory layout
150
            // are the same, and the pointer metadata is the same too.
151
            // The lifetimes is correct by construction.
152
            //
153
            // We do this, rather than `struct SlugRef<'r>(&'r str)`,
154
            // because that way we couldn't impl Deref.
155
103417
            mem::transmute::<&'s str, &'s SlugRef>(s)
156
        }
157
103417
    }
158

            
159
    /// Make an owned `Slug`
160
302
    fn to_slug(&self) -> Slug {
161
        unsafe {
162
            // SAFETY: self is a SlugRef so our syntax is right
163
302
            Slug::new_unchecked(self.0.into())
164
        }
165
302
    }
166
}
167

            
168
impl TryFrom<String> for Slug {
169
    type Error = BadSlug;
170
65751
    fn try_from(s: String) -> Result<Slug, BadSlug> {
171
65751
        Slug::new(s)
172
65751
    }
173
}
174

            
175
impl From<Slug> for String {
176
55
    fn from(s: Slug) -> String {
177
55
        s.0.into()
178
55
    }
179
}
180

            
181
impl<'s> TryFrom<&'s str> for &'s SlugRef {
182
    type Error = BadSlug;
183
    fn try_from(s: &'s str) -> Result<&'s SlugRef, BadSlug> {
184
        SlugRef::new(s)
185
    }
186
}
187

            
188
impl Deref for Slug {
189
    type Target = SlugRef;
190
100539
    fn deref(&self) -> &SlugRef {
191
        unsafe {
192
            // SAFETY: self is a Slug so our syntax is right
193
100539
            SlugRef::new_unchecked(&self.0)
194
        }
195
100539
    }
196
}
197

            
198
impl Borrow<SlugRef> for Slug {
199
480
    fn borrow(&self) -> &SlugRef {
200
480
        self
201
480
    }
202
}
203
impl Borrow<str> for Slug {
204
    fn borrow(&self) -> &str {
205
        self.as_ref()
206
    }
207
}
208

            
209
impl ToOwned for SlugRef {
210
    type Owned = Slug;
211
302
    fn to_owned(&self) -> Slug {
212
302
        self.to_slug()
213
302
    }
214
}
215

            
216
/// Implement `fn as_...(&self) -> ...` and `AsRef`
217
macro_rules! impl_as_with_inherent { { $ty:ident } => { paste!{
218
    impl SlugRef {
219
99313
        #[doc = concat!("Obtain this slug as a `", stringify!($ty), "`")]
220
99313
        pub fn [<as_ $ty:snake>](&self) -> &$ty {
221
99313
            self.as_ref()
222
99313
        }
223
    }
224
    impl_as_ref!($ty);
225
} } }
226
/// Implement `AsRef`
227
macro_rules! impl_as_ref { { $ty:ty } => { paste!{
228
    impl AsRef<$ty> for SlugRef {
229
102585
        fn as_ref(&self) -> &$ty {
230
102585
            self.0.as_ref()
231
102585
        }
232
    }
233
    impl AsRef<$ty> for Slug {
234
872
        fn as_ref(&self) -> &$ty {
235
872
            self.deref().as_ref()
236
872
        }
237
    }
238
} } }
239

            
240
impl_as_with_inherent!(str);
241
impl_as_with_inherent!(Path);
242
impl_as_ref!(OsStr);
243
impl_as_ref!([u8]);
244

            
245
/// Check the string `s` to see if it would be valid as a slug
246
///
247
/// This is a low-level method for special cases.
248
/// Usually, use [`Slug::new`] etc.
249
//
250
// SAFETY
251
// This function checks the syntax, and is relied on by unsafe code
252
#[allow(clippy::if_same_then_else)] // clippy objects to the repeated Ok(())
253
323799
pub fn check_syntax(s: &str) -> Result<(), BadSlug> {
254
323799
    if s.is_empty() {
255
328
        return Err(BadSlug::EmptySlugNotAllowed);
256
323471
    }
257

            
258
    // Slugs are not allowed to start with a hyphen.
259
323471
    if s.starts_with('-') {
260
481
        return Err(BadSlug::BadFirstCharacter('-'));
261
322990
    }
262

            
263
    // check legal character set
264
3883490
    for c in s.chars() {
265
3883490
        if c.is_ascii_lowercase() {
266
2255461
            Ok(())
267
1628029
        } else if c.is_ascii_digit() {
268
1372895
            Ok(())
269
255134
        } else if c == '_' || c == '-' {
270
254087
            Ok(())
271
        } else {
272
1047
            Err(BadSlug::BadCharacter(c))
273
1047
        }?;
274
    }
275

            
276
321943
    os::check_forbidden(s)?;
277

            
278
321943
    Ok(())
279
323799
}
280

            
281
impl Display for BadSlug {
282
65
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
283
65
        match self {
284
61
            BadSlug::BadCharacter(c) => {
285
61
                let num = u32::from(*c);
286
61
                write!(f, "character {c:?} (U+{num:04X}) is not allowed")
287
            }
288
2
            BadSlug::BadFirstCharacter(c) => {
289
2
                let num = u32::from(*c);
290
2
                write!(
291
2
                    f,
292
2
                    "character {c:?} (U+{num:04X}) is not allowed as the first character"
293
                )
294
            }
295
            BadSlug::EmptySlugNotAllowed => {
296
2
                write!(f, "empty identifier (empty slug) not allowed")
297
            }
298
            #[cfg(target_family = "windows")]
299
            BadSlug::ForbiddenOnWindows(e) => os::fmt_error(e, f),
300
        }
301
65
    }
302
}
303

            
304
/// Forbidden slug support for Windows
305
#[cfg(target_family = "windows")]
306
mod os {
307
    use super::*;
308

            
309
    /// A slug which is forbidden because we are on Windows (as found in an invalid slug error)
310
    ///
311
    /// This type is available only on Windows platforms.
312
    //
313
    // Double reference so that BadSlug has to contain only one word, not two
314
    pub type ForbiddenOnWindows = &'static &'static str;
315

            
316
    /// The forbidden slugs - windows thinks "C:\\Program Files\lpt0.json" is a printer.
317
    const FORBIDDEN: &[&str] = &[
318
        "con", "prn", "aux", "nul", //
319
        "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "com0", //
320
        "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "lpt0",
321
    ];
322

            
323
    /// Check whether this slug is forbidden here
324
    pub(super) fn check_forbidden(s: &str) -> Result<(), BadSlug> {
325
        for bad in FORBIDDEN {
326
            if s == *bad {
327
                return Err(BadSlug::ForbiddenOnWindows(bad));
328
            }
329
        }
330
        Ok(())
331
    }
332

            
333
    /// Display a forbidden slug error
334
    pub(super) fn fmt_error(s: &ForbiddenOnWindows, f: &mut fmt::Formatter) -> fmt::Result {
335
        write!(f, "slug (name) {s:?} is not allowed on Windows")
336
    }
337
}
338
/// Forbidden slug support for non-Windows
339
#[cfg(not(target_family = "windows"))]
340
mod os {
341
    use super::*;
342

            
343
    /// Check whether this slug is forbidden here
344
    #[allow(clippy::unnecessary_wraps)]
345
321943
    pub(super) fn check_forbidden(_s: &str) -> Result<(), BadSlug> {
346
321943
        Ok(())
347
321943
    }
348
}
349

            
350
#[cfg(test)]
351
mod test {
352
    // @@ begin test lint list maintained by maint/add_warning @@
353
    #![allow(clippy::bool_assert_comparison)]
354
    #![allow(clippy::clone_on_copy)]
355
    #![allow(clippy::dbg_macro)]
356
    #![allow(clippy::mixed_attributes_style)]
357
    #![allow(clippy::print_stderr)]
358
    #![allow(clippy::print_stdout)]
359
    #![allow(clippy::single_char_pattern)]
360
    #![allow(clippy::unwrap_used)]
361
    #![allow(clippy::unchecked_time_subtraction)]
362
    #![allow(clippy::useless_vec)]
363
    #![allow(clippy::needless_pass_by_value)]
364
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
365

            
366
    use super::*;
367
    use itertools::chain;
368

            
369
    #[test]
370
    fn bad() {
371
        for c in chain!(
372
            SLUG_SEPARATOR_CHARS.chars(), //
373
            ['\\', ' ', '\n', '\0']
374
        ) {
375
            let s = format!("x{c}y");
376
            let e_ref = SlugRef::new(&s).unwrap_err();
377
            assert_eq!(e_ref, BadSlug::BadCharacter(c));
378
            let e_own = Slug::new(s).unwrap_err();
379
            assert_eq!(e_ref, e_own);
380
        }
381
    }
382

            
383
    #[test]
384
    fn good() {
385
        let all = chain!(
386
            b'a'..=b'z', //
387
            b'0'..=b'9',
388
            [b'_'],
389
        )
390
        .map(char::from);
391

            
392
        let chk = |s: String| {
393
            let sref = SlugRef::new(&s).unwrap();
394
            let slug = Slug::new(s.clone()).unwrap();
395
            assert_eq!(sref.to_string(), s);
396
            assert_eq!(slug.to_string(), s);
397
        };
398

            
399
        chk(all.clone().collect());
400

            
401
        for c in all {
402
            chk(format!("{c}"));
403
        }
404

            
405
        // Hyphens are allowed, but not as the first character
406
        chk("a-".into());
407
        chk("a-b".into());
408
    }
409

            
410
    #[test]
411
    fn badchar_msg() {
412
        let chk = |s: &str, m: &str| {
413
            assert_eq!(
414
                SlugRef::new(s).unwrap_err().to_string(),
415
                m, //
416
            );
417
        };
418

            
419
        chk(".", "character '.' (U+002E) is not allowed");
420
        chk("\0", "character '\\0' (U+0000) is not allowed");
421
        chk(
422
            "\u{12345}",
423
            "character '\u{12345}' (U+12345) is not allowed",
424
        );
425
        chk(
426
            "-",
427
            "character '-' (U+002D) is not allowed as the first character",
428
        );
429
        chk("A", "character 'A' (U+0041) is not allowed");
430
    }
431

            
432
    #[test]
433
    fn windows_forbidden() {
434
        for s in ["con", "prn", "lpt0"] {
435
            let r = SlugRef::new(s);
436
            if cfg!(target_family = "windows") {
437
                assert_eq!(
438
                    r.unwrap_err().to_string(),
439
                    format!("slug (name) \"{s}\" is not allowed on Windows"),
440
                );
441
            } else {
442
                assert_eq!(r.unwrap().as_str(), s);
443
            }
444
        }
445
    }
446

            
447
    #[test]
448
    fn empty_slug() {
449
        assert_eq!(
450
            SlugRef::new("").unwrap_err().to_string(),
451
            "empty identifier (empty slug) not allowed"
452
        );
453
    }
454
}