1
//! Lexing of netdoc elements
2

            
3
use super::*;
4

            
5
/// Linear whitespace as defined by torspec
6
// Only pub via internal_prelude, for benefit of macros
7
pub const WS: &[char] = &[' ', '\t'];
8

            
9
define_derive_deftly! {
10
    /// Define `parse_options` accessor
11
    ///
12
    /// The driver must have a lifetime named `'s`, which is suitable for the returned
13
    /// `&'s ParseOptions`.
14
    ///
15
    /// # Top-level attributes:
16
    ///
17
    ///  * **`#[deftly(parse_options(field = ".field.field"))]`**, default `.options`
18
    ParseOptions beta_deftly, expect items:
19

            
20
    impl<$tgens> $ttype {
21
        /// Examine the parsing options
22
298
        pub fn parse_options(&self) -> &'s ParseOptions {
23
            &self
24
                ${tmeta(parse_options(field))
25
                  as token_stream,
26
                  default { .options }}
27
        }
28
    }
29
}
30

            
31
/// Top-level reader: Netdoc text interpreted as a stream of items
32
#[derive(Debug, Clone, Deftly)]
33
#[derive_deftly(ParseOptions)]
34
pub struct ItemStream<'s> {
35
    /// The whole input document.
36
    whole_input: &'s str,
37
    /// Remaining document, as a stream of lines
38
    lines: Lines<'s>,
39
    /// If we have peeked ahead, what we discovered
40
    peeked: PeekState<'s>,
41
    /// Parsing options.
42
    options: &'s ParseOptions,
43
}
44

            
45
/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
46
#[derive(Debug, Clone)]
47
enum PeekState<'s> {
48
    /// We've peeked a line
49
    Some(ItemStreamPeeked<'s>),
50
    /// We've not peeked, or peeking gave `None`
51
    None {
52
        /// Line number of the last item we yielded.
53
        ///
54
        /// `0` at the start.
55
        yielded_item_lno: usize,
56
    },
57
}
58

            
59
/// If an `ItemStream` has peeked ahead, what it discovered
60
#[derive(Debug, Clone)]
61
struct ItemStreamPeeked<'s> {
62
    /// The next keyword
63
    keyword: KeywordRef<'s>,
64
    /// Token proving that we
65
    line: lines::Peeked,
66
    /// Length of the suffix of the line that is the arguments rather than the keyword
67
    ///
68
    /// Does not include the first whitespace, that terminated the keyword.
69
    args_len: usize,
70
}
71

            
72
/// An Item that has been lexed but not parsed
73
#[derive(Debug, Clone, amplify::Getters, Deftly)]
74
#[derive_deftly(ParseOptions)]
75
#[deftly(parse_options(field = ".args.options"))]
76
pub struct UnparsedItem<'s> {
77
    /// The item's Keyword
78
    #[getter(as_copy)]
79
    keyword: KeywordRef<'s>,
80
    /// The Item's Arguments
81
    #[getter(skip)]
82
    args: ArgumentStream<'s>,
83
    /// The Item's Object, if there was one
84
    #[getter(as_clone)]
85
    object: Option<UnparsedObject<'s>>,
86
}
87

            
88
/// Reader for arguments on an Item
89
///
90
/// Represents the (remaining) arguments.
91
#[derive(Debug, Clone, Deftly)]
92
#[derive_deftly(ParseOptions)]
93
pub struct ArgumentStream<'s> {
94
    /// The remaining unparsed arguments
95
    ///
96
    /// Can start with WS, which is usually trimmed
97
    rest: &'s str,
98

            
99
    /// Original line length
100
    ///
101
    /// Used for reporting column of argument errors.
102
    whole_line_len: usize,
103

            
104
    /// Remaining length *before* we last yielded.
105
    previous_rest_len: usize,
106

            
107
    /// Parsing options.
108
    options: &'s ParseOptions,
109
}
110

            
111
/// An Object that has been lexed but not parsed
112
#[derive(Debug, Clone, amplify::Getters, Deftly)]
113
#[derive_deftly(ParseOptions)]
114
pub struct UnparsedObject<'s> {
115
    /// The Label
116
    #[getter(as_copy)]
117
    label: &'s str,
118

            
119
    /// The portion of the input document which is base64 data (and newlines)
120
    #[getter(skip)]
121
    data_b64: &'s str,
122

            
123
    /// Parsing options.
124
    options: &'s ParseOptions,
125
}
126

            
127
impl<'s> ItemStream<'s> {
128
    /// Start reading a network document as a series of Items
129
329
    pub fn new(input: &'s ParseInput<'s>) -> Result<Self, ParseError> {
130
329
        Ok(ItemStream {
131
329
            whole_input: input.input,
132
329
            lines: Lines::new(input.input),
133
329
            peeked: PeekState::None {
134
329
                yielded_item_lno: 0,
135
329
            },
136
329
            options: &input.options,
137
329
        })
138
329
    }
139

            
140
    /// Line number for reporting an error we have just discovered
141
    ///
142
    /// If we have recent peeked, we report the line number of the peeked keyword line.
143
    ///
144
    /// Otherwise, we report the line number of the most-recently yielded item.
145
119
    pub fn lno_for_error(&self) -> usize {
146
119
        match self.peeked {
147
            PeekState::Some { .. } => {
148
                // The error was presumably caused by whatever was seen in the peek.
149
                // That's the current line number.
150
14
                self.lines.peek_lno()
151
            }
152
105
            PeekState::None { yielded_item_lno } => {
153
                // The error was presumably caused by the results of next_item().
154
105
                yielded_item_lno
155
            }
156
        }
157
119
    }
158

            
159
    /// Core of peeking.  Tries to make `.peeked` be `Some`.
160
5192
    fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
161
5192
        if matches!(self.peeked, PeekState::None { .. }) {
162
2693
            let Some(peeked) = self.lines.peek() else {
163
404
                return Ok(());
164
            };
165

            
166
2289
            let peeked_line = self.lines.peeked_line(&peeked);
167

            
168
2289
            let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
169
2289
            let keyword = KeywordRef::new(keyword)?;
170

            
171
2289
            self.peeked = PeekState::Some(ItemStreamPeeked {
172
2289
                keyword,
173
2289
                line: peeked,
174
2289
                args_len: args.len(),
175
2289
            });
176
2499
        }
177

            
178
4788
        Ok(())
179
5192
    }
180

            
181
    /// Peek the next keyword
182
2753
    pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
183
2753
        self.peek_internal()?;
184
2753
        let PeekState::Some(peeked) = &self.peeked else {
185
396
            return Ok(None);
186
        };
187
2357
        Ok(Some(peeked.keyword))
188
2753
    }
189

            
190
    /// Obtain the body so far, suitable for hashing for a Regular signature
191
128
    pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
192
128
        let body = &self.whole_input[0..self.byte_position()];
193
128
        SignedDocumentBody { body }
194
128
    }
195

            
196
    /// Byte position, pointing to the start of the next item to yield
197
    ///
198
    /// Offset in bytes from the start of the original input string
199
    /// to the "current" position,
200
    /// ie to just after the item we yielded and just before the next item (or EOF).
201
284
    pub fn byte_position(&self) -> usize {
202
284
        self.whole_input.len() - self.lines.remaining().len()
203
284
    }
204

            
205
    /// Access for the entire input string
206
    ///
207
    /// The original `input: &str` argument to [`ParseInput::new`].
208
    ///
209
    /// Includes both yielded and unyielded items.
210
6
    pub fn whole_input(&self) -> &'s str {
211
6
        self.whole_input
212
6
    }
213

            
214
    /// Parse a (sub-)document with its own signatures
215
171
    pub fn parse_signed<
216
171
        B: NetdocParseable,
217
171
        S: NetdocParseable,
218
171
        O: NetdocSigned<Body = B, Signatures = S>,
219
171
    >(
220
171
        &mut self,
221
171
        outer_stop: stop_at!(),
222
171
    ) -> Result<O, EP> {
223
171
        let mut input = ItemStream {
224
171
            whole_input: &self.whole_input[self.whole_input.len() - self.lines.remaining().len()..],
225
171
            ..self.clone()
226
171
        };
227
171
        let r = (|| {
228
171
            let inner_always_stop = outer_stop | StopAt::doc_intro::<B>();
229
171
            let body = B::from_items(&mut input, inner_always_stop | StopAt::doc_intro::<S>())?;
230
122
            let signatures = S::from_items(&mut input, inner_always_stop)?;
231
122
            let signed = O::from_parts(body, signatures);
232
122
            Ok(signed)
233
        })(); // don't exit here
234

            
235
171
        *self = ItemStream {
236
171
            whole_input: self.whole_input,
237
171
            ..input
238
171
        };
239

            
240
171
        r
241
171
    }
242

            
243
    /// Obtain the inputs that would be needed to hash any (even Irregular) signature
244
    ///
245
    /// These are the hash inputs which would be needed for the next item,
246
    /// assuming it's a signature keyword.
247
140
    pub fn peek_signature_hash_inputs(
248
140
        &mut self,
249
140
        body: SignedDocumentBody<'s>,
250
140
    ) -> Result<Option<SignatureHashInputs<'s>>, EP> {
251
140
        self.peek_internal()?;
252
140
        let PeekState::Some(peeked) = &self.peeked else {
253
            return Ok(None);
254
        };
255
140
        let signature_item_line = self.lines.peeked_line(&peeked.line);
256
140
        let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
257
140
        Ok(Some(SignatureHashInputs {
258
140
            body,
259
140
            signature_item_kw_spc,
260
140
            signature_item_line,
261
140
        }))
262
140
    }
263

            
264
    /// Yield the next item.
265
2299
    pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
266
2299
        self.peek_internal()?;
267
2299
        let peeked = match self.peeked {
268
8
            PeekState::None { .. } => return Ok(None),
269
2291
            PeekState::Some { .. } => match mem::replace(
270
2291
                &mut self.peeked,
271
2291
                PeekState::None {
272
2291
                    yielded_item_lno: self.lines.peek_lno(),
273
2291
                },
274
2291
            ) {
275
2291
                PeekState::Some(peeked) => peeked,
276
                PeekState::None { .. } => panic!("it was Some just now"),
277
            },
278
        };
279

            
280
2291
        let keyword = peeked.keyword;
281
2291
        let line = self.lines.consume_peeked(peeked.line);
282
2291
        let args = &line[keyword.len()..];
283
2291
        let options = self.options;
284
2291
        let args = ArgumentStream::new(args, line.len(), options);
285

            
286
2291
        let object = if self.lines.remaining().starts_with('-') {
287
1108
            fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
288
1108
                let line = lines.next().ok_or(
289
                    // If this is the *header*, we already know there's a line,
290
                    // so this error path is only for footers.
291
1108
                    EP::ObjectMissingFooter,
292
                )?;
293
1108
                let label = line
294
1108
                    .strip_prefix(start)
295
1108
                    .ok_or(EP::InvalidObjectDelimiters)?
296
1108
                    .strip_suffix(PEM_AFTER_LABEL)
297
1108
                    .ok_or(EP::InvalidObjectDelimiters)?;
298
1106
                Ok(label)
299
1108
            }
300

            
301
554
            let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
302
554
            let base64_start_remaining = self.lines.remaining();
303
4280
            while !self.lines.remaining().starts_with('-') {
304
3726
                let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
305
            }
306
554
            let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
307
554
            let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
308
552
            let label = [label1, label2]
309
552
                .into_iter()
310
552
                .all_equal_value()
311
552
                .map_err(|_| EP::ObjectMismatchedLabels)?;
312
550
            Some(UnparsedObject {
313
550
                label,
314
550
                data_b64,
315
550
                options,
316
550
            })
317
        } else {
318
1737
            None
319
        };
320

            
321
2287
        Ok(Some(UnparsedItem {
322
2287
            keyword,
323
2287
            args,
324
2287
            object,
325
2287
        }))
326
2299
    }
327
}
328

            
329
impl<'s> UnparsedItem<'s> {
330
    /// Access the arguments, mutably (for consuming and parsing them)
331
1608
    pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
332
1608
        &mut self.args
333
1608
    }
334
    /// Access a copy of the arguments
335
    ///
336
    /// When using this, be careful not to process any arguments twice.
337
108
    pub fn args_copy(&self) -> ArgumentStream<'s> {
338
108
        self.args.clone()
339
108
    }
340

            
341
    /// Access the arguments (readonly)
342
    ///
343
    /// When using this, be careful not to process any arguments twice.
344
1298
    pub fn args(&self) -> &ArgumentStream<'s> {
345
1298
        &self.args
346
1298
    }
347

            
348
    /// Check that this item has no Object.
349
1056
    pub fn check_no_object(&self) -> Result<(), EP> {
350
1056
        if self.object.is_some() {
351
            return Err(EP::ObjectUnexpected);
352
1056
        }
353
1056
        Ok(())
354
1056
    }
355
    /// Convenience method for handling an error parsing an arguemnt
356
    ///
357
    /// Returns a closure that converts every error into [`ArgumentError::Invalid`]
358
    /// and then to an [`ErrorProblem`] using
359
    /// [`.args().handle_error()`](ArgumentStream::handle_error).
360
    ///
361
    /// Useful in manual `ItemValueParseable` impls, when parsing arguments ad-hoc.
362
426
    pub fn invalid_argument_handler<E>(
363
426
        &self,
364
426
        field: &'static str,
365
426
    ) -> impl FnOnce(E) -> ErrorProblem {
366
426
        let error = self.args().handle_error(field, AE::Invalid);
367
        move |_any_error| error
368
426
    }
369
}
370

            
371
/// End of an argument list that does not accept any further (unknown) arguments
372
///
373
/// Implements `ItemArgumentParseable`.  Parses successfully iff the argument list is empty.
374
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
375
#[allow(clippy::exhaustive_structs)]
376
pub struct NoFurtherArguments;
377

            
378
impl ItemArgumentParseable for NoFurtherArguments {
379
    fn from_args(args: &mut ArgumentStream) -> Result<Self, AE> {
380
        Ok(args.reject_extra_args()?)
381
    }
382
}
383

            
384
impl<'s> Iterator for ItemStream<'s> {
385
    type Item = Result<UnparsedItem<'s>, EP>;
386
32
    fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
387
32
        self.next_item().transpose()
388
32
    }
389
}
390

            
391
impl<'s> ArgumentStream<'s> {
392
    /// Make a new `ArgumentStream` from a string
393
    ///
394
    /// The string may start with whitespace (which will be ignored).
395
2553
    pub fn new(rest: &'s str, whole_line_len: usize, options: &'s ParseOptions) -> Self {
396
2553
        let previous_rest_len = whole_line_len;
397
2553
        ArgumentStream {
398
2553
            rest,
399
2553
            whole_line_len,
400
2553
            previous_rest_len,
401
2553
            options,
402
2553
        }
403
2553
    }
404

            
405
    /// Consume this whole `ArgumnetStream`, giving the remaining arguments as a string
406
    ///
407
    /// The returned string won't start with whitespace.
408
    //
409
    /// `self` will be empty on return.
410
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
411
386
    pub fn into_remaining(&mut self) -> &'s str {
412
386
        self.prep_yield();
413
386
        mem::take(&mut self.rest)
414
386
    }
415

            
416
    /// Return the component parts of this `ArgumnetStream`
417
    ///
418
    /// The returned string might start with whitespace.
419
262
    pub fn whole_line_len(&self) -> usize {
420
262
        self.whole_line_len
421
262
    }
422

            
423
    /// Prepares to yield an argument (or the rest)
424
    ///
425
    ///  * Trims leading WS from `rest`.
426
    ///  * Records the `previous_rest_len`
427
2416
    fn prep_yield(&mut self) {
428
2416
        self.rest = self.rest.trim_start_matches(WS);
429
2416
        self.previous_rest_len = self.rest.len();
430
2416
    }
431

            
432
    /// Prepares to yield, and then determines if there *is* anything to yield.
433
    ///
434
    ///  * Trim leading whitespace
435
    ///  * Records the `previous_rest_len`
436
    ///  * See if we're now empty
437
2030
    pub fn something_to_yield(&mut self) -> bool {
438
2030
        self.prep_yield();
439
2030
        !self.rest.is_empty()
440
2030
    }
441

            
442
    /// Throw and error if there are further arguments
443
    //
444
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
445
520
    pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, UnexpectedArgument> {
446
520
        if self.something_to_yield() {
447
6
            let column = self.next_arg_column();
448
6
            Err(UnexpectedArgument { column })
449
        } else {
450
514
            Ok(NoFurtherArguments)
451
        }
452
520
    }
453

            
454
    /// Convert a "length of `rest`" into the corresponding column number.
455
1738
    fn arg_column_from_rest_len(&self, rest_len: usize) -> usize {
456
        // Can't underflow since rest is always part of the whole.
457
        // Can't overflow since that would mean the document was as big as the address space.
458
1738
        self.whole_line_len - rest_len + 1
459
1738
    }
460

            
461
    /// Obtain the column number of the previously yielded argument.
462
    ///
463
    /// (After `into_remaining`, gives the column number
464
    /// of the start of the returned remaining argument string.)
465
1732
    pub fn prev_arg_column(&self) -> usize {
466
1732
        self.arg_column_from_rest_len(self.previous_rest_len)
467
1732
    }
468

            
469
    /// Obtains the column number of the *next* argument.
470
    ///
471
    /// Should be called after `something_to_yield`; otherwise the returned value
472
    /// may point to whitespace which is going to be skipped.
473
    // ^ this possible misuse doesn't seem worth defending against with type-fu,
474
    //   for a private function with few call sites.
475
6
    fn next_arg_column(&self) -> usize {
476
6
        self.arg_column_from_rest_len(self.rest.len())
477
6
    }
478

            
479
    /// Convert an `ArgumentError` to an `ErrorProblem`.
480
    ///
481
    /// The caller must supply the field name.
482
426
    pub fn handle_error(&self, field: &'static str, ae: ArgumentError) -> ErrorProblem {
483
426
        self.error_handler(field)(ae)
484
426
    }
485

            
486
    /// Return a converter from `ArgumentError` to `ErrorProblem`.
487
    ///
488
    /// Useful in `.map_err`.
489
1732
    pub fn error_handler(
490
1732
        &self,
491
1732
        field: &'static str,
492
1732
    ) -> impl Fn(ArgumentError) -> ErrorProblem + 'static {
493
1732
        let column = self.prev_arg_column();
494
430
        move |ae| match ae {
495
2
            AE::Missing => EP::MissingArgument { field },
496
428
            AE::Invalid => EP::InvalidArgument { field, column },
497
            AE::Unexpected => EP::UnexpectedArgument { column },
498
430
        }
499
1732
    }
500
}
501

            
502
impl<'s> Iterator for ArgumentStream<'s> {
503
    type Item = &'s str;
504
1404
    fn next(&mut self) -> Option<&'s str> {
505
1404
        if !self.something_to_yield() {
506
38
            return None;
507
1366
        }
508
        let arg;
509
1366
        (arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
510
1366
        Some(arg)
511
1404
    }
512
}
513

            
514
impl<'s> UnparsedObject<'s> {
515
    /// Obtain the Object data, as decoded bytes
516
540
    pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
517
540
        crate::parse::tokenize::base64_decode_multiline(self.data_b64)
518
540
            .map_err(|_e| EP::ObjectInvalidBase64)
519
540
    }
520
}