1
//! Lexing of netdoc elements
2

            
3
use super::*;
4

            
5
/// Linear whitespace as defined by torspec
6
// Only pub via internal_prelude, for benefit of macros
7
pub const WS: &[char] = &[' ', '\t'];
8

            
9
define_derive_deftly! {
10
    /// Define `parse_options` accessor
11
    ///
12
    /// The driver must have a lifetime named `'s`, which is suitable for the returned
13
    /// `&'s ParseOptions`.
14
    ///
15
    /// # Top-level attributes:
16
    ///
17
    ///  * **`#[deftly(parse_options(field = ".field.field"))]`**, default `.options`
18
    ParseOptions beta_deftly, expect items:
19

            
20
    impl<$tgens> $ttype {
21
        /// Examine the parsing options
22
2752
        pub fn parse_options(&self) -> &'s ParseOptions {
23
            &self
24
                ${tmeta(parse_options(field))
25
                  as token_stream,
26
                  default { .options }}
27
        }
28
    }
29
}
30

            
31
/// Top-level reader: Netdoc text interpreted as a stream of items
32
#[derive(Debug, Clone, Deftly)]
33
#[derive_deftly(ParseOptions)]
34
pub struct ItemStream<'s> {
35
    /// The whole input document.
36
    whole_input: &'s str,
37
    /// Remaining document, as a stream of lines
38
    lines: Lines<'s>,
39
    /// If we have peeked ahead, what we discovered
40
    peeked: PeekState<'s>,
41
    /// Parsing options.
42
    options: &'s ParseOptions,
43
}
44

            
45
/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
46
#[derive(Debug, Clone)]
47
enum PeekState<'s> {
48
    /// We've peeked a line
49
    Some(ItemStreamPeeked<'s>),
50
    /// We've not peeked, or peeking gave `None`
51
    None {
52
        /// Line number of the last item we yielded.
53
        ///
54
        /// `0` at the start.
55
        yielded_item_lno: usize,
56
    },
57
}
58

            
59
/// If an `ItemStream` has peeked ahead, what it discovered
60
#[derive(Debug, Clone)]
61
struct ItemStreamPeeked<'s> {
62
    /// The next keyword
63
    keyword: KeywordRef<'s>,
64
    /// Token proving that we
65
    line: lines::Peeked,
66
    /// Length of the suffix of the line that is the arguments rather than the keyword
67
    ///
68
    /// Does not include the first whitespace, that terminated the keyword.
69
    args_len: usize,
70
}
71

            
72
/// An Item that has been lexed but not parsed
73
#[derive(Debug, Clone, amplify::Getters, Deftly)]
74
#[derive_deftly(ParseOptions)]
75
#[deftly(parse_options(field = ".args.options"))]
76
pub struct UnparsedItem<'s> {
77
    /// The item's Keyword
78
    #[getter(as_copy)]
79
    keyword: KeywordRef<'s>,
80
    /// The Item's Arguments
81
    #[getter(skip)]
82
    args: ArgumentStream<'s>,
83
    /// The Item's Object, if there was one
84
    #[getter(as_clone)]
85
    object: Option<UnparsedObject<'s>>,
86
}
87

            
88
/// Reader for arguments on an Item
89
///
90
/// Represents the (remaining) arguments.
91
#[derive(Debug, Clone, Deftly)]
92
#[derive_deftly(ParseOptions)]
93
pub struct ArgumentStream<'s> {
94
    /// The remaining unparsed arguments
95
    ///
96
    /// Can start with WS, which is usually trimmed
97
    rest: &'s str,
98

            
99
    /// Original line length
100
    ///
101
    /// Used for reporting column of argument errors.
102
    whole_line_len: usize,
103

            
104
    /// Remaining length *before* we last yielded.
105
    previous_rest_len: usize,
106

            
107
    /// Parsing options.
108
    options: &'s ParseOptions,
109
}
110

            
111
/// An Object that has been lexed but not parsed
112
#[derive(Debug, Clone, amplify::Getters, Deftly)]
113
#[derive_deftly(ParseOptions)]
114
pub struct UnparsedObject<'s> {
115
    /// The Label
116
    #[getter(as_copy)]
117
    label: &'s str,
118

            
119
    /// The portion of the input document which is base64 data (and newlines)
120
    #[getter(skip)]
121
    data_b64: &'s str,
122

            
123
    /// Parsing options.
124
    options: &'s ParseOptions,
125
}
126

            
127
impl<'s> ItemStream<'s> {
128
    /// Start reading a network document as a series of Items
129
600
    pub fn new(input: &'s ParseInput<'s>) -> Result<Self, ParseError> {
130
600
        Ok(ItemStream {
131
600
            whole_input: input.input,
132
600
            lines: Lines::new(input.input),
133
600
            peeked: PeekState::None {
134
600
                yielded_item_lno: 0,
135
600
            },
136
600
            options: &input.options,
137
600
        })
138
600
    }
139

            
140
    /// Line number for reporting an error we have just discovered
141
    ///
142
    /// If we have recent peeked, we report the line number of the peeked keyword line.
143
    ///
144
    /// Otherwise, we report the line number of the most-recently yielded item.
145
66
    pub fn lno_for_error(&self) -> usize {
146
66
        match self.peeked {
147
            PeekState::Some { .. } => {
148
                // The error was presumably caused by whatever was seen in the peek.
149
                // That's the current line number.
150
14
                self.lines.peek_lno()
151
            }
152
52
            PeekState::None { yielded_item_lno } => {
153
                // The error was presumably caused by the results of next_item().
154
52
                yielded_item_lno
155
            }
156
        }
157
66
    }
158

            
159
    /// Core of peeking.  Tries to make `.peeked` be `Some`.
160
173132
    fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
161
173132
        if matches!(self.peeked, PeekState::None { .. }) {
162
82850
            let Some(peeked) = self.lines.peek() else {
163
734
                return Ok(());
164
            };
165

            
166
82116
            let peeked_line = self.lines.peeked_line(&peeked);
167

            
168
82116
            let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
169
82116
            let keyword = KeywordRef::new(keyword)?;
170

            
171
82116
            self.peeked = PeekState::Some(ItemStreamPeeked {
172
82116
                keyword,
173
82116
                line: peeked,
174
82116
                args_len: args.len(),
175
82116
            });
176
90282
        }
177

            
178
172398
        Ok(())
179
173132
    }
180

            
181
    /// Peek the next keyword
182
86410
    pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
183
86410
        self.peek_internal()?;
184
86410
        let PeekState::Some(peeked) = &self.peeked else {
185
726
            return Ok(None);
186
        };
187
85684
        Ok(Some(peeked.keyword))
188
86410
    }
189

            
190
    /// Obtain the body so far, suitable for hashing for an Orderly signature
191
3986
    pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
192
3986
        let body = &self.whole_input[0..self.byte_position()];
193
3986
        SignedDocumentBody { body }
194
3986
    }
195

            
196
    /// Byte position, pointing to the start of the next item to yield
197
    ///
198
    /// Offset in bytes from the start of the original input string
199
    /// to the "current" position,
200
    /// ie to just after the item we yielded and just before the next item (or EOF).
201
6111
    pub fn byte_position(&self) -> usize {
202
6111
        self.whole_input.len() - self.lines.remaining().len()
203
6111
    }
204

            
205
    /// Access for the entire input string
206
    ///
207
    /// The original `input: &str` argument to [`ParseInput::new`].
208
    ///
209
    /// Includes both yielded and unyielded items.
210
6
    pub fn whole_input(&self) -> &'s str {
211
6
        self.whole_input
212
6
    }
213

            
214
    /// Parse a (sub-)document with its own signatures
215
    ///
216
    /// Used (mostly) by the
217
    /// [`NetdocParseableUnverified`](derive_deftly_template_NetdocParseableUnverified)
218
    /// derive macro.
219
    ///
220
    /// Generic parameters:
221
    ///
222
    ///  * **`B`**: the body type: the type to which `NetdocParseableUnverified` is applied.
223
    ///  * **`S`**: the signatures section type.
224
    ///  * **`O`**: the `FooUnverified` type, which embodies the parsed body and signatures.
225
935
    pub fn parse_signed<
226
935
        B: HasUnverifiedParsedBody,
227
935
        S: NetdocParseableSignatures,
228
935
        O: NetdocUnverified<Body = B, Signatures = S>,
229
935
    >(
230
935
        &mut self,
231
935
        outer_stop: stop_at!(),
232
935
    ) -> Result<O, EP> {
233
935
        let mut input = ItemStream {
234
935
            whole_input: &self.whole_input[self.whole_input.len() - self.lines.remaining().len()..],
235
935
            ..self.clone()
236
935
        };
237
935
        let r = (|| {
238
935
            let inner_always_stop = outer_stop | StopAt::doc_intro::<B::UnverifiedParsedBody>();
239
935
            let body = B::UnverifiedParsedBody::from_items(
240
935
                &mut input,
241
935
                inner_always_stop | StopAt(S::is_item_keyword),
242
            )?;
243
935
            let signed_doc_body = input.body_sofar_for_signature();
244
935
            let unsigned_body_len = signed_doc_body.body().len();
245
935
            let mut hashes = S::HashesAccu::default();
246
935
            let sigs = S::from_items(&mut input, signed_doc_body, &mut hashes, inner_always_stop)?;
247
935
            let sigs = SignaturesData {
248
935
                sigs,
249
935
                unsigned_body_len,
250
935
                hashes,
251
935
            };
252
            // SECURITY
253
            // We unwrap the UnverifiedParsedBody and immediately wrap it up again
254
            // in FooUnverified, passing on the obligation to verify the signatures,
255
            // and still enforcing that with a newtype.
256
935
            let signed = O::from_parts(B::unverified_into_inner_unchecked(body), sigs);
257
935
            Ok(signed)
258
        })(); // don't exit here
259

            
260
935
        *self = ItemStream {
261
935
            whole_input: self.whole_input,
262
935
            ..input
263
935
        };
264

            
265
935
        r
266
935
    }
267

            
268
    /// Obtain the inputs that would be needed to hash any (even Disorderly) signature
269
    ///
270
    /// These are the hash inputs which would be needed for the next item,
271
    /// assuming it's a signature keyword.
272
2847
    pub fn peek_signature_hash_inputs(
273
2847
        &mut self,
274
2847
        body: SignedDocumentBody<'s>,
275
2847
    ) -> Result<Option<SignatureHashInputs<'s>>, EP> {
276
2847
        self.peek_internal()?;
277
2847
        let PeekState::Some(peeked) = &self.peeked else {
278
            return Ok(None);
279
        };
280
2847
        let document_sofar = self.body_sofar_for_signature().body();
281
2847
        let signature_item_line = self.lines.peeked_line(&peeked.line);
282
2847
        let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
283
2847
        Ok(Some(SignatureHashInputs {
284
2847
            body,
285
2847
            document_sofar,
286
2847
            signature_item_kw_spc,
287
2847
            signature_item_line,
288
2847
        }))
289
2847
    }
290

            
291
    /// Yield the next item.
292
83875
    pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
293
83875
        self.peek_internal()?;
294
83875
        let peeked = match self.peeked {
295
8
            PeekState::None { .. } => return Ok(None),
296
83867
            PeekState::Some { .. } => match mem::replace(
297
83867
                &mut self.peeked,
298
83867
                PeekState::None {
299
83867
                    yielded_item_lno: self.lines.peek_lno(),
300
83867
                },
301
83867
            ) {
302
83867
                PeekState::Some(peeked) => peeked,
303
                PeekState::None { .. } => panic!("it was Some just now"),
304
            },
305
        };
306

            
307
83867
        let keyword = peeked.keyword;
308
83867
        let line = self.lines.consume_peeked(peeked.line);
309
83867
        let args = &line[keyword.len()..];
310
83867
        let options = self.options;
311
83867
        let args = ArgumentStream::new(args, line.len(), options);
312

            
313
83867
        let object = if self.lines.remaining().starts_with('-') {
314
11336
            fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
315
11336
                let line = lines.next().ok_or(
316
                    // If this is the *header*, we already know there's a line,
317
                    // so this error path is only for footers.
318
11336
                    EP::ObjectMissingFooter,
319
                )?;
320
11336
                let label = line
321
11336
                    .strip_prefix(start)
322
11336
                    .ok_or(EP::InvalidObjectDelimiters)?
323
11336
                    .strip_suffix(PEM_AFTER_LABEL)
324
11336
                    .ok_or(EP::InvalidObjectDelimiters)?;
325
11334
                Ok(label)
326
11336
            }
327

            
328
5668
            let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
329
5668
            let base64_start_remaining = self.lines.remaining();
330
44081
            while !self.lines.remaining().starts_with('-') {
331
38413
                let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
332
            }
333
5668
            let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
334
5668
            let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
335
5666
            let label = [label1, label2]
336
5666
                .into_iter()
337
5666
                .all_equal_value()
338
5666
                .map_err(|_| EP::ObjectMismatchedLabels)?;
339
5664
            Some(UnparsedObject {
340
5664
                label,
341
5664
                data_b64,
342
5664
                options,
343
5664
            })
344
        } else {
345
78199
            None
346
        };
347

            
348
83863
        Ok(Some(UnparsedItem {
349
83863
            keyword,
350
83863
            args,
351
83863
            object,
352
83863
        }))
353
83875
    }
354
}
355

            
356
impl<'s> UnparsedItem<'s> {
357
    /// Access the arguments, mutably (for consuming and parsing them)
358
14388
    pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
359
14388
        &mut self.args
360
14388
    }
361
    /// Access a copy of the arguments
362
    ///
363
    /// When using this, be careful not to process any arguments twice.
364
1380
    pub fn args_copy(&self) -> ArgumentStream<'s> {
365
1380
        self.args.clone()
366
1380
    }
367

            
368
    /// Access the arguments (readonly)
369
    ///
370
    /// When using this, be careful not to process any arguments twice.
371
9598
    pub fn args(&self) -> &ArgumentStream<'s> {
372
9598
        &self.args
373
9598
    }
374

            
375
    /// Check that this item has no Object.
376
7024
    pub fn check_no_object(&self) -> Result<(), EP> {
377
7024
        if self.object.is_some() {
378
            return Err(EP::ObjectUnexpected);
379
7024
        }
380
7024
        Ok(())
381
7024
    }
382
    /// Convenience method for handling an error parsing an argument
383
    ///
384
    /// Returns a closure that converts every error into [`ArgumentError::Invalid`]
385
    /// and then to an [`ErrorProblem`] using
386
    /// [`.args().handle_error()`](ArgumentStream::handle_error).
387
    ///
388
    /// Useful in manual `ItemValueParseable` impls, when parsing arguments ad-hoc.
389
1802
    pub fn invalid_argument_handler<E>(
390
1802
        &self,
391
1802
        field: &'static str,
392
1802
    ) -> impl FnOnce(E) -> ErrorProblem {
393
1802
        let error = self.args().handle_error(field, AE::Invalid);
394
        move |_any_error| error
395
1802
    }
396
}
397

            
398
/// End of an argument list that does not accept any further (unknown) arguments
399
///
400
/// Implements `ItemArgumentParseable`.  Parses successfully iff the argument list is empty.
401
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
402
#[allow(clippy::exhaustive_structs)]
403
pub struct NoFurtherArguments;
404

            
405
impl ItemArgumentParseable for NoFurtherArguments {
406
    fn from_args(args: &mut ArgumentStream) -> Result<Self, AE> {
407
        Ok(args.reject_extra_args()?)
408
    }
409
}
410

            
411
impl<'s> Iterator for ItemStream<'s> {
412
    type Item = Result<UnparsedItem<'s>, EP>;
413
60823
    fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
414
60823
        self.next_item().transpose()
415
60823
    }
416
}
417

            
418
impl<'s> ArgumentStream<'s> {
419
    /// Make a new `ArgumentStream` from a string
420
    ///
421
    /// The string may start with whitespace (which will be ignored).
422
86159
    pub fn new(rest: &'s str, whole_line_len: usize, options: &'s ParseOptions) -> Self {
423
86159
        let previous_rest_len = whole_line_len;
424
86159
        ArgumentStream {
425
86159
            rest,
426
86159
            whole_line_len,
427
86159
            previous_rest_len,
428
86159
            options,
429
86159
        }
430
86159
    }
431

            
432
    /// Consume this whole `ArgumentStream`, giving the remaining arguments as a string
433
    ///
434
    /// The returned string won't start with whitespace.
435
    //
436
    /// `self` will be empty on return.
437
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
438
3688
    pub fn into_remaining(&mut self) -> &'s str {
439
3688
        self.prep_yield();
440
3688
        mem::take(&mut self.rest)
441
3688
    }
442

            
443
    /// Return the component parts of this `ArgumentStream`
444
    ///
445
    /// The returned string might start with whitespace.
446
2292
    pub fn whole_line_len(&self) -> usize {
447
2292
        self.whole_line_len
448
2292
    }
449

            
450
    /// Prepares to yield an argument (or the rest)
451
    ///
452
    ///  * Trims leading WS from `rest`.
453
    ///  * Records the `previous_rest_len`
454
27494
    fn prep_yield(&mut self) {
455
27494
        self.rest = self.rest.trim_start_matches(WS);
456
27494
        self.previous_rest_len = self.rest.len();
457
27494
    }
458

            
459
    /// Prepares to yield, and then determines if there *is* anything to yield.
460
    ///
461
    ///  * Trim leading whitespace
462
    ///  * Records the `previous_rest_len`
463
    ///  * See if we're now empty
464
23806
    pub fn something_to_yield(&mut self) -> bool {
465
23806
        self.prep_yield();
466
23806
        !self.rest.is_empty()
467
23806
    }
468

            
469
    /// Throw and error if there are further arguments
470
    //
471
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
472
3942
    pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, UnexpectedArgument> {
473
3942
        if self.something_to_yield() {
474
4
            let column = self.next_arg_column();
475
4
            Err(UnexpectedArgument { column })
476
        } else {
477
3938
            Ok(NoFurtherArguments)
478
        }
479
3942
    }
480

            
481
    /// Convert a "length of `rest`" into the corresponding column number.
482
20848
    fn arg_column_from_rest_len(&self, rest_len: usize) -> usize {
483
        // Can't underflow since rest is always part of the whole.
484
        // Can't overflow since that would mean the document was as big as the address space.
485
20848
        self.whole_line_len - rest_len + 1
486
20848
    }
487

            
488
    /// Obtain the column number of the previously yielded argument.
489
    ///
490
    /// (After `into_remaining`, gives the column number
491
    /// of the start of the returned remaining argument string.)
492
20844
    pub fn prev_arg_column(&self) -> usize {
493
20844
        self.arg_column_from_rest_len(self.previous_rest_len)
494
20844
    }
495

            
496
    /// Obtains the column number of the *next* argument.
497
    ///
498
    /// Should be called after `something_to_yield`; otherwise the returned value
499
    /// may point to whitespace which is going to be skipped.
500
    // ^ this possible misuse doesn't seem worth defending against with type-fu,
501
    //   for a private function with few call sites.
502
4
    fn next_arg_column(&self) -> usize {
503
4
        self.arg_column_from_rest_len(self.rest.len())
504
4
    }
505

            
506
    /// Convert an `ArgumentError` to an `ErrorProblem`.
507
    ///
508
    /// The caller must supply the field name.
509
4454
    pub fn handle_error(&self, field: &'static str, ae: ArgumentError) -> ErrorProblem {
510
4454
        self.error_handler(field)(ae)
511
4454
    }
512

            
513
    /// Return a converter from `ArgumentError` to `ErrorProblem`.
514
    ///
515
    /// Useful in `.map_err`.
516
20844
    pub fn error_handler(
517
20844
        &self,
518
20844
        field: &'static str,
519
20844
    ) -> impl Fn(ArgumentError) -> ErrorProblem + 'static {
520
20844
        let column = self.prev_arg_column();
521
4458
        move |ae| match ae {
522
2
            AE::Missing => EP::MissingArgument { field },
523
4456
            AE::Invalid => EP::InvalidArgument { field, column },
524
            AE::Unexpected => EP::UnexpectedArgument { column },
525
4458
        }
526
20844
    }
527
}
528

            
529
impl<'s> Iterator for ArgumentStream<'s> {
530
    type Item = &'s str;
531
19758
    fn next(&mut self) -> Option<&'s str> {
532
19758
        if !self.something_to_yield() {
533
462
            return None;
534
19296
        }
535
        let arg;
536
19296
        (arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
537
19296
        Some(arg)
538
19758
    }
539
}
540

            
541
impl<'s> UnparsedObject<'s> {
542
    /// Obtain the Object data, as decoded bytes
543
5656
    pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
544
5656
        crate::parse::tokenize::base64_decode_multiline(self.data_b64)
545
5656
            .map_err(|_e| EP::ObjectInvalidBase64)
546
5656
    }
547
}