1
//! Lexing of netdoc elements
2

            
3
use super::*;
4

            
5
/// Linear whitespace as defined by torspec
6
// Only pub via internal_prelude, for benefit of macros
7
pub const WS: &[char] = &[' ', '\t'];
8

            
9
define_derive_deftly! {
10
    /// Define `parse_options` accessor
11
    ///
12
    /// The driver must have a lifetime named `'s`, which is suitable for the returned
13
    /// `&'s ParseOptions`.
14
    ///
15
    /// # Top-level attributes:
16
    ///
17
    ///  * **`#[deftly(parse_options(field = ".field.field"))]`**, default `.options`
18
    ParseOptions beta_deftly, expect items:
19

            
20
    impl<$tgens> $ttype {
21
        /// Examine the parsing options
22
2818
        pub fn parse_options(&self) -> &'s ParseOptions {
23
            &self
24
                ${tmeta(parse_options(field))
25
                  as token_stream,
26
                  default { .options }}
27
        }
28
    }
29
}
30

            
31
/// Top-level reader: Netdoc text interpreted as a stream of items
32
#[derive(Debug, Clone, Deftly)]
33
#[derive_deftly(ParseOptions)]
34
pub struct ItemStream<'s> {
35
    /// The whole input document.
36
    whole_input: &'s str,
37
    /// Remaining document, as a stream of lines
38
    lines: Lines<'s>,
39
    /// If we have peeked ahead, what we discovered
40
    peeked: PeekState<'s>,
41
    /// Parsing options.
42
    options: &'s ParseOptions,
43
}
44

            
45
/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
46
#[derive(Debug, Clone)]
47
enum PeekState<'s> {
48
    /// We've peeked a line
49
    Some(ItemStreamPeeked<'s>),
50
    /// We've not peeked, or peeking gave `None`
51
    None {
52
        /// Line number of the last item we yielded.
53
        ///
54
        /// `0` at the start.
55
        yielded_item_lno: usize,
56
    },
57
}
58

            
59
/// If an `ItemStream` has peeked ahead, what it discovered
60
#[derive(Debug, Clone)]
61
struct ItemStreamPeeked<'s> {
62
    /// The next keyword
63
    keyword: KeywordRef<'s>,
64
    /// Token proving that we
65
    line: lines::Peeked,
66
    /// Length of the suffix of the line that is the arguments rather than the keyword
67
    ///
68
    /// Does not include the first whitespace, that terminated the keyword.
69
    args_len: usize,
70
}
71

            
72
/// An Item that has been lexed but not parsed
73
#[derive(Debug, Clone, amplify::Getters, Deftly)]
74
#[derive_deftly(ParseOptions)]
75
#[deftly(parse_options(field = ".args.options"))]
76
pub struct UnparsedItem<'s> {
77
    /// The item's Keyword
78
    #[getter(as_copy)]
79
    keyword: KeywordRef<'s>,
80
    /// The Item's Arguments
81
    #[getter(skip)]
82
    args: ArgumentStream<'s>,
83
    /// The Item's Object, if there was one
84
    #[getter(as_clone)]
85
    object: Option<UnparsedObject<'s>>,
86
}
87

            
88
/// Reader for arguments on an Item
89
///
90
/// Represents the (remaining) arguments.
91
#[derive(Debug, Clone, Deftly)]
92
#[derive_deftly(ParseOptions)]
93
pub struct ArgumentStream<'s> {
94
    /// The remaining unparsed arguments
95
    ///
96
    /// Can start with WS, which is usually trimmed
97
    rest: &'s str,
98

            
99
    /// Original line length
100
    ///
101
    /// Used for reporting column of argument errors.
102
    whole_line_len: usize,
103

            
104
    /// Remaining length *before* we last yielded.
105
    previous_rest_len: usize,
106

            
107
    /// Parsing options.
108
    options: &'s ParseOptions,
109
}
110

            
111
/// An Object that has been lexed but not parsed
112
#[derive(Debug, Clone, amplify::Getters, Deftly)]
113
#[derive_deftly(ParseOptions)]
114
pub struct UnparsedObject<'s> {
115
    /// The Label
116
    #[getter(as_copy)]
117
    label: &'s str,
118

            
119
    /// The portion of the input document which is base64 data (and newlines)
120
    #[getter(skip)]
121
    data_b64: &'s str,
122

            
123
    /// Parsing options.
124
    options: &'s ParseOptions,
125
}
126

            
127
impl<'s> ItemStream<'s> {
128
    /// Start reading a network document as a series of Items
129
682
    pub fn new(input: &'s ParseInput<'s>) -> Result<Self, ParseError> {
130
682
        Ok(ItemStream {
131
682
            whole_input: input.input,
132
682
            lines: Lines::new(input.input),
133
682
            peeked: PeekState::None {
134
682
                yielded_item_lno: 0,
135
682
            },
136
682
            options: &input.options,
137
682
        })
138
682
    }
139

            
140
    /// Line number for reporting an error we have just discovered
141
    ///
142
    /// If we have recent peeked, we report the line number of the peeked keyword line.
143
    ///
144
    /// Otherwise, we report the line number of the most-recently yielded item.
145
98
    pub fn lno_for_error(&self) -> usize {
146
98
        match self.peeked {
147
            PeekState::Some { .. } => {
148
                // The error was presumably caused by whatever was seen in the peek.
149
                // That's the current line number.
150
14
                self.lines.peek_lno()
151
            }
152
84
            PeekState::None { yielded_item_lno } => {
153
                // The error was presumably caused by the results of next_item().
154
84
                yielded_item_lno
155
            }
156
        }
157
98
    }
158

            
159
    /// Core of peeking.  Tries to make `.peeked` be `Some`.
160
172922
    fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
161
172922
        if matches!(self.peeked, PeekState::None { .. }) {
162
82744
            let Some(peeked) = self.lines.peek() else {
163
826
                return Ok(());
164
            };
165

            
166
81918
            let peeked_line = self.lines.peeked_line(&peeked);
167

            
168
81918
            let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
169
81918
            let keyword = KeywordRef::new(keyword)?;
170

            
171
81918
            self.peeked = PeekState::Some(ItemStreamPeeked {
172
81918
                keyword,
173
81918
                line: peeked,
174
81918
                args_len: args.len(),
175
81918
            });
176
90178
        }
177

            
178
172096
        Ok(())
179
172922
    }
180

            
181
    /// Peek the next keyword
182
88264
    pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
183
88264
        self.peek_internal()?;
184
88264
        let PeekState::Some(peeked) = &self.peeked else {
185
816
            return Ok(None);
186
        };
187
87448
        Ok(Some(peeked.keyword))
188
88264
    }
189

            
190
    /// Obtain the body so far, suitable for hashing for an Orderly signature
191
4072
    pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
192
4072
        let body = &self.whole_input[0..self.byte_position()];
193
4072
        SignedDocumentBody { body }
194
4072
    }
195

            
196
    /// Byte position, pointing to the start of the next item to yield
197
    ///
198
    /// Offset in bytes from the start of the original input string
199
    /// to the "current" position,
200
    /// ie to just after the item we yielded and just before the next item (or EOF).
201
8196
    pub fn byte_position(&self) -> usize {
202
8196
        self.whole_input.len() - self.lines.remaining().len()
203
8196
    }
204

            
205
    /// Access for the entire input string
206
    ///
207
    /// The original `input: &str` argument to [`ParseInput::new`].
208
    ///
209
    /// Includes both yielded and unyielded items.
210
1966
    pub fn whole_input(&self) -> &'s str {
211
1966
        self.whole_input
212
1966
    }
213

            
214
    /// Parse a (sub-)document with its own signatures
215
    ///
216
    /// Used (mostly) by the
217
    /// [`NetdocParseableUnverified`](derive_deftly_template_NetdocParseableUnverified)
218
    /// derive macro.
219
    ///
220
    /// Generic parameters:
221
    ///
222
    ///  * **`B`**: the body type: the type to which `NetdocParseableUnverified` is applied.
223
    ///  * **`S`**: the signatures section type.
224
    ///  * **`O`**: the `FooUnverified` type, which embodies the parsed body and signatures.
225
958
    pub fn parse_signed<
226
958
        B: HasUnverifiedParsedBody,
227
958
        S: NetdocParseableSignatures,
228
958
        O: NetdocUnverified<Body = B, Signatures = S>,
229
958
    >(
230
958
        &mut self,
231
958
        outer_stop: stop_at!(),
232
958
    ) -> Result<O, EP> {
233
958
        let mut input = ItemStream {
234
958
            whole_input: &self.whole_input[self.whole_input.len() - self.lines.remaining().len()..],
235
958
            ..self.clone()
236
958
        };
237
958
        let r = (|| {
238
958
            let inner_always_stop = outer_stop | StopAt::doc_intro::<B::UnverifiedParsedBody>();
239
958
            let body = B::UnverifiedParsedBody::from_items(
240
958
                &mut input,
241
958
                inner_always_stop | StopAt(S::is_item_keyword),
242
            )?;
243
958
            let signed_doc_body = input.body_sofar_for_signature();
244
958
            let unsigned_body_len = signed_doc_body.body().len();
245
958
            let mut hashes = S::HashesAccu::default();
246
958
            let sigs = S::from_items(&mut input, signed_doc_body, &mut hashes, inner_always_stop)?;
247
958
            let sigs = SignaturesData {
248
958
                sigs,
249
958
                unsigned_body_len,
250
958
                hashes,
251
958
            };
252
            // SECURITY
253
            // We unwrap the UnverifiedParsedBody and immediately wrap it up again
254
            // in FooUnverified, passing on the obligation to verify the signatures,
255
            // and still enforcing that with a newtype.
256
958
            let signed = O::from_parts(B::unverified_into_inner_unchecked(body), sigs);
257
958
            Ok(signed)
258
        })(); // don't exit here
259

            
260
958
        *self = ItemStream {
261
958
            whole_input: self.whole_input,
262
958
            ..input
263
958
        };
264

            
265
958
        r
266
958
    }
267

            
268
    /// Obtain the inputs that would be needed to hash any (even Disorderly) signature
269
    ///
270
    /// These are the hash inputs which would be needed for the next item,
271
    /// assuming it's a signature keyword.
272
2906
    pub fn peek_signature_hash_inputs(
273
2906
        &mut self,
274
2906
        body: SignedDocumentBody<'s>,
275
2906
    ) -> Result<Option<SignatureHashInputs<'s>>, EP> {
276
2906
        self.peek_internal()?;
277
2906
        let PeekState::Some(peeked) = &self.peeked else {
278
            return Ok(None);
279
        };
280
2906
        let document_sofar = self.body_sofar_for_signature().body();
281
2906
        let signature_item_line = self.lines.peeked_line(&peeked.line);
282
2906
        let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
283
2906
        Ok(Some(SignatureHashInputs {
284
2906
            body,
285
2906
            document_sofar,
286
2906
            signature_item_kw_spc,
287
2906
            signature_item_line,
288
2906
        }))
289
2906
    }
290

            
291
    /// Yield the next item.
292
81752
    pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
293
81752
        self.peek_internal()?;
294
81752
        let peeked = match self.peeked {
295
10
            PeekState::None { .. } => return Ok(None),
296
81742
            PeekState::Some { .. } => match mem::replace(
297
81742
                &mut self.peeked,
298
81742
                PeekState::None {
299
81742
                    yielded_item_lno: self.lines.peek_lno(),
300
81742
                },
301
81742
            ) {
302
81742
                PeekState::Some(peeked) => peeked,
303
                PeekState::None { .. } => panic!("it was Some just now"),
304
            },
305
        };
306

            
307
81742
        let keyword = peeked.keyword;
308
81742
        let line = self.lines.consume_peeked(peeked.line);
309
81742
        let args = &line[keyword.len()..];
310
81742
        let options = self.options;
311
81742
        let args = ArgumentStream::new(args, line.len(), options);
312

            
313
81742
        let object = if self.lines.remaining().starts_with('-') {
314
11628
            fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
315
11628
                let line = lines.next().ok_or(
316
                    // If this is the *header*, we already know there's a line,
317
                    // so this error path is only for footers.
318
11628
                    EP::ObjectMissingFooter,
319
                )?;
320
11628
                let label = line
321
11628
                    .strip_prefix(start)
322
11628
                    .ok_or(EP::InvalidObjectDelimiters)?
323
11628
                    .strip_suffix(PEM_AFTER_LABEL)
324
11628
                    .ok_or(EP::InvalidObjectDelimiters)?;
325
11626
                Ok(label)
326
11628
            }
327

            
328
5814
            let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
329
5814
            let base64_start_remaining = self.lines.remaining();
330
45104
            while !self.lines.remaining().starts_with('-') {
331
39290
                let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
332
            }
333
5814
            let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
334
5814
            let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
335
5812
            let label = [label1, label2]
336
5812
                .into_iter()
337
5812
                .all_equal_value()
338
5812
                .map_err(|_| EP::ObjectMismatchedLabels)?;
339
5810
            Some(UnparsedObject {
340
5810
                label,
341
5810
                data_b64,
342
5810
                options,
343
5810
            })
344
        } else {
345
75928
            None
346
        };
347

            
348
81738
        Ok(Some(UnparsedItem {
349
81738
            keyword,
350
81738
            args,
351
81738
            object,
352
81738
        }))
353
81752
    }
354
}
355

            
356
impl<'s> UnparsedItem<'s> {
357
    /// Access the arguments, mutably (for consuming and parsing them)
358
18768
    pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
359
18768
        &mut self.args
360
18768
    }
361
    /// Access a copy of the arguments
362
    ///
363
    /// When using this, be careful not to process any arguments twice.
364
1440
    pub fn args_copy(&self) -> ArgumentStream<'s> {
365
1440
        self.args.clone()
366
1440
    }
367

            
368
    /// Access the arguments (readonly)
369
    ///
370
    /// When using this, be careful not to process any arguments twice.
371
11830
    pub fn args(&self) -> &ArgumentStream<'s> {
372
11830
        &self.args
373
11830
    }
374

            
375
    /// Check that this item has no Object.
376
11202
    pub fn check_no_object(&self) -> Result<(), EP> {
377
11202
        if self.object.is_some() {
378
            return Err(EP::ObjectUnexpected);
379
11202
        }
380
11202
        Ok(())
381
11202
    }
382
    /// Convenience method for handling an error parsing an argument
383
    ///
384
    /// Returns a closure that converts every error into [`ArgumentError::Invalid`]
385
    /// and then to an [`ErrorProblem`] using
386
    /// [`.args().handle_error()`](ArgumentStream::handle_error).
387
    ///
388
    /// Useful in manual `ItemValueParseable` impls, when parsing arguments ad-hoc.
389
4578
    pub fn invalid_argument_handler<E>(
390
4578
        &self,
391
4578
        field: &'static str,
392
4578
    ) -> impl FnOnce(E) -> ErrorProblem {
393
4578
        let error = self.args().handle_error(field, AE::Invalid);
394
        move |_any_error| error
395
4578
    }
396
}
397

            
398
/// End of an argument list that does not accept any further (unknown) arguments
399
///
400
/// Implements `ItemArgumentParseable`.  Parses successfully iff the argument list is empty.
401
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
402
#[allow(clippy::exhaustive_structs)]
403
pub struct NoFurtherArguments;
404

            
405
impl ItemArgumentParseable for NoFurtherArguments {
406
    fn from_args(args: &mut ArgumentStream) -> Result<Self, AE> {
407
        Ok(args.reject_extra_args()?)
408
    }
409
}
410

            
411
impl<'s> Iterator for ItemStream<'s> {
412
    type Item = Result<UnparsedItem<'s>, EP>;
413
58050
    fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
414
58050
        self.next_item().transpose()
415
58050
    }
416
}
417

            
418
impl<'s> ArgumentStream<'s> {
419
    /// Make a new `ArgumentStream` from a string
420
    ///
421
    /// The string may start with whitespace (which will be ignored).
422
84086
    pub fn new(rest: &'s str, whole_line_len: usize, options: &'s ParseOptions) -> Self {
423
84086
        let previous_rest_len = whole_line_len;
424
84086
        ArgumentStream {
425
84086
            rest,
426
84086
            whole_line_len,
427
84086
            previous_rest_len,
428
84086
            options,
429
84086
        }
430
84086
    }
431

            
432
    /// Consume this whole `ArgumentStream`, giving the remaining arguments as a string
433
    ///
434
    /// The returned string won't start with whitespace.
435
    //
436
    /// `self` will be empty on return.
437
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
438
5822
    pub fn into_remaining(&mut self) -> &'s str {
439
5822
        self.prep_yield();
440
5822
        mem::take(&mut self.rest)
441
5822
    }
442

            
443
    /// Return the component parts of this `ArgumentStream`
444
    ///
445
    /// The returned string might start with whitespace.
446
2344
    pub fn whole_line_len(&self) -> usize {
447
2344
        self.whole_line_len
448
2344
    }
449

            
450
    /// Prepares to yield an argument (or the rest)
451
    ///
452
    ///  * Trims leading WS from `rest`.
453
    ///  * Records the `previous_rest_len`
454
40268
    fn prep_yield(&mut self) {
455
40268
        self.rest = self.rest.trim_start_matches(WS);
456
40268
        self.previous_rest_len = self.rest.len();
457
40268
    }
458

            
459
    /// Prepares to yield, and then determines if there *is* anything to yield.
460
    ///
461
    ///  * Trim leading whitespace
462
    ///  * Records the `previous_rest_len`
463
    ///  * See if we're now empty
464
34446
    pub fn something_to_yield(&mut self) -> bool {
465
34446
        self.prep_yield();
466
34446
        !self.rest.is_empty()
467
34446
    }
468

            
469
    /// Throw and error if there are further arguments
470
    //
471
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
472
4030
    pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, UnexpectedArgument> {
473
4030
        if self.something_to_yield() {
474
4
            let column = self.next_arg_column();
475
4
            Err(UnexpectedArgument { column })
476
        } else {
477
4026
            Ok(NoFurtherArguments)
478
        }
479
4030
    }
480

            
481
    /// Convert a "length of `rest`" into the corresponding column number.
482
31304
    fn arg_column_from_rest_len(&self, rest_len: usize) -> usize {
483
        // Can't underflow since rest is always part of the whole.
484
        // Can't overflow since that would mean the document was as big as the address space.
485
31304
        self.whole_line_len - rest_len + 1
486
31304
    }
487

            
488
    /// Obtain the column number of the previously yielded argument.
489
    ///
490
    /// (After `into_remaining`, gives the column number
491
    /// of the start of the returned remaining argument string.)
492
31300
    pub fn prev_arg_column(&self) -> usize {
493
31300
        self.arg_column_from_rest_len(self.previous_rest_len)
494
31300
    }
495

            
496
    /// Obtains the column number of the *next* argument.
497
    ///
498
    /// Should be called after `something_to_yield`; otherwise the returned value
499
    /// may point to whitespace which is going to be skipped.
500
    // ^ this possible misuse doesn't seem worth defending against with type-fu,
501
    //   for a private function with few call sites.
502
4
    fn next_arg_column(&self) -> usize {
503
4
        self.arg_column_from_rest_len(self.rest.len())
504
4
    }
505

            
506
    /// Convert an `ArgumentError` to an `ErrorProblem`.
507
    ///
508
    /// The caller must supply the field name.
509
4578
    pub fn handle_error(&self, field: &'static str, ae: ArgumentError) -> ErrorProblem {
510
4578
        self.error_handler(field)(ae)
511
4578
    }
512

            
513
    /// Return a converter from `ArgumentError` to `ErrorProblem`.
514
    ///
515
    /// Useful in `.map_err`.
516
31300
    pub fn error_handler(
517
31300
        &self,
518
31300
        field: &'static str,
519
31300
    ) -> impl Fn(ArgumentError) -> ErrorProblem + 'static {
520
31300
        let column = self.prev_arg_column();
521
4606
        move |ae| match ae {
522
4
            AE::Missing => EP::MissingArgument { field },
523
4602
            AE::Invalid => EP::InvalidArgument { field, column },
524
            AE::Unexpected => EP::UnexpectedArgument { column },
525
4606
        }
526
31300
    }
527
}
528

            
529
impl<'s> Iterator for ArgumentStream<'s> {
530
    type Item = &'s str;
531
30294
    fn next(&mut self) -> Option<&'s str> {
532
30294
        if !self.something_to_yield() {
533
486
            return None;
534
29808
        }
535
        let arg;
536
29808
        (arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
537
29808
        Some(arg)
538
30294
    }
539
}
540

            
541
impl<'s> UnparsedObject<'s> {
542
    /// Obtain the Object data, as decoded bytes
543
5812
    pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
544
5812
        crate::parse::tokenize::base64_decode_multiline(self.data_b64)
545
5812
            .map_err(|_e| EP::ObjectInvalidBase64)
546
5812
    }
547
}