1
//! Lexing of netdoc elements
2

            
3
use super::*;
4

            
5
/// Linear whitespace as defined by torspec
6
// Only pub via internal_prelude, for benefit of macros
7
pub const WS: &[char] = &[' ', '\t'];
8

            
9
define_derive_deftly! {
10
    /// Define `parse_options` accessor
11
    ///
12
    /// The driver must have a lifetime named `'s`, which is suitable for the returned
13
    /// `&'s ParseOptions`.
14
    ///
15
    /// # Top-level attributes:
16
    ///
17
    ///  * **`#[deftly(parse_options(field = ".field.field"))]`**, default `.options`
18
    ParseOptions beta_deftly, expect items:
19

            
20
    impl<$tgens> $ttype {
21
        /// Examine the parsing options
22
3434
        pub fn parse_options(&self) -> &'s ParseOptions {
23
            &self
24
                ${tmeta(parse_options(field))
25
                  as token_stream,
26
                  default { .options }}
27
        }
28
    }
29
}
30

            
31
/// Top-level reader: Netdoc text interpreted as a stream of items
32
#[derive(Debug, Clone, Deftly)]
33
#[derive_deftly(ParseOptions)]
34
pub struct ItemStream<'s> {
35
    /// The whole input document.
36
    whole_input: &'s str,
37
    /// Remaining document, as a stream of lines
38
    lines: Lines<'s>,
39
    /// If we have peeked ahead, what we discovered
40
    peeked: PeekState<'s>,
41
    /// Parsing options.
42
    options: &'s ParseOptions,
43
}
44

            
45
/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
46
#[derive(Debug, Clone)]
47
enum PeekState<'s> {
48
    /// We've peeked a line
49
    Some(ItemStreamPeeked<'s>),
50
    /// We've not peeked, or peeking gave `None`
51
    None {
52
        /// Line number of the last item we yielded.
53
        ///
54
        /// `0` at the start.
55
        yielded_item_lno: usize,
56
    },
57
}
58

            
59
/// If an `ItemStream` has peeked ahead, what it discovered
60
#[derive(Debug, Clone)]
61
struct ItemStreamPeeked<'s> {
62
    /// The next keyword
63
    keyword: KeywordRef<'s>,
64
    /// Token proving that we
65
    line: lines::Peeked,
66
    /// Length of the suffix of the line that is the arguments rather than the keyword
67
    ///
68
    /// Does not include the first whitespace, that terminated the keyword.
69
    args_len: usize,
70
}
71

            
72
/// An Item that has been lexed but not parsed
73
#[derive(Debug, Clone, amplify::Getters, Deftly)]
74
#[derive_deftly(ParseOptions)]
75
#[deftly(parse_options(field = ".args.options"))]
76
pub struct UnparsedItem<'s> {
77
    /// The item's Keyword
78
    #[getter(as_copy)]
79
    keyword: KeywordRef<'s>,
80
    /// The Item's Arguments
81
    #[getter(skip)]
82
    args: ArgumentStream<'s>,
83
    /// The Item's Object, if there was one
84
    #[getter(as_clone)]
85
    object: Option<UnparsedObject<'s>>,
86
}
87

            
88
/// Reader for arguments on an Item
89
///
90
/// Represents the (remaining) arguments.
91
#[derive(Debug, Clone, Deftly)]
92
#[derive_deftly(ParseOptions)]
93
pub struct ArgumentStream<'s> {
94
    /// The remaining unparsed arguments
95
    ///
96
    /// Can start with WS, which is usually trimmed
97
    rest: &'s str,
98

            
99
    /// Original line length
100
    ///
101
    /// Used for reporting column of argument errors.
102
    whole_line_len: usize,
103

            
104
    /// Remaining length *before* we last yielded.
105
    previous_rest_len: usize,
106

            
107
    /// Parsing options.
108
    options: &'s ParseOptions,
109
}
110

            
111
/// An Object that has been lexed but not parsed
112
#[derive(Debug, Clone, amplify::Getters, Deftly)]
113
#[derive_deftly(ParseOptions)]
114
pub struct UnparsedObject<'s> {
115
    /// The Label
116
    #[getter(as_copy)]
117
    label: &'s str,
118

            
119
    /// The portion of the input document which is base64 data (and newlines)
120
    #[getter(skip)]
121
    data_b64: &'s str,
122

            
123
    /// Parsing options.
124
    options: &'s ParseOptions,
125
}
126

            
127
impl<'s> ItemStream<'s> {
128
    /// Start reading a network document as a series of Items
129
722
    pub fn new(input: &'s ParseInput<'s>) -> Result<Self, ParseError> {
130
722
        Ok(ItemStream {
131
722
            whole_input: input.input,
132
722
            lines: Lines::new(input.input),
133
722
            peeked: PeekState::None {
134
722
                yielded_item_lno: 0,
135
722
            },
136
722
            options: &input.options,
137
722
        })
138
722
    }
139

            
140
    /// Line number for reporting an error we have just discovered
141
    ///
142
    /// If we have recent peeked, we report the line number of the peeked keyword line.
143
    ///
144
    /// Otherwise, we report the line number of the most-recently yielded item.
145
108
    pub fn lno_for_error(&self) -> usize {
146
108
        match self.peeked {
147
            PeekState::Some { .. } => {
148
                // The error was presumably caused by whatever was seen in the peek.
149
                // That's the current line number.
150
16
                self.lines.peek_lno()
151
            }
152
92
            PeekState::None { yielded_item_lno } => {
153
                // The error was presumably caused by the results of next_item().
154
92
                yielded_item_lno
155
            }
156
        }
157
108
    }
158

            
159
    /// Core of peeking.  Tries to make `.peeked` be `Some`.
160
171910
    fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
161
171910
        if matches!(self.peeked, PeekState::None { .. }) {
162
82304
            let Some(peeked) = self.lines.peek() else {
163
910
                return Ok(());
164
            };
165

            
166
81394
            let peeked_line = self.lines.peeked_line(&peeked);
167

            
168
81394
            let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
169
81394
            let keyword = KeywordRef::new(keyword)?;
170

            
171
81394
            self.peeked = PeekState::Some(ItemStreamPeeked {
172
81394
                keyword,
173
81394
                line: peeked,
174
81394
                args_len: args.len(),
175
81394
            });
176
89606
        }
177

            
178
171000
        Ok(())
179
171910
    }
180

            
181
    /// Peek the next keyword
182
87764
    pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
183
87764
        self.peek_internal()?;
184
87764
        let PeekState::Some(peeked) = &self.peeked else {
185
900
            return Ok(None);
186
        };
187
86864
        Ok(Some(peeked.keyword))
188
87764
    }
189

            
190
    /// Obtain the body so far, suitable for hashing for an Orderly signature
191
    #[allow(clippy::string_slice)] // TODO
192
4112
    pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
193
4112
        let body = &self.whole_input[0..self.byte_position()];
194
4112
        SignedDocumentBody { body }
195
4112
    }
196

            
197
    /// Byte position, pointing to the start of the next item to yield
198
    ///
199
    /// Offset in bytes from the start of the original input string
200
    /// to the "current" position,
201
    /// ie to just after the item we yielded and just before the next item (or EOF).
202
8108
    pub fn byte_position(&self) -> usize {
203
8108
        self.whole_input.len() - self.lines.remaining().len()
204
8108
    }
205

            
206
    /// Access for the entire input string
207
    ///
208
    /// The original `input: &str` argument to [`ParseInput::new`].
209
    ///
210
    /// Includes both yielded and unyielded items.
211
1898
    pub fn whole_input(&self) -> &'s str {
212
1898
        self.whole_input
213
1898
    }
214

            
215
    /// Parse a (sub-)document with its own signatures
216
    ///
217
    /// Used (mostly) by the
218
    /// [`NetdocParseableUnverified`](derive_deftly_template_NetdocParseableUnverified)
219
    /// derive macro.
220
    ///
221
    /// Generic parameters:
222
    ///
223
    ///  * **`B`**: the body type: the type to which `NetdocParseableUnverified` is applied.
224
    ///  * **`S`**: the signatures section type.
225
    ///  * **`O`**: the `FooUnverified` type, which embodies the parsed body and signatures.
226
    #[allow(clippy::string_slice)] // TODO
227
1000
    pub fn parse_signed<
228
1000
        B: HasUnverifiedParsedBody,
229
1000
        S: NetdocParseableSignatures,
230
1000
        O: NetdocParseableUnverified<Body = B, Signatures = S>,
231
1000
    >(
232
1000
        &mut self,
233
1000
        outer_stop: stop_at!(),
234
1000
    ) -> Result<O, EP> {
235
1000
        let mut input = ItemStream {
236
1000
            whole_input: &self.whole_input[self.whole_input.len() - self.lines.remaining().len()..],
237
1000
            ..self.clone()
238
1000
        };
239
1000
        let r = (|| {
240
1000
            let inner_always_stop = outer_stop | StopAt::doc_intro::<B::UnverifiedParsedBody>();
241
1000
            let body = B::UnverifiedParsedBody::from_items(
242
1000
                &mut input,
243
1000
                inner_always_stop | StopAt(S::is_item_keyword),
244
2
            )?;
245
998
            let signed_doc_body = input.body_sofar_for_signature();
246
998
            let unsigned_body_len = signed_doc_body.body().len();
247
998
            let mut hashes = S::HashesAccu::default();
248
998
            let sigs = S::from_items(&mut input, signed_doc_body, &mut hashes, inner_always_stop)?;
249
998
            let sigs = SignaturesData {
250
998
                sigs,
251
998
                unsigned_body_len,
252
998
                hashes,
253
998
            };
254
            // SECURITY
255
            // We unwrap the UnverifiedParsedBody and immediately wrap it up again
256
            // in FooUnverified, passing on the obligation to verify the signatures,
257
            // and still enforcing that with a newtype.
258
998
            let signed = O::from_parts(B::unverified_into_inner_unchecked(body), sigs);
259
998
            Ok(signed)
260
        })(); // don't exit here
261

            
262
1000
        *self = ItemStream {
263
1000
            whole_input: self.whole_input,
264
1000
            ..input
265
1000
        };
266

            
267
1000
        r
268
1000
    }
269

            
270
    /// Obtain the inputs that would be needed to hash any (even Disorderly) signature
271
    ///
272
    /// These are the hash inputs which would be needed for the next item,
273
    /// assuming it's a signature keyword.
274
2914
    pub fn peek_signature_hash_inputs(
275
2914
        &mut self,
276
2914
        body: SignedDocumentBody<'s>,
277
2914
    ) -> Result<Option<SignatureHashInputs<'s>>, EP> {
278
2914
        self.peek_internal()?;
279
2914
        let PeekState::Some(peeked) = &self.peeked else {
280
            return Ok(None);
281
        };
282
2914
        let document_sofar = self.body_sofar_for_signature().body();
283
2914
        let signature_item_line = self.lines.peeked_line(&peeked.line);
284
2914
        let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
285
2914
        Ok(Some(SignatureHashInputs {
286
2914
            body,
287
2914
            document_sofar,
288
2914
            signature_item_kw_spc,
289
2914
            signature_item_line,
290
2914
        }))
291
2914
    }
292

            
293
    /// Yield the next item.
294
    #[allow(clippy::string_slice)] // TODO
295
81232
    pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
296
81232
        self.peek_internal()?;
297
81232
        let peeked = match self.peeked {
298
10
            PeekState::None { .. } => return Ok(None),
299
81222
            PeekState::Some { .. } => match mem::replace(
300
81222
                &mut self.peeked,
301
81222
                PeekState::None {
302
81222
                    yielded_item_lno: self.lines.peek_lno(),
303
81222
                },
304
81222
            ) {
305
81222
                PeekState::Some(peeked) => peeked,
306
                PeekState::None { .. } => panic!("it was Some just now"),
307
            },
308
        };
309

            
310
81222
        let keyword = peeked.keyword;
311
81222
        let line = self.lines.consume_peeked(peeked.line);
312
81222
        let args = &line[keyword.len()..];
313
81222
        let options = self.options;
314
81222
        let args = ArgumentStream::new(args, line.len(), options);
315

            
316
81222
        let object = if self.lines.remaining().starts_with('-') {
317
12468
            fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
318
12468
                let line = lines.next().ok_or(
319
                    // If this is the *header*, we already know there's a line,
320
                    // so this error path is only for footers.
321
12468
                    EP::ObjectMissingFooter,
322
                )?;
323
12468
                let label = line
324
12468
                    .strip_prefix(start)
325
12468
                    .ok_or(EP::InvalidObjectDelimiters)?
326
12468
                    .strip_suffix(PEM_AFTER_LABEL)
327
12468
                    .ok_or(EP::InvalidObjectDelimiters)?;
328
12466
                Ok(label)
329
12468
            }
330

            
331
6234
            let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
332
6234
            let base64_start_remaining = self.lines.remaining();
333
46598
            while !self.lines.remaining().starts_with('-') {
334
40364
                let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
335
            }
336
6234
            let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
337
6234
            let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
338
6232
            let label = [label1, label2]
339
6232
                .into_iter()
340
6232
                .all_equal_value()
341
6232
                .map_err(|_| EP::ObjectMismatchedLabels)?;
342
6230
            Some(UnparsedObject {
343
6230
                label,
344
6230
                data_b64,
345
6230
                options,
346
6230
            })
347
        } else {
348
74988
            None
349
        };
350

            
351
81218
        Ok(Some(UnparsedItem {
352
81218
            keyword,
353
81218
            args,
354
81218
            object,
355
81218
        }))
356
81232
    }
357
}
358

            
359
impl<'s> UnparsedItem<'s> {
360
    /// Access the arguments, mutably (for consuming and parsing them)
361
19700
    pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
362
19700
        &mut self.args
363
19700
    }
364
    /// Access a copy of the arguments
365
    ///
366
    /// When using this, be careful not to process any arguments twice.
367
1870
    pub fn args_copy(&self) -> ArgumentStream<'s> {
368
1870
        self.args.clone()
369
1870
    }
370

            
371
    /// Access the arguments (readonly)
372
    ///
373
    /// When using this, be careful not to process any arguments twice.
374
12316
    pub fn args(&self) -> &ArgumentStream<'s> {
375
12316
        &self.args
376
12316
    }
377

            
378
    /// Check that this item has no Object.
379
12788
    pub fn check_no_object(&self) -> Result<(), EP> {
380
12788
        if self.object.is_some() {
381
2
            return Err(EP::ObjectUnexpected);
382
12786
        }
383
12786
        Ok(())
384
12788
    }
385
    /// Convenience method for handling an error parsing an argument
386
    ///
387
    /// Returns a closure that converts every error into [`ArgumentError::Invalid`]
388
    /// and then to an [`ErrorProblem`] using
389
    /// [`.args().handle_error()`](ArgumentStream::handle_error).
390
    ///
391
    /// Useful in manual `ItemValueParseable` impls, when parsing arguments ad-hoc.
392
5200
    pub fn invalid_argument_handler<E>(
393
5200
        &self,
394
5200
        field: &'static str,
395
5200
    ) -> impl FnOnce(E) -> ErrorProblem {
396
5200
        let error = self.args().handle_error(field, AE::Invalid);
397
        move |_any_error| error
398
5200
    }
399
}
400

            
401
#[deprecated = "use types::NoFurtherArguments"]
402
pub use crate::types::NoMoreArguments as NoFurtherArguments;
403

            
404
impl<'s> Iterator for ItemStream<'s> {
405
    type Item = Result<UnparsedItem<'s>, EP>;
406
55900
    fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
407
55900
        self.next_item().transpose()
408
55900
    }
409
}
410

            
411
impl<'s> ArgumentStream<'s> {
412
    /// Make a new `ArgumentStream` from a string
413
    ///
414
    /// The string may start with whitespace (which will be ignored).
415
83674
    pub fn new(rest: &'s str, whole_line_len: usize, options: &'s ParseOptions) -> Self {
416
83674
        let previous_rest_len = whole_line_len;
417
83674
        ArgumentStream {
418
83674
            rest,
419
83674
            whole_line_len,
420
83674
            previous_rest_len,
421
83674
            options,
422
83674
        }
423
83674
    }
424

            
425
    /// Consume this whole `ArgumentStream`, giving the remaining arguments as a string
426
    ///
427
    /// The returned string won't start with whitespace.
428
    //
429
    /// `self` will be empty on return.
430
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
431
6318
    pub fn into_remaining(&mut self) -> &'s str {
432
6318
        self.prep_yield();
433
6318
        mem::take(&mut self.rest)
434
6318
    }
435

            
436
    /// Return the component parts of this `ArgumentStream`
437
    ///
438
    /// The returned string might start with whitespace.
439
2452
    pub fn whole_line_len(&self) -> usize {
440
2452
        self.whole_line_len
441
2452
    }
442

            
443
    /// Prepares to yield an argument (or the rest)
444
    ///
445
    ///  * Trims leading WS from `rest`.
446
    ///  * Records the `previous_rest_len`
447
43754
    fn prep_yield(&mut self) {
448
43754
        self.rest = self.rest.trim_start_matches(WS);
449
43754
        self.previous_rest_len = self.rest.len();
450
43754
    }
451

            
452
    /// Prepares to yield, and then determines if there *is* anything to yield.
453
    ///
454
    ///  * Trim leading whitespace
455
    ///  * Records the `previous_rest_len`
456
    ///  * See if we're now empty
457
37436
    pub fn something_to_yield(&mut self) -> bool {
458
37436
        self.prep_yield();
459
37436
        !self.rest.is_empty()
460
37436
    }
461

            
462
    /// Throw and error if there are further arguments
463
    //
464
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
465
3966
    pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, UnexpectedArgument> {
466
3966
        if self.something_to_yield() {
467
4
            let column = self.next_arg_column();
468
4
            Err(UnexpectedArgument { column })
469
        } else {
470
3962
            Ok(NoFurtherArguments)
471
        }
472
3966
    }
473

            
474
    /// Convert a "length of `rest`" into the corresponding column number.
475
34070
    fn arg_column_from_rest_len(&self, rest_len: usize) -> usize {
476
        // Can't underflow since rest is always part of the whole.
477
        // Can't overflow since that would mean the document was as big as the address space.
478
34070
        self.whole_line_len - rest_len + 1
479
34070
    }
480

            
481
    /// Obtain the column number of the previously yielded argument.
482
    ///
483
    /// (After `into_remaining`, gives the column number
484
    /// of the start of the returned remaining argument string.)
485
34066
    pub fn prev_arg_column(&self) -> usize {
486
34066
        self.arg_column_from_rest_len(self.previous_rest_len)
487
34066
    }
488

            
489
    /// Obtains the column number of the *next* argument.
490
    ///
491
    /// Should be called after `something_to_yield`; otherwise the returned value
492
    /// may point to whitespace which is going to be skipped.
493
    // ^ this possible misuse doesn't seem worth defending against with type-fu,
494
    //   for a private function with few call sites.
495
4
    fn next_arg_column(&self) -> usize {
496
4
        self.arg_column_from_rest_len(self.rest.len())
497
4
    }
498

            
499
    /// Convert an `ArgumentError` to an `ErrorProblem`.
500
    ///
501
    /// The caller must supply the field name.
502
5200
    pub fn handle_error(&self, field: &'static str, ae: ArgumentError) -> ErrorProblem {
503
5200
        self.error_handler(field)(ae)
504
5200
    }
505

            
506
    /// Return a converter from `ArgumentError` to `ErrorProblem`.
507
    ///
508
    /// Useful in `.map_err`.
509
34066
    pub fn error_handler(
510
34066
        &self,
511
34066
        field: &'static str,
512
34066
    ) -> impl Fn(ArgumentError) -> ErrorProblem + 'static {
513
34066
        let column = self.prev_arg_column();
514
5230
        move |ae| match ae {
515
4
            AE::Missing => EP::MissingArgument { field },
516
5226
            AE::Invalid => EP::InvalidArgument { field, column },
517
            AE::Unexpected => EP::UnexpectedArgument { column },
518
5230
        }
519
34066
    }
520
}
521

            
522
impl<'s> Iterator for ArgumentStream<'s> {
523
    type Item = &'s str;
524
33200
    fn next(&mut self) -> Option<&'s str> {
525
33200
        if !self.something_to_yield() {
526
766
            return None;
527
32434
        }
528
        let arg;
529
32434
        (arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
530
32434
        Some(arg)
531
33200
    }
532
}
533

            
534
impl<'s> UnparsedObject<'s> {
535
    /// Obtain the Object data, as decoded bytes
536
5974
    pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
537
5974
        crate::parse::tokenize::base64_decode_multiline(self.data_b64)
538
5974
            .map_err(|_e| EP::ObjectInvalidBase64)
539
5974
    }
540
}