1
//! Lexing of netdoc elements
2

            
3
use super::*;
4

            
5
/// Linear whitespace as defined by torspec
6
// Only pub via internal_prelude, for benefit of macros
7
pub const WS: &[char] = &[' ', '\t'];
8

            
9
define_derive_deftly! {
10
    /// Define `parse_options` accessor
11
    ///
12
    /// The driver must have a lifetime named `'s`, which is suitable for the returned
13
    /// `&'s ParseOptions`.
14
    ///
15
    /// # Top-level attributes:
16
    ///
17
    ///  * **`#[deftly(parse_options(field = ".field.field"))]`**, default `.options`
18
    ParseOptions beta_deftly, expect items:
19

            
20
    impl<$tgens> $ttype {
21
        /// Examine the parsing options
22
3332
        pub fn parse_options(&self) -> &'s ParseOptions {
23
            &self
24
                ${tmeta(parse_options(field))
25
                  as token_stream,
26
                  default { .options }}
27
        }
28
    }
29
}
30

            
31
/// Top-level reader: Netdoc text interpreted as a stream of items
32
#[derive(Debug, Clone, Deftly)]
33
#[derive_deftly(ParseOptions)]
34
pub struct ItemStream<'s> {
35
    /// The whole input document.
36
    whole_input: &'s str,
37
    /// Remaining document, as a stream of lines
38
    lines: Lines<'s>,
39
    /// If we have peeked ahead, what we discovered
40
    peeked: PeekState<'s>,
41
    /// Parsing options.
42
    options: &'s ParseOptions,
43
}
44

            
45
/// Whether an `ItemStream` has peeked ahead, and if so what it discovered
46
#[derive(Debug, Clone)]
47
enum PeekState<'s> {
48
    /// We've peeked a line
49
    Some(ItemStreamPeeked<'s>),
50
    /// We've not peeked, or peeking gave `None`
51
    None {
52
        /// Line number of the last item we yielded.
53
        ///
54
        /// `0` at the start.
55
        yielded_item_lno: usize,
56
    },
57
}
58

            
59
/// If an `ItemStream` has peeked ahead, what it discovered
60
#[derive(Debug, Clone)]
61
struct ItemStreamPeeked<'s> {
62
    /// The next keyword
63
    keyword: KeywordRef<'s>,
64
    /// Token proving that we
65
    line: lines::Peeked,
66
    /// Length of the suffix of the line that is the arguments rather than the keyword
67
    ///
68
    /// Does not include the first whitespace, that terminated the keyword.
69
    args_len: usize,
70
}
71

            
72
/// An Item that has been lexed but not parsed
73
#[derive(Debug, Clone, amplify::Getters, Deftly)]
74
#[derive_deftly(ParseOptions)]
75
#[deftly(parse_options(field = ".args.options"))]
76
pub struct UnparsedItem<'s> {
77
    /// The item's Keyword
78
    #[getter(as_copy)]
79
    keyword: KeywordRef<'s>,
80
    /// The Item's Arguments
81
    #[getter(skip)]
82
    args: ArgumentStream<'s>,
83
    /// The Item's Object, if there was one
84
    #[getter(as_clone)]
85
    object: Option<UnparsedObject<'s>>,
86
}
87

            
88
/// Reader for arguments on an Item
89
///
90
/// Represents the (remaining) arguments.
91
#[derive(Debug, Clone, Deftly)]
92
#[derive_deftly(ParseOptions)]
93
pub struct ArgumentStream<'s> {
94
    /// The remaining unparsed arguments
95
    ///
96
    /// Can start with WS, which is usually trimmed
97
    rest: &'s str,
98

            
99
    /// Original line length
100
    ///
101
    /// Used for reporting column of argument errors.
102
    whole_line_len: usize,
103

            
104
    /// Remaining length *before* we last yielded.
105
    previous_rest_len: usize,
106

            
107
    /// Parsing options.
108
    options: &'s ParseOptions,
109
}
110

            
111
/// An Object that has been lexed but not parsed
112
#[derive(Debug, Clone, amplify::Getters, Deftly)]
113
#[derive_deftly(ParseOptions)]
114
pub struct UnparsedObject<'s> {
115
    /// The Label
116
    #[getter(as_copy)]
117
    label: &'s str,
118

            
119
    /// The portion of the input document which is base64 data (and newlines)
120
    #[getter(skip)]
121
    data_b64: &'s str,
122

            
123
    /// Parsing options.
124
    options: &'s ParseOptions,
125
}
126

            
127
impl<'s> ItemStream<'s> {
128
    /// Start reading a network document as a series of Items
129
700
    pub fn new(input: &'s ParseInput<'s>) -> Result<Self, ParseError> {
130
700
        Ok(ItemStream {
131
700
            whole_input: input.input,
132
700
            lines: Lines::new(input.input),
133
700
            peeked: PeekState::None {
134
700
                yielded_item_lno: 0,
135
700
            },
136
700
            options: &input.options,
137
700
        })
138
700
    }
139

            
140
    /// Line number for reporting an error we have just discovered
141
    ///
142
    /// If we have recent peeked, we report the line number of the peeked keyword line.
143
    ///
144
    /// Otherwise, we report the line number of the most-recently yielded item.
145
106
    pub fn lno_for_error(&self) -> usize {
146
106
        match self.peeked {
147
            PeekState::Some { .. } => {
148
                // The error was presumably caused by whatever was seen in the peek.
149
                // That's the current line number.
150
14
                self.lines.peek_lno()
151
            }
152
92
            PeekState::None { yielded_item_lno } => {
153
                // The error was presumably caused by the results of next_item().
154
92
                yielded_item_lno
155
            }
156
        }
157
106
    }
158

            
159
    /// Core of peeking.  Tries to make `.peeked` be `Some`.
160
174744
    fn peek_internal<'i>(&'i mut self) -> Result<(), EP> {
161
174744
        if matches!(self.peeked, PeekState::None { .. }) {
162
83654
            let Some(peeked) = self.lines.peek() else {
163
846
                return Ok(());
164
            };
165

            
166
82808
            let peeked_line = self.lines.peeked_line(&peeked);
167

            
168
82808
            let (keyword, args) = peeked_line.split_once(WS).unwrap_or((peeked_line, ""));
169
82808
            let keyword = KeywordRef::new(keyword)?;
170

            
171
82808
            self.peeked = PeekState::Some(ItemStreamPeeked {
172
82808
                keyword,
173
82808
                line: peeked,
174
82808
                args_len: args.len(),
175
82808
            });
176
91090
        }
177

            
178
173898
        Ok(())
179
174744
    }
180

            
181
    /// Peek the next keyword
182
89196
    pub fn peek_keyword(&mut self) -> Result<Option<KeywordRef<'s>>, EP> {
183
89196
        self.peek_internal()?;
184
89196
        let PeekState::Some(peeked) = &self.peeked else {
185
836
            return Ok(None);
186
        };
187
88360
        Ok(Some(peeked.keyword))
188
89196
    }
189

            
190
    /// Obtain the body so far, suitable for hashing for an Orderly signature
191
    #[allow(clippy::string_slice)] // TODO
192
4072
    pub fn body_sofar_for_signature(&self) -> SignedDocumentBody<'s> {
193
4072
        let body = &self.whole_input[0..self.byte_position()];
194
4072
        SignedDocumentBody { body }
195
4072
    }
196

            
197
    /// Byte position, pointing to the start of the next item to yield
198
    ///
199
    /// Offset in bytes from the start of the original input string
200
    /// to the "current" position,
201
    /// ie to just after the item we yielded and just before the next item (or EOF).
202
8202
    pub fn byte_position(&self) -> usize {
203
8202
        self.whole_input.len() - self.lines.remaining().len()
204
8202
    }
205

            
206
    /// Access for the entire input string
207
    ///
208
    /// The original `input: &str` argument to [`ParseInput::new`].
209
    ///
210
    /// Includes both yielded and unyielded items.
211
1966
    pub fn whole_input(&self) -> &'s str {
212
1966
        self.whole_input
213
1966
    }
214

            
215
    /// Parse a (sub-)document with its own signatures
216
    ///
217
    /// Used (mostly) by the
218
    /// [`NetdocParseableUnverified`](derive_deftly_template_NetdocParseableUnverified)
219
    /// derive macro.
220
    ///
221
    /// Generic parameters:
222
    ///
223
    ///  * **`B`**: the body type: the type to which `NetdocParseableUnverified` is applied.
224
    ///  * **`S`**: the signatures section type.
225
    ///  * **`O`**: the `FooUnverified` type, which embodies the parsed body and signatures.
226
    #[allow(clippy::string_slice)] // TODO
227
958
    pub fn parse_signed<
228
958
        B: HasUnverifiedParsedBody,
229
958
        S: NetdocParseableSignatures,
230
958
        O: NetdocParseableUnverified<Body = B, Signatures = S>,
231
958
    >(
232
958
        &mut self,
233
958
        outer_stop: stop_at!(),
234
958
    ) -> Result<O, EP> {
235
958
        let mut input = ItemStream {
236
958
            whole_input: &self.whole_input[self.whole_input.len() - self.lines.remaining().len()..],
237
958
            ..self.clone()
238
958
        };
239
958
        let r = (|| {
240
958
            let inner_always_stop = outer_stop | StopAt::doc_intro::<B::UnverifiedParsedBody>();
241
958
            let body = B::UnverifiedParsedBody::from_items(
242
958
                &mut input,
243
958
                inner_always_stop | StopAt(S::is_item_keyword),
244
            )?;
245
958
            let signed_doc_body = input.body_sofar_for_signature();
246
958
            let unsigned_body_len = signed_doc_body.body().len();
247
958
            let mut hashes = S::HashesAccu::default();
248
958
            let sigs = S::from_items(&mut input, signed_doc_body, &mut hashes, inner_always_stop)?;
249
958
            let sigs = SignaturesData {
250
958
                sigs,
251
958
                unsigned_body_len,
252
958
                hashes,
253
958
            };
254
            // SECURITY
255
            // We unwrap the UnverifiedParsedBody and immediately wrap it up again
256
            // in FooUnverified, passing on the obligation to verify the signatures,
257
            // and still enforcing that with a newtype.
258
958
            let signed = O::from_parts(B::unverified_into_inner_unchecked(body), sigs);
259
958
            Ok(signed)
260
        })(); // don't exit here
261

            
262
958
        *self = ItemStream {
263
958
            whole_input: self.whole_input,
264
958
            ..input
265
958
        };
266

            
267
958
        r
268
958
    }
269

            
270
    /// Obtain the inputs that would be needed to hash any (even Disorderly) signature
271
    ///
272
    /// These are the hash inputs which would be needed for the next item,
273
    /// assuming it's a signature keyword.
274
2906
    pub fn peek_signature_hash_inputs(
275
2906
        &mut self,
276
2906
        body: SignedDocumentBody<'s>,
277
2906
    ) -> Result<Option<SignatureHashInputs<'s>>, EP> {
278
2906
        self.peek_internal()?;
279
2906
        let PeekState::Some(peeked) = &self.peeked else {
280
            return Ok(None);
281
        };
282
2906
        let document_sofar = self.body_sofar_for_signature().body();
283
2906
        let signature_item_line = self.lines.peeked_line(&peeked.line);
284
2906
        let signature_item_kw_spc = signature_item_line.strip_end_counted(peeked.args_len);
285
2906
        Ok(Some(SignatureHashInputs {
286
2906
            body,
287
2906
            document_sofar,
288
2906
            signature_item_kw_spc,
289
2906
            signature_item_line,
290
2906
        }))
291
2906
    }
292

            
293
    /// Yield the next item.
294
    #[allow(clippy::string_slice)] // TODO
295
82642
    pub fn next_item(&mut self) -> Result<Option<UnparsedItem<'s>>, EP> {
296
82642
        self.peek_internal()?;
297
82642
        let peeked = match self.peeked {
298
10
            PeekState::None { .. } => return Ok(None),
299
82632
            PeekState::Some { .. } => match mem::replace(
300
82632
                &mut self.peeked,
301
82632
                PeekState::None {
302
82632
                    yielded_item_lno: self.lines.peek_lno(),
303
82632
                },
304
82632
            ) {
305
82632
                PeekState::Some(peeked) => peeked,
306
                PeekState::None { .. } => panic!("it was Some just now"),
307
            },
308
        };
309

            
310
82632
        let keyword = peeked.keyword;
311
82632
        let line = self.lines.consume_peeked(peeked.line);
312
82632
        let args = &line[keyword.len()..];
313
82632
        let options = self.options;
314
82632
        let args = ArgumentStream::new(args, line.len(), options);
315

            
316
82632
        let object = if self.lines.remaining().starts_with('-') {
317
12112
            fn pem_delimiter<'s>(lines: &mut Lines<'s>, start: &str) -> Result<&'s str, EP> {
318
12112
                let line = lines.next().ok_or(
319
                    // If this is the *header*, we already know there's a line,
320
                    // so this error path is only for footers.
321
12112
                    EP::ObjectMissingFooter,
322
                )?;
323
12112
                let label = line
324
12112
                    .strip_prefix(start)
325
12112
                    .ok_or(EP::InvalidObjectDelimiters)?
326
12112
                    .strip_suffix(PEM_AFTER_LABEL)
327
12112
                    .ok_or(EP::InvalidObjectDelimiters)?;
328
12110
                Ok(label)
329
12112
            }
330

            
331
6056
            let label1 = pem_delimiter(&mut self.lines, PEM_HEADER_START)?;
332
6056
            let base64_start_remaining = self.lines.remaining();
333
46066
            while !self.lines.remaining().starts_with('-') {
334
40010
                let _: &str = self.lines.next().ok_or(EP::ObjectMissingFooter)?;
335
            }
336
6056
            let data_b64 = base64_start_remaining.strip_end_counted(self.lines.remaining().len());
337
6056
            let label2 = pem_delimiter(&mut self.lines, PEM_FOOTER_START)?;
338
6054
            let label = [label1, label2]
339
6054
                .into_iter()
340
6054
                .all_equal_value()
341
6054
                .map_err(|_| EP::ObjectMismatchedLabels)?;
342
6052
            Some(UnparsedObject {
343
6052
                label,
344
6052
                data_b64,
345
6052
                options,
346
6052
            })
347
        } else {
348
76576
            None
349
        };
350

            
351
82628
        Ok(Some(UnparsedItem {
352
82628
            keyword,
353
82628
            args,
354
82628
            object,
355
82628
        }))
356
82642
    }
357
}
358

            
359
impl<'s> UnparsedItem<'s> {
360
    /// Access the arguments, mutably (for consuming and parsing them)
361
19236
    pub fn args_mut(&mut self) -> &mut ArgumentStream<'s> {
362
19236
        &mut self.args
363
19236
    }
364
    /// Access a copy of the arguments
365
    ///
366
    /// When using this, be careful not to process any arguments twice.
367
1674
    pub fn args_copy(&self) -> ArgumentStream<'s> {
368
1674
        self.args.clone()
369
1674
    }
370

            
371
    /// Access the arguments (readonly)
372
    ///
373
    /// When using this, be careful not to process any arguments twice.
374
11740
    pub fn args(&self) -> &ArgumentStream<'s> {
375
11740
        &self.args
376
11740
    }
377

            
378
    /// Check that this item has no Object.
379
12286
    pub fn check_no_object(&self) -> Result<(), EP> {
380
12286
        if self.object.is_some() {
381
2
            return Err(EP::ObjectUnexpected);
382
12284
        }
383
12284
        Ok(())
384
12286
    }
385
    /// Convenience method for handling an error parsing an argument
386
    ///
387
    /// Returns a closure that converts every error into [`ArgumentError::Invalid`]
388
    /// and then to an [`ErrorProblem`] using
389
    /// [`.args().handle_error()`](ArgumentStream::handle_error).
390
    ///
391
    /// Useful in manual `ItemValueParseable` impls, when parsing arguments ad-hoc.
392
4812
    pub fn invalid_argument_handler<E>(
393
4812
        &self,
394
4812
        field: &'static str,
395
4812
    ) -> impl FnOnce(E) -> ErrorProblem {
396
4812
        let error = self.args().handle_error(field, AE::Invalid);
397
        move |_any_error| error
398
4812
    }
399
}
400

            
401
#[deprecated = "use types::NoFurtherArguments"]
402
pub use crate::types::NoMoreArguments as NoFurtherArguments;
403

            
404
impl<'s> Iterator for ItemStream<'s> {
405
    type Item = Result<UnparsedItem<'s>, EP>;
406
58050
    fn next(&mut self) -> Option<Result<UnparsedItem<'s>, EP>> {
407
58050
        self.next_item().transpose()
408
58050
    }
409
}
410

            
411
impl<'s> ArgumentStream<'s> {
412
    /// Make a new `ArgumentStream` from a string
413
    ///
414
    /// The string may start with whitespace (which will be ignored).
415
85016
    pub fn new(rest: &'s str, whole_line_len: usize, options: &'s ParseOptions) -> Self {
416
85016
        let previous_rest_len = whole_line_len;
417
85016
        ArgumentStream {
418
85016
            rest,
419
85016
            whole_line_len,
420
85016
            previous_rest_len,
421
85016
            options,
422
85016
        }
423
85016
    }
424

            
425
    /// Consume this whole `ArgumentStream`, giving the remaining arguments as a string
426
    ///
427
    /// The returned string won't start with whitespace.
428
    //
429
    /// `self` will be empty on return.
430
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
431
6096
    pub fn into_remaining(&mut self) -> &'s str {
432
6096
        self.prep_yield();
433
6096
        mem::take(&mut self.rest)
434
6096
    }
435

            
436
    /// Return the component parts of this `ArgumentStream`
437
    ///
438
    /// The returned string might start with whitespace.
439
2384
    pub fn whole_line_len(&self) -> usize {
440
2384
        self.whole_line_len
441
2384
    }
442

            
443
    /// Prepares to yield an argument (or the rest)
444
    ///
445
    ///  * Trims leading WS from `rest`.
446
    ///  * Records the `previous_rest_len`
447
42326
    fn prep_yield(&mut self) {
448
42326
        self.rest = self.rest.trim_start_matches(WS);
449
42326
        self.previous_rest_len = self.rest.len();
450
42326
    }
451

            
452
    /// Prepares to yield, and then determines if there *is* anything to yield.
453
    ///
454
    ///  * Trim leading whitespace
455
    ///  * Records the `previous_rest_len`
456
    ///  * See if we're now empty
457
36230
    pub fn something_to_yield(&mut self) -> bool {
458
36230
        self.prep_yield();
459
36230
        !self.rest.is_empty()
460
36230
    }
461

            
462
    /// Throw and error if there are further arguments
463
    //
464
    // (We don't take `self` by value because that makes use with `UnparsedItem` annoying.)
465
3852
    pub fn reject_extra_args(&mut self) -> Result<NoFurtherArguments, UnexpectedArgument> {
466
3852
        if self.something_to_yield() {
467
4
            let column = self.next_arg_column();
468
4
            Err(UnexpectedArgument { column })
469
        } else {
470
3848
            Ok(NoFurtherArguments)
471
        }
472
3852
    }
473

            
474
    /// Convert a "length of `rest`" into the corresponding column number.
475
33322
    fn arg_column_from_rest_len(&self, rest_len: usize) -> usize {
476
        // Can't underflow since rest is always part of the whole.
477
        // Can't overflow since that would mean the document was as big as the address space.
478
33322
        self.whole_line_len - rest_len + 1
479
33322
    }
480

            
481
    /// Obtain the column number of the previously yielded argument.
482
    ///
483
    /// (After `into_remaining`, gives the column number
484
    /// of the start of the returned remaining argument string.)
485
33318
    pub fn prev_arg_column(&self) -> usize {
486
33318
        self.arg_column_from_rest_len(self.previous_rest_len)
487
33318
    }
488

            
489
    /// Obtains the column number of the *next* argument.
490
    ///
491
    /// Should be called after `something_to_yield`; otherwise the returned value
492
    /// may point to whitespace which is going to be skipped.
493
    // ^ this possible misuse doesn't seem worth defending against with type-fu,
494
    //   for a private function with few call sites.
495
4
    fn next_arg_column(&self) -> usize {
496
4
        self.arg_column_from_rest_len(self.rest.len())
497
4
    }
498

            
499
    /// Convert an `ArgumentError` to an `ErrorProblem`.
500
    ///
501
    /// The caller must supply the field name.
502
4812
    pub fn handle_error(&self, field: &'static str, ae: ArgumentError) -> ErrorProblem {
503
4812
        self.error_handler(field)(ae)
504
4812
    }
505

            
506
    /// Return a converter from `ArgumentError` to `ErrorProblem`.
507
    ///
508
    /// Useful in `.map_err`.
509
33318
    pub fn error_handler(
510
33318
        &self,
511
33318
        field: &'static str,
512
33318
    ) -> impl Fn(ArgumentError) -> ErrorProblem + 'static {
513
33318
        let column = self.prev_arg_column();
514
4842
        move |ae| match ae {
515
4
            AE::Missing => EP::MissingArgument { field },
516
4838
            AE::Invalid => EP::InvalidArgument { field, column },
517
            AE::Unexpected => EP::UnexpectedArgument { column },
518
4842
        }
519
33318
    }
520
}
521

            
522
impl<'s> Iterator for ArgumentStream<'s> {
523
    type Item = &'s str;
524
32256
    fn next(&mut self) -> Option<&'s str> {
525
32256
        if !self.something_to_yield() {
526
704
            return None;
527
31552
        }
528
        let arg;
529
31552
        (arg, self.rest) = self.rest.split_once(WS).unwrap_or((self.rest, ""));
530
31552
        Some(arg)
531
32256
    }
532
}
533

            
534
impl<'s> UnparsedObject<'s> {
535
    /// Obtain the Object data, as decoded bytes
536
5852
    pub fn decode_data(&self) -> Result<Vec<u8>, EP> {
537
5852
        crate::parse::tokenize::base64_decode_multiline(self.data_b64)
538
5852
            .map_err(|_e| EP::ObjectInvalidBase64)
539
5852
    }
540
}