|
4 | 4 | mod tests;
|
5 | 5 |
|
6 | 6 | use nom::error::{ErrorKind, FromExternalError, ParseError};
|
7 |
| -use nom::{Err, IResult, Parser}; |
| 7 | +use nom::{Check, Err, IResult, Input, Mode, OutputM, OutputMode, Parser}; |
8 | 8 |
|
9 | 9 | /// An unary operator.
|
10 | 10 | pub struct Unary<V, Q: Ord + Copy> {
|
@@ -367,3 +367,138 @@ where
|
367 | 367 | }
|
368 | 368 | }
|
369 | 369 | }
|
| 370 | + |
| 371 | +/// Applies a parser multiple times separated by another parser. |
| 372 | +/// |
| 373 | +/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting |
| 374 | +/// into a vector, you have a callback to build the output. |
| 375 | +/// |
| 376 | +/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as: |
| 377 | +/// * A := A op B | B |
| 378 | +/// |
| 379 | +/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow |
| 380 | +/// because the recusion is unlimited. This function solves this problem by converting the recusion |
| 381 | +/// into an iteration. |
| 382 | +/// |
| 383 | +/// Compare with a right recursive operator, that in LALR would be: |
| 384 | +/// * A := B op A | B |
| 385 | +/// Or equivalently: |
| 386 | +/// * A := B (op A)? |
| 387 | +/// |
| 388 | +/// That can be written in `nom` trivially. |
| 389 | +/// |
| 390 | +/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see |
| 391 | +/// [`cut`][crate::combinator::cut]. |
| 392 | +/// |
| 393 | +/// # Arguments |
| 394 | +/// * `child` The parser to apply. |
| 395 | +/// * `operator` Parses the operator between argument. |
| 396 | +/// * `init` A function returning the initial value. |
| 397 | +/// * `fold` The function that combines a result of `f` with |
| 398 | +/// the current accumulator. |
| 399 | +/// ```rust |
| 400 | +/// # #[macro_use] extern crate nom; |
| 401 | +/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; |
| 402 | +/// use nom::multi::left_assoc; |
| 403 | +/// use nom::branch::alt; |
| 404 | +/// use nom::sequence::delimited; |
| 405 | +/// use nom::character::complete::{char, digit1}; |
| 406 | +/// |
| 407 | +/// fn add(i: &str) -> IResult<&str, String> { |
| 408 | +/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i) |
| 409 | +/// } |
| 410 | +/// fn mult(i: &str) -> IResult<&str, String> { |
| 411 | +/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i) |
| 412 | +/// } |
| 413 | +/// fn single(i: &str) -> IResult<&str, String> { |
| 414 | +/// alt(( |
| 415 | +/// digit1.map(|x: &str| x.to_string()), |
| 416 | +/// delimited(char('('), add, char(')')) |
| 417 | +/// )).parse(i) |
| 418 | +/// } |
| 419 | +/// |
| 420 | +/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23")))); |
| 421 | +/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123")))); |
| 422 | +/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123")))); |
| 423 | +/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234")))); |
| 424 | +/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234")))); |
| 425 | +/// ``` |
| 426 | +pub fn left_assoc<I, E, O, OP, G, F, B>( |
| 427 | + child: F, |
| 428 | + operator: G, |
| 429 | + builder: B, |
| 430 | +) -> impl Parser<I, Output = O, Error = E> |
| 431 | +where |
| 432 | + I: Clone + Input, |
| 433 | + E: ParseError<I>, |
| 434 | + F: Parser<I, Output = O, Error = E>, |
| 435 | + G: Parser<I, Output = OP, Error = E>, |
| 436 | + B: FnMut(O, OP, O) -> O, |
| 437 | +{ |
| 438 | + LeftAssoc { |
| 439 | + child, |
| 440 | + operator, |
| 441 | + builder, |
| 442 | + } |
| 443 | +} |
| 444 | + |
| 445 | +/// Parser implementation for the [separated_list1] combinator |
| 446 | +pub struct LeftAssoc<F, G, B> { |
| 447 | + child: F, |
| 448 | + operator: G, |
| 449 | + builder: B, |
| 450 | +} |
| 451 | + |
| 452 | +impl<I, E, O, OP, G, F, B> Parser<I> for LeftAssoc<F, G, B> |
| 453 | +where |
| 454 | + I: Clone + Input, |
| 455 | + E: ParseError<I>, |
| 456 | + F: Parser<I, Output = O, Error = E>, |
| 457 | + G: Parser<I, Output = OP, Error = E>, |
| 458 | + B: FnMut(O, OP, O) -> O, |
| 459 | +{ |
| 460 | + type Output = O; |
| 461 | + type Error = E; |
| 462 | + |
| 463 | + fn process<OM: OutputMode>( |
| 464 | + &mut self, |
| 465 | + mut i: I, |
| 466 | + ) -> nom::PResult<OM, I, Self::Output, Self::Error> { |
| 467 | + let (i1, mut res) = self.child.process::<OM>(i)?; |
| 468 | + i = i1; |
| 469 | + |
| 470 | + loop { |
| 471 | + let len = i.input_len(); |
| 472 | + match self |
| 473 | + .operator |
| 474 | + .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i.clone()) |
| 475 | + { |
| 476 | + Err(Err::Error(_)) => return Ok((i, res)), |
| 477 | + Err(Err::Failure(e)) => return Err(Err::Failure(e)), |
| 478 | + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), |
| 479 | + Ok((i1, op)) => { |
| 480 | + match self |
| 481 | + .child |
| 482 | + .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i1.clone()) |
| 483 | + { |
| 484 | + Err(Err::Error(_)) => return Ok((i, res)), |
| 485 | + Err(Err::Failure(e)) => return Err(Err::Failure(e)), |
| 486 | + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), |
| 487 | + Ok((i2, rhs)) => { |
| 488 | + // infinite loop check: the parser must always consume |
| 489 | + if i2.input_len() == len { |
| 490 | + return Err(Err::Error(OM::Error::bind(|| { |
| 491 | + <F as Parser<I>>::Error::from_error_kind(i, ErrorKind::SeparatedList) |
| 492 | + }))); |
| 493 | + } |
| 494 | + // there is no combine() with 3 arguments, fake it with a tuple and two calls |
| 495 | + let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs)); |
| 496 | + res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs)); |
| 497 | + i = i2; |
| 498 | + } |
| 499 | + } |
| 500 | + } |
| 501 | + } |
| 502 | + } |
| 503 | + } |
| 504 | +} |
0 commit comments