pest_meta/optimizer/factorizer.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
// pest. The Elegant Parser
// Copyright (c) 2018 DragoČ™ Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use crate::ast::*;
pub fn factor(rule: Rule) -> Rule {
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: expr.map_top_down(|expr| {
match expr {
Expr::Choice(lhs, rhs) => match (*lhs, *rhs) {
(Expr::Seq(l1, r1), Expr::Seq(l2, r2)) => {
if l1 == l2 {
Expr::Seq(l1, Box::new(Expr::Choice(r1, r2)))
} else {
Expr::Choice(Box::new(Expr::Seq(l1, r1)), Box::new(Expr::Seq(l2, r2)))
}
}
// Converts `(rule ~ rest) | rule` to `rule ~ rest?`, avoiding trying to match `rule` twice.
// This is only done for atomic rules, because other rule types have implicit whitespaces.
// FIXME: "desugar" implicit whitespace rules before applying any optimizations
(Expr::Seq(l1, l2), r)
if matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic) =>
{
if *l1 == r {
Expr::Seq(l1, Box::new(Expr::Opt(l2)))
} else {
Expr::Choice(Box::new(Expr::Seq(l1, l2)), Box::new(r))
}
}
// Converts `rule | (rule ~ rest)` to `rule` since `(rule ~ rest)`
// will never match if `rule` didn't.
(l, Expr::Seq(r1, r2)) => {
if l == *r1 {
l
} else {
Expr::Choice(Box::new(l), Box::new(Expr::Seq(r1, r2)))
}
}
(lhs, rhs) => Expr::Choice(Box::new(lhs), Box::new(rhs)),
},
expr => expr,
}
}),
}
}