Skip to content

Commit 55fbc36

Browse files
authored
Merge pull request #12 from Tsukuba-Programming-Lab/#11_UpdateSyntaxTrait
#11 TokenSet/Syntax トレイトの定義を修正
2 parents 18aaddb + d6b34e8 commit 55fbc36

File tree

10 files changed

+81
-99
lines changed

10 files changed

+81
-99
lines changed

crates/algorithm_lr1/src/builder.rs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,8 @@ where
3737
{
3838
pub fn setup() -> anyhow::Result<Self> {
3939
// 1. Pre-process
40-
let rules = S::try_into()?
41-
.into_iter()
42-
.map(|(rule, _)| rule)
43-
.collect::<Vec<_>>();
44-
let ruleset = RuleSet::from(rules);
40+
let rules = S::into_iter().collect::<Vec<_>>();
41+
let ruleset = S::into_ruleset();
4542
let first_set = ruleset.first_set();
4643

4744
// 2. Generate dummy nonterm
@@ -76,7 +73,7 @@ where
7673
let mut goto_table: Vec<Vec<usize>> = Vec::with_capacity(dfa.0.len());
7774
for _ in 0..dfa.0.len() {
7875
action_table.push(HashMap::from_iter(
79-
T::enum_iter()
76+
T::into_iter()
8077
.map(|token| (token, LRAction::None))
8178
.collect::<Vec<(T, LRAction<S>)>>(),
8279
));
@@ -85,7 +82,6 @@ where
8582
}
8683

8784
// 5. Setup tables
88-
let rule_table: Vec<S> = S::enum_iter().collect();
8985
for lritem_set in &dfa.0 {
9086
for (token, next) in &lritem_set.next {
9187
match &token {
@@ -113,7 +109,7 @@ where
113109
let id = lritem_set.id as usize;
114110
let label = action_table[id].get_mut(&t.0).unwrap();
115111
*label = LRAction::Reduce(
116-
rule_table[item.rule.id as usize],
112+
rules[item.rule.id as usize],
117113
*nonterm_table.get(lhs).unwrap(),
118114
item.rule.rhs.len(),
119115
);
@@ -124,7 +120,7 @@ where
124120
LRAction::Accept
125121
} else {
126122
LRAction::Reduce(
127-
rule_table[item.rule.id as usize],
123+
rules[item.rule.id as usize],
128124
*nonterm_table.get(lhs).unwrap(),
129125
item.rule.rhs.len(),
130126
)

crates/algorithm_lr1/src/lib.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,14 @@ mod test {
6868
#[derive(Debug, Clone, Copy, Syntax)]
6969
enum TestSyntax {
7070
#[rule("<expr> ::= <expr> Plus <term>")]
71-
ExprPlus,
7271
#[rule("<expr> ::= <expr> Minus <term>")]
73-
ExprMinus,
7472
#[rule("<expr> ::= <term>")]
75-
ExprTerm,
73+
Expr,
7674
#[rule("<term> ::= <term> Mul <num>")]
77-
TermMul,
7875
#[rule("<term> ::= <term> Div <num>")]
79-
TermDiv,
8076
#[rule("<term> ::= <num>")]
81-
TermNum,
77+
Term,
8278
#[rule("<num> ::= BracketL <expr> BracketR")]
83-
NestedNum,
8479
#[rule("<num> ::= Num")]
8580
Num,
8681
}

crates/core/src/cfg/syntax.rs

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,26 @@ use super::token::TokenSet;
77

88
pub trait Syntax<'a>
99
where
10-
Self: Debug + Clone + Copy + Sized,
10+
Self: Debug + Clone + Copy,
1111
{
1212
type TokenSet: TokenSet<'a>;
1313

14-
fn enum_iter() -> impl Iterator<Item = Self>;
15-
fn to_rule(&self) -> Rule<'a, Self::TokenSet>;
14+
fn into_iter() -> impl Iterator<Item = Self>;
15+
fn into_rules(&self) -> Vec<Rule<'a, Self::TokenSet>>;
1616

17-
fn try_into() -> anyhow::Result<Vec<(Rule<'a, Self::TokenSet>, Self)>> {
18-
Self::enum_iter()
19-
.map(|elem| Ok((Self::to_rule(&elem), elem)))
20-
.collect::<anyhow::Result<Vec<_>>>()
17+
fn into_ruleset() -> RuleSet<'a, Self::TokenSet> {
18+
let rules = Self::into_iter()
19+
.enumerate()
20+
.flat_map(|(idx, elem)| {
21+
let mut rules = Self::into_rules(&elem);
22+
for rule in &mut rules {
23+
rule.id = idx;
24+
}
25+
rules
26+
})
27+
.collect::<Vec<_>>();
28+
29+
RuleSet::from(rules)
2130
}
2231
}
2332

@@ -108,16 +117,12 @@ pub struct RuleSet<'a, T: TokenSet<'a>> {
108117
}
109118

110119
impl<'a, T: TokenSet<'a>> From<Vec<Rule<'a, T>>> for RuleSet<'a, T> {
111-
fn from(mut rules: Vec<Rule<'a, T>>) -> Self {
120+
fn from(rules: Vec<Rule<'a, T>>) -> Self {
112121
let top = match &rules[0].lhs {
113122
RuleElem::NonTerm(s) => s.clone(),
114123
_ => unreachable!(),
115124
};
116125

117-
for (idx, rule) in rules.iter_mut().enumerate() {
118-
rule.id = idx;
119-
}
120-
121126
RuleSet {
122127
top,
123128
rules,
@@ -228,7 +233,7 @@ impl<'a, T: TokenSet<'a>> RuleSet<'a, T> {
228233
mod test {
229234
use std::collections::HashMap;
230235

231-
use super::{TokenSet, Syntax, Rule, RuleElem, RuleSet};
236+
use super::{TokenSet, Syntax, Rule, RuleElem};
232237

233238
#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
234239
enum TestToken {
@@ -242,7 +247,7 @@ mod test {
242247
}
243248

244249
impl TokenSet<'_> for TestToken {
245-
fn enum_iter() -> impl Iterator<Item = Self> {
250+
fn into_iter() -> impl Iterator<Item = Self> {
246251
Box::new(
247252
vec![
248253
TestToken::Num,
@@ -257,7 +262,7 @@ mod test {
257262
)
258263
}
259264

260-
fn to_regex(&self) -> &'static str {
265+
fn into_regex_str(&self) -> &'static str {
261266
match self {
262267
TestToken::Num => r"^[1-9][0-9]*",
263268
TestToken::Plus => r"^\+",
@@ -289,7 +294,7 @@ mod test {
289294
impl<'a> Syntax<'a> for TestSyntax {
290295
type TokenSet = TestToken;
291296

292-
fn enum_iter() -> impl Iterator<Item = Self> {
297+
fn into_iter() -> impl Iterator<Item = Self> {
293298
Box::new(
294299
vec![
295300
TestSyntax::ExprPlus,
@@ -305,7 +310,7 @@ mod test {
305310
)
306311
}
307312

308-
fn to_rule(&self) -> Rule<'a, Self::TokenSet> {
313+
fn into_rules(&self) -> Vec<Rule<'a, Self::TokenSet>> {
309314
let expr_plus = Rule::from((
310315
RuleElem::new_nonterm("expr"),
311316
vec![
@@ -364,14 +369,14 @@ mod test {
364369
let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![]));
365370

366371
match self {
367-
TestSyntax::ExprPlus => expr_plus,
368-
TestSyntax::ExprMinus => expr_minus,
369-
TestSyntax::Expr2Term => expr_2_term,
370-
TestSyntax::TermMul => term_mul,
371-
TestSyntax::TermDiv => term_div,
372-
TestSyntax::Term2Fact => term_2_fact,
373-
TestSyntax::Fact2Expr => fact_2_expr,
374-
TestSyntax::Fact2Num => fact_2_num,
372+
TestSyntax::ExprPlus => vec![expr_plus],
373+
TestSyntax::ExprMinus => vec![expr_minus],
374+
TestSyntax::Expr2Term => vec![expr_2_term],
375+
TestSyntax::TermMul => vec![term_mul],
376+
TestSyntax::TermDiv => vec![term_div],
377+
TestSyntax::Term2Fact => vec![term_2_fact],
378+
TestSyntax::Fact2Expr => vec![fact_2_expr],
379+
TestSyntax::Fact2Num => vec![fact_2_num],
375380
}
376381
}
377382
}
@@ -399,12 +404,7 @@ mod test {
399404

400405
#[test]
401406
fn first_set() {
402-
let rules = <TestSyntax as Syntax>::try_into()
403-
.unwrap()
404-
.into_iter()
405-
.map(|(rule, _)| rule)
406-
.collect::<Vec<_>>();
407-
let ruleset = RuleSet::from(rules);
407+
let ruleset = <TestSyntax as Syntax>::into_ruleset();
408408
let first_set = ruleset.first_set();
409409

410410
check(

crates/core/src/cfg/token.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
use std::fmt::Debug;
22
use std::hash::Hash;
33

4-
use regex::Regex;
4+
use regex::{Regex, RegexSet};
55

66
pub trait TokenSet<'a>
77
where
88
Self: Debug + Copy + Clone + Hash + Eq,
99
{
1010
fn ignore_str() -> &'a str;
11-
fn enum_iter() -> impl Iterator<Item = Self>;
12-
fn to_regex(&self) -> &'a str;
11+
fn into_iter() -> impl Iterator<Item = Self>;
12+
fn into_regex_str(&self) -> &'a str;
1313

14-
fn try_into() -> anyhow::Result<Vec<(Regex, Self)>> {
15-
Self::enum_iter()
16-
.map(|token| Ok((Regex::new(Self::to_regex(&token))?, token)))
17-
.collect::<anyhow::Result<Vec<_>>>()
14+
fn into_regex(&self) -> anyhow::Result<Regex> {
15+
Ok(Regex::new(self.into_regex_str())?)
16+
}
17+
18+
fn try_into_regexset() -> anyhow::Result<RegexSet> {
19+
let regex_set = Self::into_iter()
20+
.map(|token| Self::into_regex_str(&token))
21+
.collect::<Vec<_>>();
22+
23+
Ok(RegexSet::new(regex_set)?)
1824
}
1925
}

crates/core/src/lex.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ impl Lexer {
4141

4242
struct LexDriver<'a, 'b, T: TokenSet<'a>> {
4343
// Regex
44+
regex_istr: Regex,
4445
regex_set: RegexSet,
4546
regex_map: Vec<(Regex, T)>,
46-
regex_istr: Regex,
4747

4848
// State
4949
input: &'b str,
@@ -57,18 +57,16 @@ impl<'a, 'b, T: TokenSet<'a>> TryFrom<&'b str> for LexDriver<'a, 'b, T> {
5757
type Error = anyhow::Error;
5858

5959
fn try_from(input: &'b str) -> anyhow::Result<Self> {
60-
let regex_map = T::try_into()?;
61-
let regex_set = regex_map
62-
.iter()
63-
.map(|(_, token)| T::to_regex(&token))
64-
.collect::<Vec<_>>();
65-
let regex_set = RegexSet::new(regex_set)?;
6660
let regex_istr = Regex::new(T::ignore_str())?;
61+
let regex_set = T::try_into_regexset()?;
62+
let regex_map = T::into_iter()
63+
.map(|token| Ok((token.into_regex()?, token)))
64+
.collect::<anyhow::Result<Vec<_>>>()?;
6765

6866
Ok(LexDriver {
67+
regex_istr,
6968
regex_set,
7069
regex_map,
71-
regex_istr,
7270
input,
7371
pos: 0,
7472
tokenset: PhantomData,
@@ -126,11 +124,11 @@ mod test {
126124
r"^[ \t\n]+"
127125
}
128126

129-
fn enum_iter() -> Box<dyn Iterator<Item = Self>> {
130-
Box::new(vec![TestToken::Num, TestToken::Plus].into_iter())
127+
fn into_iter() -> impl Iterator<Item = Self> {
128+
vec![TestToken::Num, TestToken::Plus].into_iter()
131129
}
132130

133-
fn to_regex(&self) -> &'static str {
131+
fn into_regex_str(&self) -> &'static str {
134132
match self {
135133
TestToken::Num => r"^[1-9][0-9]*",
136134
TestToken::Plus => r"^\+",

crates/core_derive/src/impl/syntax.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream {
3131
impl<'a> Syntax<'a> for #enum_name {
3232
type TokenSet = #enum_assoc_type;
3333

34-
fn enum_iter() -> impl Iterator<Item = Self> {
34+
fn into_iter() -> impl Iterator<Item = Self> {
3535
vec![
3636
#( #enum_variants, )*
3737
].into_iter()
3838
}
3939

40-
fn to_rule(&self) -> Rule<'a, Self::TokenSet> {
40+
fn into_rules(&self) -> Vec<Rule<'a, Self::TokenSet>> {
4141
match self {
4242
#( #enum_rule_table, )*
4343
_ => unimplemented!(),
@@ -50,23 +50,23 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream {
5050
struct VariantInfo<'a> {
5151
parent_ident: &'a Ident,
5252
self_ident: &'a Ident,
53-
rule: Option<TokenStream>,
53+
rules: Vec<TokenStream>,
5454
}
5555

5656
impl<'a> VariantInfo<'a> {
5757
fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> {
5858
let self_ident = &variant.ident;
5959

60-
let mut rule = None;
60+
let mut rules = vec![];
6161
for attr in &variant.attrs {
6262
let attr = attr.parse_args::<LitStr>().unwrap().value();
63-
rule = Some(Self::parse_rule(&attr));
63+
rules.push(Self::parse_rule(&attr));
6464
}
6565

6666
VariantInfo {
6767
parent_ident,
6868
self_ident,
69-
rule,
69+
rules,
7070
}
7171
}
7272

@@ -102,9 +102,11 @@ impl<'a> VariantInfo<'a> {
102102

103103
fn gen_ident_with_rule(&self) -> TokenStream {
104104
let ident = self.gen_ident();
105-
match &self.rule {
106-
Some(rule) => quote! { #ident => #rule },
107-
None => quote! { unimplemented!() },
105+
if self.rules.is_empty() {
106+
quote! { #ident => unimplemented!() }
107+
} else {
108+
let rules = &self.rules;
109+
quote! { #ident => vec![#(#rules),*] }
108110
}
109111
}
110112
}

crates/core_derive/src/impl/tokenset.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream {
3636
#enum_ignored
3737
}
3838

39-
fn enum_iter() -> impl Iterator<Item = Self> {
39+
fn into_iter() -> impl Iterator<Item = Self> {
4040
vec![
4141
#( #enum_variants, )*
4242
].into_iter()
4343
}
4444

45-
fn to_regex(&self) -> &'static str {
45+
fn into_regex_str(&self) -> &'static str {
4646
match self {
4747
#( #enum_regex_table, )*
4848
_ => unimplemented!(),

examples/expr.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,14 @@ enum ExprTokenSet {
2828
#[derive(Debug, Clone, Copy, Syntax)]
2929
enum ExprSyntax {
3030
#[rule("<expr> ::= <expr> Plus <term>")]
31-
ExprPlus,
3231
#[rule("<expr> ::= <expr> Minus <term>")]
33-
ExprMinus,
3432
#[rule("<expr> ::= <term>")]
35-
ExprTerm,
33+
Expr,
3634
#[rule("<term> ::= <term> Mul <num>")]
37-
TermMul,
3835
#[rule("<term> ::= <term> Div <num>")]
39-
TermDiv,
4036
#[rule("<term> ::= <num>")]
41-
TermNum,
37+
Term,
4238
#[rule("<num> ::= BracketL <expr> BracketR")]
43-
NestedNum,
4439
#[rule("<num> ::= Num")]
4540
Num,
4641
}

0 commit comments

Comments
 (0)