From e4187d3e3c74212e97bfdd581ced2a462a6312f1 Mon Sep 17 00:00:00 2001 From: kould Date: Mon, 10 Nov 2025 14:53:51 +0800 Subject: [PATCH 01/13] refactor: upgrade nom to version 8.0.0 and replace the pratt parser with nom_language. Use the first token check to reduce branch traversal in expr_element. --- Cargo.lock | 21 +- Cargo.toml | 6 +- src/query/ast/Cargo.toml | 2 +- src/query/ast/benches/bench.rs | 10 +- src/query/ast/src/ast/expr.rs | 4 +- src/query/ast/src/lib.rs | 1 + src/query/ast/src/parser/comment.rs | 3 +- src/query/ast/src/parser/common.rs | 274 ++--- src/query/ast/src/parser/copy.rs | 17 +- src/query/ast/src/parser/data_mask.rs | 14 +- src/query/ast/src/parser/dynamic_table.rs | 13 +- src/query/ast/src/parser/expr.rs | 1370 ++++++++++++--------- src/query/ast/src/parser/input.rs | 110 +- src/query/ast/src/parser/parser.rs | 3 +- src/query/ast/src/parser/query.rs | 918 +++++++++----- src/query/ast/src/parser/script.rs | 32 +- src/query/ast/src/parser/sequence.rs | 3 +- src/query/ast/src/parser/stage.rs | 35 +- src/query/ast/src/parser/statement.rs | 337 +++-- src/query/ast/src/parser/stream.rs | 12 +- src/query/ast/src/precedence.rs | 30 + src/query/ast/tests/it/parser.rs | 3 +- 22 files changed, 1927 insertions(+), 1291 deletions(-) create mode 100644 src/query/ast/src/precedence.rs diff --git a/Cargo.lock b/Cargo.lock index 8158c5a13ea8f..9e3cdf1b83732 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3053,11 +3053,11 @@ dependencies = [ "indent", "itertools 0.13.0", "logos", - "nom 7.1.3", + "nom 8.0.0", + "nom-language", "nom-rule", "ordered-float 5.0.0", "percent-encoding", - "pratt", "pretty_assertions", "recursive", "rspack-codespan-reporting", @@ -10723,15 +10723,24 @@ dependencies = [ "memchr", ] +[[package]] +name = "nom-language" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2de2bc5b451bfedaef92c90b8939a8fff5770bdcc1fafd6239d086aab8fa6b29" +dependencies = [ + "nom 8.0.0", +] + [[package]] name = "nom-rule" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c72951bd83c76b88d820f03b38b124f981dc2520070d62170da7012d1ce964ab" +checksum = "67df4c3364e754b809f749a0e9c2832154c42f785f187741f536305400b2744c" dependencies = [ - "nom 7.1.3", + "nom 8.0.0", "pratt", - "proc-macro-error 1.0.4", + "proc-macro-error2", "proc-macro2", "quote", "syn 2.0.106", diff --git a/Cargo.toml b/Cargo.toml index 1143327f58ceb..acf6369b73eed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -554,9 +554,9 @@ fast-float2 = "0.2.3" gix = "0.71.0" indent = "0.1.1" logos = "0.12.1" -nom = "7.1.1" -nom-rule = "0.4" -pratt = "0.4.0" +nom = "8.0.0" +nom-language = "0.1.0" +nom-rule = "0.5.1" rspack-codespan-reporting = "0.11" rustc-demangle = "0.1" strsim = "0.10" diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index 2fbab0490bed9..e17da802a1854 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -19,10 +19,10 @@ indent = { workspace = true } itertools = { workspace = true } logos = { workspace = true } nom = { workspace = true } +nom-language = { workspace = true } nom-rule = { workspace = true } ordered-float = { workspace = true } percent-encoding = { workspace = true } -pratt = { workspace = true } pretty_assertions = { workspace = true } recursive = { workspace = true } rspack-codespan-reporting = { workspace = true } diff --git a/src/query/ast/benches/bench.rs b/src/query/ast/benches/bench.rs index 4d66d6f9e3908..15c735ab3f130 100644 --- a/src/query/ast/benches/bench.rs +++ b/src/query/ast/benches/bench.rs @@ -18,11 +18,11 @@ fn main() { // bench fastest │ slowest │ median │ mean │ samples │ iters // ╰─ dummy │ │ │ │ │ -// ├─ deep_function_call 802.2 µs │ 1.207 ms │ 842 µs │ 850.6 µs │ 100 │ 100 -// ├─ deep_query 242.3 µs │ 426.3 µs │ 254.2 µs │ 257.3 µs │ 100 │ 100 -// ├─ large_query 1.104 ms │ 1.264 ms │ 1.14 ms │ 1.142 ms │ 100 │ 100 -// ├─ large_statement 1.097 ms │ 1.2 ms │ 1.15 ms │ 1.148 ms │ 100 │ 100 -// ╰─ wide_expr 282.4 µs │ 368.6 µs │ 298 µs │ 298.7 µs │ 100 │ 100 +// ├─ deep_function_call 242.8 µs │ 525.3 µs │ 258.9 µs │ 262.8 µs │ 100 │ 100 +// ├─ deep_query 235.6 µs │ 364.8 µs │ 244.8 µs │ 249.3 µs │ 100 │ 100 +// ├─ large_query 362.9 µs │ 451.6 µs │ 376.5 µs │ 379.7 µs │ 100 │ 100 +// ├─ large_statement 364.8 µs │ 418.4 µs │ 380.2 µs │ 382.8 µs │ 100 │ 100 +// ╰─ wide_expr 96.97 µs │ 270.2 µs │ 102.8 µs │ 105.3 µs │ 100 │ 100 #[divan::bench_group(max_time = 0.5)] mod dummy { diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index 6fd85d563bc93..0d2cb4df53d20 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -20,8 +20,6 @@ use derive_visitor::DriveMut; use educe::Educe; use enum_as_inner::EnumAsInner; use ethnum::i256; -use pratt::Affix; -use pratt::Associativity; use super::ColumnRef; use super::OrderByExpr; @@ -32,6 +30,8 @@ use crate::ast::write_dot_separated_list; use crate::ast::Identifier; use crate::ast::Indirection; use crate::ast::Query; +use crate::precedence::Affix; +use crate::precedence::Associativity; use crate::span::merge_span; use crate::ParseError; use crate::Result; diff --git a/src/query/ast/src/lib.rs b/src/query/ast/src/lib.rs index a5af0f29f2a59..87e8bab927ae8 100644 --- a/src/query/ast/src/lib.rs +++ b/src/query/ast/src/lib.rs @@ -18,6 +18,7 @@ pub mod ast; pub mod parser; mod parser_error; +pub mod precedence; pub mod span; mod visitor; diff --git a/src/query/ast/src/parser/comment.rs b/src/query/ast/src/parser/comment.rs index 44a3ac8c42be7..85e6391444286 100644 --- a/src/query/ast/src/parser/comment.rs +++ b/src/query/ast/src/parser/comment.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use nom::Parser; use nom_rule::rule; use super::expr::literal_string; @@ -33,7 +34,7 @@ pub fn comment(i: Input) -> IResult { | #comment_column: "`COMMENT [IF EXISTS] ON COLUMN . IS ''`" | #comment_network_policy: "`COMMENT [IF EXISTS] ON NETWORK POLICY IS ''`" | #comment_password_policy: "`COMMENT [IF EXISTS] ON PASSWORD POLICY IS ''`" - )(i) + ).parse(i) } fn comment_table(i: Input) -> IResult { diff --git a/src/query/ast/src/parser/common.rs b/src/query/ast/src/parser/common.rs index d54bc796c8045..0349e17b8e22e 100644 --- a/src/query/ast/src/parser/common.rs +++ b/src/query/ast/src/parser/common.rs @@ -12,20 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::cell::RefCell; -use std::rc::Rc; - -use nom::branch::alt; -use nom::combinator::consumed; -use nom::combinator::map; -use nom::multi::many1; +pub use nom::branch::alt; +pub use nom::branch::permutation; +pub use nom::combinator::consumed; +pub use nom::combinator::map; +pub use nom::combinator::not; +pub use nom::combinator::value; +use nom::error::Error as NomError; +use nom::error::ErrorKind as NomErrorKind; +pub use nom::multi::many1; use nom::sequence::terminated; -use nom::Offset; -use nom::Slice; +use nom::Parser; use nom_rule::rule; -use pratt::PrattError; -use pratt::PrattParser; -use pratt::Precedence; + +pub fn parser_fn<'a, O, P>(mut parser: P) -> impl FnMut(Input<'a>) -> IResult<'a, O> +where P: nom::Parser, Output = O, Error = Error<'a>> { + move |input| parser.parse(input) +} use crate::ast::quote::QuotedIdent; use crate::ast::ColumnID; @@ -41,11 +44,78 @@ use crate::parser::query::with_options; use crate::parser::token::*; use crate::parser::Error; use crate::parser::ErrorKind; +pub use crate::precedence::Affix; +pub use crate::precedence::Associativity; +pub use crate::precedence::Precedence; use crate::Range; use crate::Span; pub type IResult<'a, Output> = nom::IResult, Output, Error<'a>>; +pub type ElementsInput<'a, T> = &'a [WithSpan<'a, T>]; +pub type ElementsError<'a, T> = NomError>; +pub type ElementsResult<'a, T, O> = nom::IResult, O, ElementsError<'a, T>>; + +pub fn match_prefix<'a, T>( + affix_fn: impl Fn(&T) -> Affix + Copy, + precedence: Precedence, +) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> +where + T: Clone, +{ + match_affix( + affix_fn, + move |affix| matches!(affix, Affix::Prefix(p) if p == precedence), + ) +} + +pub fn match_postfix<'a, T>( + affix_fn: impl Fn(&T) -> Affix + Copy, + precedence: Precedence, +) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> +where + T: Clone, +{ + match_affix( + affix_fn, + move |affix| matches!(affix, Affix::Postfix(p) if p == precedence), + ) +} + +pub fn match_binary<'a, T>( + affix_fn: impl Fn(&T) -> Affix + Copy, + precedence: Precedence, + associativity: Associativity, +) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> +where + T: Clone, +{ + match_affix( + affix_fn, + move |affix| matches!(affix, Affix::Infix(p, assoc) if p == precedence && assoc == associativity), + ) +} + +pub fn match_nilfix<'a, T>( + affix_fn: impl Fn(&T) -> Affix + Copy, +) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> +where T: Clone { + match_affix(affix_fn, |affix| matches!(affix, Affix::Nilfix)) +} + +fn match_affix<'a, T>( + affix_fn: impl Fn(&T) -> Affix + Copy, + predicate: impl Fn(Affix) -> bool + Copy, +) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> +where + T: Clone, +{ + move |input| match input.split_first() { + Some((elem, rest)) if predicate(affix_fn(&elem.elem)) => Ok((rest, elem.clone())), + _ => Err(nom::Err::Error(NomError::new(input, NomErrorKind::Tag))), + } +} + pub fn match_text(text: &'static str) -> impl FnMut(Input) -> IResult<&Token> { move |i| match i.tokens.first().filter(|token| token.text() == text) { Some(token) => Ok((i.slice(1..), token)), @@ -85,7 +155,8 @@ pub fn lambda_params(i: Input) -> IResult> { rule!( #single_param | #multi_params - )(i) + ) + .parse(i) } pub fn ident(i: Input) -> IResult { @@ -116,7 +187,8 @@ pub fn stage_name(i: Input) -> IResult { rule!( #plain_ident | #anonymous_stage - )(i) + ) + .parse(i) } fn plain_identifier( @@ -134,7 +206,8 @@ fn plain_identifier( quote: None, ident_type: IdentifierType::None, }, - )(i) + ) + .parse(i) } } @@ -193,7 +266,8 @@ fn identifier_variable(i: Input) -> IResult { quote: None, ident_type: IdentifierType::Variable, }, - )(i) + ) + .parse(i) } fn non_reserved_identifier( @@ -205,7 +279,8 @@ fn non_reserved_identifier( | #quoted_identifier | #identifier_hole | #identifier_variable - )(i) + ) + .parse(i) } } @@ -228,7 +303,8 @@ fn non_reserved_keyword( pub fn database_ref(i: Input) -> IResult { map(dot_separated_idents_1_to_2, |(catalog, database)| { DatabaseRef { catalog, database } - })(i) + }) + .parse(i) } pub fn table_ref(i: Input) -> IResult { @@ -242,7 +318,8 @@ pub fn table_ref(i: Input) -> IResult { table, with_options, }, - )(i) + ) + .parse(i) } pub fn set_type(i: Input) -> IResult { @@ -259,7 +336,8 @@ pub fn set_type(i: Input) -> IResult { }, None => SetType::SettingsSession, }, - )(i) + ) + .parse(i) } pub fn table_reference_only(i: Input) -> IResult { @@ -279,7 +357,8 @@ pub fn table_reference_only(i: Input) -> IResult { unpivot: None, sample: None, }, - )(i) + ) + .parse(i) } pub fn column_reference_only(i: Input) -> IResult<(TableReference, Identifier)> { @@ -304,7 +383,8 @@ pub fn column_reference_only(i: Input) -> IResult<(TableReference, Identifier)> column, ) }, - )(i) + ) + .parse(i) } pub fn column_id(i: Input) -> IResult { @@ -333,11 +413,12 @@ pub fn column_id(i: Input) -> IResult { ))) }), map_res(rule! { #ident }, |ident| Ok(ColumnID::Name(ident))), - ))(i) + )) + .parse(i) } pub fn variable_ident(i: Input) -> IResult { - map(rule! { IdentVariable }, |t| t.text()[1..].to_string())(i) + map(rule! { IdentVariable }, |t| t.text()[1..].to_string()).parse(i) } /// Parse one to two idents separated by a dot, fulfilling from the right. @@ -352,7 +433,8 @@ pub fn dot_separated_idents_1_to_2(i: Input) -> IResult<(Option, Ide (ident1, None) => (None, ident1), (ident0, Some((_, ident1))) => (Some(ident0), ident1), }, - )(i) + ) + .parse(i) } /// Parse one to three idents separated by a dot, fulfilling from the right. @@ -371,7 +453,8 @@ pub fn dot_separated_idents_1_to_3( (ident1, Some((_, ident2, None))) => (None, Some(ident1), ident2), (ident0, Some((_, ident1, Some((_, ident2))))) => (Some(ident0), Some(ident1), ident2), }, - )(i) + ) + .parse(i) } /// Parse two to four idents separated by a dot, fulfilling from the right. @@ -396,36 +479,37 @@ pub fn dot_separated_idents_2_to_4( (Some(ident0), Some(ident1), ident2, ident3) } }, - )(i) + ) + .parse(i) } pub fn comma_separated_list0<'a, T>( - item: impl FnMut(Input<'a>) -> IResult<'a, T>, + item: impl nom::Parser, Output = T, Error = Error<'a>>, ) -> impl FnMut(Input<'a>) -> IResult<'a, Vec> { separated_list0(match_text(","), item) } pub fn comma_separated_list0_ignore_trailing<'a, T>( - item: impl FnMut(Input<'a>) -> IResult<'a, T>, -) -> impl FnMut(Input<'a>) -> IResult<'a, Vec> { + item: impl nom::Parser, Output = T, Error = Error<'a>>, +) -> impl nom::Parser, Output = Vec, Error = Error<'a>> { nom::multi::separated_list0(match_text(","), item) } pub fn comma_separated_list1_ignore_trailing<'a, T>( - item: impl FnMut(Input<'a>) -> IResult<'a, T>, -) -> impl FnMut(Input<'a>) -> IResult<'a, Vec> { + item: impl nom::Parser, Output = T, Error = Error<'a>>, +) -> impl nom::Parser, Output = Vec, Error = Error<'a>> { nom::multi::separated_list1(match_text(","), item) } pub fn semicolon_terminated_list1<'a, T>( - item: impl FnMut(Input<'a>) -> IResult<'a, T>, -) -> impl FnMut(Input<'a>) -> IResult<'a, Vec> { + item: impl nom::Parser, Output = T, Error = Error<'a>>, +) -> impl nom::Parser, Output = Vec, Error = Error<'a>> { many1(terminated(item, match_text(";"))) } pub fn comma_separated_list1<'a, T>( - item: impl FnMut(Input<'a>) -> IResult<'a, T>, -) -> impl FnMut(Input<'a>) -> IResult<'a, Vec> { + item: impl nom::Parser, Output = T, Error = Error<'a>>, +) -> impl nom::Parser, Output = Vec, Error = Error<'a>> { separated_list1(match_text(","), item) } @@ -437,9 +521,9 @@ pub fn separated_list0( mut f: F, ) -> impl FnMut(I) -> nom::IResult, E> where - I: Clone + nom::InputLength, - F: nom::Parser, - G: nom::Parser, + I: Clone + nom::Input, + F: nom::Parser, + G: nom::Parser, E: nom::error::ParseError, { move |mut i: I| { @@ -487,9 +571,9 @@ pub fn separated_list1( mut f: F, ) -> impl FnMut(I) -> nom::IResult, E> where - I: Clone + nom::InputLength, - F: nom::Parser, - G: nom::Parser, + I: Clone + nom::Input, + F: nom::Parser, + G: nom::Parser, E: nom::error::ParseError, { move |mut i: I| { @@ -537,7 +621,7 @@ pub fn map_res<'a, O1, O2, F, G>( mut f: G, ) -> impl FnMut(Input<'a>) -> IResult<'a, O2> where - F: nom::Parser, O1, Error<'a>>, + F: nom::Parser, Output = O1, Error = Error<'a>>, G: FnMut(O1) -> Result>, { move |input: Input| { @@ -565,7 +649,7 @@ pub fn error_hint<'a, O, F>( message: &'static str, ) -> impl FnMut(Input<'a>) -> IResult<'a, ()> where - F: nom::Parser, O, Error<'a>>, + F: nom::Parser, Output = O, Error = Error<'a>>, { move |input: Input| match match_error.parse(input) { Ok(_) => Err(nom::Err::Error(Error::from_error_kind( @@ -583,106 +667,8 @@ pub fn transform_span(tokens: &[Token]) -> Span { }) } -pub(crate) trait IterProvider<'a> { - type Item; - type Iter: Iterator + ExactSizeIterator; - - fn create_iter(self, span: Rc>>>) -> Self::Iter; -} - -impl<'a, T> IterProvider<'a> for Vec> -where T: Clone -{ - type Item = WithSpan<'a, T>; - type Iter = ErrorSpan<'a, T, std::vec::IntoIter>>; - - fn create_iter(self, span: Rc>>>) -> Self::Iter { - ErrorSpan::new(self.into_iter(), span) - } -} - -pub(crate) struct ErrorSpan<'a, T, I: Iterator>> { - iter: I, - span: Rc>>>, -} - -impl<'a, T, I: Iterator>> ErrorSpan<'a, T, I> { - fn new(iter: I, span: Rc>>>) -> Self { - Self { iter, span } - } -} - -impl<'a, T, I: Iterator>> Iterator for ErrorSpan<'a, T, I> { - type Item = WithSpan<'a, T>; - - fn next(&mut self) -> Option { - self.iter - .next() - .inspect(|item| *self.span.borrow_mut() = Some(item.span)) - } -} - -impl<'a, T, I: Iterator>> ExactSizeIterator for ErrorSpan<'a, T, I> {} - -pub fn run_pratt_parser<'a, I, P, E, T>( - mut parser: P, - parsers: T, - rest: Input<'a>, - input: Input<'a>, -) -> IResult<'a, P::Output> -where - E: std::fmt::Debug, - P: PrattParser, Error = &'static str>, - I: Iterator + ExactSizeIterator, - T: IterProvider<'a, Item = P::Input, Iter = I>, -{ - let span = Rc::new(RefCell::new(None)); - let mut iter = parsers.create_iter(span.clone()).peekable(); - let expr = parser - .parse_input(&mut iter, Precedence(0)) - .map_err(|err| { - // Rollback parsing footprint on unused expr elements. - input.backtrace.clear(); - - let err_kind = match err { - PrattError::EmptyInput => ErrorKind::Other("expecting an operand"), - PrattError::UnexpectedNilfix(i) => { - *span.borrow_mut() = Some(i.span); - ErrorKind::Other("unable to parse the element") - } - PrattError::UnexpectedPrefix(i) => { - *span.borrow_mut() = Some(i.span); - ErrorKind::Other("unable to parse the prefix operator") - } - PrattError::UnexpectedInfix(i) => { - *span.borrow_mut() = Some(i.span); - ErrorKind::Other("missing lhs or rhs for the binary operator") - } - PrattError::UnexpectedPostfix(i) => { - *span.borrow_mut() = Some(i.span); - ErrorKind::Other("unable to parse the postfix operator") - } - PrattError::UserError(err) => ErrorKind::Other(err), - }; - - let span = span - .take() - // It's safe to slice one more token because input must contain EOI. - .unwrap_or_else(|| rest.slice(..1)); - - nom::Err::Error(Error::from_error_kind(span, err_kind)) - })?; - if let Some(elem) = iter.peek() { - // Rollback parsing footprint on unused expr elements. - input.backtrace.clear(); - Ok((input.slice(input.offset(&elem.span)..), expr)) - } else { - Ok((rest, expr)) - } -} - pub fn check_template_mode<'a, O, F>(mut parser: F) -> impl FnMut(Input<'a>) -> IResult<'a, O> -where F: nom::Parser, O, Error<'a>> { +where F: nom::Parser, Output = O, Error = Error<'a>> { move |input: Input| { parser.parse(input).and_then(|(i, res)| { if input.mode.is_template() { @@ -715,7 +701,7 @@ macro_rules! declare_experimental_feature { mut parser: F, ) -> impl FnMut(Input<'a>) -> IResult<'a, O> where - F: nom::Parser, O, Error<'a>>, + F: nom::Parser, Output = O, Error = Error<'a>>, { move |input: Input| { parser.parse(input).and_then(|(i, res)| { diff --git a/src/query/ast/src/parser/copy.rs b/src/query/ast/src/parser/copy.rs index 42118484fba4d..021f12dc903fb 100644 --- a/src/query/ast/src/parser/copy.rs +++ b/src/query/ast/src/parser/copy.rs @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use nom::branch::alt; -use nom::combinator::map; +use nom::Parser; use nom_rule::rule; use super::query::alias_name; @@ -120,7 +119,8 @@ fn copy_into_location(i: Input) -> IResult { } CopyIntoLocation(copy_stmt) }, - )(i) + ) + .parse(i) } pub fn copy_into(i: Input) -> IResult { rule!( @@ -138,14 +138,15 @@ pub fn copy_into(i: Input) -> IResult { [ FILES = ( '' [ , '' ] [ , ... ] ) ] [ PATTERN = '' ] [ copyOptions ]`" - )(i) + ).parse(i) } pub fn literal_string_or_variable(i: Input) -> IResult { alt(( map(literal_string, LiteralStringOrVariable::Literal), map(variable_ident, LiteralStringOrVariable::Variable), - ))(i) + )) + .parse(i) } fn copy_into_table_option(i: Input) -> IResult { @@ -196,7 +197,8 @@ fn copy_into_table_option(i: Input) -> IResult { rule! { RETURN_FAILED_ONLY ~ "=" ~ #literal_bool }, |(_, _, return_failed_only)| CopyIntoTableOption::ReturnFailedOnly(return_failed_only), ), - ))(i) + )) + .parse(i) } fn copy_into_location_option(i: Input) -> IResult { @@ -227,5 +229,6 @@ fn copy_into_location_option(i: Input) -> IResult { map(rule! { #file_format_clause }, |options| { CopyIntoLocationOption::FileFormat(options) }), - ))(i) + )) + .parse(i) } diff --git a/src/query/ast/src/parser/data_mask.rs b/src/query/ast/src/parser/data_mask.rs index 57ec3c144db39..30ff5071a2949 100644 --- a/src/query/ast/src/parser/data_mask.rs +++ b/src/query/ast/src/parser/data_mask.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use nom::combinator::map; +use nom::Parser; use nom_rule::rule; use crate::ast::DataMaskArg; @@ -30,22 +30,24 @@ fn data_mask_arg(i: Input) -> IResult { arg_name: arg_name.name, arg_type, } - })(i) + }) + .parse(i) } fn data_mask_args(i: Input) -> IResult> { map( rule! { AS ~ "(" ~ #comma_separated_list1(data_mask_arg) ~ ")" }, |(_, _, args, _)| args, - )(i) + ) + .parse(i) } fn data_mask_body(i: Input) -> IResult { - map(rule! { #expr }, |expr| expr)(i) + map(rule! { #expr }, |expr| expr).parse(i) } fn data_mask_return_type(i: Input) -> IResult { - map(rule! { RETURNS ~ #type_name }, |(_, type_name)| type_name)(i) + map(rule! { RETURNS ~ #type_name }, |(_, type_name)| type_name).parse(i) } pub fn data_mask_policy(i: Input) -> IResult { @@ -60,5 +62,5 @@ pub fn data_mask_policy(i: Input) -> IResult { None => None, }, }, - )(i) + ).parse(i) } diff --git a/src/query/ast/src/parser/dynamic_table.rs b/src/query/ast/src/parser/dynamic_table.rs index b960a64c4ce30..2e6af1dc350e6 100644 --- a/src/query/ast/src/parser/dynamic_table.rs +++ b/src/query/ast/src/parser/dynamic_table.rs @@ -12,10 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use nom::branch::alt; -use nom::branch::permutation; -use nom::combinator::map; -use nom::combinator::value; +use nom::Parser; use nom_rule::rule; use crate::ast::ClusterOption; @@ -53,7 +50,7 @@ pub fn dynamic_table(i: Input) -> IResult { [ COMMENT = '' ] AS `" - )(i) + ).parse(i) } fn create_dynamic_table(i: Input) -> IResult { @@ -148,7 +145,8 @@ fn dynamic_table_options( task_warehouse_option, refresh_mode_opt, initialize_opt, - ))(i) + )) + .parse(i) } fn target_lag(i: Input) -> IResult { @@ -188,5 +186,6 @@ fn target_lag(i: Input) -> IResult { | #interval_hour | #interval_day | #downstream - )(i) + ) + .parse(i) } diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index a69c913fd1388..e28577bb2eafa 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -12,19 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Reverse; + use ethnum::i256; use itertools::Itertools; -use nom::branch::alt; use nom::combinator::consumed; -use nom::combinator::map; -use nom::combinator::value; use nom::error::context; -use nom::Slice; +use nom::error::Error as NomError; +use nom::error::ErrorKind as NomErrorKind; +use nom::Offset; +use nom::Parser; +use nom_language::precedence::binary_op as precedence_binary_op; +use nom_language::precedence::precedence; +use nom_language::precedence::unary_op as precedence_unary_op; +use nom_language::precedence::Assoc as NomAssoc; +use nom_language::precedence::Operation; use nom_rule::rule; -use pratt::Affix; -use pratt::Associativity; -use pratt::PrattParser; -use pratt::Precedence; use crate::ast::quote::AtString; use crate::ast::*; @@ -38,12 +41,12 @@ use crate::parser::ErrorKind; use crate::Span; pub fn expr(i: Input) -> IResult { - context("expression", subexpr(0))(i) + context("expression", subexpr(0)).parse(i) } pub fn values(i: Input) -> IResult> { let values = comma_separated_list0(expr); - map(rule! { ( "(" ~ #values ~ ")" ) }, |(_, v, _)| v)(i) + map(rule! { ( "(" ~ #values ~ ")" ) }, |(_, v, _)| v).parse(i) } pub fn subexpr(min_precedence: u32) -> impl FnMut(Input) -> IResult { @@ -62,7 +65,7 @@ pub fn subexpr(min_precedence: u32) -> impl FnMut(Input) -> IResult { }) }; - let (rest, mut expr_elements) = rule! { #higher_prec_expr_element+ }(i)?; + let (rest, mut expr_elements) = rule! { #higher_prec_expr_element+ }.parse(i)?; for (prev, curr) in (-1..(expr_elements.len() as isize)).tuple_windows() { // If it's following a prefix or infix element or it's the first element, ... @@ -127,7 +130,171 @@ pub fn subexpr(min_precedence: u32) -> impl FnMut(Input) -> IResult { } } - run_pratt_parser(ExprParser, expr_elements, rest, i) + parse_expr_elements(expr_elements, rest, i) + } +} + +type ExprElementsInput<'a> = ElementsInput<'a, ExprElement>; +type ExprElementsError<'a> = ElementsError<'a, ExprElement>; +type ExprElementsResult<'a, O> = ElementsResult<'a, ExprElement, O>; + +fn expr_prefix_parser<'a>() -> impl nom::Parser< + ExprElementsInput<'a>, + Output = nom_language::precedence::Unary, Reverse>, + Error = ExprElementsError<'a>, +> { + alt(( + precedence_unary_op( + Reverse(Precedence(NOT_PREC)), + match_prefix(ExprElement::affix, Precedence(NOT_PREC)), + ), + precedence_unary_op( + Reverse(Precedence(50)), + match_prefix(ExprElement::affix, Precedence(50)), + ), + precedence_unary_op( + Reverse(Precedence(60)), + match_prefix(ExprElement::affix, Precedence(60)), + ), + )) +} + +fn expr_postfix_parser<'a>() -> impl nom::Parser< + ExprElementsInput<'a>, + Output = nom_language::precedence::Unary, Reverse>, + Error = ExprElementsError<'a>, +> { + alt(( + precedence_unary_op( + Reverse(Precedence(61)), + match_postfix(ExprElement::affix, Precedence(61)), + ), + precedence_unary_op( + Reverse(Precedence(60)), + match_postfix(ExprElement::affix, Precedence(60)), + ), + precedence_unary_op( + Reverse(Precedence(BETWEEN_PREC)), + match_postfix(ExprElement::affix, Precedence(BETWEEN_PREC)), + ), + precedence_unary_op( + Reverse(Precedence(17)), + match_postfix(ExprElement::affix, Precedence(17)), + ), + )) +} + +fn expr_binary_parser<'a>() -> impl nom::Parser< + ExprElementsInput<'a>, + Output = nom_language::precedence::Binary, Reverse>, + Error = ExprElementsError<'a>, +> { + alt(( + precedence_binary_op( + Reverse(Precedence(5)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(5), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(10)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(10), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(20)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(20), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(22)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(22), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(23)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(23), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(24)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(24), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(30)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(30), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(40)), + NomAssoc::Left, + match_binary(ExprElement::affix, Precedence(40), Associativity::Left), + ), + precedence_binary_op( + Reverse(Precedence(40)), + NomAssoc::Right, + match_binary(ExprElement::affix, Precedence(40), Associativity::Right), + ), + )) +} + +fn expr_operand_parser<'a>(input: ExprElementsInput<'a>) -> ExprElementsResult<'a, Expr> { + match_nilfix(ExprElement::affix)(input).and_then(|(rest, elem)| match expr_primary(elem) { + Ok(expr) => Ok((rest, expr)), + Err(_) => Err(nom::Err::Failure(NomError::new( + input, + NomErrorKind::Verify, + ))), + }) +} + +fn parse_expr_elements<'a>( + expr_elements: Vec>, + rest: Input<'a>, + input: Input<'a>, +) -> IResult<'a, Expr> { + let mut parser = precedence( + expr_prefix_parser(), + expr_postfix_parser(), + expr_binary_parser(), + expr_operand_parser, + expr_fold, + ); + + match parser(expr_elements.as_slice()) { + Ok((remaining, expr)) => { + if remaining.is_empty() { + Ok((rest, expr)) + } else { + input.backtrace.clear(); + let total = input.offset(&rest); + let unused = remaining.iter().map(|e| e.span.tokens.len()).sum::(); + let consumed = total.saturating_sub(unused); + Ok((input.slice(consumed..), expr)) + } + } + Err(_) => { + input.backtrace.clear(); + Err(nom::Err::Error(Error::from_error_kind( + rest, + ErrorKind::Other("unable to parse the expression"), + ))) + } + } +} + +fn expr_fold<'a>( + operation: Operation< + WithSpan<'a, ExprElement>, + WithSpan<'a, ExprElement>, + WithSpan<'a, ExprElement>, + Expr, + >, +) -> Result { + match operation { + Operation::Prefix(op, rhs) => expr_prefix(op, rhs), + Operation::Postfix(lhs, op) => expr_postfix(lhs, op), + Operation::Binary(lhs, op, rhs) => expr_infix(lhs, op, rhs), } } @@ -480,6 +647,409 @@ impl ExprElement { } } +fn expr_primary(elem: WithSpan<'_, ExprElement>) -> Result { + let expr = match elem.elem { + ExprElement::ColumnRef { column } => Expr::ColumnRef { + span: transform_span(elem.span.tokens), + column, + }, + ExprElement::Cast { expr, target_type } => Expr::Cast { + span: transform_span(elem.span.tokens), + expr, + target_type, + pg_style: false, + }, + ExprElement::TryCast { expr, target_type } => Expr::TryCast { + span: transform_span(elem.span.tokens), + expr, + target_type, + }, + ExprElement::Extract { field, expr } => Expr::Extract { + span: transform_span(elem.span.tokens), + kind: field, + expr, + }, + ExprElement::DatePart { field, expr } => Expr::DatePart { + span: transform_span(elem.span.tokens), + kind: field, + expr, + }, + ExprElement::Position { + substr_expr, + str_expr, + } => Expr::Position { + span: transform_span(elem.span.tokens), + substr_expr, + str_expr, + }, + ExprElement::SubString { + expr, + substring_from, + substring_for, + } => Expr::Substring { + span: transform_span(elem.span.tokens), + expr, + substring_from, + substring_for, + }, + ExprElement::Trim { expr, trim_where } => Expr::Trim { + span: transform_span(elem.span.tokens), + expr, + trim_where, + }, + ExprElement::Literal { value } => Expr::Literal { + span: transform_span(elem.span.tokens), + value, + }, + ExprElement::CountAll { qualified, window } => Expr::CountAll { + span: transform_span(elem.span.tokens), + qualified, + window, + }, + ExprElement::Tuple { exprs } => Expr::Tuple { + span: transform_span(elem.span.tokens), + exprs, + }, + ExprElement::FunctionCall { func } => Expr::FunctionCall { + span: transform_span(elem.span.tokens), + func, + }, + ExprElement::Case { + operand, + conditions, + results, + else_result, + } => Expr::Case { + span: transform_span(elem.span.tokens), + operand, + conditions, + results, + else_result, + }, + ExprElement::Exists { subquery, not } => Expr::Exists { + span: transform_span(elem.span.tokens), + not, + subquery: Box::new(subquery), + }, + ExprElement::Subquery { subquery, modifier } => Expr::Subquery { + span: transform_span(elem.span.tokens), + modifier, + subquery: Box::new(subquery), + }, + ExprElement::Group(expr) => expr, + ExprElement::Array { exprs } => Expr::Array { + span: transform_span(elem.span.tokens), + exprs, + }, + ExprElement::ListComprehension { + source, + param, + filter, + result, + } => { + let span = transform_span(elem.span.tokens); + let mut source = source; + + if let Some(filter) = filter { + source = Expr::FunctionCall { + span, + func: FunctionCall { + distinct: false, + name: Identifier::from_name( + transform_span(elem.span.tokens), + "array_filter", + ), + args: vec![source], + params: vec![], + order_by: vec![], + window: None, + lambda: Some(Lambda { + params: vec![param.clone()], + expr: Box::new(filter), + }), + }, + }; + } + Expr::FunctionCall { + span, + func: FunctionCall { + distinct: false, + name: Identifier::from_name(transform_span(elem.span.tokens), "array_map"), + args: vec![source], + params: vec![], + order_by: vec![], + window: None, + lambda: Some(Lambda { + params: vec![param.clone()], + expr: Box::new(result), + }), + }, + } + } + ExprElement::Map { kvs } => Expr::Map { + span: transform_span(elem.span.tokens), + kvs, + }, + ExprElement::Interval { expr, unit } => Expr::Interval { + span: transform_span(elem.span.tokens), + expr: Box::new(expr), + unit, + }, + ExprElement::DateAdd { + unit, + interval, + date, + } => Expr::DateAdd { + span: transform_span(elem.span.tokens), + unit, + interval: Box::new(interval), + date: Box::new(date), + }, + ExprElement::DateDiff { + unit, + date_start, + date_end, + } => Expr::DateDiff { + span: transform_span(elem.span.tokens), + unit, + date_start: Box::new(date_start), + date_end: Box::new(date_end), + }, + ExprElement::DateBetween { + unit, + date_start, + date_end, + } => Expr::DateBetween { + span: transform_span(elem.span.tokens), + unit, + date_start: Box::new(date_start), + date_end: Box::new(date_end), + }, + ExprElement::DateSub { + unit, + interval, + date, + } => Expr::DateSub { + span: transform_span(elem.span.tokens), + unit, + interval: Box::new(interval), + date: Box::new(date), + }, + ExprElement::DateTrunc { unit, date } => Expr::DateTrunc { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::TimeSlice { + unit, + date, + slice_length, + start_or_end, + } => Expr::TimeSlice { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + slice_length, + start_or_end: start_or_end.unwrap_or("start".to_string()), + }, + ExprElement::LastDay { unit, date } => Expr::LastDay { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::PreviousDay { unit, date } => Expr::PreviousDay { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::NextDay { unit, date } => Expr::NextDay { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::Hole { name } => Expr::Hole { + span: transform_span(elem.span.tokens), + name, + }, + ExprElement::Placeholder => Expr::Placeholder { + span: transform_span(elem.span.tokens), + }, + ExprElement::VariableAccess(name) => { + let span = transform_span(elem.span.tokens); + make_func_get_variable(span, name) + } + ExprElement::StageLocation { location } => Expr::StageLocation { + span: transform_span(elem.span.tokens), + location, + }, + _ => unreachable!(), + }; + Ok(expr) +} + +fn expr_infix(lhs: Expr, elem: WithSpan<'_, ExprElement>, rhs: Expr) -> Result { + let expr = match elem.elem { + ExprElement::BinaryOp { op } => Expr::BinaryOp { + span: transform_span(elem.span.tokens), + left: Box::new(lhs), + right: Box::new(rhs), + op, + }, + ExprElement::IsDistinctFrom { not } => Expr::IsDistinctFrom { + span: transform_span(elem.span.tokens), + left: Box::new(lhs), + right: Box::new(rhs), + not, + }, + ExprElement::JsonOp { op } => Expr::JsonOp { + span: transform_span(elem.span.tokens), + left: Box::new(lhs), + right: Box::new(rhs), + op, + }, + _ => unreachable!(), + }; + Ok(expr) +} + +fn expr_prefix(elem: WithSpan<'_, ExprElement>, rhs: Expr) -> Result { + let expr = match elem.elem { + ExprElement::UnaryOp { op } => Expr::UnaryOp { + span: transform_span(elem.span.tokens), + op, + expr: Box::new(rhs), + }, + _ => unreachable!(), + }; + Ok(expr) +} + +fn expr_postfix(mut lhs: Expr, elem: WithSpan<'_, ExprElement>) -> Result { + let expr = match elem.elem { + ExprElement::MapAccess { accessor } => Expr::MapAccess { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + accessor, + }, + ExprElement::DotAccess { key } => { + if let Expr::ColumnRef { column, .. } = &mut lhs { + if let ColumnID::Name(name) = &column.column { + column.database = column.table.take(); + column.table = Some(name.clone()); + column.column = key.clone(); + return Ok(lhs); + } + } + match key { + ColumnID::Name(id) => Expr::MapAccess { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + accessor: MapAccessor::Colon { key: id }, + }, + _ => return Err("dot access position must be after ident"), + } + } + ExprElement::ChainFunctionCall { name, args, lambda } => Expr::FunctionCall { + span: transform_span(elem.span.tokens), + func: FunctionCall { + distinct: false, + name, + args: [vec![lhs], args].concat(), + params: vec![], + order_by: vec![], + window: None, + lambda, + }, + }, + ExprElement::IsNull { not } => Expr::IsNull { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + not, + }, + ExprElement::InList { list, not } => Expr::InList { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + list, + not, + }, + ExprElement::InSubquery { subquery, not } => Expr::InSubquery { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + subquery, + not, + }, + ExprElement::LikeSubquery { + subquery, + modifier, + escape, + } => Expr::LikeSubquery { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + subquery, + modifier, + escape, + }, + ExprElement::Escape { escape } => match lhs { + Expr::BinaryOp { + span, + op: BinaryOperator::Like(_), + left, + right, + } => Expr::LikeWithEscape { + span, + left, + right, + is_not: false, + escape, + }, + Expr::BinaryOp { + span, + op: BinaryOperator::NotLike(_), + left, + right, + } => Expr::LikeWithEscape { + span, + left, + right, + is_not: true, + escape, + }, + Expr::BinaryOp { + span, + op: BinaryOperator::LikeAny(_), + left, + right, + } => Expr::LikeAnyWithEscape { + span, + left, + right, + escape, + }, + _ => return Err("escape clause must be after LIKE/NOT LIKE/LIKE ANY binary expr"), + }, + ExprElement::Between { low, high, not } => Expr::Between { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + low, + high, + not, + }, + ExprElement::PgCast { target_type } => Expr::Cast { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + target_type, + pg_style: true, + }, + ExprElement::UnaryOp { op } => Expr::UnaryOp { + span: transform_span(elem.span.tokens), + op, + expr: Box::new(lhs), + }, + _ => unreachable!(), + }; + Ok(expr) +} + impl Expr { pub fn affix(&self) -> Affix { match self { @@ -531,438 +1101,7 @@ impl Expr { } } } - -struct ExprParser; - -impl<'a, I: Iterator>> PrattParser for ExprParser { - type Error = &'static str; - type Input = WithSpan<'a, ExprElement>; - type Output = Expr; - - fn query(&mut self, elem: &WithSpan) -> Result { - Ok(elem.elem.affix()) - } - - fn primary(&mut self, elem: WithSpan<'a, ExprElement>) -> Result { - let expr = match elem.elem { - ExprElement::ColumnRef { column } => Expr::ColumnRef { - span: transform_span(elem.span.tokens), - column, - }, - ExprElement::Cast { expr, target_type } => Expr::Cast { - span: transform_span(elem.span.tokens), - expr, - target_type, - pg_style: false, - }, - ExprElement::TryCast { expr, target_type } => Expr::TryCast { - span: transform_span(elem.span.tokens), - expr, - target_type, - }, - ExprElement::Extract { field, expr } => Expr::Extract { - span: transform_span(elem.span.tokens), - kind: field, - expr, - }, - ExprElement::DatePart { field, expr } => Expr::DatePart { - span: transform_span(elem.span.tokens), - kind: field, - expr, - }, - ExprElement::Position { - substr_expr, - str_expr, - } => Expr::Position { - span: transform_span(elem.span.tokens), - substr_expr, - str_expr, - }, - ExprElement::SubString { - expr, - substring_from, - substring_for, - } => Expr::Substring { - span: transform_span(elem.span.tokens), - expr, - substring_from, - substring_for, - }, - ExprElement::Trim { expr, trim_where } => Expr::Trim { - span: transform_span(elem.span.tokens), - expr, - trim_where, - }, - ExprElement::Literal { value } => Expr::Literal { - span: transform_span(elem.span.tokens), - value, - }, - ExprElement::CountAll { qualified, window } => Expr::CountAll { - span: transform_span(elem.span.tokens), - qualified, - window, - }, - ExprElement::Tuple { exprs } => Expr::Tuple { - span: transform_span(elem.span.tokens), - exprs, - }, - ExprElement::FunctionCall { func } => Expr::FunctionCall { - span: transform_span(elem.span.tokens), - func, - }, - ExprElement::Case { - operand, - conditions, - results, - else_result, - } => Expr::Case { - span: transform_span(elem.span.tokens), - operand, - conditions, - results, - else_result, - }, - ExprElement::Exists { subquery, not } => Expr::Exists { - span: transform_span(elem.span.tokens), - not, - subquery: Box::new(subquery), - }, - ExprElement::Subquery { subquery, modifier } => Expr::Subquery { - span: transform_span(elem.span.tokens), - modifier, - subquery: Box::new(subquery), - }, - ExprElement::Group(expr) => expr, - ExprElement::Array { exprs } => Expr::Array { - span: transform_span(elem.span.tokens), - exprs, - }, - ExprElement::ListComprehension { - source, - param, - filter, - result, - } => { - let span = transform_span(elem.span.tokens); - let mut source = source; - - // array_filter(source, filter) - if let Some(filter) = filter { - source = Expr::FunctionCall { - span, - func: FunctionCall { - distinct: false, - name: Identifier::from_name( - transform_span(elem.span.tokens), - "array_filter", - ), - args: vec![source], - params: vec![], - order_by: vec![], - window: None, - lambda: Some(Lambda { - params: vec![param.clone()], - expr: Box::new(filter), - }), - }, - }; - } - // array_map(source, result) - Expr::FunctionCall { - span, - func: FunctionCall { - distinct: false, - name: Identifier::from_name(transform_span(elem.span.tokens), "array_map"), - args: vec![source], - params: vec![], - order_by: vec![], - window: None, - lambda: Some(Lambda { - params: vec![param.clone()], - expr: Box::new(result), - }), - }, - } - } - ExprElement::Map { kvs } => Expr::Map { - span: transform_span(elem.span.tokens), - kvs, - }, - ExprElement::Interval { expr, unit } => Expr::Interval { - span: transform_span(elem.span.tokens), - expr: Box::new(expr), - unit, - }, - ExprElement::DateAdd { - unit, - interval, - date, - } => Expr::DateAdd { - span: transform_span(elem.span.tokens), - unit, - interval: Box::new(interval), - date: Box::new(date), - }, - ExprElement::DateDiff { - unit, - date_start, - date_end, - } => Expr::DateDiff { - span: transform_span(elem.span.tokens), - unit, - date_start: Box::new(date_start), - date_end: Box::new(date_end), - }, - ExprElement::DateBetween { - unit, - date_start, - date_end, - } => Expr::DateBetween { - span: transform_span(elem.span.tokens), - unit, - date_start: Box::new(date_start), - date_end: Box::new(date_end), - }, - ExprElement::DateSub { - unit, - interval, - date, - } => Expr::DateSub { - span: transform_span(elem.span.tokens), - unit, - interval: Box::new(interval), - date: Box::new(date), - }, - ExprElement::DateTrunc { unit, date } => Expr::DateTrunc { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::TimeSlice { - unit, - date, - slice_length, - start_or_end, - } => Expr::TimeSlice { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - slice_length, - start_or_end: start_or_end.unwrap_or("start".to_string()), - }, - ExprElement::LastDay { unit, date } => Expr::LastDay { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::PreviousDay { unit, date } => Expr::PreviousDay { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::NextDay { unit, date } => Expr::NextDay { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::Hole { name } => Expr::Hole { - span: transform_span(elem.span.tokens), - name, - }, - ExprElement::Placeholder => Expr::Placeholder { - span: transform_span(elem.span.tokens), - }, - ExprElement::VariableAccess(name) => { - let span = transform_span(elem.span.tokens); - make_func_get_variable(span, name) - } - ExprElement::StageLocation { location } => Expr::StageLocation { - span: transform_span(elem.span.tokens), - location, - }, - _ => unreachable!(), - }; - Ok(expr) - } - - fn infix( - &mut self, - lhs: Expr, - elem: WithSpan<'a, ExprElement>, - rhs: Expr, - ) -> Result { - let expr = match elem.elem { - ExprElement::BinaryOp { op } => Expr::BinaryOp { - span: transform_span(elem.span.tokens), - left: Box::new(lhs), - right: Box::new(rhs), - op, - }, - ExprElement::IsDistinctFrom { not } => Expr::IsDistinctFrom { - span: transform_span(elem.span.tokens), - left: Box::new(lhs), - right: Box::new(rhs), - not, - }, - ExprElement::JsonOp { op } => Expr::JsonOp { - span: transform_span(elem.span.tokens), - left: Box::new(lhs), - right: Box::new(rhs), - op, - }, - _ => unreachable!(), - }; - Ok(expr) - } - - fn prefix(&mut self, elem: WithSpan<'a, ExprElement>, rhs: Expr) -> Result { - let expr = match elem.elem { - ExprElement::UnaryOp { op } => Expr::UnaryOp { - span: transform_span(elem.span.tokens), - op, - expr: Box::new(rhs), - }, - _ => unreachable!(), - }; - Ok(expr) - } - - fn postfix( - &mut self, - mut lhs: Expr, - elem: WithSpan<'a, ExprElement>, - ) -> Result { - let expr = match elem.elem { - ExprElement::MapAccess { accessor } => Expr::MapAccess { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - accessor, - }, - ExprElement::DotAccess { key } => { - // `database.table.column` is parsed into [database] [.table] [.column], - // so we need to transform it into the right `ColumnRef` form. - if let Expr::ColumnRef { column, .. } = &mut lhs { - if let ColumnID::Name(name) = &column.column { - column.database = column.table.take(); - column.table = Some(name.clone()); - column.column = key.clone(); - return Ok(lhs); - } - } - - match key { - ColumnID::Name(id) => Expr::MapAccess { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - accessor: MapAccessor::Colon { key: id }, - }, - _ => { - return Err("dot access position must be after ident"); - } - } - } - ExprElement::ChainFunctionCall { name, args, lambda } => Expr::FunctionCall { - span: transform_span(elem.span.tokens), - func: FunctionCall { - distinct: false, - name, - args: [vec![lhs], args].concat(), - params: vec![], - order_by: vec![], - window: None, - lambda, - }, - }, - ExprElement::IsNull { not } => Expr::IsNull { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - not, - }, - ExprElement::InList { list, not } => Expr::InList { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - list, - not, - }, - ExprElement::InSubquery { subquery, not } => Expr::InSubquery { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - subquery, - not, - }, - ExprElement::LikeSubquery { - subquery, - modifier, - escape, - } => Expr::LikeSubquery { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - subquery, - modifier, - escape, - }, - ExprElement::Escape { escape } => match lhs { - Expr::BinaryOp { - span, - op: BinaryOperator::Like(_), - left, - right, - } => Expr::LikeWithEscape { - span, - left, - right, - is_not: false, - escape, - }, - Expr::BinaryOp { - span, - op: BinaryOperator::NotLike(_), - left, - right, - } => Expr::LikeWithEscape { - span, - left, - right, - is_not: true, - escape, - }, - Expr::BinaryOp { - span, - op: BinaryOperator::LikeAny(_), - left, - right, - } => Expr::LikeAnyWithEscape { - span, - left, - right, - escape, - }, - _ => return Err("escape clause must be after LIKE/NOT LIKE/LIKE ANY binary expr"), - }, - ExprElement::Between { low, high, not } => Expr::Between { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - low, - high, - not, - }, - ExprElement::PgCast { target_type } => Expr::Cast { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - target_type, - pg_style: true, - }, - ExprElement::UnaryOp { op } => Expr::UnaryOp { - span: transform_span(elem.span.tokens), - op, - expr: Box::new(lhs), - }, - _ => unreachable!(), - }; - Ok(expr) - } -} - +#[allow(unreachable_code)] pub fn expr_element(i: Input) -> IResult> { let column_ref = map(column_id, |column| ExprElement::ColumnRef { column: ColumnRef { @@ -1003,9 +1142,9 @@ pub fn expr_element(i: Input) -> IResult> { }, |(_, m, _, subquery, _, option_escape)| { let modifier = match m.kind { - TokenKind::ALL => SubqueryModifier::All, - TokenKind::ANY => SubqueryModifier::Any, - TokenKind::SOME => SubqueryModifier::Some, + ALL => SubqueryModifier::All, + ANY => SubqueryModifier::Any, + SOME => SubqueryModifier::Some, _ => unreachable!(), }; ExprElement::LikeSubquery { @@ -1174,9 +1313,9 @@ pub fn expr_element(i: Input) -> IResult> { }, |(modifier, _, subquery, _)| { let modifier = modifier.map(|m| match m.kind { - TokenKind::ALL => SubqueryModifier::All, - TokenKind::ANY => SubqueryModifier::Any, - TokenKind::SOME => SubqueryModifier::Some, + ALL => SubqueryModifier::All, + ANY => SubqueryModifier::Any, + SOME => SubqueryModifier::Some, _ => unreachable!(), }); ExprElement::Subquery { modifier, subquery } @@ -1345,13 +1484,17 @@ pub fn expr_element(i: Input) -> IResult> { let interval = map( rule! { - INTERVAL ~ #subexpr(0) ~ #interval_kind + INTERVAL ~ ^#subexpr(0) ~ #interval_kind? }, - |(_, operand, unit)| ExprElement::Interval { - expr: operand, - unit, + |(_, expr, unit)| match unit { + None => ExprElement::Cast { + expr: Box::new(expr), + target_type: TypeName::Interval, + }, + Some(unit) => ExprElement::Interval { expr, unit }, }, ); + let date_trunc = map( rule! { DATE_TRUNC ~ "(" ~ #interval_kind ~ "," ~ #subexpr(0) ~ ")" @@ -1375,38 +1518,36 @@ pub fn expr_element(i: Input) -> IResult> { rule! { TRUNC ~ "(" ~ (#subexpr(0) ~ "," ~ #interval_kind)? ~ (#subexpr(0) ~ ("," ~ #subexpr(0))?)? ~ ")" }, - |(s, _, opt_date, opt_numeric, _)| { - return match (opt_date, opt_numeric) { - (Some((date, _, unit)), None) => ExprElement::DateTrunc { unit, date }, - (None, Some((expr, opt_expr2))) => { - if let Some((_, expr2)) = opt_expr2 { - ExprElement::FunctionCall { - func: FunctionCall { - distinct: false, - name: Identifier::from_name(Some(s.span), "TRUNCATE"), - args: vec![expr, expr2], - ..Default::default() - }, - } - } else { - ExprElement::FunctionCall { - func: FunctionCall { - distinct: false, - name: Identifier::from_name(Some(s.span), "TRUNCATE"), - args: vec![expr], - ..Default::default() - }, - } + |(s, _, opt_date, opt_numeric, _)| match (opt_date, opt_numeric) { + (Some((date, _, unit)), None) => ExprElement::DateTrunc { unit, date }, + (None, Some((expr, opt_expr2))) => { + if let Some((_, expr2)) = opt_expr2 { + ExprElement::FunctionCall { + func: FunctionCall { + distinct: false, + name: Identifier::from_name(Some(s.span), "TRUNCATE"), + args: vec![expr, expr2], + ..Default::default() + }, + } + } else { + ExprElement::FunctionCall { + func: FunctionCall { + distinct: false, + name: Identifier::from_name(Some(s.span), "TRUNCATE"), + args: vec![expr], + ..Default::default() + }, } } - _ => ExprElement::DateTrunc { - unit: IntervalKind::UnknownIntervalKind, - date: Expr::Literal { - span: None, - value: Literal::Null, - }, + } + _ => ExprElement::DateTrunc { + unit: IntervalKind::UnknownIntervalKind, + date: Expr::Literal { + span: None, + value: Literal::Null, }, - }; + }, }, ); @@ -1479,9 +1620,9 @@ pub fn expr_element(i: Input) -> IResult> { }, ); - let interval_expr = map( + let inverted_expr = map( rule! { - INTERVAL ~ #consumed(literal_string) + INVERTED ~ #consumed(literal_string) }, |(_, (span, date))| ExprElement::Cast { expr: Box::new(Expr::Literal { @@ -1545,71 +1686,145 @@ pub fn expr_element(i: Input) -> IResult> { ExprElement::StageLocation { location } }); - map( - consumed(alt(( - // Note: each `alt` call supports maximum of 21 parsers - rule!( - #is_null : "`... IS [NOT] NULL`" - | #in_list : "`[NOT] IN (, ...)`" - | #in_subquery : "`[NOT] IN (SELECT ...)`" - | #like_subquery: "`LIKE ANY | ALL | SOME (SELECT ...)`" - | #exists : "`[NOT] EXISTS (SELECT ...)`" - | #between : "`[NOT] BETWEEN ... AND ...`" - | #binary_op : "" - | #json_op : "" - | #unary_op : "" - | #cast : "`CAST(... AS ...)`" - | #pg_cast : "`::`" - | #position : "`POSITION(... IN ...)`" - | #variable_access: "`$`" - ), - rule! ( - #date_add : "`DATE_ADD(..., ..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW))`" - | #date_diff : "`DATE_DIFF(..., ..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW))`" - | #date_sub : "`DATE_SUB(..., ..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW))`" - | #date_between : "`DATE_BETWEEN((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW), ..., ...,)`" - | #date_trunc : "`DATE_TRUNC((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK), ...)`" - | #time_slice : "`TIME_SLICE(, , (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK) [ , ] )`" - | #trunc : "`TRUNC(..., (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK))`" - | #last_day : "`LAST_DAY(..., (YEAR | QUARTER | MONTH | WEEK)))`" - | #previous_day : "`PREVIOUS_DAY(..., (Sunday | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday))`" - | #next_day : "`NEXT_DAY(..., (Sunday | Monday | Tuesday | Wednesday | Thursday | Friday | Saturday))`" - | #date_expr : "`DATE `" - | #timestamp_expr : "`TIMESTAMP `" - | #timestamp_tz_expr : "`TIMESTAMP_TZ `" - | #interval : "`INTERVAL ... (YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | DOY | DOW)`" - | #interval_expr : "`INTERVAL `" - | #extract : "`EXTRACT((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK) FROM ...)`" - | #date_part : "`DATE_PART((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK), ...)`" - ), - rule!( - #substring : "`SUBSTRING(... [FROM ...] [FOR ...])`" - | #trim_from : "`TRIM([(BOTH | LEADEING | TRAILING) ... FROM ...)`" - | #is_distinct_from: "`... IS [NOT] DISTINCT FROM ...`" - | #chain_function_call : "x.function(...)" - | #list_comprehensions: "[expr for x in ... [if ...]]" - | #count_all_with_window : "`COUNT(*) OVER ...`" - | #function_call - | #escape: "`ESCAPE ''`" - ), - rule!( - #case : "`CASE ... END`" - | #tuple : "`( [, ...])`" - | #subquery : "`(SELECT ...)`" - | #stage_location: "@" - | #column_ref : "" - | #dot_access : "" - | #map_access : "[] | . | :" - | #literal : "" - | #current_date: "CURRENT_DATE" - | #current_time: "CURRENT_TIME" - | #current_timestamp: "CURRENT_TIMESTAMP" - | #array : "`[, ...]`" - | #map_expr : "`{ : , ... }`" - ), - ))), - |(span, elem)| WithSpan { span, elem }, - )(i) + macro_rules! with_span { + ($parser:expr) => { + map(consumed($parser), |(span, elem)| WithSpan { span, elem }) + }; + } + + macro_rules! try_token { + ($token_0:expr, $($pat:pat => $body:expr),+ $(,)?) => {{ + match $token_0.kind { + $( + $pat => Some($body), + )+ + _ => None, + } + }}; + } + + if let Some(token_0) = i.tokens.first() { + use TokenKind::*; + + macro_rules! try_dispatch { + ($($pat:pat => $body:expr),+ $(,)?) => {{ + if let Some(result) = try_token!(token_0, $($pat => $body),+) { + if matches!(&result, Ok(_) | Err(nom::Err::Failure(_))) { + return result; + } + } + }}; + } + + try_dispatch!( + IS => with_span!(rule!(#is_null | #is_distinct_from)).parse(i), + NOT => with_span!(rule!( + #in_list + | #in_subquery + | #exists + | #between + | #binary_op + | #unary_op + )) + .parse(i), + IN => with_span!(rule!(#in_list | #in_subquery)).parse(i), + LIKE => with_span!(rule!(#like_subquery | #binary_op)).parse(i), + EXISTS => with_span!(exists).parse(i), + BETWEEN => with_span!(between).parse(i), + CAST | TRY_CAST => with_span!(cast).parse(i), + DoubleColon => with_span!(pg_cast).parse(i), + POSITION => with_span!(position).parse(i), + IdentVariable => with_span!(variable_access).parse(i), + ESCAPE => with_span!(escape).parse(i), + COUNT => with_span!(rule!{ #count_all_with_window | #function_call}).parse(i), + SUBSTRING | SUBSTR => with_span!(substring).parse(i), + TRIM => with_span!(trim_from).parse(i), + CASE => with_span!(case).parse(i), + LParen => with_span!(rule!(#tuple | #subquery)).parse(i), + ANY | SOME | ALL => with_span!(subquery).parse(i), + Dot => { + return with_span!(rule!(#chain_function_call | #dot_access | #map_access)).parse(i); + }, + Colon => { + return with_span!(map_access).parse(i); + }, + LBracket => { + return with_span!(rule!(#list_comprehensions | #map_access | #array)).parse(i); + }, + LBrace => with_span!(map_expr).parse(i), + LiteralAtString => with_span!(stage_location).parse(i), + DATEADD | DATE_ADD => with_span!(date_add).parse(i), + DATE_DIFF | DATEDIFF => with_span!(date_diff).parse(i), + DATESUB | DATE_SUB => with_span!(date_sub).parse(i), + DATEBETWEEN | DATE_BETWEEN => with_span!(date_between).parse(i), + DATE_TRUNC => with_span!(date_trunc).parse(i), + TIME_SLICE => with_span!(time_slice).parse(i), + TRUNC => with_span!(trunc).parse(i), + LAST_DAY => with_span!(last_day).parse(i), + PREVIOUS_DAY => with_span!(previous_day).parse(i), + NEXT_DAY => with_span!(next_day).parse(i), + DATE => with_span!(date_expr).parse(i), + TIMESTAMP => with_span!(timestamp_expr).parse(i), + TIMESTAMP_TZ => with_span!(timestamp_tz_expr).parse(i), + INVERTED => with_span!(inverted_expr).parse(i), + INTERVAL => with_span!(interval).parse(i), + DATE_PART | DATEPART => with_span!(date_part).parse(i), + EXTRACT => with_span!(extract).parse(i), + CURRENT_DATE => with_span!(rule!{ #function_call | #current_date }).parse(i), + CURRENT_TIME => with_span!(rule!{ #function_call | #current_time }).parse(i), + CURRENT_TIMESTAMP => with_span!(rule!{ #function_call | #current_timestamp }).parse(i), + Plus + | Minus + | Multiply + | Divide + | IntDiv + | DIV + | Modulo + | StringConcat + | Spaceship + | L1DISTANCE + | L2DISTANCE + | Gt + | Lt + | Gte + | Lte + | Eq + | NotEq + | Caret + | AND + | OR + | XOR + | REGEXP + | RLIKE + | BitWiseOr + | BitWiseAnd + | BitWiseXor + | ShiftLeft + | ShiftRight + | SOUNDS => with_span!(rule!{ #binary_op | #unary_op }).parse(i), + RArrow + | LongRArrow + | HashRArrow + | HashLongRArrow + | Placeholder + | QuestionOr + | QuestionAnd + | AtArrow + | ArrowAt + | AtQuestion + | AtAt + | HashMinus => with_span!(json_op).parse(i), + Factorial | SquareRoot | BitWiseNot | CubeRoot | Abs => with_span!(unary_op).parse(i), + ); + } + + with_span!(alt((rule!( + #function_call + | #map_access : "[] | . | :" + | #literal : "" + | #column_ref : "" + ),))) + .parse(i) } #[inline] @@ -1681,30 +1896,27 @@ pub fn binary_op(i: Input) -> IResult { ShiftRight => BinaryOperator::BitwiseShiftRight, ); match token_0.kind { - TokenKind::LIKE => { - return if matches!( - i.tokens.get(1).map(|first| first.kind == TokenKind::ANY), - Some(true) - ) { + LIKE => { + return if matches!(i.tokens.get(1).map(|first| first.kind == ANY), Some(true)) { return_op(i, 2, BinaryOperator::LikeAny(None)) } else { return_op(i, 1, BinaryOperator::Like(None)) } } - TokenKind::NOT => match i.tokens.get(1).map(|first| first.kind) { - Some(TokenKind::LIKE) => { + NOT => match i.tokens.get(1).map(|first| first.kind) { + Some(LIKE) => { return return_op(i, 2, BinaryOperator::NotLike(None)); } - Some(TokenKind::REGEXP) => { + Some(REGEXP) => { return return_op(i, 2, BinaryOperator::NotRegexp); } - Some(TokenKind::RLIKE) => { + Some(RLIKE) => { return return_op(i, 2, BinaryOperator::NotRLike); } _ => (), }, - TokenKind::SOUNDS => { - if let Some(TokenKind::LIKE) = i.tokens.get(1).map(|first| first.kind) { + SOUNDS => { + if let Some(LIKE) = i.tokens.get(1).map(|first| first.kind) { return return_op(i, 2, BinaryOperator::SoundsLike); } } @@ -1714,7 +1926,7 @@ pub fn binary_op(i: Input) -> IResult { Err(nom::Err::Error(Error::from_error_kind(i, ErrorKind::Other("expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ...")))) } -pub(crate) fn json_op(i: Input) -> IResult { +pub fn json_op(i: Input) -> IResult { if let Some(token_0) = i.tokens.first() { op_branch!( i, token_0, @@ -1747,7 +1959,8 @@ pub fn literal(i: Input) -> IResult { | #boolean | #literal_number | #null - )(i) + ) + .parse(i) } pub fn literal_hex_str(i: Input) -> IResult<&str> { @@ -1769,7 +1982,8 @@ pub fn literal_hex_str(i: Input) -> IResult<&str> { rule!( #mysql_hex | #pg_hex - )(i) + ) + .parse(i) } #[allow(clippy::from_str_radix_10)] @@ -1787,7 +2001,8 @@ pub fn literal_u64(i: Input) -> IResult { rule!( #decimal | #hex - )(i) + ) + .parse(i) } #[allow(clippy::from_str_radix_10)] @@ -1805,7 +2020,8 @@ pub fn literal_i64(i: Input) -> IResult { rule!( #decimal | #hex - )(i) + ) + .parse(i) } pub fn literal_number(i: Input) -> IResult { @@ -1831,11 +2047,12 @@ pub fn literal_number(i: Input) -> IResult { #decimal_uint | #decimal_float | #hex_uint - )(i) + ) + .parse(i) } pub fn literal_bool(i: Input) -> IResult { - alt((value(true, rule! { TRUE }), value(false, rule! { FALSE })))(i) + alt((value(true, rule! { TRUE }), value(false, rule! { FALSE }))).parse(i) } pub fn literal_string(i: Input) -> IResult { @@ -1892,7 +2109,8 @@ pub fn nullable(i: Input) -> IResult { alt(( value(true, rule! { NULL }), value(false, rule! { NOT ~ NULL }), - ))(i) + )) + .parse(i) } pub fn type_name(i: Input) -> IResult { @@ -2119,7 +2337,8 @@ pub fn weekday(i: Input) -> IResult { Weekday::Saturday, rule! { #literal_string_eq_ignore_case("SATURDAY") }, ), - ))(i) + )) + .parse(i) } pub fn interval_kind(i: Input) -> IResult { @@ -2335,7 +2554,8 @@ pub fn interval_kind(i: Input) -> IResult { | #yearweek_str | #millennium_str ), - ))(i) + )) + .parse(i) } pub fn map_access(i: Input) -> IResult { @@ -2369,7 +2589,8 @@ pub fn map_access(i: Input) -> IResult { #bracket | #dot_number | #colon - )(i) + ) + .parse(i) } pub fn map_element(i: Input) -> IResult<(Literal, Expr)> { @@ -2378,7 +2599,8 @@ pub fn map_element(i: Input) -> IResult<(Literal, Expr)> { #literal ~ ":" ~ #subexpr(0) }, |(key, _, value)| (key, value), - )(i) + ) + .parse(i) } pub fn function_call(i: Input) -> IResult { @@ -2593,7 +2815,7 @@ pub fn function_call(i: Input) -> IResult { }, }, }, - )(i) + ).parse(i) } pub fn parse_float(text: &str) -> Result { diff --git a/src/query/ast/src/parser/input.rs b/src/query/ast/src/parser/input.rs index 923c59677d891..d9a69af0f0c50 100644 --- a/src/query/ast/src/parser/input.rs +++ b/src/query/ast/src/parser/input.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::Range; -use std::ops::RangeFrom; -use std::ops::RangeFull; -use std::ops::RangeTo; +use std::iter::Cloned; +use std::iter::Enumerate; +use std::ops::Bound; +use std::ops::RangeBounds; use enum_as_inner::EnumAsInner; +use nom::Needed; +use nom::Offset; use crate::parser::token::Token; use crate::parser::Backtrace; @@ -40,60 +42,94 @@ impl<'a> std::ops::Deref for Input<'a> { } } -impl nom::InputLength for Input<'_> { - fn input_len(&self) -> usize { - self.tokens.input_len() +impl<'a> Input<'a> { + pub fn slice(&self, range: R) -> Self + where R: RangeBounds { + let len = self.tokens.len(); + let start = match range.start_bound() { + Bound::Included(&idx) => idx, + Bound::Excluded(&idx) => idx + 1, + Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + Bound::Included(&idx) => idx + 1, + Bound::Excluded(&idx) => idx, + Bound::Unbounded => len, + }; + + Input { + tokens: &self.tokens[start.min(len)..end.min(len)], + ..*self + } } } -impl nom::Offset for Input<'_> { +impl<'a> Offset for Input<'a> { fn offset(&self, second: &Self) -> usize { - let fst = self.tokens.as_ptr(); - let snd = second.tokens.as_ptr(); - - (snd as usize - fst as usize) / std::mem::size_of::() + self.tokens.len().saturating_sub(second.tokens.len()) } } -impl nom::Slice> for Input<'_> { - fn slice(&self, range: Range) -> Self { - Input { - tokens: &self.tokens[range], - ..*self - } +impl<'a> nom::Input for Input<'a> { + type Item = Token<'a>; + type Iter = Cloned>>; + type IterIndices = Enumerate; + + fn input_len(&self) -> usize { + self.tokens.len() } -} -impl nom::Slice> for Input<'_> { - fn slice(&self, range: RangeTo) -> Self { - Input { - tokens: &self.tokens[range], - ..*self - } + fn take(&self, index: usize) -> Self { + self.slice(..index) } -} -impl nom::Slice> for Input<'_> { - fn slice(&self, range: RangeFrom) -> Self { - Input { - tokens: &self.tokens[range], - ..*self - } + fn take_from(&self, index: usize) -> Self { + self.slice(index..) } -} -impl nom::Slice for Input<'_> { - fn slice(&self, _: RangeFull) -> Self { - *self + fn take_split(&self, index: usize) -> (Self, Self) { + (self.slice(index..), self.slice(..index)) } -} + fn position

(&self, predicate: P) -> Option + where P: Fn(Self::Item) -> bool { + self.tokens + .iter() + .position(|token| predicate(token.clone())) + } + + fn iter_elements(&self) -> Self::Iter { + self.tokens.iter().cloned() + } + + fn iter_indices(&self) -> Self::IterIndices { + self.iter_elements().enumerate() + } + + fn slice_index(&self, count: usize) -> Result { + if self.tokens.len() >= count { + Ok(count) + } else { + Err(Needed::new(count - self.tokens.len())) + } + } +} #[derive(Clone, Debug)] pub struct WithSpan<'a, T> { pub(crate) span: Input<'a>, pub(crate) elem: T, } +impl<'a, T: PartialEq> PartialEq for WithSpan<'a, T> { + fn eq(&self, other: &Self) -> bool { + self.elem == other.elem + && self.span.tokens.as_ptr() == other.span.tokens.as_ptr() + && self.span.tokens.len() == other.span.tokens.len() + } +} + +impl<'a, T: Eq> Eq for WithSpan<'a, T> {} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, EnumAsInner)] pub enum ParseMode { #[default] diff --git a/src/query/ast/src/parser/parser.rs b/src/query/ast/src/parser/parser.rs index 9c34317c6a218..9253b18d27df6 100644 --- a/src/query/ast/src/parser/parser.rs +++ b/src/query/ast/src/parser/parser.rs @@ -14,6 +14,7 @@ use derive_visitor::DriveMut; use derive_visitor::VisitorMut; +use nom::Parser; use pretty_assertions::assert_eq; use crate::ast::ExplainKind; @@ -73,7 +74,7 @@ pub fn parse_comma_separated_exprs(tokens: &[Token], dialect: Dialect) -> Result pub fn parse_comma_separated_idents(tokens: &[Token], dialect: Dialect) -> Result> { run_parser(tokens, dialect, ParseMode::Default, true, |i| { - comma_separated_list1(ident)(i) + comma_separated_list1(ident).parse(i) }) } diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index ec4dc6b5380f1..7fced541a9be7 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -12,18 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::Reverse; use std::collections::BTreeMap; -use nom::branch::alt; -use nom::combinator::consumed; -use nom::combinator::map; -use nom::combinator::value; use nom::error::context; +use nom::error::Error as NomError; +use nom::error::ErrorKind as NomErrorKind; +use nom::Parser; +use nom_language::precedence::binary_op as precedence_binary_op; +use nom_language::precedence::precedence; +use nom_language::precedence::unary_op as precedence_unary_op; +use nom_language::precedence::Assoc as NomAssoc; +use nom_language::precedence::Operation; use nom_rule::rule; -use pratt::Affix; -use pratt::Associativity; -use pratt::PrattParser; -use pratt::Precedence; use crate::ast::*; use crate::parser::common::*; @@ -36,6 +37,7 @@ use crate::parser::statement::hint; use crate::parser::statement::set_table_option; use crate::parser::statement::top_n; use crate::parser::token::*; +use crate::parser::Error; use crate::parser::ErrorKind; use crate::Range; @@ -43,12 +45,13 @@ pub fn query(i: Input) -> IResult { context( "`SELECT ...`", map(set_operation, |set_expr| set_expr.into_query()), - )(i) + ) + .parse(i) } pub fn set_operation(i: Input) -> IResult { - let (rest, set_operation_elements) = rule! { #set_operation_element+ }(i)?; - run_pratt_parser(SetOperationParser, set_operation_elements, rest, i) + let (rest, set_operation_elements) = rule! { #set_operation_element+ }.parse(i)?; + parse_set_operation_elements(set_operation_elements, rest, i) } #[derive(Debug, Clone, PartialEq)] @@ -85,6 +88,25 @@ pub enum SetOperationElement { Group(SetExpr), } +impl SetOperationElement { + fn affix(&self) -> Affix { + match self { + SetOperationElement::SetOperation { op, .. } => match op { + SetOperator::Union | SetOperator::Except => { + Affix::Infix(Precedence(10), Associativity::Left) + } + SetOperator::Intersect => Affix::Infix(Precedence(20), Associativity::Left), + }, + SetOperationElement::With(_) => Affix::Prefix(Precedence(5)), + SetOperationElement::OrderBy { .. } + | SetOperationElement::Limit { .. } + | SetOperationElement::Offset { .. } + | SetOperationElement::IgnoreResult => Affix::Postfix(Precedence(5)), + _ => Affix::Nilfix, + } + } +} + pub fn set_operation_element(i: Input) -> IResult> { let with = map(with, SetOperationElement::With); let set_operator = map( @@ -232,162 +254,259 @@ pub fn set_operation_element(i: Input) -> IResult> | #ignore_result }), |(span, elem)| WithSpan { span, elem }, - )(i) + ) + .parse(i) } -struct SetOperationParser; - -impl<'a, I: Iterator>> PrattParser - for SetOperationParser -{ - type Error = &'static str; - type Input = WithSpan<'a, SetOperationElement>; - type Output = SetExpr; - - fn query(&mut self, input: &Self::Input) -> Result { - let affix = match &input.elem { - // https://learn.microsoft.com/en-us/sql/t-sql/language-elements/set-operators-except-and-intersect-transact-sql?view=sql-server-2017 - // If EXCEPT or INTERSECT is used together with other operators in an expression, it's evaluated in the context of the following precedence: - // 1. Expressions in parentheses - // 2. The INTERSECT operator - // 3. EXCEPT and UNION evaluated from left to right based on their position in the expression - SetOperationElement::SetOperation { op, .. } => match op { - SetOperator::Union | SetOperator::Except => { - Affix::Infix(Precedence(10), Associativity::Left) - } - SetOperator::Intersect => Affix::Infix(Precedence(20), Associativity::Left), - }, - SetOperationElement::With(_) => Affix::Prefix(Precedence(5)), - SetOperationElement::OrderBy { .. } => Affix::Postfix(Precedence(5)), - SetOperationElement::Limit { .. } => Affix::Postfix(Precedence(5)), - SetOperationElement::Offset { .. } => Affix::Postfix(Precedence(5)), - SetOperationElement::IgnoreResult => Affix::Postfix(Precedence(5)), - _ => Affix::Nilfix, - }; - Ok(affix) - } +type SetOpInput<'a> = ElementsInput<'a, SetOperationElement>; +type SetOpError<'a> = ElementsError<'a, SetOperationElement>; +type SetOpResult<'a, O> = ElementsResult<'a, SetOperationElement, O>; + +fn set_operation_prefix_parser<'a>() -> impl nom::Parser< + SetOpInput<'a>, + Output = nom_language::precedence::Unary< + WithSpan<'a, SetOperationElement>, + Reverse, + >, + Error = SetOpError<'a>, +> { + precedence_unary_op( + Reverse(Precedence(5)), + match_prefix(SetOperationElement::affix, Precedence(5)), + ) +} - fn primary(&mut self, input: Self::Input) -> Result { - let set_expr = match input.elem { - SetOperationElement::Group(expr) => expr, - SetOperationElement::SelectStmt { - hints, - distinct, - top_n, - select_list, - from, - selection, - group_by, - having, - window_list, - qualify, - } => SetExpr::Select(Box::new(SelectStmt { - span: transform_span(input.span.tokens), - hints, - top_n, - distinct, - select_list, - from, - selection, - group_by, - having, - window_list, - qualify, - })), - SetOperationElement::Values(values) => SetExpr::Values { - span: transform_span(input.span.tokens), - values, - }, - _ => unreachable!(), - }; - Ok(set_expr) +fn set_operation_postfix_parser<'a>() -> impl nom::Parser< + SetOpInput<'a>, + Output = nom_language::precedence::Unary< + WithSpan<'a, SetOperationElement>, + Reverse, + >, + Error = SetOpError<'a>, +> { + precedence_unary_op( + Reverse(Precedence(5)), + match_postfix(SetOperationElement::affix, Precedence(5)), + ) +} + +fn set_operation_binary_parser<'a>() -> impl nom::Parser< + SetOpInput<'a>, + Output = nom_language::precedence::Binary< + WithSpan<'a, SetOperationElement>, + Reverse, + >, + Error = SetOpError<'a>, +> { + alt(( + precedence_binary_op( + Reverse(Precedence(10)), + NomAssoc::Left, + match_binary( + SetOperationElement::affix, + Precedence(10), + Associativity::Left, + ), + ), + precedence_binary_op( + Reverse(Precedence(20)), + NomAssoc::Left, + match_binary( + SetOperationElement::affix, + Precedence(20), + Associativity::Left, + ), + ), + )) +} + +fn set_operation_operand_parser<'a>(input: SetOpInput<'a>) -> SetOpResult<'a, SetExpr> { + match_nilfix(SetOperationElement::affix)(input).and_then(|(rest, elem)| { + match set_operation_primary_expr(elem) { + Ok(expr) => Ok((rest, expr)), + Err(_) => Err(nom::Err::Failure(NomError::new( + input, + NomErrorKind::Verify, + ))), + } + }) +} + +fn parse_set_operation_elements<'a>( + elements: Vec>, + rest: Input<'a>, + input: Input<'a>, +) -> IResult<'a, SetExpr> { + let mut parser = precedence( + set_operation_prefix_parser(), + set_operation_postfix_parser(), + set_operation_binary_parser(), + set_operation_operand_parser, + set_operation_fold, + ); + + match parser(elements.as_slice()) { + Ok((remaining, expr)) if remaining.is_empty() => Ok((rest, expr)), + Ok((_, _)) => { + input.backtrace.clear(); + Err(nom::Err::Error(Error::from_error_kind( + rest, + ErrorKind::Other("unable to parse the set expression"), + ))) + } + Err(_) => { + input.backtrace.clear(); + Err(nom::Err::Error(Error::from_error_kind( + rest, + ErrorKind::Other("unable to parse the set expression"), + ))) + } } +} - fn infix( - &mut self, - lhs: Self::Output, - input: Self::Input, - rhs: Self::Output, - ) -> Result { - let set_expr = match input.elem { - SetOperationElement::SetOperation { op, all, .. } => { - SetExpr::SetOperation(Box::new(SetOperation { - span: transform_span(input.span.tokens), - op, - all, - left: Box::new(lhs), - right: Box::new(rhs), - })) - } - _ => unreachable!(), - }; - Ok(set_expr) +fn set_operation_fold<'a>( + operation: Operation< + WithSpan<'a, SetOperationElement>, + WithSpan<'a, SetOperationElement>, + WithSpan<'a, SetOperationElement>, + SetExpr, + >, +) -> Result { + match operation { + Operation::Prefix(op, rhs) => apply_set_operation_prefix(op, rhs), + Operation::Postfix(lhs, op) => apply_set_operation_postfix(lhs, op), + Operation::Binary(lhs, op, rhs) => apply_set_operation_infix(lhs, op, rhs), } +} - fn prefix(&mut self, op: Self::Input, rhs: Self::Output) -> Result { - let mut query = rhs.into_query(); - match op.elem { - SetOperationElement::With(with) => { - if query.with.is_some() { - return Err("duplicated WITH clause"); - } - query.with = Some(with); +fn set_operation_primary_expr( + input: WithSpan<'_, SetOperationElement>, +) -> Result { + let set_expr = match input.elem { + SetOperationElement::Group(expr) => expr, + SetOperationElement::SelectStmt { + hints, + distinct, + top_n, + select_list, + from, + selection, + group_by, + having, + window_list, + qualify, + } => SetExpr::Select(Box::new(SelectStmt { + span: transform_span(input.span.tokens), + hints, + top_n, + distinct, + select_list, + from, + selection, + group_by, + having, + window_list, + qualify, + })), + SetOperationElement::Values(values) => SetExpr::Values { + span: transform_span(input.span.tokens), + values, + }, + _ => unreachable!(), + }; + Ok(set_expr) +} + +fn apply_set_operation_infix( + lhs: SetExpr, + input: WithSpan<'_, SetOperationElement>, + rhs: SetExpr, +) -> Result { + let set_expr = match input.elem { + SetOperationElement::SetOperation { op, all, .. } => { + SetExpr::SetOperation(Box::new(SetOperation { + span: transform_span(input.span.tokens), + op, + all, + left: Box::new(lhs), + right: Box::new(rhs), + })) + } + _ => unreachable!(), + }; + Ok(set_expr) +} + +fn apply_set_operation_prefix( + op: WithSpan<'_, SetOperationElement>, + rhs: SetExpr, +) -> Result { + let mut query = rhs.into_query(); + match op.elem { + SetOperationElement::With(with) => { + if query.with.is_some() { + return Err("duplicated WITH clause"); } - _ => unreachable!(), + query.with = Some(with); } - Ok(SetExpr::Query(Box::new(query))) + _ => unreachable!(), } + Ok(SetExpr::Query(Box::new(query))) +} - fn postfix(&mut self, lhs: Self::Output, op: Self::Input) -> Result { - let mut query = lhs.into_query(); - match op.elem { - SetOperationElement::OrderBy { order_by } => { - if !query.order_by.is_empty() { - return Err("duplicated ORDER BY clause"); - } - if !query.limit.is_empty() { - return Err("ORDER BY must appear before LIMIT"); - } - if query.offset.is_some() { - return Err("ORDER BY must appear before OFFSET"); - } - query.order_by = order_by; +fn apply_set_operation_postfix( + lhs: SetExpr, + op: WithSpan<'_, SetOperationElement>, +) -> Result { + let mut query = lhs.into_query(); + match op.elem { + SetOperationElement::OrderBy { order_by } => { + if !query.order_by.is_empty() { + return Err("duplicated ORDER BY clause"); } - SetOperationElement::Limit { limit } => { - if query.limit.is_empty() && limit.len() > 2 { - return Err("[LIMIT n OFFSET m] or [LIMIT n,m]"); - } - if !query.limit.is_empty() { - return Err("duplicated LIMIT clause"); - } - if query.offset.is_some() { - return Err("LIMIT must appear before OFFSET"); - } - query.limit = limit; + if !query.limit.is_empty() { + return Err("ORDER BY must appear before LIMIT"); } - SetOperationElement::Offset { offset } => { - if query.limit.len() == 2 { - return Err("LIMIT n,m should not appear OFFSET"); - } - if query.offset.is_some() { - return Err("duplicated OFFSET clause"); - } - query.offset = Some(offset); + if query.offset.is_some() { + return Err("ORDER BY must appear before OFFSET"); + } + query.order_by = order_by; + } + SetOperationElement::Limit { limit } => { + if query.limit.is_empty() && limit.len() > 2 { + return Err("[LIMIT n OFFSET m] or [LIMIT n,m]"); + } + if !query.limit.is_empty() { + return Err("duplicated LIMIT clause"); } - SetOperationElement::IgnoreResult => { - query.ignore_result = true; + if query.offset.is_some() { + return Err("LIMIT must appear before OFFSET"); } - _ => unreachable!(), + query.limit = limit; } - Ok(SetExpr::Query(Box::new(query))) + SetOperationElement::Offset { offset } => { + if query.limit.len() == 2 { + return Err("LIMIT n,m should not appear OFFSET"); + } + if query.offset.is_some() { + return Err("duplicated OFFSET clause"); + } + query.offset = Some(offset); + } + SetOperationElement::IgnoreResult => { + query.ignore_result = true; + } + _ => unreachable!(), } + Ok(SetExpr::Query(Box::new(query))) } pub fn row_values(i: Input) -> IResult> { map( rule! {"(" ~ #comma_separated_list1(expr) ~ ")"}, |(_, row_values, _)| row_values, - )(i) + ) + .parse(i) } pub fn with(i: Input) -> IResult { @@ -413,7 +532,8 @@ pub fn with(i: Input) -> IResult { recursive: recursive.is_some(), ctes, }, - )(i) + ) + .parse(i) } pub fn exclude_col(i: Input) -> IResult> { @@ -433,7 +553,8 @@ pub fn exclude_col(i: Input) -> IResult> { rule!( #var | #vars - )(i) + ) + .parse(i) } #[allow(clippy::type_complexity)] @@ -544,7 +665,8 @@ pub fn select_target(i: Input) -> IResult { | #columns_regexp | #columns_lambda | #projection - )(i) + ) + .parse(i) } pub fn travel_point(i: Input) -> IResult { @@ -559,7 +681,8 @@ pub fn travel_point(i: Input) -> IResult { rule!( #at_stream | #at_snapshot_or_ts - )(i) + ) + .parse(i) } pub fn at_snapshot_or_ts(i: Input) -> IResult { @@ -578,7 +701,8 @@ pub fn at_snapshot_or_ts(i: Input) -> IResult { rule!( #at_snapshot | #at_timestamp | #at_offset - )(i) + ) + .parse(i) } pub fn temporal_clause(i: Input) -> IResult { @@ -606,7 +730,8 @@ pub fn temporal_clause(i: Input) -> IResult { rule!( #time_travel | #changes - )(i) + ) + .parse(i) } pub fn alias_name(i: Input) -> IResult { @@ -631,7 +756,8 @@ pub fn alias_name(i: Input) -> IResult { rule!( #short_alias | #as_alias - )(i) + ) + .parse(i) } pub fn with_options(i: Input) -> IResult { @@ -645,7 +771,8 @@ pub fn with_options(i: Input) -> IResult { }, |(_, _, options, _)| WithOptions { options }, ), - ))(i) + )) + .parse(i) } pub fn table_alias(i: Input) -> IResult { @@ -655,7 +782,8 @@ pub fn table_alias(i: Input) -> IResult { name, columns: opt_columns.map(|(_, cols, _)| cols).unwrap_or_default(), }, - )(i) + ) + .parse(i) } pub fn table_alias_without_as(i: Input) -> IResult { @@ -665,7 +793,8 @@ pub fn table_alias_without_as(i: Input) -> IResult { name, columns: opt_columns.map(|(_, cols, _)| cols).unwrap_or_default(), }, - )(i) + ) + .parse(i) } pub fn join_operator(i: Input) -> IResult { @@ -685,7 +814,8 @@ pub fn join_operator(i: Input) -> IResult { value(JoinOperator::LeftAsof, rule! { ASOF ~ LEFT }), value(JoinOperator::RightAsof, rule! { ASOF ~ RIGHT }), value(JoinOperator::Asof, rule! { ASOF }), - ))(i) + )) + .parse(i) } pub fn order_by_expr(i: Input) -> IResult { @@ -705,12 +835,13 @@ pub fn order_by_expr(i: Input) -> IResult { asc: opt_asc.map(|asc| asc.kind == ASC), nulls_first: opt_nulls_first, }, - )(i) + ) + .parse(i) } pub fn table_reference(i: Input) -> IResult { - let (rest, table_reference_elements) = rule! { #table_reference_element+ }(i)?; - run_pratt_parser(TableReferenceParser, table_reference_elements, rest, i) + let (rest, table_reference_elements) = rule! { #table_reference_element+ }.parse(i)?; + parse_table_reference_elements(table_reference_elements, rest, i) } #[derive(Debug, Clone, PartialEq)] @@ -729,7 +860,8 @@ pub fn table_function_param(i: Input) -> IResult { rule!( #named | #normal - )(i) + ) + .parse(i) } #[derive(Debug, Clone, PartialEq)] @@ -778,6 +910,16 @@ pub enum TableReferenceElement { }, } +impl TableReferenceElement { + fn affix(&self) -> Affix { + match self { + TableReferenceElement::Join { .. } => Affix::Infix(Precedence(10), Associativity::Left), + TableReferenceElement::JoinCondition(..) => Affix::Postfix(Precedence(5)), + _ => Affix::Nilfix, + } + } +} + pub fn table_reference_element(i: Input) -> IResult> { let aliased_table = map( rule! { @@ -888,10 +1030,264 @@ pub fn table_reference_element(i: Input) -> IResult = ElementsInput<'a, TableReferenceElement>; +type TableRefError<'a> = ElementsError<'a, TableReferenceElement>; +type TableRefResult<'a, O> = ElementsResult<'a, TableReferenceElement, O>; + +fn table_reference_prefix_parser<'a>() -> impl nom::Parser< + TableRefInput<'a>, + Output = nom_language::precedence::Unary< + WithSpan<'a, TableReferenceElement>, + Reverse, + >, + Error = TableRefError<'a>, +> { + nom::combinator::fail::< + TableRefInput<'a>, + nom_language::precedence::Unary, Reverse>, + TableRefError<'a>, + >() +} + +fn table_reference_postfix_parser<'a>() -> impl nom::Parser< + TableRefInput<'a>, + Output = nom_language::precedence::Unary< + WithSpan<'a, TableReferenceElement>, + Reverse, + >, + Error = TableRefError<'a>, +> { + precedence_unary_op( + Reverse(Precedence(5)), + match_postfix(TableReferenceElement::affix, Precedence(5)), + ) +} + +fn table_reference_binary_parser<'a>() -> impl nom::Parser< + TableRefInput<'a>, + Output = nom_language::precedence::Binary< + WithSpan<'a, TableReferenceElement>, + Reverse, + >, + Error = TableRefError<'a>, +> { + precedence_binary_op( + Reverse(Precedence(10)), + NomAssoc::Left, + match_binary( + TableReferenceElement::affix, + Precedence(10), + Associativity::Left, + ), + ) +} + +fn table_reference_operand_parser<'a>( + input: TableRefInput<'a>, +) -> TableRefResult<'a, TableReference> { + match_nilfix(TableReferenceElement::affix)(input).and_then(|(rest, elem)| { + match table_reference_primary_expr(elem) { + Ok(expr) => Ok((rest, expr)), + Err(_) => Err(nom::Err::Failure(NomError::new( + input, + NomErrorKind::Verify, + ))), + } + }) +} + +fn parse_table_reference_elements<'a>( + elements: Vec>, + rest: Input<'a>, + input: Input<'a>, +) -> IResult<'a, TableReference> { + let mut parser = precedence( + table_reference_prefix_parser(), + table_reference_postfix_parser(), + table_reference_binary_parser(), + table_reference_operand_parser, + table_reference_fold, + ); + + match parser(elements.as_slice()) { + Ok((remaining, expr)) if remaining.is_empty() => Ok((rest, expr)), + Ok((_, _)) => { + input.backtrace.clear(); + Err(nom::Err::Error(Error::from_error_kind( + rest, + ErrorKind::Other("unable to parse the table reference"), + ))) + } + Err(_) => { + input.backtrace.clear(); + Err(nom::Err::Error(Error::from_error_kind( + rest, + ErrorKind::Other("unable to parse the table reference"), + ))) + } + } +} + +fn table_reference_fold<'a>( + operation: Operation< + WithSpan<'a, TableReferenceElement>, + WithSpan<'a, TableReferenceElement>, + WithSpan<'a, TableReferenceElement>, + TableReference, + >, +) -> Result { + match operation { + Operation::Prefix(_, _) => Err("unexpected prefix operator"), + Operation::Postfix(lhs, op) => apply_table_reference_postfix(lhs, op), + Operation::Binary(lhs, op, rhs) => apply_table_reference_infix(lhs, op, rhs), + } +} + +fn table_reference_primary_expr( + input: WithSpan<'_, TableReferenceElement>, +) -> Result { + let table_ref = match input.elem { + TableReferenceElement::Group(table_ref) => table_ref, + TableReferenceElement::Table { + catalog, + database, + table, + alias, + temporal, + with_options, + pivot, + unpivot, + sample, + } => TableReference::Table { + span: transform_span(input.span.tokens), + catalog, + database, + table, + alias, + temporal, + with_options, + pivot, + unpivot, + sample, + }, + TableReferenceElement::TableFunction { + lateral, + name, + params, + alias, + sample, + } => { + let normal_params = params + .iter() + .filter_map(|p| match p { + TableFunctionParam::Normal(p) => Some(p.clone()), + _ => None, + }) + .collect(); + let named_params = params + .into_iter() + .filter_map(|p| match p { + TableFunctionParam::Named { name, value } => Some((name, value)), + _ => None, + }) + .collect(); + TableReference::TableFunction { + span: transform_span(input.span.tokens), + lateral, + name, + params: normal_params, + named_params, + alias, + sample, + } + } + TableReferenceElement::Subquery { + lateral, + subquery, + alias, + pivot, + unpivot, + } => TableReference::Subquery { + span: transform_span(input.span.tokens), + lateral, + subquery, + alias, + pivot, + unpivot, + }, + TableReferenceElement::Stage { + location, + options, + alias, + } => { + let options = SelectStageOptions::from(options); + TableReference::Location { + span: transform_span(input.span.tokens), + location, + options, + alias, + } + } + _ => unreachable!(), + }; + Ok(table_ref) +} + +fn apply_table_reference_infix( + lhs: TableReference, + input: WithSpan<'_, TableReferenceElement>, + rhs: TableReference, +) -> Result { + let table_ref = match input.elem { + TableReferenceElement::Join { op, natural } => { + let condition = if natural { + JoinCondition::Natural + } else { + JoinCondition::None + }; + TableReference::Join { + span: transform_span(input.span.tokens), + join: Join { + op, + condition, + left: Box::new(lhs), + right: Box::new(rhs), + }, + } + } + _ => unreachable!(), + }; + Ok(table_ref) +} + +fn apply_table_reference_postfix( + mut lhs: TableReference, + op: WithSpan<'_, TableReferenceElement>, +) -> Result { + match op.elem { + TableReferenceElement::JoinCondition(new_condition) => match &mut lhs { + TableReference::Join { + join: Join { condition, .. }, + .. + } => match *condition { + JoinCondition::None => { + *condition = new_condition; + Ok(lhs) + } + JoinCondition::Natural => Err("join condition conflicting with NATURAL"), + _ => Err("join condition already set"), + }, + _ => Err("join condition must apply to a join"), + }, + _ => unreachable!(), + } +} + // PIVOT(expr FOR col IN (ident, ... | subquery)) fn pivot(i: Input) -> IResult { map( @@ -903,7 +1299,8 @@ fn pivot(i: Input) -> IResult { value_column, values, }, - )(i) + ) + .parse(i) } fn unpivot_name(i: Input) -> IResult { @@ -927,7 +1324,8 @@ fn unpivot_name(i: Input) -> IResult { map( rule! {#ident ~ (#short_alias | #as_alias)?}, |(ident, alias)| UnpivotName { ident, alias }, - )(i) + ) + .parse(i) } // UNPIVOT(ident for ident IN (ident, ...)) @@ -941,7 +1339,7 @@ fn unpivot(i: Input) -> IResult { unpivot_column, column_names, }, - )(i) + ).parse(i) } fn pivot_values(i: Input) -> IResult { @@ -960,7 +1358,8 @@ fn pivot_values(i: Input) -> IResult { map(query, |q| PivotValues::Subquery(Box::new(q))), // Parse expression list - must be last map(comma_separated_list1(expr), PivotValues::ColumnValues), - ))(i) + )) + .parse(i) } fn get_table_sample( @@ -982,173 +1381,6 @@ fn get_table_sample( None } -struct TableReferenceParser; - -impl<'a, I: Iterator>> PrattParser - for TableReferenceParser -{ - type Error = &'static str; - type Input = WithSpan<'a, TableReferenceElement>; - type Output = TableReference; - - fn query(&mut self, input: &Self::Input) -> Result { - let affix = match &input.elem { - TableReferenceElement::Join { .. } => Affix::Infix(Precedence(10), Associativity::Left), - TableReferenceElement::JoinCondition(..) => Affix::Postfix(Precedence(5)), - _ => Affix::Nilfix, - }; - Ok(affix) - } - - fn primary(&mut self, input: Self::Input) -> Result { - let table_ref = match input.elem { - TableReferenceElement::Group(table_ref) => table_ref, - TableReferenceElement::Table { - catalog, - database, - table, - alias, - temporal, - with_options, - pivot, - unpivot, - sample, - } => TableReference::Table { - span: transform_span(input.span.tokens), - catalog, - database, - table, - alias, - temporal, - with_options, - pivot, - unpivot, - sample, - }, - TableReferenceElement::TableFunction { - lateral, - name, - params, - alias, - sample, - } => { - let normal_params = params - .iter() - .filter_map(|p| match p { - TableFunctionParam::Normal(p) => Some(p.clone()), - _ => None, - }) - .collect(); - let named_params = params - .into_iter() - .filter_map(|p| match p { - TableFunctionParam::Named { name, value } => Some((name, value)), - _ => None, - }) - .collect(); - TableReference::TableFunction { - span: transform_span(input.span.tokens), - lateral, - name, - params: normal_params, - named_params, - alias, - sample, - } - } - TableReferenceElement::Subquery { - lateral, - subquery, - alias, - pivot, - unpivot, - } => TableReference::Subquery { - span: transform_span(input.span.tokens), - lateral, - subquery, - alias, - pivot, - unpivot, - }, - TableReferenceElement::Stage { - location, - options, - alias, - } => { - let options = SelectStageOptions::from(options); - TableReference::Location { - span: transform_span(input.span.tokens), - location, - options, - alias, - } - } - _ => unreachable!(), - }; - Ok(table_ref) - } - - fn infix( - &mut self, - lhs: Self::Output, - input: Self::Input, - rhs: Self::Output, - ) -> Result { - let table_ref = match input.elem { - TableReferenceElement::Join { op, natural } => { - let condition = if natural { - JoinCondition::Natural - } else { - JoinCondition::None - }; - TableReference::Join { - span: transform_span(input.span.tokens), - join: Join { - op, - condition, - left: Box::new(lhs), - right: Box::new(rhs), - }, - } - } - _ => unreachable!(), - }; - Ok(table_ref) - } - - fn prefix( - &mut self, - _op: Self::Input, - _rhs: Self::Output, - ) -> Result { - unreachable!() - } - - fn postfix( - &mut self, - mut lhs: Self::Output, - op: Self::Input, - ) -> Result { - match op.elem { - TableReferenceElement::JoinCondition(new_condition) => match &mut lhs { - TableReference::Join { - join: Join { condition, .. }, - .. - } => match *condition { - JoinCondition::None => { - *condition = new_condition; - Ok(lhs) - } - JoinCondition::Natural => Err("join condition conflicting with NATURAL"), - _ => Err("join condition already set"), - }, - _ => Err("join condition must apply to a join"), - }, - _ => unreachable!(), - } - } -} - pub fn group_by_items(i: Input) -> IResult { let all = map(rule! { ALL }, |_| GroupBy::All); @@ -1193,7 +1425,8 @@ pub fn group_by_items(i: Input) -> IResult { } else { items.into_iter().next().unwrap() } - })(i) + }) + .parse(i) } pub fn window_frame_bound(i: Input) -> IResult { @@ -1213,7 +1446,8 @@ pub fn window_frame_bound(i: Input) -> IResult { map(rule! { #subexpr(0) ~ FOLLOWING }, |(expr, _)| { WindowFrameBound::Following(Some(Box::new(expr))) }), - ))(i) + )) + .parse(i) } pub fn window_frame_between(i: Input) -> IResult<(WindowFrameBound, WindowFrameBound)> { @@ -1225,7 +1459,8 @@ pub fn window_frame_between(i: Input) -> IResult<(WindowFrameBound, WindowFrameB map(rule! { #window_frame_bound }, |s| { (s, WindowFrameBound::CurrentRow) }), - ))(i) + )) + .parse(i) } pub fn window_spec(i: Input) -> IResult { @@ -1254,7 +1489,8 @@ pub fn window_spec(i: Input) -> IResult { } }), }, - )(i) + ) + .parse(i) } pub fn window_spec_ident(i: Input) -> IResult { @@ -1271,7 +1507,8 @@ pub fn window_spec_ident(i: Input) -> IResult { }, |window_name| Window::WindowReference(WindowRef { window_name }), ), - ))(i) + )) + .parse(i) } pub fn within_group(i: Input) -> IResult> { @@ -1280,7 +1517,8 @@ pub fn within_group(i: Input) -> IResult> { WITHIN ~ GROUP ~ "(" ~ ORDER ~ ^BY ~ ^#comma_separated_list1(order_by_expr) ~ ")" }, |(_, _, _, _, _, order_by, _)| order_by, - )(i) + ) + .parse(i) } pub fn window_function(i: Input) -> IResult { @@ -1292,7 +1530,8 @@ pub fn window_function(i: Input) -> IResult { ignore_nulls: opt_ignore_nulls.map(|key| key.0.kind == IGNORE), window: window.1, }, - )(i) + ) + .parse(i) } pub fn window_clause(i: Input) -> IResult { @@ -1304,5 +1543,6 @@ pub fn window_clause(i: Input) -> IResult { name: ident, spec: window, }, - )(i) + ) + .parse(i) } diff --git a/src/query/ast/src/parser/script.rs b/src/query/ast/src/parser/script.rs index 44b5c308f4a9b..a346704f18153 100644 --- a/src/query/ast/src/parser/script.rs +++ b/src/query/ast/src/parser/script.rs @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use nom::branch::alt; -use nom::combinator::consumed; -use nom::combinator::map; +use nom::Parser; use nom_rule::rule; use crate::ast::*; @@ -40,7 +38,8 @@ pub fn script_block_or_stmt(i: Input) -> IResult { }), |(_, stmt)| ScriptBlockOrStmt::Statement(stmt.stmt), ), - ))(i) + )) + .parse(i) } pub fn script_block(i: Input) -> IResult { @@ -60,7 +59,8 @@ pub fn script_block(i: Input) -> IResult { body, } }, - )(i) + ) + .parse(i) } pub fn declare_item(i: Input) -> IResult { @@ -70,7 +70,8 @@ pub fn declare_item(i: Input) -> IResult { rule!( #declare_var | #declare_set - )(i) + ) + .parse(i) } pub fn declare_var(i: Input) -> IResult { @@ -84,7 +85,8 @@ pub fn declare_var(i: Input) -> IResult { data_type, default: default.map(|(_, default)| default), }, - )(i) + ) + .parse(i) } pub fn declare_set(i: Input) -> IResult { @@ -97,7 +99,8 @@ pub fn declare_set(i: Input) -> IResult { name, stmt, }, - )(i) + ) + .parse(i) } pub fn declare_cursor(i: Input) -> IResult { @@ -119,7 +122,8 @@ pub fn declare_cursor(i: Input) -> IResult { resultset: None, }, }, - )(i) + ) + .parse(i) } #[allow(clippy::large_enum_variant)] @@ -137,18 +141,19 @@ pub(crate) fn cursor_target(i: Input) -> IResult { rule!( #resultset | #statement - )(i) + ) + .parse(i) } pub(crate) fn iterable_item(i: Input) -> IResult { // For now, we'll treat all identifiers as potential iterables // The compiler will determine if it's a cursor or resultset // based on what was actually declared - map(ident, IterableItem::Resultset)(i) + map(ident, IterableItem::Resultset).parse(i) } pub fn script_stmts(i: Input) -> IResult> { - semicolon_terminated_list1(script_stmt)(i) + semicolon_terminated_list1(script_stmt).parse(i) } pub fn script_stmt(i: Input) -> IResult { @@ -448,5 +453,6 @@ pub fn script_stmt(i: Input) -> IResult { | #loop_stmts | #conditional_stmts | #run_stmt - )(i) + ) + .parse(i) } diff --git a/src/query/ast/src/parser/sequence.rs b/src/query/ast/src/parser/sequence.rs index e2062c4bede71..865892d6a58af 100644 --- a/src/query/ast/src/parser/sequence.rs +++ b/src/query/ast/src/parser/sequence.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use nom::Parser; use nom_rule::rule; use super::common::ident; @@ -34,7 +35,7 @@ pub fn sequence(i: Input) -> IResult { | #drop_sequence: "`DROP [IF EXISTS] `" | #show_sequences: "`SHOW SEQUENCES []`" | #desc_sequence: "`DESCRIBE SEQUENCE `" - )(i) + ).parse(i) } fn create_sequence(i: Input) -> IResult { diff --git a/src/query/ast/src/parser/stage.rs b/src/query/ast/src/parser/stage.rs index 199a241ab521d..dfd502ef00adb 100644 --- a/src/query/ast/src/parser/stage.rs +++ b/src/query/ast/src/parser/stage.rs @@ -14,8 +14,7 @@ use std::collections::BTreeMap; -use nom::branch::alt; -use nom::combinator::map; +use nom::Parser; use nom_rule::rule; use crate::ast::FileFormatOptions; @@ -32,28 +31,30 @@ use crate::parser::ErrorKind; pub fn parameter_to_grant_string(i: Input) -> IResult { let ident_to_string = |i| map_res(grant_ident, |ident| Ok(ident.name))(i); - let u64_to_string = |i| map(literal_u64, |v| v.to_string())(i); - let boolean_to_string = |i| map(literal_bool, |v| v.to_string())(i); + let u64_to_string = |i| map(literal_u64, |v| v.to_string()).parse(i); + let boolean_to_string = |i| map(literal_bool, |v| v.to_string()).parse(i); rule!( #literal_string | #ident_to_string | #u64_to_string | #boolean_to_string - )(i) + ) + .parse(i) } pub fn parameter_to_string(i: Input) -> IResult { let ident_to_string = |i| map_res(ident, |ident| Ok(ident.name))(i); - let u64_to_string = |i| map(literal_u64, |v| v.to_string())(i); - let boolean_to_string = |i| map(literal_bool, |v| v.to_string())(i); + let u64_to_string = |i| map(literal_u64, |v| v.to_string()).parse(i); + let boolean_to_string = |i| map(literal_bool, |v| v.to_string()).parse(i); rule!( #literal_string | #ident_to_string | #u64_to_string | #boolean_to_string - )(i) + ) + .parse(i) } pub fn connection_opt(sep: &'static str) -> impl FnMut(Input) -> IResult<(String, String)> { @@ -74,7 +75,8 @@ pub fn connection_opt(sep: &'static str) -> impl FnMut(Input) -> IResult<(String rule!( #string_options | #bool_options - )(i) + ) + .parse(i) } } @@ -85,7 +87,8 @@ pub fn connection_options(i: Input) -> IResult> { |(_, opts, _)| { BTreeMap::from_iter(opts.iter().map(|((k, v), _)| (k.to_lowercase(), v.clone()))) }, - )(i) + ) + .parse(i) } pub fn format_options(i: Input) -> IResult { @@ -194,21 +197,24 @@ pub fn format_options(i: Input) -> IResult { .map(|((k, v), _)| (k.to_lowercase(), v.clone())) .collect(), }, - )(i) + ) + .parse(i) } pub fn file_format_clause(i: Input) -> IResult { map( rule! { FILE_FORMAT ~ ^"=" ~ ^"(" ~ ^#format_options ~ ^")" }, |(_, _, _, opts, _)| opts, - )(i) + ) + .parse(i) } pub fn file_location(i: Input) -> IResult { alt(( string_location, map_res(at_string, |location| Ok(FileLocation::Stage(location))), - ))(i) + )) + .parse(i) } pub fn stage_location(i: Input) -> IResult { @@ -279,5 +285,6 @@ pub fn select_stage_option(i: Input) -> IResult { rule! { CASE_SENSITIVE ~ ^"=>" ~ ^#literal_bool }, |(_, _, case_sensitive)| SelectStageOption::CaseSensitive(case_sensitive), ), - ))(i) + )) + .parse(i) } diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index 53896aebec130..94f76d907c37d 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -16,12 +16,7 @@ use std::collections::BTreeMap; use std::time::Duration; use educe::Educe; -use nom::branch::alt; -use nom::combinator::consumed; -use nom::combinator::map; -use nom::combinator::not; -use nom::combinator::value; -use nom::Slice; +use nom::Parser; use nom_rule::rule; use super::sequence::sequence; @@ -71,7 +66,8 @@ fn procedure_type_name(i: Input) -> IResult> { |(_, _)| vec![], ); rule!(#procedure_empty_types: "()" - | #procedure_type_names: "(, ...)")(i) + | #procedure_type_names: "(, ...)") + .parse(i) } pub fn statement_body(i: Input) -> IResult { @@ -903,7 +899,8 @@ pub fn statement_body(i: Input) -> IResult { rule!( #from_table | #from_dot_table - )(i) + ) + .parse(i) } let show_columns = map( @@ -1705,7 +1702,8 @@ pub fn statement_body(i: Input) -> IResult { rule!( #set_comment | #unset_comment - )(i) + ) + .parse(i) } let drop_role = map( rule! { @@ -2423,7 +2421,8 @@ pub fn statement_body(i: Input) -> IResult { name: Some(name.to_string()), data_type, } - })(i) + }) + .parse(i) } fn procedure_return(i: Input) -> IResult> { @@ -2440,7 +2439,8 @@ pub fn statement_body(i: Input) -> IResult { }] }); rule!(#procedure_single_return: "" - | #procedure_table_return: "TABLE( , ...)")(i) + | #procedure_table_return: "TABLE( , ...)") + .parse(i) } fn procedure_arg(i: Input) -> IResult>> { @@ -2457,7 +2457,8 @@ pub fn statement_body(i: Input) -> IResult { |(_, _)| None, ); rule!(#procedure_empty_args: "()" - | #procedure_args: "( , ...)")(i) + | #procedure_args: "( , ...)") + .parse(i) } // CREATE [ OR REPLACE ] PROCEDURE () @@ -2534,7 +2535,7 @@ pub fn statement_body(i: Input) -> IResult { // |(_, _)| vec![], // ); // rule!(#procedure_empty_types: "()" - // | #procedure_type_names: "(, ...)")(i) + // | #procedure_type_names: "(, ...)").parse(i) // } let call_procedure = map( @@ -2840,7 +2841,7 @@ AS ), rule!(#comment), rule!(#vacuum_temporary_tables), - ))(i) + )).parse(i) } pub fn statement(i: Input) -> IResult { @@ -2852,7 +2853,8 @@ pub fn statement(i: Input) -> IResult { stmt, format: opt_format.map(|(_, format)| format.name), }, - )(i) + ) + .parse(i) } pub fn parse_create_option( @@ -2937,7 +2939,8 @@ pub fn conditional_multi_table_insert() -> impl FnMut(Input) -> IResult impl FnMut(Input) -> IResult IResult { condition: expr, into_clauses, }, - )(i) + ) + .parse(i) } fn into_clause(i: Input) -> IResult { @@ -2996,7 +3001,8 @@ fn into_clause(i: Input) -> IResult { .map(|(_, _, columns, _)| columns) .unwrap_or_default(), }, - )(i) + ) + .parse(i) } fn else_clause(i: Input) -> IResult { @@ -3005,7 +3011,8 @@ fn else_clause(i: Input) -> IResult { ELSE ~ (#into_clause)+ }, |(_, into_clauses)| ElseClause { into_clauses }, - )(i) + ) + .parse(i) } pub fn replace_stmt(allow_raw: bool) -> impl FnMut(Input) -> IResult { @@ -3052,33 +3059,35 @@ pub fn replace_stmt(allow_raw: bool) -> impl FnMut(Input) -> IResult delete_when: opt_delete_when.map(|(_, _, expr)| expr), }) }, - )(i) + ) + .parse(i) } } // `VALUES (expr, expr), (expr, expr)` pub fn insert_source(i: Input) -> IResult { - let row = map( + let row = parser_fn(map( rule! { "(" ~ #comma_separated_list1(expr) ~ ")" }, |(_, values, _)| values, - ); - let values = map( + )); + let values = parser_fn(map( rule! { VALUES ~ #comma_separated_list0(row) }, |(_, rows)| InsertSource::Values { rows }, - ); + )); - let query = map(query, |query| InsertSource::Select { + let query = parser_fn(map(query, |query| InsertSource::Select { query: Box::new(query), - }); + })); rule!( #values | #query - )(i) + ) + .parse(i) } pub fn insert_source_file(i: Input) -> IResult { @@ -3097,7 +3106,8 @@ pub fn insert_source_file(i: Input) -> IResult { location, format_options, }, - )(i) + ) + .parse(i) } // `INSERT INTO ... VALUES` statement will @@ -3124,7 +3134,8 @@ pub fn insert_source_fast_values(i: Input) -> IResult { #insert_source_file | #values | #query - )(i) + ) + .parse(i) } pub fn mutation_source(i: Input) -> IResult { @@ -3149,7 +3160,8 @@ pub fn mutation_source(i: Input) -> IResult { rule!( #query | #source_table - )(i) + ) + .parse(i) } pub fn unset_source(i: Input) -> IResult> { @@ -3170,7 +3182,8 @@ pub fn unset_source(i: Input) -> IResult> { rule!( #var | #vars - )(i) + ) + .parse(i) } pub fn set_stmt_args(i: Input) -> IResult<(Identifier, Box)> { @@ -3179,7 +3192,8 @@ pub fn set_stmt_args(i: Input) -> IResult<(Identifier, Box)> { #ident ~ "=" ~ #subexpr(0) }, |(id, _, expr)| (id, Box::new(expr)), - )(i) + ) + .parse(i) } pub fn set_var_hints(i: Input) -> IResult { @@ -3188,7 +3202,8 @@ pub fn set_var_hints(i: Input) -> IResult { SET_VAR ~ ^"(" ~ ^#ident ~ ^"=" ~ #subexpr(0) ~ ^")" }, |(_, _, name, _, expr, _)| HintItem { name, expr }, - )(i) + ) + .parse(i) } pub fn hint(i: Input) -> IResult { @@ -3204,7 +3219,7 @@ pub fn hint(i: Input) -> IResult { }, |_| Hint { hints_list: vec![] }, ); - rule!(#hint|#invalid_hint)(i) + rule!(#hint|#invalid_hint).parse(i) } pub fn query_setting(i: Input) -> IResult<(Identifier, Expr)> { @@ -3213,7 +3228,8 @@ pub fn query_setting(i: Input) -> IResult<(Identifier, Expr)> { #ident ~ "=" ~ #subexpr(0) }, |(id, _, value)| (id, value), - )(i) + ) + .parse(i) } pub fn query_statement_setting(i: Input) -> IResult { @@ -3235,7 +3251,7 @@ pub fn query_statement_setting(i: Input) -> IResult { } }, ); - rule!(#query_set: "(SETTING_NAME = VALUE, ...)")(i) + rule!(#query_set: "(SETTING_NAME = VALUE, ...)").parse(i) } pub fn top_n(i: Input) -> IResult { map( @@ -3250,7 +3266,7 @@ pub fn top_n(i: Input) -> IResult { : "TOP " }, |(_, _, n)| n, - )(i) + ).parse(i) } pub fn rest_str(i: Input) -> IResult<(String, usize)> { @@ -3371,7 +3387,7 @@ pub fn column_def(i: Input) -> IResult { }; (def, constraints) }, - )(i)?; + ).parse(i)?; for constraint in constraints { match constraint { @@ -3472,7 +3488,8 @@ pub fn create_def(i: Input) -> IResult { map(rule! { #column_def }, CreateDefinition::Column), map(rule! { #table_index_def }, CreateDefinition::TableIndex), map(rule! { #constraint_def }, CreateDefinition::Constraint), - ))(i) + )) + .parse(i) } pub fn role_name(i: Input) -> IResult { @@ -3514,7 +3531,8 @@ pub fn role_name(i: Input) -> IResult { rule!( #role_ident : "" | #role_lit : "''" - )(i) + ) + .parse(i) } pub fn grant_source(i: Input) -> IResult { @@ -3683,7 +3701,8 @@ pub fn grant_source(i: Input) -> IResult { | #procedure_privs: "ACCESS PROCEDURE ON PROCEDURE " | #procedure_all_privs: "ALL [ PRIVILEGES ] ON PROCEDURE " | #all : "ALL [ PRIVILEGES ] ON " - )(i) + ) + .parse(i) } pub fn priv_type(i: Input) -> IResult { @@ -3765,14 +3784,16 @@ pub fn priv_type(i: Input) -> IResult { | #drop | #create ), - ))(i) + )) + .parse(i) } pub fn stage_priv_type(i: Input) -> IResult { alt(( value(UserPrivilegeType::Read, rule! { READ }), value(UserPrivilegeType::Write, rule! { WRITE }), - ))(i) + )) + .parse(i) } pub fn priv_share_type(i: Input) -> IResult { @@ -3783,11 +3804,12 @@ pub fn priv_share_type(i: Input) -> IResult { ShareGrantObjectPrivilege::ReferenceUsage, rule! { REFERENCE_USAGE }, ), - ))(i) + )) + .parse(i) } pub fn alter_add_share_accounts(i: Input) -> IResult { - alt((value(true, rule! { ADD }), value(false, rule! { REMOVE })))(i) + alt((value(true, rule! { ADD }), value(false, rule! { REMOVE }))).parse(i) } pub fn on_object_name(i: Input) -> IResult { @@ -3854,7 +3876,8 @@ pub fn on_object_name(i: Input) -> IResult { | #connection : "CONNECTION " | #seq : "SEQUENCE " | #procedure : "PROCEDURE " - )(i) + ) + .parse(i) } pub fn grant_level(i: Input) -> IResult { @@ -3883,7 +3906,8 @@ pub fn grant_level(i: Input) -> IResult { #global : "*.*" | #db : ".*" | #table : "." - )(i) + ) + .parse(i) } pub fn grant_all_level(i: Input) -> IResult { @@ -3921,7 +3945,8 @@ pub fn grant_all_level(i: Input) -> IResult { | #table : ".
" | #stage : "STAGE " | #warehouse : "WAREHOUSE " - )(i) + ) + .parse(i) } pub fn grant_ownership_level(i: Input) -> IResult { @@ -3996,7 +4021,8 @@ pub fn grant_ownership_level(i: Input) -> IResult { | #table : ".
" | #object : "STAGE | UDF | WAREHOUSE | CONNECTION | SEQUENCE " | #procedure : "PROCEDURE " - )(i) + ) + .parse(i) } pub fn show_grant_option(i: Input) -> IResult { @@ -4025,7 +4051,7 @@ pub fn show_grant_option(i: Input) -> IResult { #grant_role: "FOR { ROLE | [USER] }" | #share_object_name: "ON {DATABASE | TABLE . | UDF | STAGE | CONNECTION | SEQUENCE }" | #role_granted: "OF ROLE " - )(i) + ).parse(i) } pub fn grant_option(i: Input) -> IResult { @@ -4046,7 +4072,8 @@ pub fn grant_option(i: Input) -> IResult { rule!( #role | #user - )(i) + ) + .parse(i) } pub fn create_table_source(i: Input) -> IResult { @@ -4115,7 +4142,8 @@ pub fn create_table_source(i: Input) -> IResult { rule!( #columns | #like - )(i) + ) + .parse(i) } pub fn alter_database_action(i: Input) -> IResult { @@ -4136,7 +4164,8 @@ pub fn alter_database_action(i: Input) -> IResult { rule!( #rename_database | #refresh_cache - )(i) + ) + .parse(i) } pub fn modify_column_type(i: Input) -> IResult { @@ -4285,7 +4314,8 @@ pub fn modify_column_action(i: Input) -> IResult { | #convert_stored_computed_column | #modify_column_type | #modify_column_comment - )(i) + ) + .parse(i) } pub fn alter_table_action(i: Input) -> IResult { @@ -4465,7 +4495,8 @@ pub fn alter_table_action(i: Input) -> IResult { | #drop_row_access_policy | #add_row_access_policy | #add_constraint - )(i) + ) + .parse(i) } pub fn match_clause(i: Input) -> IResult { @@ -4483,7 +4514,8 @@ pub fn match_clause(i: Input) -> IResult { operation: match_operation, }), }, - )(i) + ) + .parse(i) } fn match_operation(i: Input) -> IResult { @@ -4507,7 +4539,8 @@ fn match_operation(i: Input) -> IResult { is_star: true, }, ), - ))(i) + )) + .parse(i) } pub fn unmatch_clause(i: Input) -> IResult { @@ -4561,7 +4594,7 @@ pub fn unmatch_clause(i: Input) -> IResult { }) }, ), - ))(i) + )).parse(i) } pub fn add_column_option(i: Input) -> IResult { @@ -4570,7 +4603,8 @@ pub fn add_column_option(i: Input) -> IResult { map(rule! { AFTER ~ #ident }, |(_, ident)| { AddColumnOption::After(ident) }), - ))(i) + )) + .parse(i) } pub fn optimize_table_action(i: Input) -> IResult { @@ -4587,7 +4621,8 @@ pub fn optimize_table_action(i: Input) -> IResult { target: opt_segment.map_or(CompactTarget::Block, |_| CompactTarget::Segment), } }), - ))(i) + )) + .parse(i) } pub fn literal_duration(i: Input) -> IResult { @@ -4608,7 +4643,8 @@ pub fn literal_duration(i: Input) -> IResult { rule!( #days | #seconds - )(i) + ) + .parse(i) } pub fn vacuum_drop_table_option(i: Input) -> IResult { @@ -4620,7 +4656,8 @@ pub fn vacuum_drop_table_option(i: Input) -> IResult { dry_run: opt_dry_run.map(|dry_run| dry_run.2.is_some()), limit: opt_limit.map(|(_, limit)| limit as usize), }, - ),))(i) + ),)) + .parse(i) } pub fn vacuum_table_option(i: Input) -> IResult { @@ -4631,7 +4668,8 @@ pub fn vacuum_table_option(i: Input) -> IResult { |opt_dry_run| VacuumTableOption { dry_run: opt_dry_run.map(|dry_run| dry_run.2.is_some()), }, - ),))(i) + ),)) + .parse(i) } pub fn task_sql_block(i: Input) -> IResult { @@ -4658,7 +4696,7 @@ pub fn task_sql_block(i: Input) -> IResult { TaskSql::ScriptBlock(sql) }, ); - alt((single_statement, task_block))(i) + alt((single_statement, task_block)).parse(i) } pub fn alter_task_option(i: Input) -> IResult { @@ -4735,7 +4773,8 @@ pub fn alter_task_option(i: Input) -> IResult { | #modify_when | #add_after | #remove_after - )(i) + ) + .parse(i) } pub fn alter_pipe_option(i: Input) -> IResult { @@ -4764,7 +4803,8 @@ pub fn alter_pipe_option(i: Input) -> IResult { rule!( #set | #refresh - )(i) + ) + .parse(i) } pub fn task_warehouse_option(i: Input) -> IResult { @@ -4779,7 +4819,8 @@ pub fn task_warehouse_option(i: Input) -> IResult { }; WarehouseOptions { warehouse } }, - ),))(i) + ),)) + .parse(i) } pub fn assign_nodes_list(i: Input) -> IResult, u64)>> { @@ -4792,7 +4833,8 @@ pub fn assign_nodes_list(i: Input) -> IResult, u64)>> { map(comma_separated_list1(nodes_list), |opts| { opts.into_iter().collect() - })(i) + }) + .parse(i) } pub fn assign_warehouse_nodes_list(i: Input) -> IResult, u64)>> { @@ -4807,7 +4849,8 @@ pub fn assign_warehouse_nodes_list(i: Input) -> IResult IResult, u64)>> { @@ -4822,7 +4865,8 @@ pub fn unassign_warehouse_nodes_list(i: Input) -> IResult IResult> { @@ -4836,7 +4880,8 @@ pub fn warehouse_cluster_option(i: Input) -> IResult> { opts.into_iter() .map(|(k, v)| (k.name.to_lowercase(), v.clone())) .collect() - })(i) + }) + .parse(i) } pub fn workload_quotas(i: Input) -> IResult> { @@ -4895,14 +4940,16 @@ pub fn task_schedule_option(i: Input) -> IResult { | #cron_expr | #interval_sec | #interval_millis - )(i) + ) + .parse(i) } pub fn kill_target(i: Input) -> IResult { alt(( value(KillTarget::Query, rule! { QUERY }), value(KillTarget::Connection, rule! { CONNECTION }), - ))(i) + )) + .parse(i) } pub fn priority(i: Input) -> IResult { @@ -4910,34 +4957,38 @@ pub fn priority(i: Input) -> IResult { value(Priority::LOW, rule! { LOW }), value(Priority::MEDIUM, rule! { MEDIUM }), value(Priority::HIGH, rule! { HIGH }), - ))(i) + )) + .parse(i) } pub fn action(i: Input) -> IResult { - let mut backtrace = map( + let mut backtrace = parser_fn(map( rule! { #switch ~ EXCEPTION_BACKTRACE }, |(switch, _)| SystemAction::Backtrace(switch), - ); + )); // add other system action type here rule!( #backtrace - )(i) + ) + .parse(i) } pub fn switch(i: Input) -> IResult { alt(( value(true, rule! { ENABLE }), value(false, rule! { DISABLE }), - ))(i) + )) + .parse(i) } pub fn cluster_type(i: Input) -> IResult { alt(( value(ClusterType::Linear, rule! { LINEAR }), value(ClusterType::Hilbert, rule! { HILBERT }), - ))(i) + )) + .parse(i) } pub fn limit_where(i: Input) -> IResult { @@ -4948,7 +4999,8 @@ pub fn limit_where(i: Input) -> IResult { |(_, selection)| ShowLimit::Where { selection: Box::new(selection), }, - )(i) + ) + .parse(i) } pub fn limit_like(i: Input) -> IResult { @@ -4957,14 +5009,16 @@ pub fn limit_like(i: Input) -> IResult { LIKE ~ #literal_string }, |(_, pattern)| ShowLimit::Like { pattern }, - )(i) + ) + .parse(i) } pub fn show_limit(i: Input) -> IResult { rule!( #limit_like | #limit_where - )(i) + ) + .parse(i) } pub fn show_options(i: Input) -> IResult { @@ -4976,7 +5030,8 @@ pub fn show_options(i: Input) -> IResult { show_limit, limit: opt_limit.map(|(_, limit)| limit), }, - )(i) + ) + .parse(i) } pub fn show_stats_stmt(i: Input) -> IResult { @@ -5001,7 +5056,8 @@ pub fn show_stats_stmt(i: Input) -> IResult { target: ShowStatsTarget::Table(table), }, ), - ))(i) + )) + .parse(i) } pub fn table_option(i: Input) -> IResult> { @@ -5015,7 +5071,8 @@ pub fn table_option(i: Input) -> IResult> { .map(|(k, _, v)| (k.name.to_lowercase(), v.clone())), ) }, - )(i) + ) + .parse(i) } pub fn set_table_option(i: Input) -> IResult> { @@ -5030,16 +5087,18 @@ pub fn set_table_option(i: Input) -> IResult> { opts.into_iter() .map(|(k, v)| (k.name.to_lowercase(), v.clone())) .collect() - })(i) + }) + .parse(i) } pub fn option_to_string(i: Input) -> IResult { - let bool_to_string = |i| map(literal_bool, |v| v.to_string())(i); + let bool_to_string = |i| map(literal_bool, |v| v.to_string()).parse(i); rule!( #bool_to_string | #parameter_to_string - )(i) + ) + .parse(i) } pub fn engine(i: Input) -> IResult { @@ -5058,24 +5117,26 @@ pub fn engine(i: Input) -> IResult { ENGINE ~ ^"=" ~ ^#engine }, |(_, _, engine)| engine, - )(i) + ) + .parse(i) } pub fn database_engine(i: Input) -> IResult { - value(DatabaseEngine::Default, rule! { DEFAULT })(i) + value(DatabaseEngine::Default, rule! { DEFAULT }).parse(i) } pub fn create_database_option(i: Input) -> IResult { - let mut create_db_engine = map( + let mut create_db_engine = parser_fn(map( rule! { ENGINE ~ ^"=" ~ ^#database_engine }, |(_, _, option)| CreateDatabaseOption::DatabaseEngine(option), - ); + )); rule!( #create_db_engine - )(i) + ) + .parse(i) } pub fn catalog_type(i: Input) -> IResult { @@ -5083,7 +5144,8 @@ pub fn catalog_type(i: Input) -> IResult { value(CatalogType::Default, rule! { DEFAULT }), value(CatalogType::Hive, rule! { HIVE }), value(CatalogType::Iceberg, rule! { ICEBERG }), - ))(i) + )) + .parse(i) } pub fn user_option(i: Input) -> IResult { @@ -5159,7 +5221,8 @@ pub fn user_option(i: Input) -> IResult { | #must_change_password | #set_workload_group | #unset_workload_group - )(i) + ) + .parse(i) } pub fn user_identity(i: Input) -> IResult { @@ -5171,7 +5234,8 @@ pub fn user_identity(i: Input) -> IResult { let hostname = "%".to_string(); UserIdentity { username, hostname } }, - )(i) + ) + .parse(i) } pub fn auth_type(i: Input) -> IResult { @@ -5180,14 +5244,16 @@ pub fn auth_type(i: Input) -> IResult { value(AuthType::Sha256Password, rule! { SHA256_PASSWORD }), value(AuthType::DoubleSha1Password, rule! { DOUBLE_SHA1_PASSWORD }), value(AuthType::JWT, rule! { JWT }), - ))(i) + )) + .parse(i) } pub fn presign_action(i: Input) -> IResult { alt(( value(PresignAction::Download, rule! { DOWNLOAD }), value(PresignAction::Upload, rule! { UPLOAD }), - ))(i) + )) + .parse(i) } pub fn presign_location(i: Input) -> IResult { @@ -5208,7 +5274,8 @@ pub fn presign_option(i: Input) -> IResult { rule! { CONTENT_TYPE ~ ^"=" ~ ^#literal_string }, |(_, _, v)| PresignOption::ContentType(v), ), - ))(i) + )) + .parse(i) } pub fn table_reference_with_alias(i: Input) -> IResult { @@ -5231,13 +5298,15 @@ pub fn table_reference_with_alias(i: Input) -> IResult { unpivot: None, sample: None, }, - )(i) + ) + .parse(i) } pub fn update_expr(i: Input) -> IResult { map(rule! { ( #ident ~ "=" ~ ^#expr ) }, |(name, _, expr)| { UpdateExpr { name, expr } - })(i) + }) + .parse(i) } pub fn udaf_state_field(i: Input) -> IResult { @@ -5248,7 +5317,8 @@ pub fn udaf_state_field(i: Input) -> IResult { : "` `" }, |(name, type_name)| UDAFStateField { name, type_name }, - )(i) + ) + .parse(i) } pub fn udf_header(i: Input) -> IResult<(String, String)> { @@ -5257,7 +5327,8 @@ pub fn udf_header(i: Input) -> IResult<(String, String)> { #literal_string ~ #match_text("=") ~ ^#literal_string }, |(k, _, v)| (k, v), - )(i) + ) + .parse(i) } pub fn udf_script_or_address(i: Input) -> IResult<(String, bool)> { @@ -5278,7 +5349,8 @@ pub fn udf_script_or_address(i: Input) -> IResult<(String, bool)> { rule!( #script: "AS " | #address: "ADDRESS=" - )(i) + ) + .parse(i) } pub fn udf_definition(i: Input) -> IResult { @@ -5304,7 +5376,8 @@ pub fn udf_definition(i: Input) -> IResult { rule!( #scalar: "" | #table: "TABLE (, ...)" - )(i) + ) + .parse(i) } let lambda_udf = map( @@ -5462,7 +5535,7 @@ pub fn udf_definition(i: Input) -> IResult { | #udaf: "(<[arg_name] arg_type>, ...) STATE {, ...} RETURNS LANGUAGE { ADDRESS= | AS } " | #udf: "(<[arg_name] arg_type>, ...) RETURNS LANGUAGE HANDLER= { ADDRESS= | AS } " | #scalar_udf_or_udtf: "(, ...) RETURNS AS }" - )(i) + ).parse(i) } fn lambda_udf_params(i: Input) -> IResult { @@ -5482,7 +5555,8 @@ fn lambda_udf_params(i: Input) -> IResult { rule!( #names: "(, ...)" | #name_with_types: "(, ...)" - )(i) + ) + .parse(i) } fn udf_args(i: Input) -> IResult { @@ -5502,18 +5576,20 @@ fn udf_args(i: Input) -> IResult { rule!( #types: "(, ...)" | #name_with_types: "(, ...)" - )(i) + ) + .parse(i) } fn udtf_arg(i: Input) -> IResult<(Identifier, TypeName)> { - map(rule! { #ident ~ ^#type_name }, |(name, ty)| (name, ty))(i) + map(rule! { #ident ~ ^#type_name }, |(name, ty)| (name, ty)).parse(i) } fn udf_immutable(i: Input) -> IResult { alt(( value(false, rule! { VOLATILE }), value(true, rule! { IMMUTABLE }), - ))(i) + )) + .parse(i) } pub fn row_access_definition(i: Input) -> IResult { @@ -5523,7 +5599,8 @@ pub fn row_access_definition(i: Input) -> IResult { name: name.to_string(), data_type, } - })(i) + }) + .parse(i) } let row_access_def = map( @@ -5539,14 +5616,16 @@ pub fn row_access_definition(i: Input) -> IResult { rule!( #row_access_def: "AS ( [ , ... ]) RETURNS BOOLEAN -> " - )(i) + ) + .parse(i) } pub fn mutation_update_expr(i: Input) -> IResult { map( rule! { #dot_separated_idents_1_to_2 ~ "=" ~ ^#expr }, |((table, name), _, expr)| MutationUpdateExpr { table, name, expr }, - )(i) + ) + .parse(i) } pub fn password_set_options(i: Input) -> IResult { @@ -5594,7 +5673,8 @@ pub fn password_set_options(i: Input) -> IResult { comment: opt_comment.map(|opt| opt.2), } }, - )(i) + ) + .parse(i) } pub fn password_unset_options(i: Input) -> IResult { @@ -5642,7 +5722,8 @@ pub fn password_unset_options(i: Input) -> IResult { comment: opt_comment.is_some(), } }, - )(i) + ) + .parse(i) } pub fn alter_password_action(i: Input) -> IResult { @@ -5662,7 +5743,8 @@ pub fn alter_password_action(i: Input) -> IResult { rule!( #set_options | #unset_options - )(i) + ) + .parse(i) } pub fn explain_option(i: Input) -> IResult { @@ -5677,7 +5759,8 @@ pub fn explain_option(i: Input) -> IResult { DECORRELATED => ExplainOption::Decorrelated, _ => unreachable!(), }, - )(i) + ) + .parse(i) } pub fn create_task_option(i: Input) -> IResult { @@ -5735,7 +5818,8 @@ pub fn create_task_option(i: Input) -> IResult { | #comment_opt }, |opt| opt, - )(i) + ) + .parse(i) } fn alter_task_set_option(i: Input) -> IResult { @@ -5779,7 +5863,8 @@ fn alter_task_set_option(i: Input) -> IResult { | #comment_opt }, |opt| opt, - )(i) + ) + .parse(i) } pub fn notification_webhook_options(i: Input) -> IResult { @@ -5812,14 +5897,16 @@ pub fn notification_webhook_options(i: Input) -> IResult IResult { map( rule! { WEBHOOK ~ ^"=" ~ ^"(" ~ ^#notification_webhook_options ~ ^")" }, |(_, _, _, opts, _)| opts, - )(i) + ) + .parse(i) } pub fn alter_notification_options(i: Input) -> IResult { @@ -5854,7 +5941,8 @@ pub fn alter_notification_options(i: Input) -> IResult | #comment }, |opts| opts, - )(i) + ) + .parse(i) } fn index_type(i: Input) -> IResult { @@ -5862,5 +5950,6 @@ fn index_type(i: Input) -> IResult { value(TableIndexType::Inverted, rule! { INVERTED }), value(TableIndexType::Ngram, rule! { NGRAM }), value(TableIndexType::Vector, rule! { VECTOR }), - ))(i) + )) + .parse(i) } diff --git a/src/query/ast/src/parser/stream.rs b/src/query/ast/src/parser/stream.rs index 93e42c4ae5894..0aab80b501f84 100644 --- a/src/query/ast/src/parser/stream.rs +++ b/src/query/ast/src/parser/stream.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use nom::combinator::map; +use nom::Parser; use nom_rule::rule; use crate::ast::CreateStreamStmt; @@ -39,7 +39,7 @@ pub fn stream_table(i: Input) -> IResult { | #drop_stream: "`DROP STREAM [IF EXISTS] [.]`" | #show_streams: "`SHOW [FULL] STREAMS [FROM ] []`" | #describe_stream: "`DESCRIBE STREAM [.]`" - )(i) + ).parse(i) } fn create_stream(i: Input) -> IResult { @@ -97,7 +97,8 @@ fn drop_stream(i: Input) -> IResult { stream, }) }, - )(i) + ) + .parse(i) } fn show_streams(i: Input) -> IResult { @@ -118,7 +119,7 @@ fn show_streams(i: Input) -> IResult { limit, }) }, - )(i) + ).parse(i) } fn describe_stream(i: Input) -> IResult { @@ -133,5 +134,6 @@ fn describe_stream(i: Input) -> IResult { stream, }) }, - )(i) + ) + .parse(i) } diff --git a/src/query/ast/src/precedence.rs b/src/query/ast/src/precedence.rs new file mode 100644 index 0000000000000..8b85d7d41548c --- /dev/null +++ b/src/query/ast/src/precedence.rs @@ -0,0 +1,30 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Precedence(pub u32); + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Associativity { + Left, + Right, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Affix { + Nilfix, + Prefix(Precedence), + Postfix(Precedence), + Infix(Precedence, Associativity), +} diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index bd366a85c5da8..53f583beadd6b 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -26,6 +26,7 @@ use databend_common_ast::parser::statement::insert_stmt; use databend_common_ast::parser::token::*; use databend_common_ast::parser::*; use goldenfile::Mint; +use nom::Parser; use nom_rule::rule; fn run_parser(file: &mut dyn Write, parser: P, src: &str) @@ -58,7 +59,7 @@ fn run_parser_with_dialect( }; let parser = parser; let mut parser = rule! { #parser ~ &EOI }; - match parser(input) { + match parser.parse(input) { Ok((i, (output, _))) => { assert_eq!(i[0].kind, TokenKind::EOI); writeln!(file, "---------- Input ----------").unwrap(); From 222c1bcfe4676ea2eb9ddd2c067650e027bef657 Mon Sep 17 00:00:00 2001 From: kould Date: Mon, 10 Nov 2025 15:00:35 +0800 Subject: [PATCH 02/13] chore: codefmt --- src/query/ast/src/parser/input.rs | 2 +- src/query/ast/src/parser/query.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/query/ast/src/parser/input.rs b/src/query/ast/src/parser/input.rs index d9a69af0f0c50..e789b614f1ac6 100644 --- a/src/query/ast/src/parser/input.rs +++ b/src/query/ast/src/parser/input.rs @@ -123,7 +123,7 @@ pub struct WithSpan<'a, T> { impl<'a, T: PartialEq> PartialEq for WithSpan<'a, T> { fn eq(&self, other: &Self) -> bool { self.elem == other.elem - && self.span.tokens.as_ptr() == other.span.tokens.as_ptr() + && std::ptr::eq(self.span.tokens.as_ptr(), other.span.tokens.as_ptr()) && self.span.tokens.len() == other.span.tokens.len() } } diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index 7fced541a9be7..5903ad1e3695a 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -346,7 +346,7 @@ fn parse_set_operation_elements<'a>( ); match parser(elements.as_slice()) { - Ok((remaining, expr)) if remaining.is_empty() => Ok((rest, expr)), + Ok(([], expr)) => Ok((rest, expr)), Ok((_, _)) => { input.backtrace.clear(); Err(nom::Err::Error(Error::from_error_kind( @@ -1115,7 +1115,7 @@ fn parse_table_reference_elements<'a>( ); match parser(elements.as_slice()) { - Ok((remaining, expr)) if remaining.is_empty() => Ok((rest, expr)), + Ok(([], expr)) => Ok((rest, expr)), Ok((_, _)) => { input.backtrace.clear(); Err(nom::Err::Error(Error::from_error_kind( From 41203ffc2fc50088afd547456f22dbff3d2da33d Mon Sep 17 00:00:00 2001 From: kould Date: Mon, 10 Nov 2025 19:21:46 +0800 Subject: [PATCH 03/13] refactor: replace nom-language to pratt-parser --- Cargo.lock | 11 +- Cargo.toml | 2 +- src/query/ast/Cargo.toml | 2 +- src/query/ast/benches/bench.rs | 10 +- src/query/ast/src/ast/expr.rs | 4 +- src/query/ast/src/lib.rs | 1 - src/query/ast/src/parser/common.rs | 174 ++- src/query/ast/src/parser/expr.rs | 1260 ++++++++--------- src/query/ast/src/parser/input.rs | 45 +- src/query/ast/src/parser/query.rs | 823 ++++------- src/query/ast/src/precedence.rs | 30 - src/query/ast/tests/it/testdata/dialect.txt | 2 +- .../ast/tests/it/testdata/expr-error.txt | 14 +- .../ast/tests/it/testdata/query-error.txt | 1 - .../ast/tests/it/testdata/stmt-error.txt | 7 +- 15 files changed, 1011 insertions(+), 1375 deletions(-) delete mode 100644 src/query/ast/src/precedence.rs diff --git a/Cargo.lock b/Cargo.lock index 9e3cdf1b83732..8cb788276849e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3054,10 +3054,10 @@ dependencies = [ "itertools 0.13.0", "logos", "nom 8.0.0", - "nom-language", "nom-rule", "ordered-float 5.0.0", "percent-encoding", + "pratt", "pretty_assertions", "recursive", "rspack-codespan-reporting", @@ -10723,15 +10723,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "nom-language" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2de2bc5b451bfedaef92c90b8939a8fff5770bdcc1fafd6239d086aab8fa6b29" -dependencies = [ - "nom 8.0.0", -] - [[package]] name = "nom-rule" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index acf6369b73eed..2ea8645022b88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -555,8 +555,8 @@ gix = "0.71.0" indent = "0.1.1" logos = "0.12.1" nom = "8.0.0" -nom-language = "0.1.0" nom-rule = "0.5.1" +pratt = "0.4.0" rspack-codespan-reporting = "0.11" rustc-demangle = "0.1" strsim = "0.10" diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index e17da802a1854..2fbab0490bed9 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -19,10 +19,10 @@ indent = { workspace = true } itertools = { workspace = true } logos = { workspace = true } nom = { workspace = true } -nom-language = { workspace = true } nom-rule = { workspace = true } ordered-float = { workspace = true } percent-encoding = { workspace = true } +pratt = { workspace = true } pretty_assertions = { workspace = true } recursive = { workspace = true } rspack-codespan-reporting = { workspace = true } diff --git a/src/query/ast/benches/bench.rs b/src/query/ast/benches/bench.rs index 15c735ab3f130..12c3711ac659f 100644 --- a/src/query/ast/benches/bench.rs +++ b/src/query/ast/benches/bench.rs @@ -18,11 +18,11 @@ fn main() { // bench fastest │ slowest │ median │ mean │ samples │ iters // ╰─ dummy │ │ │ │ │ -// ├─ deep_function_call 242.8 µs │ 525.3 µs │ 258.9 µs │ 262.8 µs │ 100 │ 100 -// ├─ deep_query 235.6 µs │ 364.8 µs │ 244.8 µs │ 249.3 µs │ 100 │ 100 -// ├─ large_query 362.9 µs │ 451.6 µs │ 376.5 µs │ 379.7 µs │ 100 │ 100 -// ├─ large_statement 364.8 µs │ 418.4 µs │ 380.2 µs │ 382.8 µs │ 100 │ 100 -// ╰─ wide_expr 96.97 µs │ 270.2 µs │ 102.8 µs │ 105.3 µs │ 100 │ 100 +// ├─ deep_function_call 174.4 µs │ 405.3 µs │ 186.3 µs │ 189.7 µs │ 100 │ 100 +// ├─ deep_query 226.1 µs │ 396.8 µs │ 239.3 µs │ 242.2 µs │ 100 │ 100 +// ├─ large_query 270.5 µs │ 362.8 µs │ 289.7 µs │ 294 µs │ 100 │ 100 +// ├─ large_statement 283.4 µs │ 313.5 µs │ 294.6 µs │ 295 µs │ 100 │ 100 +// ╰─ wide_expr 38.01 µs │ 189 µs │ 39.04 µs │ 40.81 µs │ 100 │ 100 #[divan::bench_group(max_time = 0.5)] mod dummy { diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index 0d2cb4df53d20..6fd85d563bc93 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -20,6 +20,8 @@ use derive_visitor::DriveMut; use educe::Educe; use enum_as_inner::EnumAsInner; use ethnum::i256; +use pratt::Affix; +use pratt::Associativity; use super::ColumnRef; use super::OrderByExpr; @@ -30,8 +32,6 @@ use crate::ast::write_dot_separated_list; use crate::ast::Identifier; use crate::ast::Indirection; use crate::ast::Query; -use crate::precedence::Affix; -use crate::precedence::Associativity; use crate::span::merge_span; use crate::ParseError; use crate::Result; diff --git a/src/query/ast/src/lib.rs b/src/query/ast/src/lib.rs index 87e8bab927ae8..a5af0f29f2a59 100644 --- a/src/query/ast/src/lib.rs +++ b/src/query/ast/src/lib.rs @@ -18,7 +18,6 @@ pub mod ast; pub mod parser; mod parser_error; -pub mod precedence; pub mod span; mod visitor; diff --git a/src/query/ast/src/parser/common.rs b/src/query/ast/src/parser/common.rs index 0349e17b8e22e..8addd71099be8 100644 --- a/src/query/ast/src/parser/common.rs +++ b/src/query/ast/src/parser/common.rs @@ -12,18 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cell::RefCell; +use std::rc::Rc; + pub use nom::branch::alt; pub use nom::branch::permutation; pub use nom::combinator::consumed; pub use nom::combinator::map; pub use nom::combinator::not; pub use nom::combinator::value; -use nom::error::Error as NomError; -use nom::error::ErrorKind as NomErrorKind; pub use nom::multi::many1; use nom::sequence::terminated; +use nom::Offset; use nom::Parser; use nom_rule::rule; +use pratt::PrattError; +use pratt::PrattParser; +use pratt::Precedence; pub fn parser_fn<'a, O, P>(mut parser: P) -> impl FnMut(Input<'a>) -> IResult<'a, O> where P: nom::Parser, Output = O, Error = Error<'a>> { @@ -44,78 +49,11 @@ use crate::parser::query::with_options; use crate::parser::token::*; use crate::parser::Error; use crate::parser::ErrorKind; -pub use crate::precedence::Affix; -pub use crate::precedence::Associativity; -pub use crate::precedence::Precedence; use crate::Range; use crate::Span; pub type IResult<'a, Output> = nom::IResult, Output, Error<'a>>; -pub type ElementsInput<'a, T> = &'a [WithSpan<'a, T>]; -pub type ElementsError<'a, T> = NomError>; -pub type ElementsResult<'a, T, O> = nom::IResult, O, ElementsError<'a, T>>; - -pub fn match_prefix<'a, T>( - affix_fn: impl Fn(&T) -> Affix + Copy, - precedence: Precedence, -) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> -where - T: Clone, -{ - match_affix( - affix_fn, - move |affix| matches!(affix, Affix::Prefix(p) if p == precedence), - ) -} - -pub fn match_postfix<'a, T>( - affix_fn: impl Fn(&T) -> Affix + Copy, - precedence: Precedence, -) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> -where - T: Clone, -{ - match_affix( - affix_fn, - move |affix| matches!(affix, Affix::Postfix(p) if p == precedence), - ) -} - -pub fn match_binary<'a, T>( - affix_fn: impl Fn(&T) -> Affix + Copy, - precedence: Precedence, - associativity: Associativity, -) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> -where - T: Clone, -{ - match_affix( - affix_fn, - move |affix| matches!(affix, Affix::Infix(p, assoc) if p == precedence && assoc == associativity), - ) -} - -pub fn match_nilfix<'a, T>( - affix_fn: impl Fn(&T) -> Affix + Copy, -) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> -where T: Clone { - match_affix(affix_fn, |affix| matches!(affix, Affix::Nilfix)) -} - -fn match_affix<'a, T>( - affix_fn: impl Fn(&T) -> Affix + Copy, - predicate: impl Fn(Affix) -> bool + Copy, -) -> impl FnMut(ElementsInput<'a, T>) -> ElementsResult<'a, T, WithSpan<'a, T>> -where - T: Clone, -{ - move |input| match input.split_first() { - Some((elem, rest)) if predicate(affix_fn(&elem.elem)) => Ok((rest, elem.clone())), - _ => Err(nom::Err::Error(NomError::new(input, NomErrorKind::Tag))), - } -} - pub fn match_text(text: &'static str) -> impl FnMut(Input) -> IResult<&Token> { move |i| match i.tokens.first().filter(|token| token.text() == text) { Some(token) => Ok((i.slice(1..), token)), @@ -667,6 +605,104 @@ pub fn transform_span(tokens: &[Token]) -> Span { }) } +pub(crate) trait IterProvider<'a> { + type Item; + type Iter: Iterator + ExactSizeIterator; + + fn create_iter(self, span: Rc>>>) -> Self::Iter; +} + +impl<'a, T> IterProvider<'a> for Vec> +where T: Clone +{ + type Item = WithSpan<'a, T>; + type Iter = ErrorSpan<'a, T, std::vec::IntoIter>>; + + fn create_iter(self, span: Rc>>>) -> Self::Iter { + ErrorSpan::new(self.into_iter(), span) + } +} + +pub(crate) struct ErrorSpan<'a, T, I: Iterator>> { + iter: I, + span: Rc>>>, +} + +impl<'a, T, I: Iterator>> ErrorSpan<'a, T, I> { + fn new(iter: I, span: Rc>>>) -> Self { + Self { iter, span } + } +} + +impl<'a, T, I: Iterator>> Iterator for ErrorSpan<'a, T, I> { + type Item = WithSpan<'a, T>; + + fn next(&mut self) -> Option { + self.iter + .next() + .inspect(|item| *self.span.borrow_mut() = Some(item.span)) + } +} + +impl<'a, T, I: Iterator>> ExactSizeIterator for ErrorSpan<'a, T, I> {} + +pub fn run_pratt_parser<'a, I, P, E, T>( + mut parser: P, + parsers: T, + rest: Input<'a>, + input: Input<'a>, +) -> IResult<'a, P::Output> +where + E: std::fmt::Debug, + P: PrattParser, Error = &'static str>, + I: Iterator + ExactSizeIterator, + T: IterProvider<'a, Item = P::Input, Iter = I>, +{ + let span = Rc::new(RefCell::new(None)); + let mut iter = parsers.create_iter(span.clone()).peekable(); + let expr = parser + .parse_input(&mut iter, Precedence(0)) + .map_err(|err| { + // Rollback parsing footprint on unused expr elements. + input.backtrace.clear(); + + let err_kind = match err { + PrattError::EmptyInput => ErrorKind::Other("expecting an operand"), + PrattError::UnexpectedNilfix(i) => { + *span.borrow_mut() = Some(i.span); + ErrorKind::Other("unable to parse the element") + } + PrattError::UnexpectedPrefix(i) => { + *span.borrow_mut() = Some(i.span); + ErrorKind::Other("unable to parse the prefix operator") + } + PrattError::UnexpectedInfix(i) => { + *span.borrow_mut() = Some(i.span); + ErrorKind::Other("missing lhs or rhs for the binary operator") + } + PrattError::UnexpectedPostfix(i) => { + *span.borrow_mut() = Some(i.span); + ErrorKind::Other("unable to parse the postfix operator") + } + PrattError::UserError(err) => ErrorKind::Other(err), + }; + + let span = span + .take() + // It's safe to slice one more token because input must contain EOI. + .unwrap_or_else(|| rest.slice(..1)); + + nom::Err::Error(Error::from_error_kind(span, err_kind)) + })?; + if let Some(elem) = iter.peek() { + // Rollback parsing footprint on unused expr elements. + input.backtrace.clear(); + Ok((input.slice(input.offset(&elem.span)..), expr)) + } else { + Ok((rest, expr)) + } +} + pub fn check_template_mode<'a, O, F>(mut parser: F) -> impl FnMut(Input<'a>) -> IResult<'a, O> where F: nom::Parser, Output = O, Error = Error<'a>> { move |input: Input| { diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index e28577bb2eafa..332dab3ef3591 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -12,22 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::cmp::Reverse; - use ethnum::i256; use itertools::Itertools; use nom::combinator::consumed; use nom::error::context; -use nom::error::Error as NomError; -use nom::error::ErrorKind as NomErrorKind; -use nom::Offset; use nom::Parser; -use nom_language::precedence::binary_op as precedence_binary_op; -use nom_language::precedence::precedence; -use nom_language::precedence::unary_op as precedence_unary_op; -use nom_language::precedence::Assoc as NomAssoc; -use nom_language::precedence::Operation; use nom_rule::rule; +use pratt::Affix; +use pratt::Associativity; +use pratt::PrattParser; +use pratt::Precedence; use crate::ast::quote::AtString; use crate::ast::*; @@ -130,171 +124,7 @@ pub fn subexpr(min_precedence: u32) -> impl FnMut(Input) -> IResult { } } - parse_expr_elements(expr_elements, rest, i) - } -} - -type ExprElementsInput<'a> = ElementsInput<'a, ExprElement>; -type ExprElementsError<'a> = ElementsError<'a, ExprElement>; -type ExprElementsResult<'a, O> = ElementsResult<'a, ExprElement, O>; - -fn expr_prefix_parser<'a>() -> impl nom::Parser< - ExprElementsInput<'a>, - Output = nom_language::precedence::Unary, Reverse>, - Error = ExprElementsError<'a>, -> { - alt(( - precedence_unary_op( - Reverse(Precedence(NOT_PREC)), - match_prefix(ExprElement::affix, Precedence(NOT_PREC)), - ), - precedence_unary_op( - Reverse(Precedence(50)), - match_prefix(ExprElement::affix, Precedence(50)), - ), - precedence_unary_op( - Reverse(Precedence(60)), - match_prefix(ExprElement::affix, Precedence(60)), - ), - )) -} - -fn expr_postfix_parser<'a>() -> impl nom::Parser< - ExprElementsInput<'a>, - Output = nom_language::precedence::Unary, Reverse>, - Error = ExprElementsError<'a>, -> { - alt(( - precedence_unary_op( - Reverse(Precedence(61)), - match_postfix(ExprElement::affix, Precedence(61)), - ), - precedence_unary_op( - Reverse(Precedence(60)), - match_postfix(ExprElement::affix, Precedence(60)), - ), - precedence_unary_op( - Reverse(Precedence(BETWEEN_PREC)), - match_postfix(ExprElement::affix, Precedence(BETWEEN_PREC)), - ), - precedence_unary_op( - Reverse(Precedence(17)), - match_postfix(ExprElement::affix, Precedence(17)), - ), - )) -} - -fn expr_binary_parser<'a>() -> impl nom::Parser< - ExprElementsInput<'a>, - Output = nom_language::precedence::Binary, Reverse>, - Error = ExprElementsError<'a>, -> { - alt(( - precedence_binary_op( - Reverse(Precedence(5)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(5), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(10)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(10), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(20)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(20), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(22)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(22), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(23)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(23), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(24)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(24), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(30)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(30), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(40)), - NomAssoc::Left, - match_binary(ExprElement::affix, Precedence(40), Associativity::Left), - ), - precedence_binary_op( - Reverse(Precedence(40)), - NomAssoc::Right, - match_binary(ExprElement::affix, Precedence(40), Associativity::Right), - ), - )) -} - -fn expr_operand_parser<'a>(input: ExprElementsInput<'a>) -> ExprElementsResult<'a, Expr> { - match_nilfix(ExprElement::affix)(input).and_then(|(rest, elem)| match expr_primary(elem) { - Ok(expr) => Ok((rest, expr)), - Err(_) => Err(nom::Err::Failure(NomError::new( - input, - NomErrorKind::Verify, - ))), - }) -} - -fn parse_expr_elements<'a>( - expr_elements: Vec>, - rest: Input<'a>, - input: Input<'a>, -) -> IResult<'a, Expr> { - let mut parser = precedence( - expr_prefix_parser(), - expr_postfix_parser(), - expr_binary_parser(), - expr_operand_parser, - expr_fold, - ); - - match parser(expr_elements.as_slice()) { - Ok((remaining, expr)) => { - if remaining.is_empty() { - Ok((rest, expr)) - } else { - input.backtrace.clear(); - let total = input.offset(&rest); - let unused = remaining.iter().map(|e| e.span.tokens.len()).sum::(); - let consumed = total.saturating_sub(unused); - Ok((input.slice(consumed..), expr)) - } - } - Err(_) => { - input.backtrace.clear(); - Err(nom::Err::Error(Error::from_error_kind( - rest, - ErrorKind::Other("unable to parse the expression"), - ))) - } - } -} - -fn expr_fold<'a>( - operation: Operation< - WithSpan<'a, ExprElement>, - WithSpan<'a, ExprElement>, - WithSpan<'a, ExprElement>, - Expr, - >, -) -> Result { - match operation { - Operation::Prefix(op, rhs) => expr_prefix(op, rhs), - Operation::Postfix(lhs, op) => expr_postfix(lhs, op), - Operation::Binary(lhs, op, rhs) => expr_infix(lhs, op, rhs), + run_pratt_parser(ExprParser, expr_elements, rest, i) } } @@ -647,409 +477,6 @@ impl ExprElement { } } -fn expr_primary(elem: WithSpan<'_, ExprElement>) -> Result { - let expr = match elem.elem { - ExprElement::ColumnRef { column } => Expr::ColumnRef { - span: transform_span(elem.span.tokens), - column, - }, - ExprElement::Cast { expr, target_type } => Expr::Cast { - span: transform_span(elem.span.tokens), - expr, - target_type, - pg_style: false, - }, - ExprElement::TryCast { expr, target_type } => Expr::TryCast { - span: transform_span(elem.span.tokens), - expr, - target_type, - }, - ExprElement::Extract { field, expr } => Expr::Extract { - span: transform_span(elem.span.tokens), - kind: field, - expr, - }, - ExprElement::DatePart { field, expr } => Expr::DatePart { - span: transform_span(elem.span.tokens), - kind: field, - expr, - }, - ExprElement::Position { - substr_expr, - str_expr, - } => Expr::Position { - span: transform_span(elem.span.tokens), - substr_expr, - str_expr, - }, - ExprElement::SubString { - expr, - substring_from, - substring_for, - } => Expr::Substring { - span: transform_span(elem.span.tokens), - expr, - substring_from, - substring_for, - }, - ExprElement::Trim { expr, trim_where } => Expr::Trim { - span: transform_span(elem.span.tokens), - expr, - trim_where, - }, - ExprElement::Literal { value } => Expr::Literal { - span: transform_span(elem.span.tokens), - value, - }, - ExprElement::CountAll { qualified, window } => Expr::CountAll { - span: transform_span(elem.span.tokens), - qualified, - window, - }, - ExprElement::Tuple { exprs } => Expr::Tuple { - span: transform_span(elem.span.tokens), - exprs, - }, - ExprElement::FunctionCall { func } => Expr::FunctionCall { - span: transform_span(elem.span.tokens), - func, - }, - ExprElement::Case { - operand, - conditions, - results, - else_result, - } => Expr::Case { - span: transform_span(elem.span.tokens), - operand, - conditions, - results, - else_result, - }, - ExprElement::Exists { subquery, not } => Expr::Exists { - span: transform_span(elem.span.tokens), - not, - subquery: Box::new(subquery), - }, - ExprElement::Subquery { subquery, modifier } => Expr::Subquery { - span: transform_span(elem.span.tokens), - modifier, - subquery: Box::new(subquery), - }, - ExprElement::Group(expr) => expr, - ExprElement::Array { exprs } => Expr::Array { - span: transform_span(elem.span.tokens), - exprs, - }, - ExprElement::ListComprehension { - source, - param, - filter, - result, - } => { - let span = transform_span(elem.span.tokens); - let mut source = source; - - if let Some(filter) = filter { - source = Expr::FunctionCall { - span, - func: FunctionCall { - distinct: false, - name: Identifier::from_name( - transform_span(elem.span.tokens), - "array_filter", - ), - args: vec![source], - params: vec![], - order_by: vec![], - window: None, - lambda: Some(Lambda { - params: vec![param.clone()], - expr: Box::new(filter), - }), - }, - }; - } - Expr::FunctionCall { - span, - func: FunctionCall { - distinct: false, - name: Identifier::from_name(transform_span(elem.span.tokens), "array_map"), - args: vec![source], - params: vec![], - order_by: vec![], - window: None, - lambda: Some(Lambda { - params: vec![param.clone()], - expr: Box::new(result), - }), - }, - } - } - ExprElement::Map { kvs } => Expr::Map { - span: transform_span(elem.span.tokens), - kvs, - }, - ExprElement::Interval { expr, unit } => Expr::Interval { - span: transform_span(elem.span.tokens), - expr: Box::new(expr), - unit, - }, - ExprElement::DateAdd { - unit, - interval, - date, - } => Expr::DateAdd { - span: transform_span(elem.span.tokens), - unit, - interval: Box::new(interval), - date: Box::new(date), - }, - ExprElement::DateDiff { - unit, - date_start, - date_end, - } => Expr::DateDiff { - span: transform_span(elem.span.tokens), - unit, - date_start: Box::new(date_start), - date_end: Box::new(date_end), - }, - ExprElement::DateBetween { - unit, - date_start, - date_end, - } => Expr::DateBetween { - span: transform_span(elem.span.tokens), - unit, - date_start: Box::new(date_start), - date_end: Box::new(date_end), - }, - ExprElement::DateSub { - unit, - interval, - date, - } => Expr::DateSub { - span: transform_span(elem.span.tokens), - unit, - interval: Box::new(interval), - date: Box::new(date), - }, - ExprElement::DateTrunc { unit, date } => Expr::DateTrunc { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::TimeSlice { - unit, - date, - slice_length, - start_or_end, - } => Expr::TimeSlice { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - slice_length, - start_or_end: start_or_end.unwrap_or("start".to_string()), - }, - ExprElement::LastDay { unit, date } => Expr::LastDay { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::PreviousDay { unit, date } => Expr::PreviousDay { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::NextDay { unit, date } => Expr::NextDay { - span: transform_span(elem.span.tokens), - unit, - date: Box::new(date), - }, - ExprElement::Hole { name } => Expr::Hole { - span: transform_span(elem.span.tokens), - name, - }, - ExprElement::Placeholder => Expr::Placeholder { - span: transform_span(elem.span.tokens), - }, - ExprElement::VariableAccess(name) => { - let span = transform_span(elem.span.tokens); - make_func_get_variable(span, name) - } - ExprElement::StageLocation { location } => Expr::StageLocation { - span: transform_span(elem.span.tokens), - location, - }, - _ => unreachable!(), - }; - Ok(expr) -} - -fn expr_infix(lhs: Expr, elem: WithSpan<'_, ExprElement>, rhs: Expr) -> Result { - let expr = match elem.elem { - ExprElement::BinaryOp { op } => Expr::BinaryOp { - span: transform_span(elem.span.tokens), - left: Box::new(lhs), - right: Box::new(rhs), - op, - }, - ExprElement::IsDistinctFrom { not } => Expr::IsDistinctFrom { - span: transform_span(elem.span.tokens), - left: Box::new(lhs), - right: Box::new(rhs), - not, - }, - ExprElement::JsonOp { op } => Expr::JsonOp { - span: transform_span(elem.span.tokens), - left: Box::new(lhs), - right: Box::new(rhs), - op, - }, - _ => unreachable!(), - }; - Ok(expr) -} - -fn expr_prefix(elem: WithSpan<'_, ExprElement>, rhs: Expr) -> Result { - let expr = match elem.elem { - ExprElement::UnaryOp { op } => Expr::UnaryOp { - span: transform_span(elem.span.tokens), - op, - expr: Box::new(rhs), - }, - _ => unreachable!(), - }; - Ok(expr) -} - -fn expr_postfix(mut lhs: Expr, elem: WithSpan<'_, ExprElement>) -> Result { - let expr = match elem.elem { - ExprElement::MapAccess { accessor } => Expr::MapAccess { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - accessor, - }, - ExprElement::DotAccess { key } => { - if let Expr::ColumnRef { column, .. } = &mut lhs { - if let ColumnID::Name(name) = &column.column { - column.database = column.table.take(); - column.table = Some(name.clone()); - column.column = key.clone(); - return Ok(lhs); - } - } - match key { - ColumnID::Name(id) => Expr::MapAccess { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - accessor: MapAccessor::Colon { key: id }, - }, - _ => return Err("dot access position must be after ident"), - } - } - ExprElement::ChainFunctionCall { name, args, lambda } => Expr::FunctionCall { - span: transform_span(elem.span.tokens), - func: FunctionCall { - distinct: false, - name, - args: [vec![lhs], args].concat(), - params: vec![], - order_by: vec![], - window: None, - lambda, - }, - }, - ExprElement::IsNull { not } => Expr::IsNull { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - not, - }, - ExprElement::InList { list, not } => Expr::InList { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - list, - not, - }, - ExprElement::InSubquery { subquery, not } => Expr::InSubquery { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - subquery, - not, - }, - ExprElement::LikeSubquery { - subquery, - modifier, - escape, - } => Expr::LikeSubquery { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - subquery, - modifier, - escape, - }, - ExprElement::Escape { escape } => match lhs { - Expr::BinaryOp { - span, - op: BinaryOperator::Like(_), - left, - right, - } => Expr::LikeWithEscape { - span, - left, - right, - is_not: false, - escape, - }, - Expr::BinaryOp { - span, - op: BinaryOperator::NotLike(_), - left, - right, - } => Expr::LikeWithEscape { - span, - left, - right, - is_not: true, - escape, - }, - Expr::BinaryOp { - span, - op: BinaryOperator::LikeAny(_), - left, - right, - } => Expr::LikeAnyWithEscape { - span, - left, - right, - escape, - }, - _ => return Err("escape clause must be after LIKE/NOT LIKE/LIKE ANY binary expr"), - }, - ExprElement::Between { low, high, not } => Expr::Between { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - low, - high, - not, - }, - ExprElement::PgCast { target_type } => Expr::Cast { - span: transform_span(elem.span.tokens), - expr: Box::new(lhs), - target_type, - pg_style: true, - }, - ExprElement::UnaryOp { op } => Expr::UnaryOp { - span: transform_span(elem.span.tokens), - op, - expr: Box::new(lhs), - }, - _ => unreachable!(), - }; - Ok(expr) -} - impl Expr { pub fn affix(&self) -> Affix { match self { @@ -1101,6 +528,437 @@ impl Expr { } } } + +struct ExprParser; + +impl<'a, I: Iterator>> PrattParser for ExprParser { + type Error = &'static str; + type Input = WithSpan<'a, ExprElement>; + type Output = Expr; + + fn query(&mut self, elem: &WithSpan) -> Result { + Ok(elem.elem.affix()) + } + + fn primary(&mut self, elem: WithSpan<'a, ExprElement>) -> Result { + let expr = match elem.elem { + ExprElement::ColumnRef { column } => Expr::ColumnRef { + span: transform_span(elem.span.tokens), + column, + }, + ExprElement::Cast { expr, target_type } => Expr::Cast { + span: transform_span(elem.span.tokens), + expr, + target_type, + pg_style: false, + }, + ExprElement::TryCast { expr, target_type } => Expr::TryCast { + span: transform_span(elem.span.tokens), + expr, + target_type, + }, + ExprElement::Extract { field, expr } => Expr::Extract { + span: transform_span(elem.span.tokens), + kind: field, + expr, + }, + ExprElement::DatePart { field, expr } => Expr::DatePart { + span: transform_span(elem.span.tokens), + kind: field, + expr, + }, + ExprElement::Position { + substr_expr, + str_expr, + } => Expr::Position { + span: transform_span(elem.span.tokens), + substr_expr, + str_expr, + }, + ExprElement::SubString { + expr, + substring_from, + substring_for, + } => Expr::Substring { + span: transform_span(elem.span.tokens), + expr, + substring_from, + substring_for, + }, + ExprElement::Trim { expr, trim_where } => Expr::Trim { + span: transform_span(elem.span.tokens), + expr, + trim_where, + }, + ExprElement::Literal { value } => Expr::Literal { + span: transform_span(elem.span.tokens), + value, + }, + ExprElement::CountAll { qualified, window } => Expr::CountAll { + span: transform_span(elem.span.tokens), + qualified, + window, + }, + ExprElement::Tuple { exprs } => Expr::Tuple { + span: transform_span(elem.span.tokens), + exprs, + }, + ExprElement::FunctionCall { func } => Expr::FunctionCall { + span: transform_span(elem.span.tokens), + func, + }, + ExprElement::Case { + operand, + conditions, + results, + else_result, + } => Expr::Case { + span: transform_span(elem.span.tokens), + operand, + conditions, + results, + else_result, + }, + ExprElement::Exists { subquery, not } => Expr::Exists { + span: transform_span(elem.span.tokens), + not, + subquery: Box::new(subquery), + }, + ExprElement::Subquery { subquery, modifier } => Expr::Subquery { + span: transform_span(elem.span.tokens), + modifier, + subquery: Box::new(subquery), + }, + ExprElement::Group(expr) => expr, + ExprElement::Array { exprs } => Expr::Array { + span: transform_span(elem.span.tokens), + exprs, + }, + ExprElement::ListComprehension { + source, + param, + filter, + result, + } => { + let span = transform_span(elem.span.tokens); + let mut source = source; + + // array_filter(source, filter) + if let Some(filter) = filter { + source = Expr::FunctionCall { + span, + func: FunctionCall { + distinct: false, + name: Identifier::from_name( + transform_span(elem.span.tokens), + "array_filter", + ), + args: vec![source], + params: vec![], + order_by: vec![], + window: None, + lambda: Some(Lambda { + params: vec![param.clone()], + expr: Box::new(filter), + }), + }, + }; + } + // array_map(source, result) + Expr::FunctionCall { + span, + func: FunctionCall { + distinct: false, + name: Identifier::from_name(transform_span(elem.span.tokens), "array_map"), + args: vec![source], + params: vec![], + order_by: vec![], + window: None, + lambda: Some(Lambda { + params: vec![param.clone()], + expr: Box::new(result), + }), + }, + } + } + ExprElement::Map { kvs } => Expr::Map { + span: transform_span(elem.span.tokens), + kvs, + }, + ExprElement::Interval { expr, unit } => Expr::Interval { + span: transform_span(elem.span.tokens), + expr: Box::new(expr), + unit, + }, + ExprElement::DateAdd { + unit, + interval, + date, + } => Expr::DateAdd { + span: transform_span(elem.span.tokens), + unit, + interval: Box::new(interval), + date: Box::new(date), + }, + ExprElement::DateDiff { + unit, + date_start, + date_end, + } => Expr::DateDiff { + span: transform_span(elem.span.tokens), + unit, + date_start: Box::new(date_start), + date_end: Box::new(date_end), + }, + ExprElement::DateBetween { + unit, + date_start, + date_end, + } => Expr::DateBetween { + span: transform_span(elem.span.tokens), + unit, + date_start: Box::new(date_start), + date_end: Box::new(date_end), + }, + ExprElement::DateSub { + unit, + interval, + date, + } => Expr::DateSub { + span: transform_span(elem.span.tokens), + unit, + interval: Box::new(interval), + date: Box::new(date), + }, + ExprElement::DateTrunc { unit, date } => Expr::DateTrunc { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::TimeSlice { + unit, + date, + slice_length, + start_or_end, + } => Expr::TimeSlice { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + slice_length, + start_or_end: start_or_end.unwrap_or("start".to_string()), + }, + ExprElement::LastDay { unit, date } => Expr::LastDay { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::PreviousDay { unit, date } => Expr::PreviousDay { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::NextDay { unit, date } => Expr::NextDay { + span: transform_span(elem.span.tokens), + unit, + date: Box::new(date), + }, + ExprElement::Hole { name } => Expr::Hole { + span: transform_span(elem.span.tokens), + name, + }, + ExprElement::Placeholder => Expr::Placeholder { + span: transform_span(elem.span.tokens), + }, + ExprElement::VariableAccess(name) => { + let span = transform_span(elem.span.tokens); + make_func_get_variable(span, name) + } + ExprElement::StageLocation { location } => Expr::StageLocation { + span: transform_span(elem.span.tokens), + location, + }, + _ => unreachable!(), + }; + Ok(expr) + } + + fn infix( + &mut self, + lhs: Expr, + elem: WithSpan<'a, ExprElement>, + rhs: Expr, + ) -> Result { + let expr = match elem.elem { + ExprElement::BinaryOp { op } => Expr::BinaryOp { + span: transform_span(elem.span.tokens), + left: Box::new(lhs), + right: Box::new(rhs), + op, + }, + ExprElement::IsDistinctFrom { not } => Expr::IsDistinctFrom { + span: transform_span(elem.span.tokens), + left: Box::new(lhs), + right: Box::new(rhs), + not, + }, + ExprElement::JsonOp { op } => Expr::JsonOp { + span: transform_span(elem.span.tokens), + left: Box::new(lhs), + right: Box::new(rhs), + op, + }, + _ => unreachable!(), + }; + Ok(expr) + } + + fn prefix(&mut self, elem: WithSpan<'a, ExprElement>, rhs: Expr) -> Result { + let expr = match elem.elem { + ExprElement::UnaryOp { op } => Expr::UnaryOp { + span: transform_span(elem.span.tokens), + op, + expr: Box::new(rhs), + }, + _ => unreachable!(), + }; + Ok(expr) + } + + fn postfix( + &mut self, + mut lhs: Expr, + elem: WithSpan<'a, ExprElement>, + ) -> Result { + let expr = match elem.elem { + ExprElement::MapAccess { accessor } => Expr::MapAccess { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + accessor, + }, + ExprElement::DotAccess { key } => { + // `database.table.column` is parsed into [database] [.table] [.column], + // so we need to transform it into the right `ColumnRef` form. + if let Expr::ColumnRef { column, .. } = &mut lhs { + if let ColumnID::Name(name) = &column.column { + column.database = column.table.take(); + column.table = Some(name.clone()); + column.column = key.clone(); + return Ok(lhs); + } + } + + match key { + ColumnID::Name(id) => Expr::MapAccess { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + accessor: MapAccessor::Colon { key: id }, + }, + _ => { + return Err("dot access position must be after ident"); + } + } + } + ExprElement::ChainFunctionCall { name, args, lambda } => Expr::FunctionCall { + span: transform_span(elem.span.tokens), + func: FunctionCall { + distinct: false, + name, + args: [vec![lhs], args].concat(), + params: vec![], + order_by: vec![], + window: None, + lambda, + }, + }, + ExprElement::IsNull { not } => Expr::IsNull { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + not, + }, + ExprElement::InList { list, not } => Expr::InList { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + list, + not, + }, + ExprElement::InSubquery { subquery, not } => Expr::InSubquery { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + subquery, + not, + }, + ExprElement::LikeSubquery { + subquery, + modifier, + escape, + } => Expr::LikeSubquery { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + subquery, + modifier, + escape, + }, + ExprElement::Escape { escape } => match lhs { + Expr::BinaryOp { + span, + op: BinaryOperator::Like(_), + left, + right, + } => Expr::LikeWithEscape { + span, + left, + right, + is_not: false, + escape, + }, + Expr::BinaryOp { + span, + op: BinaryOperator::NotLike(_), + left, + right, + } => Expr::LikeWithEscape { + span, + left, + right, + is_not: true, + escape, + }, + Expr::BinaryOp { + span, + op: BinaryOperator::LikeAny(_), + left, + right, + } => Expr::LikeAnyWithEscape { + span, + left, + right, + escape, + }, + _ => return Err("escape clause must be after LIKE/NOT LIKE/LIKE ANY binary expr"), + }, + ExprElement::Between { low, high, not } => Expr::Between { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + low, + high, + not, + }, + ExprElement::PgCast { target_type } => Expr::Cast { + span: transform_span(elem.span.tokens), + expr: Box::new(lhs), + target_type, + pg_style: true, + }, + ExprElement::UnaryOp { op } => Expr::UnaryOp { + span: transform_span(elem.span.tokens), + op, + expr: Box::new(lhs), + }, + _ => unreachable!(), + }; + Ok(expr) + } +} #[allow(unreachable_code)] pub fn expr_element(i: Input) -> IResult> { let column_ref = map(column_id, |column| ExprElement::ColumnRef { @@ -1692,137 +1550,141 @@ pub fn expr_element(i: Input) -> IResult> { }; } - macro_rules! try_token { - ($token_0:expr, $($pat:pat => $body:expr),+ $(,)?) => {{ - match $token_0.kind { - $( - $pat => Some($body), - )+ - _ => None, - } - }}; - } - - if let Some(token_0) = i.tokens.first() { - use TokenKind::*; + macro_rules! try_dispatch { + ($($pat:pat => $body:expr),+ $(,)?) => {{ + if let Some(token_0) = i.tokens.first() { + use TokenKind::*; - macro_rules! try_dispatch { - ($($pat:pat => $body:expr),+ $(,)?) => {{ - if let Some(result) = try_token!(token_0, $($pat => $body),+) { - if matches!(&result, Ok(_) | Err(nom::Err::Failure(_))) { + if let Some(result) = match token_0.kind { + $($pat => Some($body),)+ + _ => None, + } { + if result.is_ok() { return result; } } - }}; - } + } + }}; + } - try_dispatch!( - IS => with_span!(rule!(#is_null | #is_distinct_from)).parse(i), - NOT => with_span!(rule!( - #in_list - | #in_subquery - | #exists - | #between - | #binary_op - | #unary_op - )) - .parse(i), - IN => with_span!(rule!(#in_list | #in_subquery)).parse(i), - LIKE => with_span!(rule!(#like_subquery | #binary_op)).parse(i), - EXISTS => with_span!(exists).parse(i), - BETWEEN => with_span!(between).parse(i), - CAST | TRY_CAST => with_span!(cast).parse(i), - DoubleColon => with_span!(pg_cast).parse(i), - POSITION => with_span!(position).parse(i), - IdentVariable => with_span!(variable_access).parse(i), - ESCAPE => with_span!(escape).parse(i), - COUNT => with_span!(rule!{ #count_all_with_window | #function_call}).parse(i), - SUBSTRING | SUBSTR => with_span!(substring).parse(i), - TRIM => with_span!(trim_from).parse(i), - CASE => with_span!(case).parse(i), - LParen => with_span!(rule!(#tuple | #subquery)).parse(i), - ANY | SOME | ALL => with_span!(subquery).parse(i), - Dot => { - return with_span!(rule!(#chain_function_call | #dot_access | #map_access)).parse(i); - }, - Colon => { - return with_span!(map_access).parse(i); - }, - LBracket => { - return with_span!(rule!(#list_comprehensions | #map_access | #array)).parse(i); - }, - LBrace => with_span!(map_expr).parse(i), - LiteralAtString => with_span!(stage_location).parse(i), - DATEADD | DATE_ADD => with_span!(date_add).parse(i), - DATE_DIFF | DATEDIFF => with_span!(date_diff).parse(i), - DATESUB | DATE_SUB => with_span!(date_sub).parse(i), - DATEBETWEEN | DATE_BETWEEN => with_span!(date_between).parse(i), - DATE_TRUNC => with_span!(date_trunc).parse(i), - TIME_SLICE => with_span!(time_slice).parse(i), - TRUNC => with_span!(trunc).parse(i), - LAST_DAY => with_span!(last_day).parse(i), - PREVIOUS_DAY => with_span!(previous_day).parse(i), - NEXT_DAY => with_span!(next_day).parse(i), - DATE => with_span!(date_expr).parse(i), - TIMESTAMP => with_span!(timestamp_expr).parse(i), - TIMESTAMP_TZ => with_span!(timestamp_tz_expr).parse(i), - INVERTED => with_span!(inverted_expr).parse(i), - INTERVAL => with_span!(interval).parse(i), - DATE_PART | DATEPART => with_span!(date_part).parse(i), - EXTRACT => with_span!(extract).parse(i), - CURRENT_DATE => with_span!(rule!{ #function_call | #current_date }).parse(i), - CURRENT_TIME => with_span!(rule!{ #function_call | #current_time }).parse(i), - CURRENT_TIMESTAMP => with_span!(rule!{ #function_call | #current_timestamp }).parse(i), - Plus - | Minus - | Multiply - | Divide - | IntDiv - | DIV - | Modulo - | StringConcat - | Spaceship - | L1DISTANCE - | L2DISTANCE - | Gt - | Lt - | Gte - | Lte - | Eq - | NotEq - | Caret - | AND - | OR - | XOR - | REGEXP - | RLIKE - | BitWiseOr - | BitWiseAnd - | BitWiseXor - | ShiftLeft - | ShiftRight - | SOUNDS => with_span!(rule!{ #binary_op | #unary_op }).parse(i), - RArrow - | LongRArrow - | HashRArrow - | HashLongRArrow - | Placeholder - | QuestionOr - | QuestionAnd - | AtArrow - | ArrowAt - | AtQuestion - | AtAt - | HashMinus => with_span!(json_op).parse(i), - Factorial | SquareRoot | BitWiseNot | CubeRoot | Abs => with_span!(unary_op).parse(i), - ); + try_dispatch!( + IS => with_span!(rule!(#is_null | #is_distinct_from)).parse(i), + NOT => with_span!(rule!( + #in_list + | #in_subquery + | #exists + | #between + | #binary_op + | #unary_op + )) + .parse(i), + IN => with_span!(rule!(#in_list | #in_subquery)).parse(i), + LIKE => with_span!(rule!(#like_subquery | #binary_op)).parse(i), + EXISTS => with_span!(exists).parse(i), + BETWEEN => with_span!(between).parse(i), + CAST | TRY_CAST => with_span!(cast).parse(i), + DoubleColon => with_span!(pg_cast).parse(i), + POSITION => with_span!(position).parse(i), + IDENTIFIER => { + return with_span!(column_ref).parse(i); + }, + IdentVariable => with_span!(variable_access).parse(i), + ESCAPE => with_span!(escape).parse(i), + COUNT => with_span!(rule!{ #count_all_with_window | #function_call}).parse(i), + SUBSTRING | SUBSTR => with_span!(substring).parse(i), + TRIM => with_span!(trim_from).parse(i), + CASE => with_span!(case).parse(i), + LParen => with_span!(rule!(#tuple | #subquery)).parse(i), + ANY | SOME | ALL => with_span!(subquery).parse(i), + Dot => { + return with_span!(rule!(#chain_function_call | #dot_access | #map_access)).parse(i); + }, + Colon => { + return with_span!(map_access).parse(i); + }, + LBracket => { + return with_span!(rule!(#list_comprehensions | #map_access | #array)).parse(i); + }, + LBrace => with_span!(map_expr).parse(i), + LiteralAtString => with_span!(stage_location).parse(i), + DATEADD | DATE_ADD => with_span!(date_add).parse(i), + DATE_DIFF | DATEDIFF => with_span!(date_diff).parse(i), + DATESUB | DATE_SUB => with_span!(date_sub).parse(i), + DATEBETWEEN | DATE_BETWEEN => with_span!(date_between).parse(i), + DATE_TRUNC => with_span!(date_trunc).parse(i), + TIME_SLICE => with_span!(time_slice).parse(i), + TRUNC => with_span!(trunc).parse(i), + LAST_DAY => with_span!(last_day).parse(i), + PREVIOUS_DAY => with_span!(previous_day).parse(i), + NEXT_DAY => with_span!(next_day).parse(i), + DATE => with_span!(date_expr).parse(i), + TIMESTAMP => with_span!(timestamp_expr).parse(i), + TIMESTAMP_TZ => with_span!(timestamp_tz_expr).parse(i), + INVERTED => with_span!(inverted_expr).parse(i), + INTERVAL => with_span!(interval).parse(i), + DATE_PART | DATEPART => with_span!(date_part).parse(i), + EXTRACT => with_span!(extract).parse(i), + CURRENT_DATE => with_span!(rule!{ #function_call | #current_date }).parse(i), + CURRENT_TIME => with_span!(rule!{ #function_call | #current_time }).parse(i), + CURRENT_TIMESTAMP => with_span!(rule!{ #function_call | #current_timestamp }).parse(i), + Plus + | Minus + | Multiply + | Divide + | IntDiv + | DIV + | Modulo + | StringConcat + | Spaceship + | L1DISTANCE + | L2DISTANCE + | Gt + | Lt + | Gte + | Lte + | Eq + | NotEq + | Caret + | AND + | OR + | XOR + | REGEXP + | RLIKE + | BitWiseOr + | BitWiseAnd + | BitWiseXor + | ShiftLeft + | ShiftRight + | SOUNDS => with_span!(rule!{ #binary_op | #unary_op }).parse(i), + RArrow + | LongRArrow + | HashRArrow + | HashLongRArrow + | Placeholder + | QuestionOr + | QuestionAnd + | AtArrow + | ArrowAt + | AtQuestion + | AtAt + | HashMinus => with_span!(json_op).parse(i), + Factorial | SquareRoot | BitWiseNot | CubeRoot | Abs => with_span!(unary_op).parse(i), + ); + + // The try-parse operation in the function call is very expensive, easy to stack overflow + // so we manually check here whether the second token exists in LParen to avoid entering the loop + if i.tokens + .get(1) + .map(|token| token.kind == LParen) + .unwrap_or(false) + { + return with_span!(function_call).parse(i); } with_span!(alt((rule!( - #function_call + #column_ref : "" | #map_access : "[] | . | :" | #literal : "" - | #column_ref : "" ),))) .parse(i) } diff --git a/src/query/ast/src/parser/input.rs b/src/query/ast/src/parser/input.rs index e789b614f1ac6..c38b8f1d34436 100644 --- a/src/query/ast/src/parser/input.rs +++ b/src/query/ast/src/parser/input.rs @@ -12,14 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::iter::Cloned; use std::iter::Enumerate; use std::ops::Bound; use std::ops::RangeBounds; use enum_as_inner::EnumAsInner; use nom::Needed; -use nom::Offset; use crate::parser::token::Token; use crate::parser::Backtrace; @@ -64,15 +62,18 @@ impl<'a> Input<'a> { } } -impl<'a> Offset for Input<'a> { +impl nom::Offset for Input<'_> { fn offset(&self, second: &Self) -> usize { - self.tokens.len().saturating_sub(second.tokens.len()) + let fst = self.tokens.as_ptr(); + let snd = second.tokens.as_ptr(); + + (snd as usize - fst as usize) / std::mem::size_of::() } } impl<'a> nom::Input for Input<'a> { - type Item = Token<'a>; - type Iter = Cloned>>; + type Item = &'a Token<'a>; + type Iter = std::slice::Iter<'a, Token<'a>>; type IterIndices = Enumerate; fn input_len(&self) -> usize { @@ -80,7 +81,7 @@ impl<'a> nom::Input for Input<'a> { } fn take(&self, index: usize) -> Self { - self.slice(..index) + self.slice(0..index) } fn take_from(&self, index: usize) -> Self { @@ -88,18 +89,27 @@ impl<'a> nom::Input for Input<'a> { } fn take_split(&self, index: usize) -> (Self, Self) { - (self.slice(index..), self.slice(..index)) + let (prefix, suffix) = self.tokens.split_at(index); + + ( + Input { + tokens: prefix, + ..*self + }, + Input { + tokens: suffix, + ..*self + }, + ) } fn position

(&self, predicate: P) -> Option where P: Fn(Self::Item) -> bool { - self.tokens - .iter() - .position(|token| predicate(token.clone())) + self.tokens.iter().position(predicate) } fn iter_elements(&self) -> Self::Iter { - self.tokens.iter().cloned() + self.tokens.iter() } fn iter_indices(&self) -> Self::IterIndices { @@ -114,22 +124,13 @@ impl<'a> nom::Input for Input<'a> { } } } + #[derive(Clone, Debug)] pub struct WithSpan<'a, T> { pub(crate) span: Input<'a>, pub(crate) elem: T, } -impl<'a, T: PartialEq> PartialEq for WithSpan<'a, T> { - fn eq(&self, other: &Self) -> bool { - self.elem == other.elem - && std::ptr::eq(self.span.tokens.as_ptr(), other.span.tokens.as_ptr()) - && self.span.tokens.len() == other.span.tokens.len() - } -} - -impl<'a, T: Eq> Eq for WithSpan<'a, T> {} - #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, EnumAsInner)] pub enum ParseMode { #[default] diff --git a/src/query/ast/src/parser/query.rs b/src/query/ast/src/parser/query.rs index 5903ad1e3695a..adb67e2a729e2 100644 --- a/src/query/ast/src/parser/query.rs +++ b/src/query/ast/src/parser/query.rs @@ -12,19 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::cmp::Reverse; use std::collections::BTreeMap; use nom::error::context; -use nom::error::Error as NomError; -use nom::error::ErrorKind as NomErrorKind; use nom::Parser; -use nom_language::precedence::binary_op as precedence_binary_op; -use nom_language::precedence::precedence; -use nom_language::precedence::unary_op as precedence_unary_op; -use nom_language::precedence::Assoc as NomAssoc; -use nom_language::precedence::Operation; use nom_rule::rule; +use pratt::Affix; +use pratt::Associativity; +use pratt::PrattParser; +use pratt::Precedence; use crate::ast::*; use crate::parser::common::*; @@ -37,7 +33,6 @@ use crate::parser::statement::hint; use crate::parser::statement::set_table_option; use crate::parser::statement::top_n; use crate::parser::token::*; -use crate::parser::Error; use crate::parser::ErrorKind; use crate::Range; @@ -51,7 +46,7 @@ pub fn query(i: Input) -> IResult { pub fn set_operation(i: Input) -> IResult { let (rest, set_operation_elements) = rule! { #set_operation_element+ }.parse(i)?; - parse_set_operation_elements(set_operation_elements, rest, i) + run_pratt_parser(SetOperationParser, set_operation_elements, rest, i) } #[derive(Debug, Clone, PartialEq)] @@ -88,25 +83,6 @@ pub enum SetOperationElement { Group(SetExpr), } -impl SetOperationElement { - fn affix(&self) -> Affix { - match self { - SetOperationElement::SetOperation { op, .. } => match op { - SetOperator::Union | SetOperator::Except => { - Affix::Infix(Precedence(10), Associativity::Left) - } - SetOperator::Intersect => Affix::Infix(Precedence(20), Associativity::Left), - }, - SetOperationElement::With(_) => Affix::Prefix(Precedence(5)), - SetOperationElement::OrderBy { .. } - | SetOperationElement::Limit { .. } - | SetOperationElement::Offset { .. } - | SetOperationElement::IgnoreResult => Affix::Postfix(Precedence(5)), - _ => Affix::Nilfix, - } - } -} - pub fn set_operation_element(i: Input) -> IResult> { let with = map(with, SetOperationElement::With); let set_operator = map( @@ -258,247 +234,152 @@ pub fn set_operation_element(i: Input) -> IResult> .parse(i) } -type SetOpInput<'a> = ElementsInput<'a, SetOperationElement>; -type SetOpError<'a> = ElementsError<'a, SetOperationElement>; -type SetOpResult<'a, O> = ElementsResult<'a, SetOperationElement, O>; - -fn set_operation_prefix_parser<'a>() -> impl nom::Parser< - SetOpInput<'a>, - Output = nom_language::precedence::Unary< - WithSpan<'a, SetOperationElement>, - Reverse, - >, - Error = SetOpError<'a>, -> { - precedence_unary_op( - Reverse(Precedence(5)), - match_prefix(SetOperationElement::affix, Precedence(5)), - ) -} - -fn set_operation_postfix_parser<'a>() -> impl nom::Parser< - SetOpInput<'a>, - Output = nom_language::precedence::Unary< - WithSpan<'a, SetOperationElement>, - Reverse, - >, - Error = SetOpError<'a>, -> { - precedence_unary_op( - Reverse(Precedence(5)), - match_postfix(SetOperationElement::affix, Precedence(5)), - ) -} - -fn set_operation_binary_parser<'a>() -> impl nom::Parser< - SetOpInput<'a>, - Output = nom_language::precedence::Binary< - WithSpan<'a, SetOperationElement>, - Reverse, - >, - Error = SetOpError<'a>, -> { - alt(( - precedence_binary_op( - Reverse(Precedence(10)), - NomAssoc::Left, - match_binary( - SetOperationElement::affix, - Precedence(10), - Associativity::Left, - ), - ), - precedence_binary_op( - Reverse(Precedence(20)), - NomAssoc::Left, - match_binary( - SetOperationElement::affix, - Precedence(20), - Associativity::Left, - ), - ), - )) -} - -fn set_operation_operand_parser<'a>(input: SetOpInput<'a>) -> SetOpResult<'a, SetExpr> { - match_nilfix(SetOperationElement::affix)(input).and_then(|(rest, elem)| { - match set_operation_primary_expr(elem) { - Ok(expr) => Ok((rest, expr)), - Err(_) => Err(nom::Err::Failure(NomError::new( - input, - NomErrorKind::Verify, - ))), - } - }) -} - -fn parse_set_operation_elements<'a>( - elements: Vec>, - rest: Input<'a>, - input: Input<'a>, -) -> IResult<'a, SetExpr> { - let mut parser = precedence( - set_operation_prefix_parser(), - set_operation_postfix_parser(), - set_operation_binary_parser(), - set_operation_operand_parser, - set_operation_fold, - ); - - match parser(elements.as_slice()) { - Ok(([], expr)) => Ok((rest, expr)), - Ok((_, _)) => { - input.backtrace.clear(); - Err(nom::Err::Error(Error::from_error_kind( - rest, - ErrorKind::Other("unable to parse the set expression"), - ))) - } - Err(_) => { - input.backtrace.clear(); - Err(nom::Err::Error(Error::from_error_kind( - rest, - ErrorKind::Other("unable to parse the set expression"), - ))) - } - } -} - -fn set_operation_fold<'a>( - operation: Operation< - WithSpan<'a, SetOperationElement>, - WithSpan<'a, SetOperationElement>, - WithSpan<'a, SetOperationElement>, - SetExpr, - >, -) -> Result { - match operation { - Operation::Prefix(op, rhs) => apply_set_operation_prefix(op, rhs), - Operation::Postfix(lhs, op) => apply_set_operation_postfix(lhs, op), - Operation::Binary(lhs, op, rhs) => apply_set_operation_infix(lhs, op, rhs), +struct SetOperationParser; + +impl<'a, I: Iterator>> PrattParser + for SetOperationParser +{ + type Error = &'static str; + type Input = WithSpan<'a, SetOperationElement>; + type Output = SetExpr; + + fn query(&mut self, input: &Self::Input) -> Result { + let affix = match &input.elem { + // https://learn.microsoft.com/en-us/sql/t-sql/language-elements/set-operators-except-and-intersect-transact-sql?view=sql-server-2017 + // If EXCEPT or INTERSECT is used together with other operators in an expression, it's evaluated in the context of the following precedence: + // 1. Expressions in parentheses + // 2. The INTERSECT operator + // 3. EXCEPT and UNION evaluated from left to right based on their position in the expression + SetOperationElement::SetOperation { op, .. } => match op { + SetOperator::Union | SetOperator::Except => { + Affix::Infix(Precedence(10), Associativity::Left) + } + SetOperator::Intersect => Affix::Infix(Precedence(20), Associativity::Left), + }, + SetOperationElement::With(_) => Affix::Prefix(Precedence(5)), + SetOperationElement::OrderBy { .. } => Affix::Postfix(Precedence(5)), + SetOperationElement::Limit { .. } => Affix::Postfix(Precedence(5)), + SetOperationElement::Offset { .. } => Affix::Postfix(Precedence(5)), + SetOperationElement::IgnoreResult => Affix::Postfix(Precedence(5)), + _ => Affix::Nilfix, + }; + Ok(affix) } -} -fn set_operation_primary_expr( - input: WithSpan<'_, SetOperationElement>, -) -> Result { - let set_expr = match input.elem { - SetOperationElement::Group(expr) => expr, - SetOperationElement::SelectStmt { - hints, - distinct, - top_n, - select_list, - from, - selection, - group_by, - having, - window_list, - qualify, - } => SetExpr::Select(Box::new(SelectStmt { - span: transform_span(input.span.tokens), - hints, - top_n, - distinct, - select_list, - from, - selection, - group_by, - having, - window_list, - qualify, - })), - SetOperationElement::Values(values) => SetExpr::Values { - span: transform_span(input.span.tokens), - values, - }, - _ => unreachable!(), - }; - Ok(set_expr) -} - -fn apply_set_operation_infix( - lhs: SetExpr, - input: WithSpan<'_, SetOperationElement>, - rhs: SetExpr, -) -> Result { - let set_expr = match input.elem { - SetOperationElement::SetOperation { op, all, .. } => { - SetExpr::SetOperation(Box::new(SetOperation { + fn primary(&mut self, input: Self::Input) -> Result { + let set_expr = match input.elem { + SetOperationElement::Group(expr) => expr, + SetOperationElement::SelectStmt { + hints, + distinct, + top_n, + select_list, + from, + selection, + group_by, + having, + window_list, + qualify, + } => SetExpr::Select(Box::new(SelectStmt { span: transform_span(input.span.tokens), - op, - all, - left: Box::new(lhs), - right: Box::new(rhs), - })) - } - _ => unreachable!(), - }; - Ok(set_expr) -} + hints, + top_n, + distinct, + select_list, + from, + selection, + group_by, + having, + window_list, + qualify, + })), + SetOperationElement::Values(values) => SetExpr::Values { + span: transform_span(input.span.tokens), + values, + }, + _ => unreachable!(), + }; + Ok(set_expr) + } -fn apply_set_operation_prefix( - op: WithSpan<'_, SetOperationElement>, - rhs: SetExpr, -) -> Result { - let mut query = rhs.into_query(); - match op.elem { - SetOperationElement::With(with) => { - if query.with.is_some() { - return Err("duplicated WITH clause"); + fn infix( + &mut self, + lhs: Self::Output, + input: Self::Input, + rhs: Self::Output, + ) -> Result { + let set_expr = match input.elem { + SetOperationElement::SetOperation { op, all, .. } => { + SetExpr::SetOperation(Box::new(SetOperation { + span: transform_span(input.span.tokens), + op, + all, + left: Box::new(lhs), + right: Box::new(rhs), + })) } - query.with = Some(with); - } - _ => unreachable!(), + _ => unreachable!(), + }; + Ok(set_expr) } - Ok(SetExpr::Query(Box::new(query))) -} -fn apply_set_operation_postfix( - lhs: SetExpr, - op: WithSpan<'_, SetOperationElement>, -) -> Result { - let mut query = lhs.into_query(); - match op.elem { - SetOperationElement::OrderBy { order_by } => { - if !query.order_by.is_empty() { - return Err("duplicated ORDER BY clause"); - } - if !query.limit.is_empty() { - return Err("ORDER BY must appear before LIMIT"); - } - if query.offset.is_some() { - return Err("ORDER BY must appear before OFFSET"); + fn prefix(&mut self, op: Self::Input, rhs: Self::Output) -> Result { + let mut query = rhs.into_query(); + match op.elem { + SetOperationElement::With(with) => { + if query.with.is_some() { + return Err("duplicated WITH clause"); + } + query.with = Some(with); } - query.order_by = order_by; + _ => unreachable!(), } - SetOperationElement::Limit { limit } => { - if query.limit.is_empty() && limit.len() > 2 { - return Err("[LIMIT n OFFSET m] or [LIMIT n,m]"); - } - if !query.limit.is_empty() { - return Err("duplicated LIMIT clause"); + Ok(SetExpr::Query(Box::new(query))) + } + + fn postfix(&mut self, lhs: Self::Output, op: Self::Input) -> Result { + let mut query = lhs.into_query(); + match op.elem { + SetOperationElement::OrderBy { order_by } => { + if !query.order_by.is_empty() { + return Err("duplicated ORDER BY clause"); + } + if !query.limit.is_empty() { + return Err("ORDER BY must appear before LIMIT"); + } + if query.offset.is_some() { + return Err("ORDER BY must appear before OFFSET"); + } + query.order_by = order_by; } - if query.offset.is_some() { - return Err("LIMIT must appear before OFFSET"); + SetOperationElement::Limit { limit } => { + if query.limit.is_empty() && limit.len() > 2 { + return Err("[LIMIT n OFFSET m] or [LIMIT n,m]"); + } + if !query.limit.is_empty() { + return Err("duplicated LIMIT clause"); + } + if query.offset.is_some() { + return Err("LIMIT must appear before OFFSET"); + } + query.limit = limit; } - query.limit = limit; - } - SetOperationElement::Offset { offset } => { - if query.limit.len() == 2 { - return Err("LIMIT n,m should not appear OFFSET"); + SetOperationElement::Offset { offset } => { + if query.limit.len() == 2 { + return Err("LIMIT n,m should not appear OFFSET"); + } + if query.offset.is_some() { + return Err("duplicated OFFSET clause"); + } + query.offset = Some(offset); } - if query.offset.is_some() { - return Err("duplicated OFFSET clause"); + SetOperationElement::IgnoreResult => { + query.ignore_result = true; } - query.offset = Some(offset); - } - SetOperationElement::IgnoreResult => { - query.ignore_result = true; + _ => unreachable!(), } - _ => unreachable!(), + Ok(SetExpr::Query(Box::new(query))) } - Ok(SetExpr::Query(Box::new(query))) } pub fn row_values(i: Input) -> IResult> { @@ -841,7 +722,7 @@ pub fn order_by_expr(i: Input) -> IResult { pub fn table_reference(i: Input) -> IResult { let (rest, table_reference_elements) = rule! { #table_reference_element+ }.parse(i)?; - parse_table_reference_elements(table_reference_elements, rest, i) + run_pratt_parser(TableReferenceParser, table_reference_elements, rest, i) } #[derive(Debug, Clone, PartialEq)] @@ -910,16 +791,6 @@ pub enum TableReferenceElement { }, } -impl TableReferenceElement { - fn affix(&self) -> Affix { - match self { - TableReferenceElement::Join { .. } => Affix::Infix(Precedence(10), Associativity::Left), - TableReferenceElement::JoinCondition(..) => Affix::Postfix(Precedence(5)), - _ => Affix::Nilfix, - } - } -} - pub fn table_reference_element(i: Input) -> IResult> { let aliased_table = map( rule! { @@ -1035,259 +906,6 @@ pub fn table_reference_element(i: Input) -> IResult = ElementsInput<'a, TableReferenceElement>; -type TableRefError<'a> = ElementsError<'a, TableReferenceElement>; -type TableRefResult<'a, O> = ElementsResult<'a, TableReferenceElement, O>; - -fn table_reference_prefix_parser<'a>() -> impl nom::Parser< - TableRefInput<'a>, - Output = nom_language::precedence::Unary< - WithSpan<'a, TableReferenceElement>, - Reverse, - >, - Error = TableRefError<'a>, -> { - nom::combinator::fail::< - TableRefInput<'a>, - nom_language::precedence::Unary, Reverse>, - TableRefError<'a>, - >() -} - -fn table_reference_postfix_parser<'a>() -> impl nom::Parser< - TableRefInput<'a>, - Output = nom_language::precedence::Unary< - WithSpan<'a, TableReferenceElement>, - Reverse, - >, - Error = TableRefError<'a>, -> { - precedence_unary_op( - Reverse(Precedence(5)), - match_postfix(TableReferenceElement::affix, Precedence(5)), - ) -} - -fn table_reference_binary_parser<'a>() -> impl nom::Parser< - TableRefInput<'a>, - Output = nom_language::precedence::Binary< - WithSpan<'a, TableReferenceElement>, - Reverse, - >, - Error = TableRefError<'a>, -> { - precedence_binary_op( - Reverse(Precedence(10)), - NomAssoc::Left, - match_binary( - TableReferenceElement::affix, - Precedence(10), - Associativity::Left, - ), - ) -} - -fn table_reference_operand_parser<'a>( - input: TableRefInput<'a>, -) -> TableRefResult<'a, TableReference> { - match_nilfix(TableReferenceElement::affix)(input).and_then(|(rest, elem)| { - match table_reference_primary_expr(elem) { - Ok(expr) => Ok((rest, expr)), - Err(_) => Err(nom::Err::Failure(NomError::new( - input, - NomErrorKind::Verify, - ))), - } - }) -} - -fn parse_table_reference_elements<'a>( - elements: Vec>, - rest: Input<'a>, - input: Input<'a>, -) -> IResult<'a, TableReference> { - let mut parser = precedence( - table_reference_prefix_parser(), - table_reference_postfix_parser(), - table_reference_binary_parser(), - table_reference_operand_parser, - table_reference_fold, - ); - - match parser(elements.as_slice()) { - Ok(([], expr)) => Ok((rest, expr)), - Ok((_, _)) => { - input.backtrace.clear(); - Err(nom::Err::Error(Error::from_error_kind( - rest, - ErrorKind::Other("unable to parse the table reference"), - ))) - } - Err(_) => { - input.backtrace.clear(); - Err(nom::Err::Error(Error::from_error_kind( - rest, - ErrorKind::Other("unable to parse the table reference"), - ))) - } - } -} - -fn table_reference_fold<'a>( - operation: Operation< - WithSpan<'a, TableReferenceElement>, - WithSpan<'a, TableReferenceElement>, - WithSpan<'a, TableReferenceElement>, - TableReference, - >, -) -> Result { - match operation { - Operation::Prefix(_, _) => Err("unexpected prefix operator"), - Operation::Postfix(lhs, op) => apply_table_reference_postfix(lhs, op), - Operation::Binary(lhs, op, rhs) => apply_table_reference_infix(lhs, op, rhs), - } -} - -fn table_reference_primary_expr( - input: WithSpan<'_, TableReferenceElement>, -) -> Result { - let table_ref = match input.elem { - TableReferenceElement::Group(table_ref) => table_ref, - TableReferenceElement::Table { - catalog, - database, - table, - alias, - temporal, - with_options, - pivot, - unpivot, - sample, - } => TableReference::Table { - span: transform_span(input.span.tokens), - catalog, - database, - table, - alias, - temporal, - with_options, - pivot, - unpivot, - sample, - }, - TableReferenceElement::TableFunction { - lateral, - name, - params, - alias, - sample, - } => { - let normal_params = params - .iter() - .filter_map(|p| match p { - TableFunctionParam::Normal(p) => Some(p.clone()), - _ => None, - }) - .collect(); - let named_params = params - .into_iter() - .filter_map(|p| match p { - TableFunctionParam::Named { name, value } => Some((name, value)), - _ => None, - }) - .collect(); - TableReference::TableFunction { - span: transform_span(input.span.tokens), - lateral, - name, - params: normal_params, - named_params, - alias, - sample, - } - } - TableReferenceElement::Subquery { - lateral, - subquery, - alias, - pivot, - unpivot, - } => TableReference::Subquery { - span: transform_span(input.span.tokens), - lateral, - subquery, - alias, - pivot, - unpivot, - }, - TableReferenceElement::Stage { - location, - options, - alias, - } => { - let options = SelectStageOptions::from(options); - TableReference::Location { - span: transform_span(input.span.tokens), - location, - options, - alias, - } - } - _ => unreachable!(), - }; - Ok(table_ref) -} - -fn apply_table_reference_infix( - lhs: TableReference, - input: WithSpan<'_, TableReferenceElement>, - rhs: TableReference, -) -> Result { - let table_ref = match input.elem { - TableReferenceElement::Join { op, natural } => { - let condition = if natural { - JoinCondition::Natural - } else { - JoinCondition::None - }; - TableReference::Join { - span: transform_span(input.span.tokens), - join: Join { - op, - condition, - left: Box::new(lhs), - right: Box::new(rhs), - }, - } - } - _ => unreachable!(), - }; - Ok(table_ref) -} - -fn apply_table_reference_postfix( - mut lhs: TableReference, - op: WithSpan<'_, TableReferenceElement>, -) -> Result { - match op.elem { - TableReferenceElement::JoinCondition(new_condition) => match &mut lhs { - TableReference::Join { - join: Join { condition, .. }, - .. - } => match *condition { - JoinCondition::None => { - *condition = new_condition; - Ok(lhs) - } - JoinCondition::Natural => Err("join condition conflicting with NATURAL"), - _ => Err("join condition already set"), - }, - _ => Err("join condition must apply to a join"), - }, - _ => unreachable!(), - } -} - // PIVOT(expr FOR col IN (ident, ... | subquery)) fn pivot(i: Input) -> IResult { map( @@ -1381,6 +999,173 @@ fn get_table_sample( None } +struct TableReferenceParser; + +impl<'a, I: Iterator>> PrattParser + for TableReferenceParser +{ + type Error = &'static str; + type Input = WithSpan<'a, TableReferenceElement>; + type Output = TableReference; + + fn query(&mut self, input: &Self::Input) -> Result { + let affix = match &input.elem { + TableReferenceElement::Join { .. } => Affix::Infix(Precedence(10), Associativity::Left), + TableReferenceElement::JoinCondition(..) => Affix::Postfix(Precedence(5)), + _ => Affix::Nilfix, + }; + Ok(affix) + } + + fn primary(&mut self, input: Self::Input) -> Result { + let table_ref = match input.elem { + TableReferenceElement::Group(table_ref) => table_ref, + TableReferenceElement::Table { + catalog, + database, + table, + alias, + temporal, + with_options, + pivot, + unpivot, + sample, + } => TableReference::Table { + span: transform_span(input.span.tokens), + catalog, + database, + table, + alias, + temporal, + with_options, + pivot, + unpivot, + sample, + }, + TableReferenceElement::TableFunction { + lateral, + name, + params, + alias, + sample, + } => { + let normal_params = params + .iter() + .filter_map(|p| match p { + TableFunctionParam::Normal(p) => Some(p.clone()), + _ => None, + }) + .collect(); + let named_params = params + .into_iter() + .filter_map(|p| match p { + TableFunctionParam::Named { name, value } => Some((name, value)), + _ => None, + }) + .collect(); + TableReference::TableFunction { + span: transform_span(input.span.tokens), + lateral, + name, + params: normal_params, + named_params, + alias, + sample, + } + } + TableReferenceElement::Subquery { + lateral, + subquery, + alias, + pivot, + unpivot, + } => TableReference::Subquery { + span: transform_span(input.span.tokens), + lateral, + subquery, + alias, + pivot, + unpivot, + }, + TableReferenceElement::Stage { + location, + options, + alias, + } => { + let options = SelectStageOptions::from(options); + TableReference::Location { + span: transform_span(input.span.tokens), + location, + options, + alias, + } + } + _ => unreachable!(), + }; + Ok(table_ref) + } + + fn infix( + &mut self, + lhs: Self::Output, + input: Self::Input, + rhs: Self::Output, + ) -> Result { + let table_ref = match input.elem { + TableReferenceElement::Join { op, natural } => { + let condition = if natural { + JoinCondition::Natural + } else { + JoinCondition::None + }; + TableReference::Join { + span: transform_span(input.span.tokens), + join: Join { + op, + condition, + left: Box::new(lhs), + right: Box::new(rhs), + }, + } + } + _ => unreachable!(), + }; + Ok(table_ref) + } + + fn prefix( + &mut self, + _op: Self::Input, + _rhs: Self::Output, + ) -> Result { + unreachable!() + } + + fn postfix( + &mut self, + mut lhs: Self::Output, + op: Self::Input, + ) -> Result { + match op.elem { + TableReferenceElement::JoinCondition(new_condition) => match &mut lhs { + TableReference::Join { + join: Join { condition, .. }, + .. + } => match *condition { + JoinCondition::None => { + *condition = new_condition; + Ok(lhs) + } + JoinCondition::Natural => Err("join condition conflicting with NATURAL"), + _ => Err("join condition already set"), + }, + _ => Err("join condition must apply to a join"), + }, + _ => unreachable!(), + } + } +} + pub fn group_by_items(i: Input) -> IResult { let all = map(rule! { ALL }, |_| GroupBy::All); diff --git a/src/query/ast/src/precedence.rs b/src/query/ast/src/precedence.rs deleted file mode 100644 index 8b85d7d41548c..0000000000000 --- a/src/query/ast/src/precedence.rs +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct Precedence(pub u32); - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Associativity { - Left, - Right, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum Affix { - Nilfix, - Prefix(Precedence), - Postfix(Precedence), - Infix(Precedence, Associativity), -} diff --git a/src/query/ast/tests/it/testdata/dialect.txt b/src/query/ast/tests/it/testdata/dialect.txt index 74955cc981620..afc11169e433f 100644 --- a/src/query/ast/tests/it/testdata/dialect.txt +++ b/src/query/ast/tests/it/testdata/dialect.txt @@ -237,7 +237,7 @@ error: 1 | "a\"b" | ^^^^^^ | | - | expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ... + | invalid identifier | while parsing expression diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index 93527ebddb07f..aa497ee42b1e9 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -31,7 +31,6 @@ error: 1 | CAST(col1 AS foo) | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `VECTOR`, `LONGBLOB`, `GEOMETRY`, `GEOGRAPHY`, `STAGE_LOCATION`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `REAL`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `TIMESTAMP`, `TIMESTAMP_TZ`, `DATETIME`, `INTERVAL`, `NUMERIC`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` | | - | while parsing `CAST(... AS ...)` | while parsing expression @@ -52,9 +51,8 @@ error: --> SQL:1:10 | 1 | CAST(col1) - | ---- ^ expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ... + | ---- ^ unexpected `)`, expecting `AS`, `,`, `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , or `NULL` | | - | while parsing `CAST(... AS ...)` | while parsing expression @@ -65,10 +63,8 @@ error: --> SQL:1:2 | 1 | a.add(b) - | -^ - | || - | |chain function only works in experimental dialect, try `set sql_dialect = 'experimental'` - | |while parsing x.function(...) + | -^ chain function only works in experimental dialect, try `set sql_dialect = 'experimental'` + | | | while parsing expression @@ -81,7 +77,7 @@ error: 1 | $ abc + 3 | ^ | | - | expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ... + | unexpected `$`, expecting `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , or `NULL` | while parsing expression @@ -96,7 +92,6 @@ error: | | | list comprehension only works in experimental dialect, try `set sql_dialect = 'experimental'` | while parsing expression - | while parsing [expr for x in ... [if ...]] ---------- Input ---------- @@ -110,7 +105,6 @@ error: 1 | G.E.B IS NOT NULL | - while parsing expression 2 | AND col1 NOT BETWEEN col2 AND - | --- while parsing `[NOT] BETWEEN ... AND ...` 3 | AND 1 + col3 DIV sum(col4) | ^^^ expected more tokens for expression diff --git a/src/query/ast/tests/it/testdata/query-error.txt b/src/query/ast/tests/it/testdata/query-error.txt index a47c79410f48e..60e894ed4e444 100644 --- a/src/query/ast/tests/it/testdata/query-error.txt +++ b/src/query/ast/tests/it/testdata/query-error.txt @@ -93,7 +93,6 @@ error: 1 | select number + 5 as a, cast(number as float(255)) | ------ ---- ^ unexpected `(`, expecting `)`, `NULL`, or `NOT` | | | - | | while parsing `CAST(... AS ...)` | | while parsing expression | while parsing `SELECT ...` diff --git a/src/query/ast/tests/it/testdata/stmt-error.txt b/src/query/ast/tests/it/testdata/stmt-error.txt index 1d505f46020ef..409ccebfeb44d 100644 --- a/src/query/ast/tests/it/testdata/stmt-error.txt +++ b/src/query/ast/tests/it/testdata/stmt-error.txt @@ -608,7 +608,7 @@ error: --> SQL:1:41 | 1 | SELECT * FROM t GROUP BY GROUPING SETS () - | ------ ^ expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ... + | ------ ^ unexpected `)`, expecting `(`, `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , or `NULL` | | | while parsing `SELECT ...` @@ -1039,9 +1039,8 @@ error: --> SQL:1:65 | 1 | CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p) - | ------ -- ---- ^ expecting `IS`, `IN`, `LIKE`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<=>`, `<+>`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, or more ... - | | | | | - | | | | while parsing `( [, ...])` + | ------ -- --- ^ unexpected end of input, expecting `)`, `(`, `WITHIN`, `IGNORE`, `RESPECT`, `OVER`, `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , `NULL`, or `,` + | | | | | | | while parsing expression | | while parsing AS (, ...) -> | while parsing `CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [DESC = ]` From 962c24f27009502555d31fc391ec2a35b4f5e80c Mon Sep 17 00:00:00 2001 From: kould Date: Tue, 11 Nov 2025 00:41:59 +0800 Subject: [PATCH 04/13] chore: fix unit test --- src/query/ast/src/parser/expr.rs | 14 -------------- src/query/ast/tests/it/testdata/expr-error.txt | 7 ++++--- src/query/ast/tests/it/testdata/query-error.txt | 5 ++--- src/query/ast/tests/it/testdata/stmt-error.txt | 7 ++++--- 4 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 332dab3ef3591..87e02f29d817e 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1478,19 +1478,6 @@ pub fn expr_element(i: Input) -> IResult> { }, ); - let inverted_expr = map( - rule! { - INVERTED ~ #consumed(literal_string) - }, - |(_, (span, date))| ExprElement::Cast { - expr: Box::new(Expr::Literal { - span: transform_span(span.tokens), - value: Literal::String(date), - }), - target_type: TypeName::Interval, - }, - ); - let is_distinct_from = map( rule! { IS ~ NOT? ~ DISTINCT ~ FROM @@ -1620,7 +1607,6 @@ pub fn expr_element(i: Input) -> IResult> { DATE => with_span!(date_expr).parse(i), TIMESTAMP => with_span!(timestamp_expr).parse(i), TIMESTAMP_TZ => with_span!(timestamp_tz_expr).parse(i), - INVERTED => with_span!(inverted_expr).parse(i), INTERVAL => with_span!(interval).parse(i), DATE_PART | DATEPART => with_span!(date_part).parse(i), EXTRACT => with_span!(extract).parse(i), diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index aa497ee42b1e9..fb1f524a562c1 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -29,8 +29,9 @@ error: --> SQL:1:14 | 1 | CAST(col1 AS foo) - | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `VECTOR`, `LONGBLOB`, `GEOMETRY`, `GEOGRAPHY`, `STAGE_LOCATION`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `REAL`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `TIMESTAMP`, `TIMESTAMP_TZ`, `DATETIME`, `INTERVAL`, `NUMERIC`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` + | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `VECTOR`, `LONGBLOB`, `GEOMETRY`, `GEOGRAPHY`, `STAGE_LOCATION`, , , `IDENTIFIER`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `REAL`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `TIMESTAMP`, `TIMESTAMP_TZ`, `DATETIME`, `INTERVAL`, `NUMERIC`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` | | + | while parsing `function(... [ , x -> ... ] ) [ (...) ] [ WITHIN GROUP ( ORDER BY , ... ) ] [ OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ]) ]` | while parsing expression @@ -51,8 +52,9 @@ error: --> SQL:1:10 | 1 | CAST(col1) - | ---- ^ unexpected `)`, expecting `AS`, `,`, `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , or `NULL` + | ---- ^ unexpected `)`, expecting , , `IDENTIFIER`, `ColumnPosition`, `ROW`, `[`, , `:`, , `TRUE`, `FALSE`, , , , `NULL`, `AS`, or `,` | | + | while parsing `function(... [ , x -> ... ] ) [ (...) ] [ WITHIN GROUP ( ORDER BY , ... ) ] [ OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ]) ]` | while parsing expression @@ -103,7 +105,6 @@ error: --> SQL:3:1 | 1 | G.E.B IS NOT NULL - | - while parsing expression 2 | AND col1 NOT BETWEEN col2 AND 3 | AND 1 + col3 DIV sum(col4) | ^^^ expected more tokens for expression diff --git a/src/query/ast/tests/it/testdata/query-error.txt b/src/query/ast/tests/it/testdata/query-error.txt index 60e894ed4e444..56272e6a52565 100644 --- a/src/query/ast/tests/it/testdata/query-error.txt +++ b/src/query/ast/tests/it/testdata/query-error.txt @@ -91,9 +91,8 @@ error: --> SQL:1:45 | 1 | select number + 5 as a, cast(number as float(255)) - | ------ ---- ^ unexpected `(`, expecting `)`, `NULL`, or `NOT` - | | | - | | while parsing expression + | ------ ^ unexpected `(`, expecting `*`, `COLUMNS`, , , `IDENTIFIER`, `NULL`, `NOT`, or `)` + | | | while parsing `SELECT ...` diff --git a/src/query/ast/tests/it/testdata/stmt-error.txt b/src/query/ast/tests/it/testdata/stmt-error.txt index 409ccebfeb44d..97067acb77b6f 100644 --- a/src/query/ast/tests/it/testdata/stmt-error.txt +++ b/src/query/ast/tests/it/testdata/stmt-error.txt @@ -1036,11 +1036,12 @@ error: CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p) ---------- Output --------- error: - --> SQL:1:65 + --> SQL:1:51 | 1 | CREATE FUNCTION IF NOT EXISTS isnotempty AS(p) -> not(is_null(p) - | ------ -- --- ^ unexpected end of input, expecting `)`, `(`, `WITHIN`, `IGNORE`, `RESPECT`, `OVER`, `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , `NULL`, or `,` - | | | | + | ------ -- ^^^ + | | | | + | | | expecting an operand | | | while parsing expression | | while parsing AS (, ...) -> | while parsing `CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [DESC = ]` From 21eb23f5f7563f31c05ea5f92b41af16cf3fce58 Mon Sep 17 00:00:00 2001 From: kould Date: Tue, 11 Nov 2025 11:44:16 +0800 Subject: [PATCH 05/13] perf: optimize parse embedding --- src/query/ast/benches/bench.rs | 19 +- src/query/ast/src/parser/expr.rs | 184 +++++++++--------- src/query/ast/src/parser/script.rs | 11 ++ .../script/tests/it/testdata/script-error.txt | 4 +- 4 files changed, 119 insertions(+), 99 deletions(-) diff --git a/src/query/ast/benches/bench.rs b/src/query/ast/benches/bench.rs index 12c3711ac659f..781000e569c52 100644 --- a/src/query/ast/benches/bench.rs +++ b/src/query/ast/benches/bench.rs @@ -18,11 +18,12 @@ fn main() { // bench fastest │ slowest │ median │ mean │ samples │ iters // ╰─ dummy │ │ │ │ │ -// ├─ deep_function_call 174.4 µs │ 405.3 µs │ 186.3 µs │ 189.7 µs │ 100 │ 100 -// ├─ deep_query 226.1 µs │ 396.8 µs │ 239.3 µs │ 242.2 µs │ 100 │ 100 -// ├─ large_query 270.5 µs │ 362.8 µs │ 289.7 µs │ 294 µs │ 100 │ 100 -// ├─ large_statement 283.4 µs │ 313.5 µs │ 294.6 µs │ 295 µs │ 100 │ 100 -// ╰─ wide_expr 38.01 µs │ 189 µs │ 39.04 µs │ 40.81 µs │ 100 │ 100 +// ├─ deep_function_call 132.5 µs │ 470.9 µs │ 162.3 µs │ 166.2 µs │ 100 │ 100 +// ├─ deep_query 232.3 µs │ 495 µs │ 283.6 µs │ 283.4 µs │ 100 │ 100 +// ├─ large_query 196.4 µs │ 348.2 µs │ 240.1 µs │ 240.1 µs │ 100 │ 100 +// ├─ large_statement 197.6 µs │ 286.6 µs │ 234.6 µs │ 236.1 µs │ 100 │ 100 +// ├─ wide_embedding 3.964 ms │ 7.229 ms │ 4.3 ms │ 4.397 ms │ 100 │ 100 +// ╰─ wide_expr 40.59 µs │ 56.32 µs │ 43.06 µs │ 43.68 µs │ 100 │ 100 #[divan::bench_group(max_time = 0.5)] mod dummy { @@ -70,4 +71,12 @@ mod dummy { let expr = parse_expr(&tokens, Dialect::PostgreSQL).unwrap(); divan::black_box(expr); } + + #[divan::bench] + fn wide_embedding() { + let case = r#"cosine_distance(embedding, [0.01603204570710659, -0.015531889162957668, 0.011922650970518589, -0.03914738819003105, -0.015275051817297935, 0.041202086955308914, -0.00041524821426719427, 0.013767823576927185, -0.008347207680344582, -0.0045554800890386105, -0.012064587324857712, -0.015193945728242397, -0.01120621059089899, -0.011963204480707645, -0.0003058389702346176, 0.00012799614341929555, 0.03520020842552185, -0.013686716556549072, 0.0024281248915940523, -0.0016457854071632028, -0.03209112584590912, 0.026346085593104362, 0.004193880595266819, 0.004663622006773949, 0.026332566514611244, 0.004339196253567934, 0.021331001073122025, -0.004856249783188105, 0.013734028674662113, -0.003283122321590781, 0.020722702145576477, -0.006529070902615786, 0.0037410359364002943, -0.023034237325191498, -0.03744415193796158, -0.028982045128941536, -0.008800052106380463, -0.01226735394448042, 0.012510673142969608, -0.01237549539655447, 0.025061899796128273, -0.014964143745601177, -0.003229051362723112, -0.015518371015787125, 0.01133462879806757, -0.007549660746008158, -0.06191127002239227, 0.016937734559178352, -0.01592390425503254, 0.010503287427127361, -0.00023001288354862481, -0.0014937107916921377, -0.036660123616456985, 0.013402843847870827, 0.003078666515648365, 0.0011219728039577603, 0.007434759754687548, 0.01616722345352173, 0.002958696335554123, -0.030387891456484795, 0.0008604889735579491, 0.00894874706864357, -0.010706054046750069, 0.022466491907835007, -0.012226800434291363, 0.018573381006717682, -0.012977034784853458, -0.008583768270909786, -0.01732974871993065, -0.00694136181846261, 0.020884916186332703, 0.016126669943332672, -0.00696839764714241, 0.012449842877686024, 0.018032671883702278, -0.016315918415784836, -0.033307723701000214, 0.007218475919216871, -0.014883036725223064, 0.004427061416208744, -0.008333689533174038, -0.014788413420319557, -0.02787359058856964, 0.04255386069417, 0.018803182989358902, 0.01893836073577404, 0.0001457381877116859, 0.018546346575021744, -0.007434759754687548, -0.0048934235237538815, 0.026900311931967735, 0.01364616397768259, 0.011645536869764328, -0.004535203333944082, -0.0012858754489570856, 0.012645849958062172, -0.02218261919915676, 0.006657489575445652, -0.016816074028611183, -0.0092393783852458, -0.006921085529029369, 0.004775143228471279, -0.002711997600272298, -0.007083298172801733, -0.029333505779504776, -0.014369362965226173, -0.0010602980619296432, 0.006927844136953354, 0.013342014513909817, -0.04360824450850487, -0.006772390101104975, 0.015491335652768612, 0.00725902896374464, -0.028657618910074234, 0.0203442070633173, -0.029252400621771812, 0.008604045026004314, 0.026210907846689224, 0.017032358795404434, -0.015139874070882797, 0.030766388401389122, 0.02393992617726326, 0.012699921615421772, -0.02302072010934353, -0.0024129175581038, 0.009732776321470737, -0.01987108588218689, -0.009367797523736954, 0.016586273908615112, -0.004954253323376179, -0.01364616397768259, -0.0032053953036665916, 0.005038739647716284, 0.004761625546962023, -0.0066811456345021725, -0.0013965520774945617, -0.006069467402994633, 0.003974216990172863, -0.003129357937723398, -0.04041805863380432, 0.008387761190533638, 0.00252612866461277, -0.0014024660922586918, -0.0002338147460250184, 0.01395031251013279, 0.02137155458331108, 0.031334131956100464, -0.0012292698957026005, 0.017343265935778618, 0.004356093239039183, 0.017343265935778618, 0.004869767464697361, 0.018275991082191467, -0.010942614637315273, 0.0041431887075304985, 0.0017252021934837103, -0.005126604810357094, 0.009894989430904388, -0.03730897605419159, -0.01377458218485117, 0.016532201319932938, 0.01190913375467062, -0.01904650218784809, -0.0044507174752652645, 0.0006944741471670568, 0.017951564863324165, 0.0011819577775895596, 0.007117092609405518, -0.02534577250480652, 0.02077677473425865, 0.017965083941817284, 0.02311534434556961, -0.023534394800662994, 0.011260281316936016, -0.002367295091971755, 0.01458564680069685, -0.03395657613873482, 0.02998235821723938, -0.02427786961197853, -0.006782528478652239, 0.029603861272335052, -0.00484949117526412, 0.012240317650139332, 0.0147478599101305, 0.000327805319102481, -0.0202360637485981, 0.01569410227239132, 0.012801304459571838, 0.028630584478378296, 0.0012275802437216043, 0.001503004226833582, 0.008556732907891273, -0.002074973890557885, 0.0006387134199030697, -0.648851752281189, -0.023182932287454605, 0.015072285197675228, -0.02162839099764824, 0.0022371867671608925, 0.00624519819393754, 0.0033929538913071156, 0.0022490147966891527, -0.03474060446023941, 0.016329435631632805, 0.0030904945451766253, 0.020411795005202293, 0.008529696613550186, 0.009692222811281681, -0.01057763583958149, -0.009949060156941414, 0.0037072414997965097, -0.016342952847480774, 0.002674823859706521, -0.015139874070882797, -0.03044196218252182, 0.026170354336500168, 0.004454096779227257, -0.01988460309803486, 0.01214569341391325, 0.019208716228604317, 0.009394832886755466, -0.023615499958395958, -0.027468057349324226, 0.007002192083746195, 0.0031040122266858816, 0.02089843340218067, 0.005843045189976692, 0.029603861272335052, 0.05882922559976578, 0.015748172998428345, -0.03146931156516075, 0.011746920645236969, -0.00044524073018692434, 0.04450041800737381, -0.027562681585550308, -0.015626512467861176, 0.0154643002897501, -0.00432905787602067, -0.02046586573123932, 0.014044936746358871, 0.011233245953917503, -0.001311221276409924, -0.0010780401062220335, 0.003991113975644112, -0.001809688052162528, 0.0011675952700898051, -0.008435073308646679, -0.010219414718449116, 0.004099256359040737, -0.01626184768974781, 0.019925156608223915, -0.023209968581795692, 0.006052570417523384, 0.027373433113098145, -0.00795519258826971, 0.00595118710771203, -0.006319545675069094, -0.004349334631115198, -0.033172547817230225, 0.008827087469398975, -0.010658741928637028, -0.007684838026762009, 0.04058026894927025, -0.015126356855034828, 0.02707604318857193, 0.016315918415784836, -0.018208403140306473, -0.0051130871288478374, 0.0018401030683889985, 0.022601669654250145, 0.003524752100929618, -0.02499430999159813, 0.005964704789221287, 0.004447338171303272, 0.003132737474516034, -0.040282879024744034, -0.00352813140489161, -0.004639965947717428, 0.014139560982584953, -0.004254710394889116, -0.007015709765255451, 0.02091195061802864, 0.02871168963611126, -0.0034014026168733835, 0.03163152188062668, 0.007103574927896261, -0.002127355197444558, -0.02496727555990219, 0.004771763924509287, -0.008218788541853428, -0.014261220581829548, 0.01098316814750433, -0.012159211561083794, -0.0261568371206522, -0.006556106265634298, 0.00012134288408560678, 0.018167849630117416, -0.00493735633790493, 0.009786847047507763, 0.004298642743378878, -0.01016534399241209, -0.017113465815782547, 0.017289195209741592, -0.015558924525976181, -0.00554565479978919, -0.030982671305537224, -0.016248328611254692, -0.013652922585606575, -6.948965892661363e-05, -0.014950625598430634, -0.013396085239946842, 0.003937043249607086, 0.001566368737258017, -0.009225861169397831, -0.013801617547869682, 0.0012706679990515113, 0.007448277436196804, -0.0027424125000834465, -0.006278992630541325, 0.0006910947267897427, 0.0047582462430000305, -0.014977660961449146, -0.014017901383340359, 0.011294076219201088, 0.023750677704811096, -0.018884290009737015, 0.009766570292413235, -0.0006606797687709332, 0.018654488027095795, 0.014491022564470768, 0.021168788895010948, -0.010090996511280537, 0.016883663833141327, -0.031901877373456955, -0.03684937208890915, 0.0028302778955549, -4.88962177769281e-05, 0.0028353470843285322, -0.022452974691987038, -0.009482698515057564, 0.006921085529029369, 0.00018967085634358227, -0.01382189430296421, 0.00025894929422065616, -0.020952504128217697, -0.016424059867858887, -0.012118658050894737, 0.0030381132382899523, -0.004227674566209316, 0.005630140658468008, 0.00026972126215696335, -0.017802869901061058, -0.003967457916587591, -0.006762251723557711, -0.009888230822980404, 0.020411795005202293, -0.030009394511580467, -0.005491584073752165, 0.0026731339748948812, -0.01476137712597847, 0.012470119632780552, -0.003714000340551138, -0.005021842196583748, -0.021560803055763245, -0.00024838856188580394, -0.0057991123758256435, -0.011713125742971897, -0.017965083941817284, 0.017289195209741592, 0.0028877281583845615, -0.0013467053649947047, 0.006474999710917473, -0.0154643002897501, 0.014612682163715363, 0.007671320345252752, 0.006975156255066395, 0.006877152714878321, -0.00595118710771203, 0.015775207430124283, 0.01718105375766754, 0.01616722345352173, 0.009935542941093445, 0.009604358114302158, 0.013747546821832657, -0.022033924236893654, 0.018343579024076462, -0.008387761190533638, 0.016802556812763214, -0.016207776963710785, -0.0018874151865020394, 0.004910320974886417, 0.012916205450892448, 0.021412108093500137, 0.040147703140974045, 0.004004632122814655, 0.010381627827882767, 0.04985344409942627, -0.015842797234654427, 0.004650104325264692, -0.012240317650139332, -0.002874210476875305, -0.029117222875356674, 0.03698455169796944, -0.008529696613550186, 0.004078979603946209, -0.014666752889752388, -0.0007375619607046247, -0.023547912016510963, 0.011699608527123928, 0.0021983233746141195, -0.0057585593312978745, 0.019343892112374306, -0.0130784185603261, -0.017843423411250114, 0.008691909722983837, 0.008813569322228432, -0.009638152085244656, -0.0037917273584753275, 0.013794858939945698, 0.0095975985750556, -0.00998961366713047, 0.031171919777989388, -0.010969650000333786, -0.006001878529787064, 0.012355218641459942, -0.004302022513002157, 0.012429566122591496, 0.011395459063351154, 0.004332437179982662, 0.0068264612928032875, 0.006701421923935413, -0.016315918415784836, 0.04082359001040459, -0.017924530431628227, 0.014720824547111988, 0.017573067918419838, 0.012071345932781696, -0.008982541970908642, 0.005353027023375034, -0.013436638750135899, 0.0270895604044199, 0.004099256359040737, -0.027332879602909088, -0.010584394447505474, -0.005718006286770105, 0.00737392995506525, 0.031658560037612915, -0.0011202831519767642, 0.026346085593104362, -0.017938047647476196, -0.005467928014695644, 0.00021385494619607925, 0.053395092487335205, 0.025940552353858948, 0.022709811106324196, 0.0012706679990515113, 0.017464926466345787, -0.012889170087873936, -0.0018417927203699946, 0.011746920645236969, 0.008225548081099987, -0.021168788895010948, -0.010192379355430603, -0.0168295931071043, 0.01662682555615902, -0.028387265279889107, -0.010320798493921757, -0.0056706941686570644, 0.010341075249016285, 0.016126669943332672, -0.01659979112446308, -0.029306471347808838, 0.006789287552237511, 0.017018841579556465, -0.0168295931071043, -0.02092546969652176, 0.027495093643665314, 0.003266225103288889, -0.008975782431662083, 0.01812729611992836, 0.007306341081857681, -0.009327244013547897, -0.01883021928369999, -0.005221229046583176, -0.005893736612051725, -0.0009006197797134519, -0.008151200599968433, 0.005657176487147808, 0.032821085304021835, 0.008752739988267422, 0.02602165937423706, -0.004400026053190231, 0.01881670020520687, -0.03314550966024399, -0.0027018592227250338, -0.023331627249717712, -0.016464613378047943, 0.0018417927203699946, 0.031117849051952362, 0.017856940627098083, -0.0022997064515948296, -0.012787786312401295, -0.012294389307498932, -0.01284861657768488, -0.005146881565451622, -0.007488830480724573, -0.012111899442970753, 0.011557672172784805, -0.0037883480545133352, 6.0460224631242454e-05, -0.01628888212144375, -0.02102009393274784, 0.02010088600218296, 0.0037951068952679634, 0.0035010960418730974, -0.017262160778045654, -0.018965397030115128, 0.003173290519043803, 0.11819916218519211, 0.001608611666597426, -0.0216013565659523, 0.02976607345044613, -0.011800991371273994, 0.022682776674628258, -0.00332536525093019, -0.031117849051952362, 0.02380474843084812, 0.0092934500426054, 0.02207447774708271, 0.011233245953917503, 0.004785281606018543, -0.030955635011196136, 0.015829280018806458, 0.01708642952144146, -0.021290447562932968, -0.015626512467861176, -0.025507984682917595, -0.0016753554809838533, 0.0029434890020638704, -0.02124989591538906, 0.022750364616513252, 0.028441336005926132, 0.00829989556223154, 0.030604174360632896, 0.02057400718331337, -0.0016457854071632028, 0.02022254653275013, -0.015437264926731586, -0.010557359084486961, -0.002465298632159829, 0.006549347657710314, 0.011821268126368523, 0.00784705113619566, 0.04193204641342163, 0.0023351905401796103, -0.0038931104354560375, 0.012274112552404404, 0.0016297331312671304, 0.03455135598778725, 0.02416972815990448, -0.006069467402994633, -0.019925156608223915, 0.01659979112446308, -0.033280689269304276, 0.004281745757907629, 0.024467118084430695, -0.009671946056187153, -0.01732974871993065, 0.046879541128873825, 0.0030296645127236843, 0.009462421759963036, 0.002314913785085082, 0.014436951838433743, -0.009320485405623913, 0.011956444941461086, 0.021276930347085, 0.01023293286561966, -0.009888230822980404, -0.01709994673728943, -0.01812729611992836, -0.01104399748146534, -0.01939796470105648, -0.028873903676867485, -0.022696293890476227, -0.033632148057222366, 0.01718105375766754, -0.008671632967889309, 0.004957633092999458, -0.01091557927429676, -0.01753251440823078, -0.019019467756152153, -0.004673760384321213, 0.0052584027871489525, 0.015518371015787125, 0.029603861272335052, -0.012341701425611973, 0.013754305429756641, 0.018208403140306473, 0.004940735641866922, -0.016424059867858887, 0.0023335006553679705, -0.0038322806358337402, -0.0029485581908375025, 0.015423746779561043, 0.003771450836211443, -0.004771763924509287, 0.017586586996912956, 0.014436951838433743, 0.02521059475839138, 0.008914953097701073, 0.0168295931071043, -0.025061899796128273, -0.00028450629906728864, -0.0036362733226269484, 0.02233131416141987, 0.03379436209797859, -0.0037410359364002943, -0.012517431750893593, 0.00047523321700282395, 0.013639404438436031, 0.014869519509375095, -0.03068528138101101, 0.03484874591231346, -0.009847677312791348, -0.0011135241948068142, 0.020601043477654457, -0.01592390425503254, 0.004190500825643539, 0.002000626176595688, 0.004136430099606514, 0.02150673232972622, -0.0034132306464016438, 0.014328809455037117, 0.020411795005202293, 0.004051944240927696, 0.007752426899969578, 0.012301147915422916, -0.008245824836194515, 0.008928470313549042, -0.025413360446691513, 0.04787985235452652, 0.01628888212144375, -0.01928982138633728, 0.03455135598778725, -0.01673496887087822, 0.012186246924102306, -0.008002504706382751, -0.017464926466345787, -0.017032358795404434, 0.027251774445176125, -0.014653235673904419, -0.017505479976534843, -0.02683272399008274, -0.02906315214931965, -0.004592653829604387, 0.013004071079194546, -0.015491335652768612, -0.009908506646752357, -0.031225990504026413, 0.007063021883368492, 0.010962891392409801, 0.0031513243447989225, 0.012774269096553326, -0.026886794716119766, -0.010618188418447971, 0.03303736820816994, 0.008637838996946812, 0.0413372628390789, -0.0261568371206522, -0.03682233765721321, 0.0011659055016934872, 0.014504540711641312, -0.028279121965169907, -0.029360542073845863, 0.008381001651287079, 0.0075766961090266705, 0.03382139652967453, -0.006910947151482105, 0.01719457097351551, -0.0024348837323486805, 0.014274738729000092, -0.003879592753946781, 0.014788413420319557, -0.0060187759809195995, -0.004974530078470707, 0.010239691473543644, -0.015775207430124283, 0.021763568744063377, 0.012341701425611973, 0.010320798493921757, 0.011821268126368523, 0.009820641949772835, -0.006481758784502745, 0.002612304175272584, -0.0052009522914886475, -0.044581521302461624, -0.01719457097351551, -0.009462421759963036, 0.00773215014487505, -0.00423781294375658, -0.008908193558454514, 0.0017894115298986435, 0.0060965027660131454, 0.014504540711641312, 0.0585588701069355, 0.015383194200694561, 0.011077792383730412, -0.00946918036788702, 0.0157887265086174, -0.011138621717691422, 0.018532827496528625, 0.015058767981827259, -0.013206836767494678, 0.007657802663743496, -0.021587839350104332, -0.035740919411182404, 0.015585959888994694, -0.003120909444987774, 0.013281184248626232, -0.013781340792775154, -1.8454889868735336e-05, -0.024467118084430695, -0.035037994384765625, 0.012253835797309875, -0.012997311539947987, -0.034118786454200745, 0.018519310280680656, -0.023777713999152184, -0.001063677598722279, -0.027170667424798012, -0.02857651188969612, -0.013524503447115421, 0.00525502348318696, -0.012862134724855423, -0.003666688222438097, -0.005305714905261993, 0.0016246639424934983, -0.01451805792748928, 0.017221607267856598, -0.03233444690704346, 0.0174378901720047, 0.012111899442970753, 0.002250704448670149, 0.010300521738827229, 0.0015418677357956767, -0.01719457097351551, 0.041202086955308914, 0.02811690978705883, 0.006691283546388149, 0.021655427291989326, 0.025305218994617462, -0.0035044753458350897, -0.02683272399008274, 0.008556732907891273, 0.009922024793922901, -0.005133363418281078, -0.02253408171236515, 0.016315918415784836, 0.011672573164105415, -0.007400965318083763, -0.007292823400348425, 0.011192692443728447, -0.011625261045992374, 0.0068534966558218, 0.015896867960691452, -0.012902687303721905, -0.0021256653126329184, -0.0011321111815050244, -0.01778935268521309, 0.015248016454279423, -0.009746294468641281, 0.013632645830512047, 0.004525064956396818, -0.019330374896526337, -0.016883663833141327, -0.008381001651287079, 0.008935229852795601, 0.010949373245239258, -0.0063837552443146706, 0.010618188418447971, -0.011077792383730412, 0.0016263537108898163, -0.016221294179558754, 0.00048072481877170503, -0.02754916436970234, -0.008975782431662083, 0.0012858754489570856, 0.02683272399008274, -0.011307593435049057, 0.007184681482613087, -0.02963089756667614, 0.02857651188969612, -0.024602295830845833, 0.014301774092018604, -0.01185506209731102, -0.002607234986498952, -0.002517679939046502, -0.01214569341391325, 0.018330061808228493, 0.018154330551624298, -0.010246451012790203, -0.0033608493395149708, 0.0005132518708705902, -0.009016335941851139, -0.008894676342606544, -0.01777583546936512, -0.011016962118446827, -0.007711873389780521, -0.02462933212518692, -0.0016204396961256862, -0.006177609320729971, 0.004741349257528782, 0.020628077909350395, 0.0014371052384376526, -0.019573694095015526, 0.0037477947771549225, 0.007684838026762009, 0.0054611689411103725, -0.009949060156941414, -6.653265154454857e-05, -0.016126669943332672, 0.0008296516025438905, -0.014044936746358871, 0.0019938673358410597, 0.024453600868582726, -0.004761625546962023, -0.04639289900660515, -0.04020177200436592, 0.013227113522589207, 0.017992118373513222, -0.01179423276335001, -0.023507358506321907, 0.009685464203357697, 0.014653235673904419, 0.02115527167916298, -0.005511860363185406, -0.042932357639074326, 0.02358846552670002, -0.031090812757611275, -0.0046433452516794205, -0.0011363354278728366, -0.020492902025580406, 0.020506419241428375, 0.005119845736771822, 0.005778836086392403, 0.008671632967889309, -0.029441649094223976, -0.029360542073845863, -0.010530323721468449, 0.009232619777321815, -0.03265887126326561, -0.020857879891991615, 0.016572754830121994, -0.030306784436106682, -0.002777896588668227, 0.010327557101845741, -0.011429253034293652, 0.0014506230363622308, -0.006495276466012001, 0.002270981203764677, 0.0059038749895989895, 0.014896554872393608, -0.0078943632543087, 0.011618501506745815, -0.01260529737919569, 0.008752739988267422, -0.05139446631073952, -0.005498342681676149, -0.009854435920715332, 0.02139859087765217, 0.022844988852739334, -0.010733089409768581, -0.030820459127426147, -0.032821085304021835, -0.06310082972049713, -0.014031419530510902, -0.01539671141654253, -0.002223669085651636, 0.015072285197675228, 0.014207149855792522, 0.009590839967131615, -0.011848303489387035, -0.009766570292413235, 0.009462421759963036, -0.03763340041041374, -0.011949686333537102, -0.007448277436196804, -0.006796046160161495, 0.025399843230843544, -0.02185819298028946, -0.044554486870765686, -0.023723643273115158, -0.007705114781856537, 0.003960699308663607, 0.01383541151881218, 0.014477504417300224, -0.033388830721378326, 0.008651357144117355, -9.367374877911061e-05, 0.020749738439917564, 0.0057991123758256435, 0.033632148057222366, -0.0013695164816454053, -0.020587526261806488, -0.01045597530901432, 0.00818499457091093, 0.010746607556939125, -0.011463047936558723, -0.0039032488130033016, 0.013565056957304478, -0.011199451982975006, 0.039688099175691605, 0.01601852849125862, -0.004606171511113644, 0.005413856822997332, -0.001509763184003532, 0.0050083245150744915, 0.00042644262430258095, -0.00586332194507122, 0.003379436209797859, 0.012990552932024002, -0.008036299608647823, -0.003676826599985361, 0.010374869219958782, -0.008401278406381607, 0.01766769215464592, 0.0015756621723994613, -0.014720824547111988, 0.022290760651230812, -0.017113465815782547, 0.012706680223345757, -0.01045597530901432, -0.01962776482105255, -0.0020023160614073277, 0.01581576094031334, -0.012206523679196835, 0.017275677993893623, 0.0015215912135317922, -0.025480948388576508, -0.02114175260066986, 0.003060079412534833, -0.007035986054688692, 0.02787359058856964, -0.030739352107048035, 0.006167470943182707, -0.003274673828855157, 0.006126917898654938, 0.009273173287510872, -0.0184787567704916, -0.006478379480540752, 0.021466178819537163, 0.0038998695090413094, -0.002216910244897008, 0.018181366845965385, 0.21390479803085327, -0.01741085574030876, 0.005893736612051725, -0.007657802663743496, -0.0035382697824388742, 0.008725704625248909, -0.010902061127126217, 0.014977660961449146, -0.014477504417300224, 0.01884373649954796, -0.002004005713388324, -0.006731837056577206, -0.00514012249186635, 0.0039032488130033016, 0.005535516422241926, -0.0350920669734478, -0.007833532989025116, -0.005437512882053852, -0.013146006502211094, -0.001560454722493887, 0.010300521738827229, -0.01051004696637392, 0.009658428840339184, 0.0006285751587711275, 0.03228037431836128, 0.018803182989358902, -0.002453470602631569, -0.001361067988909781, 0.012591779232025146, 0.021885229274630547, -0.01789749413728714, 0.01284861657768488, 0.005626761354506016, -0.012679644860327244, 0.01638350635766983, -0.006637212820351124, 0.0022828092332929373, -0.014558611437678337, 0.0054037184454500675, -0.004933977033942938, 0.025143004953861237, -0.018789665773510933, 0.006586521398276091, 0.004379749298095703, -0.00942186824977398, 0.03384843468666077, 0.006981915328651667, 0.00035526324063539505, 0.0001783708721632138, 0.03238851577043533, -0.05063747242093086, 0.0026460986118763685, 0.003107391530647874, 0.006809563841670752, 0.0051130871288478374, 0.006694663316011429, 0.0032881915103644133, -0.015058767981827259, -0.007563178427517414, 0.032956261187791824, -0.007225234527140856, 0.035632774233818054, -0.0012174418661743402, 0.028738725930452347, -0.018424686044454575, 0.010543840937316418, 0.0033422624692320824, 0.007975469343364239, 0.01318656001240015, -0.010881784372031689, 0.00630940729752183, -0.009002817794680595, 0.004690657369792461, 0.015072285197675228, 0.002054697135463357, -0.022250209003686905, 0.009976095519959927, 0.012179488316178322, 0.0216013565659523, -0.0023909511510282755, 0.0058464244939386845, -0.009881471283733845, -0.012639091350138187, -0.015383194200694561, -0.006295889616012573, -0.0502319410443306, 0.017870459705591202, -0.005011703819036484, 0.014937108382582664, -0.02266925759613514, 0.010131550021469593, 0.011659055016934872, -0.001459916471503675, -0.023399217054247856, 0.010185620747506618, 0.005778836086392403, -0.008130923844873905, 0.02427786961197853, -0.013990866020321846, 0.0032577766105532646, 0.004944115411490202, 0.04523037374019623, 0.008962265215814114, -0.006556106265634298, -0.02358846552670002, 0.028333192691206932, -0.006613556761294603, 0.028279121965169907, 0.006184368394315243, 0.0010856438893824816, 0.016329435631632805, -0.04915051907300949, -0.004606171511113644, -0.01605908013880253, 0.00807009357959032, 0.010354592464864254, 0.01778935268521309, -0.027103079482913017, 0.033767327666282654, -0.000505225732922554, 0.0015680583892390132, -0.020411795005202293, 0.005995119921863079, 0.021925782784819603, -0.0012960138265043497, -0.023534394800662994, -0.036200519651174545, 0.0005922461859881878, -0.011260281316936016, -0.01973590813577175, 0.004744728561490774, -0.0018451721407473087, 0.003169911215081811, -0.0008283843053504825, 0.004058702848851681, -0.009949060156941414, -0.016437577083706856, -0.019452035427093506, 0.006718319375067949, 0.01528856996446848, 0.015315605327486992, 0.009475938975811005, 0.01103048026561737, -0.0023537774104624987, -0.002012454206123948, -0.007624008227139711, 0.026521814987063408, -0.00574842095375061, 0.015153392218053341, -0.020290134474635124, -0.002515990287065506, 0.006981915328651667, -0.023723643273115158, 0.001364447409287095, 0.016126669943332672, 0.005413856822997332, -0.005714626517146826, -0.028387265279889107, -0.030496032908558846, 0.019019467756152153, -0.03768747299909592, -0.003474060446023941, 0.05147557333111763, -0.011375182308256626, -0.006397272925823927, 0.009212343022227287, -0.17291899025440216, 0.029928287491202354, 0.005626761354506016, -0.010178862139582634, 0.022966649383306503, 0.0029941804241389036, 0.006039052736014128, -0.014450469054281712, -0.013179801404476166, 0.0005884443526156247, 0.02860354818403721, 0.017478443682193756, -0.005917392671108246, -0.005190813913941383, 0.01927630417048931, -0.003065148601308465, 0.012794545851647854, 0.017978601157665253, 0.011429253034293652, 0.014247703365981579, -0.0037883480545133352, 0.0008921711705625057, 0.0023875716142356396, -0.01481544878333807, 0.01636998914182186, 0.006498655769973993, -0.025197075679898262, -0.007340135518461466, 0.0004676294920500368, -0.0017505480209365487, -0.014910073019564152, 0.004423682112246752, 0.014139560982584953, -0.023155897855758667, 0.008455349132418633, -0.029684968292713165, -0.030766388401389122, -0.006867014337331057, -0.024345459416508675, 0.03160448744893074, 0.0185869000852108, 0.019844049587845802, 0.0026900311931967735, -0.009915266185998917, -0.015383194200694561, 0.005819389130920172, 0.015004697255790234, 0.013673199340701103, -0.015599478036165237, -0.018505793064832687, -0.005072533618658781, -0.008259342052042484, 0.011861821636557579, 0.01709994673728943, -0.025021346285939217, 0.019100572913885117, -0.005782215390354395, 0.015667065978050232, -0.013118971139192581, -0.002113837283104658, 0.01649164967238903, -0.0013712062500417233, 0.021912263706326485, 0.005160399246960878, 0.01731623150408268, -0.022277243435382843, -0.012699921615421772, 0.027711376547813416, -0.014531576074659824, 0.006502035539597273, 0.02591351792216301, -0.02208799496293068, 0.0009513113182038069, -0.017248641699552536, -0.007684838026762009, -0.015139874070882797, -0.014828965999186039, 0.015585959888994694, 0.005947807803750038, -0.008387761190533638, -0.01418011449277401, 0.030171606689691544, -0.02314237877726555, -0.018559863790869713, -0.018573381006717682, -0.03209112584590912, 0.0031428756192326546, -0.005454409867525101, 0.0309286005795002, -0.0010231243213638663, 0.0026376498863101006, -0.03555167093873024, -0.009861194528639317, -0.03195594996213913, 0.007475312799215317, 0.034091752022504807, 0.011280558072030544, -0.012118658050894737, 0.02207447774708271, -0.01628888212144375, 0.005042118951678276, -0.012868893332779408, -0.03146931156516075, 0.022507045418024063, -0.00040658842772245407, -0.007130610290914774, 0.003272983944043517, 0.011659055016934872, 0.0009884850587695837, -0.021547285839915276, -0.009753053076565266, 0.045635905116796494, 0.03192891180515289, 0.0016179051017388701, 0.0076915971003472805, 0.0021510112565010786, -0.006005258299410343, -0.0261568371206522, 0.0021476317197084427, -0.027116596698760986, 0.033388830721378326, 0.027481574565172195, -0.03168559446930885, -0.0350920669734478, 0.014639717526733875, 0.028495406731963158, -0.10327557474374771, -0.01731623150408268, 0.0049204593524336815, 0.0067690107971429825, 0.0025362668093293905, 0.01359885185956955, 0.006954879965633154, 0.012220041826367378, -0.04285125061869621, 0.0274139866232872, -0.016897181048989296, -0.039444778114557266, 0.020519936457276344, -0.006583141628652811, 0.012726956978440285, -0.00010629383177729324, 0.0026292013935744762, -0.037822648882865906, -0.02380474843084812, 0.017518997192382812, 0.0010172103065997362, -0.011976721696555614, -0.02941461279988289, 0.01418011449277401, -0.004775143228471279, -0.025021346285939217, -0.01673496887087822, 0.036092378199100494, 0.021195823326706886, 0.011219728738069534, -0.021412108093500137, -0.0031141506042331457, 0.03403768315911293, -0.02066863141953945, -0.006728457752615213, -0.012713438831269741, -0.021344520151615143, 0.0005385976401157677, 0.024683402851223946, -0.024480635300278664, 0.003521372564136982, -0.004021529108285904, 0.03684937208890915, -0.04604144021868706, 0.02556205540895462, -0.003026285208761692, 0.013963830657303333, 0.0025987864937633276, -0.005984981544315815, -0.004339196253567934, -0.04368935152888298, -0.020425312221050262, -0.010016649030148983, 0.00764428498223424, 0.02685975842177868, -0.0025362668093293905, -0.00511646643280983, 0.01778935268521309, -0.024129174649715424, -0.01916816271841526, 0.02011440508067608, 0.01109130959957838, -0.010719572193920612, -0.0033929538913071156, 0.04787985235452652, -0.020019780844449997, -0.010699295438826084, 0.0022439456079155207, -0.019492588937282562, -0.015437264926731586, -0.012902687303721905, 0.008941988460719585, -0.015275051817297935, -0.004683898761868477, -0.01284861657768488, -0.008908193558454514, -0.02415621094405651, -0.03360511362552643, 0.006248577497899532, -0.015423746779561043, -0.014085490256547928, -0.02779248356819153, 0.013713751919567585, -0.03763340041041374, -0.0022861885372549295, -0.019127609208226204, -0.011125104501843452, -0.007428000681102276, 0.0077794622629880905, -0.019573694095015526, 0.011476565152406693, -0.005353027023375034, 0.009955818764865398, -0.008908193558454514, -0.006839978974312544, 0.021412108093500137, 0.009442145004868507, 0.0012917894637212157, 0.00924613792449236, 0.019479069858789444, -0.021290447562932968, -0.0016516994219273329, -0.06239791214466095, 0.03173966333270073, -0.013538021594285965, -0.01718105375766754, -0.010090996511280537, -0.0019448655657470226, 0.009624633938074112, 0.0002259998000226915, 0.0030364233534783125, -0.00842831376940012, -0.007563178427517414, 0.01800563558936119, 0.0024331940803676844, 0.018262473866343498, -0.014058454893529415, -0.005133363418281078, 0.016870146617293358, -0.016072599217295647, 0.029441649094223976, 0.005511860363185406, -0.012794545851647854, 0.01522098109126091, -0.012091622687876225, 0.000906533794477582, -0.02173653431236744, -0.009840918704867363, -0.02323700301349163, -0.004254710394889116, -0.0027407228481024504, -0.005393580067902803, -0.019587213173508644, 0.004322298802435398, 0.006613556761294603, 0.056882672011852264, 0.000604496628511697, -0.013152766041457653, 0.014099008403718472, 0.04263496771454811, -0.004491270985454321, 0.015410229563713074, -0.03522724285721779, -0.0017708245432004333, 0.017289195209741592, -0.013058141805231571, -0.027630271390080452, 0.021060647442936897, 0.006515553221106529, 0.0015401780838146806, 0.028387265279889107, -0.015829280018806458, 0.02175005152821541, 0.01522098109126091, -0.008124164305627346, -0.010503287427127361, -0.009854435920715332, -0.02077677473425865, 0.013761064037680626, 0.008056576363742352, 0.006894049700349569, -0.04395970702171326, 0.030144570395350456, -0.0024416425731033087, 0.004656862933188677, -0.0056537967175245285, 0.013666439801454544, -0.01649164967238903, 0.022804435342550278, 0.0006378685939125717, -0.003267914755269885, 0.01214569341391325, -0.010226174257695675, -0.0060491906479001045, -0.003521372564136982, 0.010287003591656685, 0.0046129305846989155, -0.012855375185608864, 0.025710750371217728, -0.0048934235237538815, -0.024304905906319618, 0.01388272363692522, -0.007651043590158224, -0.006150573957711458, -0.00037955291918478906, 0.007624008227139711, 0.022452974691987038, -0.01603204570710659, -0.003425058675929904, 0.014328809455037117, 0.013605610467493534, 0.011098068207502365, -0.023966962471604347, 0.010854749009013176, 0.0008338759071193635, -0.003161462489515543, 0.01156443078070879, -0.00039032488712109625, 0.021925782784819603, -0.015653548762202263, 0.007921398617327213, 0.021574320271611214, -0.0010822644690051675, 0.004866388160735369, -0.00525502348318696, -0.04893423616886139, -0.0015384883154183626, -0.007495589554309845, -0.0137069933116436, -0.028170980513095856, -0.0023402594961225986, 0.010138308629393578, 0.023615499958395958, 0.0048731472343206406, 0.016924217343330383, 0.016667379066348076, -0.02210151217877865, 0.01974942535161972, 0.010496528819203377, -0.018640970811247826, -0.0325777642428875, 0.038877032697200775, 0.016424059867858887, -0.018803182989358902, 0.028873903676867485, -0.006995433010160923, -0.010084237903356552, 0.034470248967409134, 0.005102948751300573, -0.012321424670517445, 0.015410229563713074, -0.015977974981069565, 0.02173653431236744, -0.010787160135805607, -0.029009081423282623, -0.010090996511280537, -0.013781340792775154, 0.014396398328244686, 0.004727831110358238, 0.0034808192867785692, -0.014477504417300224, 0.05415208637714386, 0.014477504417300224, -0.005934290122240782, -0.005332750268280506, -0.005778836086392403, 0.02475099079310894, 0.011773956008255482, 0.016505166888237, 0.027981732040643692, -0.02172301523387432, 0.00714412797242403, -0.025710750371217728, 0.019032984972000122, -0.02045234851539135, -0.022507045418024063, 0.002061456209048629, 0.001507228589616716, 0.014369362965226173, -0.008448590524494648, -0.0012225110549479723, 0.02592703513801098, 0.006860255729407072, 0.01731623150408268, -0.00414994778111577, -0.05266513302922249, -0.041202086955308914, 0.007637525908648968, -0.01481544878333807, -0.014639717526733875, -0.03384843468666077, 0.007536142598837614, 0.003171600867062807, -0.02498079277575016, 0.01492359023541212, 0.0013374119298532605, -0.006235059816390276, -0.018884290009737015, -0.009604358114302158, 0.036092378199100494, 0.01435584481805563, -0.011226487345993519, 0.01411252561956644, -0.005650417413562536, -0.011773956008255482, -0.006775769870728254, 0.003027974860742688, -0.008786533959209919, -0.020290134474635124, -0.01528856996446848]::Vector(1536))"#; + let tokens = tokenize_sql(case).unwrap(); + let expr = parse_expr(&tokens, Dialect::PostgreSQL).unwrap(); + divan::black_box(expr); + } } diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 87e02f29d817e..e88da257bfc6e 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -34,6 +34,29 @@ use crate::parser::Error; use crate::parser::ErrorKind; use crate::Span; +macro_rules! with_span { + ($parser:expr) => { + map(consumed($parser), |(span, elem)| WithSpan { span, elem }) + }; +} + +macro_rules! try_dispatch { + ($input:expr, $($pat:pat => $body:expr),+ $(,)?) => {{ + if let Some(token_0) = $input.tokens.first() { + use TokenKind::*; + + if let Some(result) = match token_0.kind { + $($pat => Some($body),)+ + _ => None, + } { + if result.is_ok() { + return result; + } + } + } + }}; +} + pub fn expr(i: Input) -> IResult { context("expression", subexpr(0)).parse(i) } @@ -1214,7 +1237,15 @@ pub fn expr_element(i: Input) -> IResult> { let variable_access = map(variable_ident, ExprElement::VariableAccess); let unary_op = map(unary_op, |op| ExprElement::UnaryOp { op }); - let map_access = map(map_access, |accessor| ExprElement::MapAccess { accessor }); + let bracket_map_access = map(map_access_bracket, |accessor| ExprElement::MapAccess { + accessor, + }); + let dot_number_map_access = map(map_access_dot_number, |accessor| ExprElement::MapAccess { + accessor, + }); + let colon_map_access = map(map_access_colon, |accessor| ExprElement::MapAccess { + accessor, + }); let dot_access = map( rule! { "." ~ #column_id @@ -1531,30 +1562,7 @@ pub fn expr_element(i: Input) -> IResult> { ExprElement::StageLocation { location } }); - macro_rules! with_span { - ($parser:expr) => { - map(consumed($parser), |(span, elem)| WithSpan { span, elem }) - }; - } - - macro_rules! try_dispatch { - ($($pat:pat => $body:expr),+ $(,)?) => {{ - if let Some(token_0) = i.tokens.first() { - use TokenKind::*; - - if let Some(result) = match token_0.kind { - $($pat => Some($body),)+ - _ => None, - } { - if result.is_ok() { - return result; - } - } - } - }}; - } - - try_dispatch!( + try_dispatch!(i, IS => with_span!(rule!(#is_null | #is_distinct_from)).parse(i), NOT => with_span!(rule!( #in_list @@ -1584,13 +1592,17 @@ pub fn expr_element(i: Input) -> IResult> { LParen => with_span!(rule!(#tuple | #subquery)).parse(i), ANY | SOME | ALL => with_span!(subquery).parse(i), Dot => { - return with_span!(rule!(#chain_function_call | #dot_access | #map_access)).parse(i); + return with_span!(rule!(#chain_function_call | #dot_access | #dot_number_map_access)) + .parse(i); }, Colon => { - return with_span!(map_access).parse(i); + return with_span!(colon_map_access).parse(i); }, LBracket => { - return with_span!(rule!(#list_comprehensions | #map_access | #array)).parse(i); + return with_span!(rule!( + #list_comprehensions | #bracket_map_access | #array + )) + .parse(i); }, LBrace => with_span!(map_expr).parse(i), LiteralAtString => with_span!(stage_location).parse(i), @@ -1669,7 +1681,7 @@ pub fn expr_element(i: Input) -> IResult> { with_span!(alt((rule!( #column_ref : "" - | #map_access : "[] | . | :" + | #dot_number_map_access : "." | #literal : "" ),))) .parse(i) @@ -1796,19 +1808,37 @@ pub fn json_op(i: Input) -> IResult { } pub fn literal(i: Input) -> IResult { - let string = map(literal_string, Literal::String); - let code_string = map(code_string, Literal::String); - let boolean = map(literal_bool, Literal::Boolean); - let null = value(Literal::Null, rule! { NULL }); + let mut string = map(literal_string, Literal::String); + let mut code_string = map(code_string, Literal::String); + let mut boolean = map(literal_bool, Literal::Boolean); + let mut null = value(Literal::Null, rule! { NULL }); + let mut decimal_uint = map_res( + rule! { + LiteralInteger + }, + |token| parse_uint(token.text(), 10).map_err(nom::Err::Failure), + ); + let mut hex_uint = map_res(literal_hex_str, |str| { + parse_uint(str, 16).map_err(nom::Err::Failure) + }); + let mut decimal_float = map_res( + rule! { + LiteralFloat + }, + |token| parse_float(token.text()).map_err(nom::Err::Failure), + ); - rule!( - #string - | #code_string - | #boolean - | #literal_number - | #null - ) - .parse(i) + try_dispatch!(i, + LiteralString => string.parse(i), + LiteralCodeString => code_string.parse(i), + LiteralInteger => decimal_uint.parse(i), + LiteralFloat => decimal_float.parse(i), + MySQLLiteralHex | PGLiteralHex => hex_uint(i), + TRUE | FALSE => boolean.parse(i), + NULL => null.parse(i), + ); + + Err(nom::Err::Error(Error::from_error_kind(i, ErrorKind::Other("expecting `LiteralString`, 'LiteralCodeString', 'LiteralInteger', 'LiteralFloat', 'TRUE', 'FALSE', or more ...")))) } pub fn literal_hex_str(i: Input) -> IResult<&str> { @@ -1872,33 +1902,6 @@ pub fn literal_i64(i: Input) -> IResult { .parse(i) } -pub fn literal_number(i: Input) -> IResult { - let decimal_uint = map_res( - rule! { - LiteralInteger - }, - |token| parse_uint(token.text(), 10).map_err(nom::Err::Failure), - ); - - let hex_uint = map_res(literal_hex_str, |str| { - parse_uint(str, 16).map_err(nom::Err::Failure) - }); - - let decimal_float = map_res( - rule! { - LiteralFloat - }, - |token| parse_float(token.text()).map_err(nom::Err::Failure), - ); - - rule!( - #decimal_uint - | #decimal_float - | #hex_uint - ) - .parse(i) -} - pub fn literal_bool(i: Input) -> IResult { alt((value(true, rule! { TRUE }), value(false, rule! { FALSE }))).parse(i) } @@ -2406,37 +2409,34 @@ pub fn interval_kind(i: Input) -> IResult { .parse(i) } -pub fn map_access(i: Input) -> IResult { - let bracket = map( +fn map_access_bracket(i: Input) -> IResult { + map( rule! { - "[" ~ #subexpr(0) ~ "]" + "[" ~ #subexpr(0) ~ "]" }, |(_, key, _)| MapAccessor::Bracket { key: Box::new(key) }, - ); - let dot_number = map_res( - rule! { - LiteralFloat - }, - |key| { - if key.text().starts_with('.') { - if let Ok(key) = (key.text()[1..]).parse::() { - return Ok(MapAccessor::DotNumber { key }); - } + ) + .parse(i) +} + +fn map_access_dot_number(i: Input) -> IResult { + map_res(rule! { LiteralFloat }, |key| { + if key.text().starts_with('.') { + if let Ok(key) = (key.text()[1..]).parse::() { + return Ok(MapAccessor::DotNumber { key }); } - Err(nom::Err::Error(ErrorKind::ExpectText("."))) - }, - ); - let colon = map( + } + Err(nom::Err::Error(ErrorKind::ExpectText("."))) + }) + .parse(i) +} + +fn map_access_colon(i: Input) -> IResult { + map( rule! { - ":" ~ #ident + ":" ~ #ident }, |(_, key)| MapAccessor::Colon { key }, - ); - - rule!( - #bracket - | #dot_number - | #colon ) .parse(i) } diff --git a/src/query/ast/src/parser/script.rs b/src/query/ast/src/parser/script.rs index a346704f18153..5ebefcbe0aabe 100644 --- a/src/query/ast/src/parser/script.rs +++ b/src/query/ast/src/parser/script.rs @@ -17,6 +17,8 @@ use nom_rule::rule; use crate::ast::*; use crate::parser::common::*; +use crate::parser::error::Error; +use crate::parser::error::ErrorKind; use crate::parser::expr::*; use crate::parser::input::Input; use crate::parser::statement::*; @@ -157,6 +159,15 @@ pub fn script_stmts(i: Input) -> IResult> { } pub fn script_stmt(i: Input) -> IResult { + if let Some(token) = i.tokens.first() { + let kind = token.kind; + if matches!(kind, END | ELSE | ELSEIF | WHEN | UNTIL) { + return Err(nom::Err::Error(Error::from_error_kind( + i, + ErrorKind::Other("block terminator"), + ))); + } + } let let_var_stmt = map( rule! { LET ~ #declare_var diff --git a/src/query/script/tests/it/testdata/script-error.txt b/src/query/script/tests/it/testdata/script-error.txt index c76af3a83d98d..2907332a211e9 100644 --- a/src/query/script/tests/it/testdata/script-error.txt +++ b/src/query/script/tests/it/testdata/script-error.txt @@ -73,11 +73,11 @@ LET x := 'min'; LET y := IDENTIFIER(:x)([1,2]); ---------- Output ---------- error: - --> SQL:2:10 + --> SQL:2:1 | 1 | LET x := 'min'; 2 | LET y := IDENTIFIER(:x)([1,2]); - | ^^^^^^^^^^^^^^ variable is not allowed in this context + | ^^^ unable to parse rest of the sql, rest tokens: [LET(16..19), Ident(20..21), ColonEqual(22..24), IDENTIFIER(25..35), LParen(35..36), Colon(36..37), Ident(37..38), RParen(38..39), LParen(39..40), LBracket(40..41), LiteralInteger(41..42), Comma(42..43), LiteralInteger(43..44), RBracket(44..45), RParen(45..46), SemiColon(46..47), EOI(47..47)] ---------- Input ---------- From e3911c4ad75b33d9224b70acfc7c426492b31d93 Mon Sep 17 00:00:00 2001 From: kould Date: Tue, 11 Nov 2025 12:39:45 +0800 Subject: [PATCH 06/13] chore: fix unit test --- src/query/ast/src/parser/expr.rs | 2 +- src/query/ast/tests/it/testdata/expr-error.txt | 4 ++-- src/query/ast/tests/it/testdata/stmt-error.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index e88da257bfc6e..1e63e45c6d09c 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1838,7 +1838,7 @@ pub fn literal(i: Input) -> IResult { NULL => null.parse(i), ); - Err(nom::Err::Error(Error::from_error_kind(i, ErrorKind::Other("expecting `LiteralString`, 'LiteralCodeString', 'LiteralInteger', 'LiteralFloat', 'TRUE', 'FALSE', or more ...")))) + Err(nom::Err::Error(Error::from_error_kind(i, ErrorKind::Other("expecting ``, '', '', '', 'TRUE', 'FALSE', or more ...")))) } pub fn literal_hex_str(i: Input) -> IResult<&str> { diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index fb1f524a562c1..6c826ef40fffe 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -52,7 +52,7 @@ error: --> SQL:1:10 | 1 | CAST(col1) - | ---- ^ unexpected `)`, expecting , , `IDENTIFIER`, `ColumnPosition`, `ROW`, `[`, , `:`, , `TRUE`, `FALSE`, , , , `NULL`, `AS`, or `,` + | ---- ^ expecting ``, '', '', '', 'TRUE', 'FALSE', or more ... | | | while parsing `function(... [ , x -> ... ] ) [ (...) ] [ WITHIN GROUP ( ORDER BY , ... ) ] [ OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ]) ]` | while parsing expression @@ -79,7 +79,7 @@ error: 1 | $ abc + 3 | ^ | | - | unexpected `$`, expecting `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , or `NULL` + | expecting ``, '', '', '', 'TRUE', 'FALSE', or more ... | while parsing expression diff --git a/src/query/ast/tests/it/testdata/stmt-error.txt b/src/query/ast/tests/it/testdata/stmt-error.txt index 97067acb77b6f..ce3ded5ccf557 100644 --- a/src/query/ast/tests/it/testdata/stmt-error.txt +++ b/src/query/ast/tests/it/testdata/stmt-error.txt @@ -608,7 +608,7 @@ error: --> SQL:1:41 | 1 | SELECT * FROM t GROUP BY GROUPING SETS () - | ------ ^ unexpected `)`, expecting `(`, `ColumnPosition`, `ROW`, , , `IDENTIFIER`, `[`, , `:`, , `TRUE`, `FALSE`, , , , or `NULL` + | ------ ^ expecting ``, '', '', '', 'TRUE', 'FALSE', or more ... | | | while parsing `SELECT ...` From 27c66ce706543536e0c4c5a6ae386b802d47b5ef Mon Sep 17 00:00:00 2001 From: kould Date: Wed, 12 Nov 2025 02:32:46 +0800 Subject: [PATCH 07/13] perf: use try_dispatch to categorize the statement_body, reducing the number of branches and stack usage (otherwise, stack overflow is extremely likely). --- src/query/ast/benches/bench.rs | 12 +- src/query/ast/src/parser/expr.rs | 21 +- src/query/ast/src/parser/mod.rs | 17 + src/query/ast/src/parser/statement.rs | 496 ++++++++++-------- src/query/ast/src/parser/stream.rs | 17 +- .../ast/tests/it/testdata/stmt-error.txt | 10 +- 6 files changed, 306 insertions(+), 267 deletions(-) diff --git a/src/query/ast/benches/bench.rs b/src/query/ast/benches/bench.rs index 781000e569c52..8bad12b57fba1 100644 --- a/src/query/ast/benches/bench.rs +++ b/src/query/ast/benches/bench.rs @@ -18,12 +18,12 @@ fn main() { // bench fastest │ slowest │ median │ mean │ samples │ iters // ╰─ dummy │ │ │ │ │ -// ├─ deep_function_call 132.5 µs │ 470.9 µs │ 162.3 µs │ 166.2 µs │ 100 │ 100 -// ├─ deep_query 232.3 µs │ 495 µs │ 283.6 µs │ 283.4 µs │ 100 │ 100 -// ├─ large_query 196.4 µs │ 348.2 µs │ 240.1 µs │ 240.1 µs │ 100 │ 100 -// ├─ large_statement 197.6 µs │ 286.6 µs │ 234.6 µs │ 236.1 µs │ 100 │ 100 -// ├─ wide_embedding 3.964 ms │ 7.229 ms │ 4.3 ms │ 4.397 ms │ 100 │ 100 -// ╰─ wide_expr 40.59 µs │ 56.32 µs │ 43.06 µs │ 43.68 µs │ 100 │ 100 +// ├─ deep_function_call 117.2 µs │ 407.3 µs │ 127.5 µs │ 132.2 µs │ 100 │ 100 +// ├─ deep_query 229.7 µs │ 395.8 µs │ 242.6 µs │ 244.6 µs │ 100 │ 100 +// ├─ large_query 203.8 µs │ 275.7 µs │ 210.9 µs │ 212.8 µs │ 100 │ 100 +// ├─ large_statement 206.7 µs │ 238.3 µs │ 212.9 µs │ 213.7 µs │ 100 │ 100 +// ├─ wide_embedding 3.628 ms │ 4.088 ms │ 3.761 ms │ 3.762 ms │ 100 │ 100 +// ╰─ wide_expr 39.76 µs │ 46.04 µs │ 40.18 µs │ 40.43 µs │ 100 │ 100 #[divan::bench_group(max_time = 0.5)] mod dummy { diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 1e63e45c6d09c..987ac5dc6c348 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -40,23 +40,6 @@ macro_rules! with_span { }; } -macro_rules! try_dispatch { - ($input:expr, $($pat:pat => $body:expr),+ $(,)?) => {{ - if let Some(token_0) = $input.tokens.first() { - use TokenKind::*; - - if let Some(result) = match token_0.kind { - $($pat => Some($body),)+ - _ => None, - } { - if result.is_ok() { - return result; - } - } - } - }}; -} - pub fn expr(i: Input) -> IResult { context("expression", subexpr(0)).parse(i) } @@ -1562,7 +1545,7 @@ pub fn expr_element(i: Input) -> IResult> { ExprElement::StageLocation { location } }); - try_dispatch!(i, + try_dispatch!(i, true, IS => with_span!(rule!(#is_null | #is_distinct_from)).parse(i), NOT => with_span!(rule!( #in_list @@ -1828,7 +1811,7 @@ pub fn literal(i: Input) -> IResult { |token| parse_float(token.text()).map_err(nom::Err::Failure), ); - try_dispatch!(i, + try_dispatch!(i, true, LiteralString => string.parse(i), LiteralCodeString => code_string.parse(i), LiteralInteger => decimal_uint.parse(i), diff --git a/src/query/ast/src/parser/mod.rs b/src/query/ast/src/parser/mod.rs index 7c8a62d80ef91..6d7ac8c96384b 100644 --- a/src/query/ast/src/parser/mod.rs +++ b/src/query/ast/src/parser/mod.rs @@ -12,6 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. +macro_rules! try_dispatch { + ($input:expr, $return_if_ok:literal, $($pat:pat => $body:expr),+ $(,)?) => {{ + if let Some(token_0) = $input.tokens.first() { + use TokenKind::*; + + if let Some(result) = match token_0.kind { + $($pat => Some($body),)+ + _ => None, + } { + if !$return_if_ok || result.is_ok() { + return result; + } + } + } + }}; +} + mod comment; mod common; mod copy; diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index 94f76d907c37d..7bb92c74cca29 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -20,6 +20,9 @@ use nom::Parser; use nom_rule::rule; use super::sequence::sequence; +use super::stream::describe_stream; +use super::stream::show_streams; +use super::stream::create_stream; use crate::ast::*; use crate::parser::comment::comment; use crate::parser::common::*; @@ -32,7 +35,7 @@ use crate::parser::expr::*; use crate::parser::input::Input; use crate::parser::query::*; use crate::parser::stage::*; -use crate::parser::stream::stream_table; +use crate::parser::stream::{drop_stream}; use crate::parser::token::*; use crate::parser::Error; use crate::parser::ErrorKind; @@ -70,6 +73,10 @@ fn procedure_type_name(i: Input) -> IResult> { .parse(i) } +fn query_statement(i: Input) -> IResult { + map(query, |query| Statement::Query(Box::new(query))).parse(i) +} + pub fn statement_body(i: Input) -> IResult { let explain = map_res( rule! { @@ -2397,9 +2404,9 @@ pub fn statement_body(i: Input) -> IResult { }, ); - let begin = value(Statement::Begin, rule! { BEGIN ~ TRANSACTION? }); - let commit = value(Statement::Commit, rule! { COMMIT }); - let abort = value(Statement::Abort, rule! { ABORT | ROLLBACK }); + let mut begin = value(Statement::Begin, rule! { BEGIN ~ TRANSACTION? }); + let mut commit = value(Statement::Commit, rule! { COMMIT }); + let mut abort = value(Statement::Abort, rule! { ABORT | ROLLBACK }); let execute_immediate = map( rule! { @@ -2574,7 +2581,7 @@ pub fn statement_body(i: Input) -> IResult { let describe_procedure = map( rule! { - ( DESC | DESCRIBE ) ~ ^PROCEDURE ~ #ident ~ #procedure_type_name + ( DESC | DESCRIBE ) ~ PROCEDURE ~ #ident ~ #procedure_type_name }, |(_, _, name, args)| { Statement::DescProcedure(DescProcedureStmt { @@ -2593,199 +2600,174 @@ pub fn statement_body(i: Input) -> IResult { }, ); - alt(( - // query, explain, report, show - rule!( - #map(query, |query| Statement::Query(Box::new(query))) - | #explain : "`EXPLAIN [PIPELINE | GRAPH] `" + try_dispatch!(i, false, + SELECT | VALUES => query_statement(i), + WITH => rule!( + #delete + | #update + | #insert_stmt(false, false) + | #copy_into + | #query_statement + ).parse(i), + HintPrefix | LParen | FROM => query_statement(i), + EXPLAIN => rule!( + #explain : "`EXPLAIN [PIPELINE | GRAPH] `" | #explain_analyze : "`EXPLAIN ANALYZE `" - | #report: "`REPORT ISSUE `" - | #show_settings : "`SHOW SETTINGS []`" - | #show_variables : "`SHOW VARIABLES []`" - | #show_stages : "`SHOW STAGES`" - | #show_engines : "`SHOW ENGINES`" - | #show_process_list : "`SHOW PROCESSLIST`" - | #show_metrics : "`SHOW METRICS`" - | #show_functions : "`SHOW FUNCTIONS []`" - | #show_indexes : "`SHOW INDEXES`" - | #show_locks : "`SHOW LOCKS [IN ACCOUNT] [WHERE ...]`" - | #kill_stmt : "`KILL (QUERY | CONNECTION) `" - | #vacuum_temp_files : "VACUUM TEMPORARY FILES [RETAIN number SECONDS|DAYS] [LIMIT number]" - | #set_priority: "`SET PRIORITY (HIGH | MEDIUM | LOW) `" - | #system_action: "`SYSTEM (ENABLE | DISABLE) EXCEPTION_BACKTRACE`" - ), - // use - rule!( + ).parse(i), + REPORT => rule!(#report: "`REPORT ISSUE `").parse(i), + SETTINGS => rule!(#query_setting : "SETTINGS ( { = | (, ...) = (, ...)} ) Statement").parse(i), + CATALOG => rule!(#use_catalog: "`USE CATALOG `").parse(i), + SHOW => rule!( + ( + #show_tasks : "`SHOW TASKS []`" + | #show_settings : "`SHOW SETTINGS []`" + | #show_variables : "`SHOW VARIABLES []`" + | #show_stages : "`SHOW STAGES`" + | #show_process_list : "`SHOW PROCESSLIST`" + | #show_metrics : "`SHOW METRICS`" + | #show_engines : "`SHOW ENGINES`" + | #show_functions : "`SHOW FUNCTIONS []`" + | #show_user_functions : "`SHOW USER FUNCTIONS []`" + | #show_table_functions : "`SHOW TABLE_FUNCTIONS []`" + | #show_indexes : "`SHOW INDEXES`" + | #show_locks : "`SHOW LOCKS [IN ACCOUNT] [WHERE ...]`" + ) + | ( + #show_catalogs : "`SHOW CATALOGS []`" + | #show_create_catalog : "`SHOW CREATE CATALOG `" + | #show_online_nodes: "`SHOW ONLINE NODES`" + | #show_warehouses: "`SHOW WAREHOUSES`" + | #show_workload_groups: "`SHOW WORKLOAD GROUPS`" + | #show_databases : "`SHOW [FULL] DATABASES [(FROM | IN) ] []`" + | #show_drop_databases : "`SHOW DROP DATABASES [FROM ] []`" + | #show_create_database : "`SHOW CREATE DATABASE `" + ) + | ( + #show_tables : "`SHOW [FULL] TABLES [FROM ] []`" + | #show_columns : "`SHOW [FULL] COLUMNS FROM

[FROM|IN .] []`" + | #show_create_table : "`SHOW CREATE TABLE [.]
`" + | #show_fields : "`SHOW FIELDS FROM [.]
`" + | #show_statistics: "`SHOW STATISTICS [FROM DATABASE [.] | FROM TABLE [.].
]`" + | #show_tables_status : "`SHOW TABLES STATUS [FROM ] []`" + | #show_drop_tables_status : "`SHOW DROP TABLES [FROM ]`" + | #show_views : "`SHOW [FULL] VIEWS [FROM ] []`" + | #show_virtual_columns : "`SHOW VIRTUAL COLUMNS FROM
[FROM|IN .] []`" + ) + | ( + #show_dictionaries : "`SHOW DICTIONARIES [, ...]`" + | #show_create_dictionary : "`SHOW CREATE DICTIONARY `" + | #show_users : "`SHOW USERS`" + | #show_roles : "`SHOW ROLES`" + | #show_grants : "`SHOW GRANTS {FOR { ROLE | USER }] | ON {DATABASE | TABLE .} }`" + | #show_connections: "`SHOW CONNECTIONS`" + | #show_file_formats: "`SHOW FILE FORMATS`" + | #show_network_policies: "`SHOW NETWORK POLICIES`" + | #show_password_policies: "`SHOW PASSWORD POLICIES []`" + | #show_procedures : "`SHOW PROCEDURES []()`" + | #show_streams: "`SHOW [FULL] STREAMS [FROM ] []`" + | #sequence + ) + ).parse(i), + USE => rule!( #use_catalog: "`USE CATALOG `" | #use_warehouse: "`USE WAREHOUSE `" | #use_database : "`USE `" - ), - // warehouse - rule!( - #show_warehouses: "`SHOW WAREHOUSES`" - | #show_online_nodes: "`SHOW ONLINE NODES`" - | #create_warehouse: "`CREATE WAREHOUSE [(ASSIGN NODES [FROM ] [, ...])] WITH [warehouse_size = ]`" - | #drop_warehouse: "`DROP WAREHOUSE `" - | #rename_warehouse: "`RENAME WAREHOUSE TO `" - | #resume_warehouse: "`RESUME WAREHOUSE `" - | #suspend_warehouse: "`SUSPEND WAREHOUSE `" - | #inspect_warehouse: "`INSPECT WAREHOUSE `" - | #add_warehouse_cluster: "`ALTER WAREHOUSE ADD CLUSTER [(ASSIGN NODES [FROM ] [, ...])] WITH [cluster_size = ]`" - | #drop_warehouse_cluster: "`ALTER WAREHOUSE DROP CLUSTER `" - | #rename_warehouse_cluster: "`ALTER WAREHOUSE RENAME CLUSTER TO `" - | #assign_warehouse_nodes: "`ALTER WAREHOUSE ASSIGN NODES ( ASSIGN NODES [FROM ] FOR [, ...] )`" - | #unassign_warehouse_nodes: "`ALTER WAREHOUSE UNASSIGN NODES ( UNASSIGN NODES [FROM ] FOR [, ...] )`" - ), - // workload group - rule!( - #show_workload_groups: "`SHOW WORKLOAD GROUPS`" - | #create_workload_group: "`CREATE WORKLOAD GROUP [IF NOT EXISTS] WITH []`" - | #drop_workload_group: "`DROP WORKLOAD GROUP [IF EXISTS] `" - | #rename_workload_group: "`RENAME WORKLOAD GROUP TO `" - | #set_workload_group_quotas: "`ALTER WORKLOAD GROUP SET []`" - | #unset_workload_group_quotas: "`ALTER WORKLOAD GROUP UNSET { | (, ...)}`" - ), - // database - rule!( - #show_databases : "`SHOW [FULL] DATABASES [(FROM | IN) ] []`" - | #undrop_database : "`UNDROP DATABASE `" - | #show_create_database : "`SHOW CREATE DATABASE `" - | #show_drop_databases : "`SHOW DROP DATABASES [FROM ] []`" - | #create_database : "`CREATE [OR REPLACE] DATABASE [IF NOT EXISTS] [ENGINE = ]`" - | #drop_database : "`DROP DATABASE [IF EXISTS] `" - | #alter_database : "`ALTER DATABASE [IF EXISTS] `" - ), - // network policy / password policy - rule!( - #create_network_policy: "`CREATE NETWORK POLICY [IF NOT EXISTS] name ALLOWED_IP_LIST = ('ip1' [, 'ip2']) [BLOCKED_IP_LIST = ('ip1' [, 'ip2'])] [COMMENT = '']`" - | #alter_network_policy: "`ALTER NETWORK POLICY [IF EXISTS] name SET [ALLOWED_IP_LIST = ('ip1' [, 'ip2'])] [BLOCKED_IP_LIST = ('ip1' [, 'ip2'])] [COMMENT = '']`" - | #drop_network_policy: "`DROP NETWORK POLICY [IF EXISTS] name`" - | #describe_network_policy: "`DESC NETWORK POLICY name`" - | #show_network_policies: "`SHOW NETWORK POLICIES`" - | #create_password_policy: "`CREATE PASSWORD POLICY [IF NOT EXISTS] name [PASSWORD_MIN_LENGTH = ] ... [COMMENT = '']`" - | #alter_password_policy: "`ALTER PASSWORD POLICY [IF EXISTS] name SET [PASSWORD_MIN_LENGTH = ] ... [COMMENT = '']`" - | #drop_password_policy: "`DROP PASSWORD POLICY [IF EXISTS] name`" - | #describe_password_policy: "`DESC PASSWORD POLICY name`" - | #show_password_policies: "`SHOW PASSWORD POLICIES []`" - ), - rule!( - #conditional_multi_table_insert() : "`INSERT [OVERWRITE] {FIRST|ALL} { WHEN THEN intoClause [ ... ] } [ ... ] [ ELSE intoClause ] `" - | #unconditional_multi_table_insert() : "`INSERT [OVERWRITE] ALL intoClause [ ... ] `" - | #insert_stmt(false, false) : "`INSERT INTO [TABLE]
[(, ...)] (VALUES | )`" - | #replace_stmt(false) : "`REPLACE INTO [TABLE]
[(, ...)] (FORMAT | VALUES | )`" - | #merge : "`MERGE INTO USING ON { matchedClause | notMatchedClause } [ ... ]`" - | #delete : "`DELETE FROM
[WHERE ...]`" - | #update : "`UPDATE
SET = [, = , ... ] [WHERE ...]`" - | #begin - | #commit - | #abort - ), - rule!( - #show_users : "`SHOW USERS`" - | #describe_user: "`DESCRIBE USER `" - | #create_user : "`CREATE [OR REPLACE] USER [IF NOT EXISTS] '' IDENTIFIED [WITH ] [BY ] [WITH , ...]`" - | #alter_user : "`ALTER USER ('' | USER()) [IDENTIFIED [WITH ] [BY ]] [WITH , ...]`" - | #drop_user : "`DROP USER [IF EXISTS] ''`" - | #show_roles : "`SHOW ROLES`" - | #create_role : "`CREATE ROLE [IF NOT EXISTS] [COMMENT ='']`" - | #alter_role : "`ALTER ROLE [IF EXISTS] SET COMMENT = '' | UNSET COMMENT`" - | #drop_role : "`DROP ROLE [IF EXISTS] `" - | #create_udf : "`CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [DESC = ]`" - | #drop_udf : "`DROP FUNCTION [IF EXISTS] `" - | #alter_udf : "`ALTER FUNCTION [DESC = ]`" + ).parse(i), + KILL => rule!(#kill_stmt : "`KILL (QUERY | CONNECTION) `").parse(i), + SET => rule!( + #set_priority: "`SET PRIORITY (HIGH | MEDIUM | LOW) `" | #set_role: "`SET [DEFAULT] ROLE `" | #set_secondary_roles: "`SET SECONDARY ROLES (ALL | NONE)`" | #set_secondary_specify_roles: "`SET SECONDARY ROLES [role_name,...]`" - | #show_user_functions : "`SHOW USER FUNCTIONS []`" - ), - rule!( - #show_tables : "`SHOW [FULL] TABLES [FROM ] []`" - | #show_columns : "`SHOW [FULL] COLUMNS FROM
[FROM|IN .] []`" - | #show_create_table : "`SHOW CREATE TABLE [.]
`" - | #describe_view : "`DESCRIBE VIEW [.]`" - | #describe_table : "`DESCRIBE [.]
`" - | #show_fields : "`SHOW FIELDS FROM [.]
`" - | #show_tables_status : "`SHOW TABLES STATUS [FROM ] []`" - | #show_drop_tables_status : "`SHOW DROP TABLES [FROM ]`" - | #attach_table : "`ATTACH TABLE [.]
`" - | #create_table : "`CREATE [OR REPLACE] TABLE [IF NOT EXISTS] [.]
[] []`" - | #drop_table : "`DROP TABLE [IF EXISTS] [.]
`" - | #undrop_table : "`UNDROP TABLE [.]
`" - | #alter_table : "`ALTER TABLE [.]
`" - | #rename_table : "`RENAME TABLE [.]
TO `" - | #truncate_table : "`TRUNCATE TABLE [.]
`" - | #optimize_table : "`OPTIMIZE TABLE [.]
(ALL | PURGE | COMPACT [SEGMENT])`" + | #set_stmt : "`SET [variable] { = | (, ...) = (, ...)}`" + | #use_catalog: "`USE CATALOG `" + ).parse(i), + UNSET => rule!(#unset_stmt : "`UNSET [variable] { | (, ...)}`").parse(i), + SYSTEM => rule!(#system_action: "`SYSTEM (ENABLE | DISABLE) EXCEPTION_BACKTRACE`" + ).parse(i), + MERGE => rule!(#merge : "`MERGE INTO USING ON { matchedClause | notMatchedClause } [ ... ]`" + ).parse(i), + DELETE => rule!(#delete : "`DELETE FROM
[WHERE ...]`" + ).parse(i), + UPDATE => rule!(#update : "`UPDATE
SET = [, = , ... ] [WHERE ...]`" + ).parse(i), + INSERT => rule!( + #conditional_multi_table_insert() : "`INSERT [OVERWRITE] {FIRST|ALL} { WHEN THEN intoClause [ ... ] } [ ... ] [ ELSE intoClause ] `" + | #unconditional_multi_table_insert() : "`INSERT [OVERWRITE] ALL intoClause [ ... ] `" + | #insert_stmt(false, false) : "`INSERT INTO [TABLE]
[(, ...)] (VALUES | )`" + ).parse(i), + REPLACE => rule!(#replace_stmt(false) : "`REPLACE INTO [TABLE]
[(, ...)] (FORMAT | VALUES | )`" + ).parse(i), + COPY => rule!(#copy_into).parse(i), + BEGIN => rule!(#begin).parse(i), + COMMIT => rule!(#commit).parse(i), + ABORT | ROLLBACK => rule!(#abort).parse(i), + TRUNCATE => rule!(#truncate_table : "`TRUNCATE TABLE [.]
`" + ).parse(i), + OPTIMIZE => rule!(#optimize_table : "`OPTIMIZE TABLE [.]
(ALL | PURGE | COMPACT [SEGMENT])`" + ).parse(i), + VACUUM => rule!( + #vacuum_temp_files : "VACUUM TEMPORARY FILES [RETAIN number SECONDS|DAYS] [LIMIT number]" | #vacuum_table : "`VACUUM TABLE [.]
[RETAIN number HOURS] [DRY RUN | DRY RUN SUMMARY]`" | #vacuum_drop_table : "`VACUUM DROP TABLE [FROM [.]] [RETAIN number HOURS] [DRY RUN | DRY RUN SUMMARY]`" - | #analyze_table : "`ANALYZE TABLE [.]
`" - | #exists_table : "`EXISTS TABLE [.]
`" - | #show_table_functions : "`SHOW TABLE_FUNCTIONS []`" - ), - // dictionary - rule!( - #create_dictionary : "`CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] [(, ...)] PRIMARY KEY [, ...] SOURCE ( ([])) [COMMENT ] `" - | #drop_dictionary : "`DROP DICTIONARY [IF EXISTS] `" - | #show_create_dictionary : "`SHOW CREATE DICTIONARY `" - | #show_dictionaries : "`SHOW DICTIONARIES [, ...]`" - | #rename_dictionary: "`RENAME DICTIONARY [.] TO `" - ), - // view,index - rule!( - #create_view : "`CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [.] [(, ...)] AS SELECT ...`" - | #drop_view : "`DROP VIEW [IF EXISTS] [.]`" - | #alter_view : "`ALTER VIEW [.] [(, ...)] AS SELECT ...`" - | #show_views : "`SHOW [FULL] VIEWS [FROM ] []`" - | #create_index: "`CREATE [OR REPLACE] AGGREGATING INDEX [IF NOT EXISTS] AS SELECT ...`" - | #drop_index: "`DROP INDEX [IF EXISTS] `" - | #refresh_index: "`REFRESH INDEX [LIMIT ]`" - | #create_table_index: "`CREATE [OR REPLACE] INDEX [IF NOT EXISTS] ON [.]
(, ...)`" - | #drop_table_index: "`DROP INDEX [IF EXISTS] ON [.]
`" + | #vacuum_temporary_tables + ).parse(i), + ANALYZE => rule!(#analyze_table : "`ANALYZE TABLE [.]
`" + ).parse(i), + EXISTS => rule!(#exists_table : "`EXISTS TABLE [.]
`" + ).parse(i), + UNDROP => rule!( + #undrop_database : "`UNDROP DATABASE `" + | #undrop_table : "`UNDROP TABLE [.]
`" + ).parse(i), + ATTACH => rule!(#attach_table : "`ATTACH TABLE [.]
`" + ).parse(i), + REFRESH => rule!( + #refresh_index: "`REFRESH INDEX [LIMIT ]`" | #refresh_table_index: "`REFRESH INDEX ON [.]
[LIMIT ]`" | #refresh_virtual_column: "`REFRESH VIRTUAL COLUMN FOR [.]
`" - | #show_virtual_columns : "`SHOW VIRTUAL COLUMNS FROM
[FROM|IN .] []`" - | #show_statistics: "`SHOW STATISTICS [FROM DATABASE [.] | FROM TABLE [.].
]`" - | #sequence - ), - rule!( - #create_stage: "`CREATE [OR REPLACE] STAGE [ IF NOT EXISTS ] - [ FILE_FORMAT = ( { TYPE = { CSV | PARQUET } [ formatTypeOptions ] ) } ] - [ COPY_OPTIONS = ( copyOptions ) ] - [ COMMENT = '' ]`" - | #desc_stage: "`DESC STAGE `" - | #list_stage: "`LIST @ [pattern = '']`" - | #remove_stage: "`REMOVE @ [pattern = '']`" - | #drop_stage: "`DROP STAGE `" - | #create_file_format: "`CREATE FILE FORMAT [ IF NOT EXISTS ] formatTypeOptions`" - | #show_file_formats: "`SHOW FILE FORMATS`" - | #drop_file_format: "`DROP FILE FORMAT [ IF EXISTS ] `" - | #copy_into - | #call: "`CALL (, ...)`" - | #grant : "`GRANT { ROLE | schemaObjectPrivileges | ALL [ PRIVILEGES ] ON } TO { [ROLE ] | [USER] }`" - | #show_grants : "`SHOW GRANTS {FOR { ROLE | USER }] | ON {DATABASE | TABLE .} }`" - | #revoke : "`REVOKE { ROLE | schemaObjectPrivileges | ALL [ PRIVILEGES ] ON } FROM { [ROLE ] | [USER] }`" + ).parse(i), + LIST => rule!(#list_stage: "`LIST @ [pattern = '']`" + ).parse(i), + REMOVE => rule!(#remove_stage: "`REMOVE @ [pattern = '']`" + ).parse(i), + PRESIGN => rule!(#presign: "`PRESIGN [{DOWNLOAD | UPLOAD}] [EXPIRE = 3600]`" + ).parse(i), + CALL => rule!( + #call: "`CALL (, ...)`" + | #call_procedure : "`CALL PROCEDURE ()`" + ).parse(i), + EXECUTE => rule!( + #execute_task: "`EXECUTE TASK `" + | #execute_immediate : "`EXECUTE IMMEDIATE $$