diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 84dc74d96..be83089ad 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -44,7 +44,7 @@ use crate::ast::{ }; use crate::display_utils::{DisplayCommaSeparated, Indent, NewLine, SpaceOrNewline}; use crate::keywords::Keyword; -use crate::tokenizer::{Span, Token}; +use crate::tokenizer::{Comment, Span, Token}; /// Index column type. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -1202,10 +1202,15 @@ pub struct ColumnDef { pub name: Ident, pub data_type: DataType, pub options: Vec, + /// Leading comment for the column. + pub leading_comment: Option, } impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(leading_comment) = &self.leading_comment { + write!(f, "{leading_comment}")?; + } if self.data_type == DataType::Unspecified { write!(f, "{}", self.name)?; } else { @@ -2286,6 +2291,8 @@ pub struct CreateTable { /// Snowflake "REQUIRE USER" clause for dybamic tables /// pub require_user: bool, + /// Leading comment for the table. + pub leading_comment: Option, } impl fmt::Display for CreateTable { @@ -2297,6 +2304,9 @@ impl fmt::Display for CreateTable { // `CREATE TABLE t AS SELECT a from t2` // Columns provided for CREATE TABLE AS: // `CREATE TABLE t (a INT) AS SELECT a from t2` + if let Some(leading_comment) = &self.leading_comment { + write!(f, "{leading_comment}")?; + } write!( f, "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}{dynamic}{iceberg}TABLE {if_not_exists}{name}", @@ -3515,10 +3525,16 @@ pub struct AlterTable { pub iceberg: bool, /// Token that represents the end of the statement (semicolon or EOF) pub end_token: AttachedToken, + /// Leading comment which appears before the `ALTER` keyword + pub leading_comment: Option, } impl fmt::Display for AlterTable { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(comment) = &self.leading_comment { + write!(f, "{comment}")?; + } + if self.iceberg { write!(f, "ALTER ICEBERG TABLE ")?; } else { diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index fe950c909..ec0c63a11 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -32,6 +32,7 @@ use crate::ast::{ }; use crate::parser::ParserError; +use crate::tokenizer::Comment; /// Builder for create table statement variant ([1]). /// @@ -49,6 +50,7 @@ use crate::parser::ParserError; /// name: Ident::new("c1"), /// data_type: DataType::Int(None), /// options: vec![], +/// leading_comment: None, /// }]); /// // You can access internal elements with ease /// assert!(builder.if_not_exists); @@ -115,6 +117,7 @@ pub struct CreateTableBuilder { pub refresh_mode: Option, pub initialize: Option, pub require_user: bool, + pub leading_comment: Option, } impl CreateTableBuilder { @@ -171,6 +174,7 @@ impl CreateTableBuilder { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -431,6 +435,11 @@ impl CreateTableBuilder { self } + pub fn leading_comment(mut self, leading_comment: Option) -> Self { + self.leading_comment = leading_comment; + self + } + pub fn build(self) -> Statement { CreateTable { or_replace: self.or_replace, @@ -484,6 +493,7 @@ impl CreateTableBuilder { refresh_mode: self.refresh_mode, initialize: self.initialize, require_user: self.require_user, + leading_comment: self.leading_comment, } .into() } @@ -548,6 +558,7 @@ impl TryFrom for CreateTableBuilder { refresh_mode, initialize, require_user, + leading_comment, }) => Ok(Self { or_replace, temporary, @@ -600,6 +611,7 @@ impl TryFrom for CreateTableBuilder { refresh_mode, initialize, require_user, + leading_comment, }), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 5d82c7339..0a140701e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -553,6 +553,7 @@ impl Spanned for CreateTable { refresh_mode: _, initialize: _, require_user: _, + leading_comment: _, // Option } = self; union_spans( @@ -572,6 +573,7 @@ impl Spanned for ColumnDef { name, data_type: _, // enum options, + leading_comment: _, } = self; union_spans(core::iter::once(name.span).chain(options.iter().map(|i| i.span()))) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 825fd45f0..8fb7644b1 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -35,7 +35,7 @@ use crate::ast::{ use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; use crate::parser::{IsOptional, Parser, ParserError}; -use crate::tokenizer::Token; +use crate::tokenizer::{Comment, Token}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; #[cfg(not(feature = "std"))] @@ -210,6 +210,7 @@ impl Dialect for SnowflakeDialect { } fn parse_statement(&self, parser: &mut Parser) -> Option> { + let leading_comment = parser.parse_leading_comment(); if parser.parse_keyword(Keyword::BEGIN) { return Some(parser.parse_begin_exception_end()); } @@ -261,7 +262,15 @@ impl Dialect for SnowflakeDialect { return Some(parse_create_stage(or_replace, temporary, parser)); } else if parser.parse_keyword(Keyword::TABLE) { return Some(parse_create_table( - or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser, + or_replace, + global, + temporary, + volatile, + transient, + iceberg, + dynamic, + parser, + leading_comment, )); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); @@ -305,7 +314,9 @@ impl Dialect for SnowflakeDialect { //Give back Keyword::SHOW parser.prev_token(); } - + if leading_comment.is_some() { + parser.prev_token(); + } None } @@ -630,6 +641,7 @@ pub fn parse_create_table( iceberg: bool, dynamic: bool, parser: &mut Parser, + leading_comment: Option, ) -> Result { let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = parser.parse_object_name(false)?; @@ -643,6 +655,7 @@ pub fn parse_create_table( .iceberg(iceberg) .global(global) .dynamic(dynamic) + .leading_comment(leading_comment) .hive_formats(Some(Default::default())); // Snowflake does not enforce order of the parameters in the statement. The parser needs to diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ef31c41f2..99b9dfec7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -529,6 +529,17 @@ impl<'a> Parser<'a> { Parser::new(dialect).try_with_sql(sql)?.parse_statements() } + /// Parses a single leading comment (if any) + pub fn parse_leading_comment(&mut self) -> Option { + if let Token::LeadingComment(ref comment) = self.peek_token_ref().token { + let comment = comment.clone(); + self.advance_token(); + Some(comment) + } else { + None + } + } + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { @@ -539,7 +550,10 @@ impl<'a> Parser<'a> { return statement; } + let leading_comment: Option = self.parse_leading_comment(); + let next_token = self.next_token(); + match &next_token.token { Token::Word(w) => match w.keyword { Keyword::KILL => self.parse_kill(), @@ -580,7 +594,7 @@ impl<'a> Parser<'a> { self.parse_detach_duckdb_database() } Keyword::MSCK => self.parse_msck(), - Keyword::CREATE => self.parse_create(), + Keyword::CREATE => self.parse_create(leading_comment), Keyword::CACHE => self.parse_cache_table(), Keyword::DROP => self.parse_drop(), Keyword::DISCARD => self.parse_discard(), @@ -591,7 +605,7 @@ impl<'a> Parser<'a> { Keyword::REPLACE => self.parse_replace(), Keyword::UNCACHE => self.parse_uncache_table(), Keyword::UPDATE => self.parse_update(), - Keyword::ALTER => self.parse_alter(), + Keyword::ALTER => self.parse_alter(leading_comment), Keyword::CALL => self.parse_call(), Keyword::COPY => self.parse_copy(), Keyword::OPEN => { @@ -4147,16 +4161,7 @@ impl<'a> Parser<'a> { /// /// See [`Self::get_current_token`] to get the current token after advancing pub fn advance_token(&mut self) { - loop { - self.index += 1; - match self.tokens.get(self.index - 1) { - Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) => continue, - _ => break, - } - } + self.index += 1; } /// Returns a reference to the current token @@ -4187,18 +4192,8 @@ impl<'a> Parser<'a> { /// // TODO rename to backup_token and deprecate prev_token? pub fn prev_token(&mut self) { - loop { - assert!(self.index > 0); - self.index -= 1; - if let Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) = self.tokens.get(self.index) - { - continue; - } - return; - } + assert!(self.index > 0); + self.index -= 1; } /// Report `found` was encountered instead of `expected` @@ -4717,7 +4712,10 @@ impl<'a> Parser<'a> { } /// Parse a SQL CREATE statement - pub fn parse_create(&mut self) -> Result { + pub fn parse_create( + &mut self, + leading_comment: Option, + ) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); @@ -4737,7 +4735,7 @@ impl<'a> Parser<'a> { && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); let create_view_params = self.parse_create_view_params()?; if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace, temporary, global, transient) + self.parse_create_table(or_replace, temporary, global, transient, leading_comment) } else if self.peek_keyword(Keyword::MATERIALIZED) || self.peek_keyword(Keyword::VIEW) || self.peek_keywords(&[Keyword::SECURE, Keyword::MATERIALIZED, Keyword::VIEW]) @@ -4747,7 +4745,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::POLICY) { self.parse_create_policy() } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) + self.parse_create_external_table(or_replace, leading_comment) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_alter, or_replace, temporary) } else if self.parse_keyword(Keyword::DOMAIN) { @@ -5796,6 +5794,7 @@ impl<'a> Parser<'a> { pub fn parse_create_external_table( &mut self, or_replace: bool, + leading_comment: Option, ) -> Result { self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -5831,6 +5830,7 @@ impl<'a> Parser<'a> { .external(true) .file_format(file_format) .location(location) + .leading_comment(leading_comment) .build()) } @@ -7227,10 +7227,9 @@ impl<'a> Parser<'a> { //TODO: Implement parsing for Skewed pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { columns }) + Ok(HiveDistributionStyle::PARTITIONED { + columns: self.parse_returns_table_columns()?, + }) } else { Ok(HiveDistributionStyle::NONE) } @@ -7394,6 +7393,7 @@ impl<'a> Parser<'a> { temporary: bool, global: Option, transient: bool, + leading_comment: Option, ) -> Result { let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7503,6 +7503,7 @@ impl<'a> Parser<'a> { .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) + .leading_comment(leading_comment) .build()) } @@ -7885,10 +7886,11 @@ impl<'a> Parser<'a> { } loop { + let leading_comment: Option = self.parse_leading_comment(); if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); } else if let Token::Word(_) = self.peek_token().token { - columns.push(self.parse_column_def()?); + columns.push(self.parse_column_def(leading_comment)?); } else { return self.expected("column name or constraint definition", self.peek_token()); } @@ -7939,7 +7941,10 @@ impl<'a> Parser<'a> { }) } - pub fn parse_column_def(&mut self) -> Result { + pub fn parse_column_def( + &mut self, + leading_comment: Option, + ) -> Result { let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified @@ -7947,6 +7952,7 @@ impl<'a> Parser<'a> { self.parse_data_type()? }; let mut options = vec![]; + loop { if self.parse_keyword(Keyword::CONSTRAINT) { let name = Some(self.parse_identifier()?); @@ -7968,6 +7974,7 @@ impl<'a> Parser<'a> { name: col_name, data_type, options, + leading_comment, }) } @@ -8912,7 +8919,7 @@ impl<'a> Parser<'a> { false }; - let column_def = self.parse_column_def()?; + let column_def = self.parse_column_def(None)?; let column_position = self.parse_column_position()?; @@ -9356,7 +9363,10 @@ impl<'a> Parser<'a> { } } - pub fn parse_alter(&mut self) -> Result { + pub fn parse_alter( + &mut self, + leading_comment: Option, + ) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, Keyword::TYPE, @@ -9377,10 +9387,10 @@ impl<'a> Parser<'a> { } Keyword::VIEW => self.parse_alter_view(), Keyword::TYPE => self.parse_alter_type(), - Keyword::TABLE => self.parse_alter_table(false), + Keyword::TABLE => self.parse_alter_table(leading_comment, false), Keyword::ICEBERG => { self.expect_keyword(Keyword::TABLE)?; - self.parse_alter_table(true) + self.parse_alter_table(leading_comment, true) } Keyword::INDEX => { let index_name = self.parse_object_name(false)?; @@ -9410,7 +9420,11 @@ impl<'a> Parser<'a> { } /// Parse a [Statement::AlterTable] - pub fn parse_alter_table(&mut self, iceberg: bool) -> Result { + pub fn parse_alter_table( + &mut self, + leading_comment: Option, + iceberg: bool, + ) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name(false)?; @@ -9445,6 +9459,7 @@ impl<'a> Parser<'a> { on_cluster, iceberg, end_token: AttachedToken(end_token), + leading_comment, } .into()) } @@ -10622,10 +10637,7 @@ impl<'a> Parser<'a> { )) } Keyword::NESTED if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { - self.expect_token(&Token::LParen)?; - let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(DataType::Nested(field_defs)) + Ok(DataType::Nested(self.parse_returns_table_columns()?)) } Keyword::TUPLE if dialect_is!(dialect is ClickHouseDialect | GenericDialect) => { self.prev_token(); @@ -10692,7 +10704,8 @@ impl<'a> Parser<'a> { } fn parse_returns_table_column(&mut self) -> Result { - self.parse_column_def() + let leading_comment: Option = self.parse_leading_comment(); + self.parse_column_def(leading_comment) } fn parse_returns_table_columns(&mut self) -> Result, ParserError> { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1f..b96ce9019 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -106,6 +106,8 @@ pub enum Token { HexStringLiteral(String), /// Comma Comma, + /// Comment (single line or multi line) that are associated with a statement or relevant sub-portion of a statement + LeadingComment(Comment), /// Whitespace (space, tab, etc) Whitespace(Whitespace), /// Double equals sign `==` @@ -279,6 +281,23 @@ pub enum Token { CustomBinaryOperator(String), } +/// Decide whether a comment is a LeadingComment or an InterstitialComment based on the previous token. +fn dispatch_comment_kind( + prev_token: Option<&Token>, + comment: Comment, + peeked_char: Option<&char>, +) -> Token { + if peeked_char.is_none() { + return Token::Whitespace(comment.into()); + } + match prev_token { + None | Some(Token::LParen) | Some(Token::Comma) | Some(Token::SemiColon) => { + Token::LeadingComment(comment) + } + _ => Token::Whitespace(comment.into()), + } +} + impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -304,6 +323,7 @@ impl fmt::Display for Token { Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"), Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""), Token::Comma => f.write_str(","), + Token::LeadingComment(c) => write!(f, "{c}"), Token::Whitespace(ws) => write!(f, "{ws}"), Token::DoubleEq => f.write_str("=="), Token::Spaceship => f.write_str("<=>"), @@ -449,6 +469,29 @@ impl Word { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Comment { + SingleLineComment { comment: String, prefix: String }, + MultiLineComment(String), +} + +impl fmt::Display for Comment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Comment::SingleLineComment { comment, prefix } => write!(f, "{}{}", prefix, comment), + Comment::MultiLineComment(comment) => write!(f, "/*{}*/", comment), + } + } +} + +impl From for Whitespace { + fn from(comment: Comment) -> Self { + Whitespace::InterstitialComment(comment) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -456,8 +499,20 @@ pub enum Whitespace { Space, Newline, Tab, - SingleLineComment { comment: String, prefix: String }, - MultiLineComment(String), + /// A comment which is not positioned before a statement or relevant sub-portion + /// of a statement, but rather appears between other tokens. + /// + /// For example, in the following SQL: + /// + /// ```sql + /// CREATE + /// -- This is an interstitial comment + /// TABLE + /// -- Another + /// -- interstitial comment + /// my_table (id INT); + /// ``` + InterstitialComment(Comment), } impl fmt::Display for Whitespace { @@ -466,8 +521,7 @@ impl fmt::Display for Whitespace { Whitespace::Space => f.write_str(" "), Whitespace::Newline => f.write_str("\n"), Whitespace::Tab => f.write_str("\t"), - Whitespace::SingleLineComment { prefix, comment } => write!(f, "{prefix}{comment}"), - Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"), + Whitespace::InterstitialComment(comment) => write!(f, "{}", comment), } } } @@ -826,7 +880,6 @@ impl<'a> Tokenizer<'a> { /// /// assert_eq!(tokens, vec![ /// Token::make_word("SELECT", None), - /// Token::Whitespace(Whitespace::Space), /// Token::SingleQuotedString("foo".to_string()), /// ]); pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self { @@ -896,13 +949,23 @@ impl<'a> Tokenizer<'a> { line: 1, col: 1, }; - + let mut last_char_was_word: bool = false; let mut location = state.location(); - while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? { + while let Some(token) = + self.next_token(&mut state, buf.last().map(|t| &t.token), last_char_was_word)? + { + last_char_was_word = matches!(token, Token::Word(_)); let span = location.span_to(state.location()); - - buf.push(TokenWithSpan { token, span }); - + if matches!(token, Token::Whitespace(_)) { + if matches!(buf.last().map(|t| &t.token), Some(Token::Colon)) { + return self.tokenizer_error( + state.location(), + "Unexpected whitespace after ':'".to_string(), + ); + } + } else { + buf.push(TokenWithSpan { token, span }); + } location = state.location(); } Ok(()) @@ -939,6 +1002,7 @@ impl<'a> Tokenizer<'a> { &self, chars: &mut State, prev_token: Option<&Token>, + last_char_was_word: bool, ) -> Result, TokenizerError> { match chars.peek() { Some(&ch) => match ch { @@ -1196,7 +1260,7 @@ impl<'a> Tokenizer<'a> { // if the prev token is not a word, then this is not a valid sql // word or number. if ch == '.' && chars.peekable.clone().nth(1) == Some('_') { - if let Some(Token::Word(_)) = prev_token { + if last_char_was_word { chars.next(); return Ok(Some(Token::Period)); } @@ -1240,7 +1304,7 @@ impl<'a> Tokenizer<'a> { // we should yield the dot as a dedicated token so compound identifiers // starting with digits can be parsed correctly. if s == "." && self.dialect.supports_numeric_prefix() { - if let Some(Token::Word(_)) = prev_token { + if last_char_was_word { return Ok(Some(Token::Period)); } } @@ -1332,11 +1396,13 @@ impl<'a> Tokenizer<'a> { if is_comment { chars.next(); // consume second '-' let comment = self.tokenize_single_line_comment(chars); - return Ok(Some(Token::Whitespace( - Whitespace::SingleLineComment { + return Ok(Some(dispatch_comment_kind( + prev_token, + Comment::SingleLineComment { prefix: "--".to_owned(), comment, }, + chars.peek(), ))); } @@ -1358,15 +1424,21 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('*') => { chars.next(); // consume the '*', starting a multi-line comment - self.tokenize_multiline_comment(chars) + Ok(self.tokenize_multiline_comment(chars)?.map(|comment| { + dispatch_comment_kind(prev_token, comment, chars.peek()) + })) } Some('/') if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the second '/', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "//".to_owned(), - comment, - }))) + Ok(Some(dispatch_comment_kind( + prev_token, + Comment::SingleLineComment { + prefix: "//".to_owned(), + comment, + }, + chars.peek(), + ))) } Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => { self.consume_and_return(chars, Token::DuckIntDiv) @@ -1568,10 +1640,15 @@ impl<'a> Tokenizer<'a> { { chars.next(); // consume the '#', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "#".to_owned(), - comment, - }))) + + Ok(Some(dispatch_comment_kind( + prev_token, + Comment::SingleLineComment { + prefix: "#".to_owned(), + comment, + }, + chars.peek(), + ))) } '~' => { chars.next(); // consume @@ -2104,7 +2181,7 @@ impl<'a> Tokenizer<'a> { fn tokenize_multiline_comment( &self, chars: &mut State, - ) -> Result, TokenizerError> { + ) -> Result, TokenizerError> { let mut s = String::new(); let mut nested = 1; let supports_nested_comments = self.dialect.supports_nested_comments(); @@ -2121,7 +2198,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume the '/' nested -= 1; if nested == 0 { - break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); + break Ok(Some(Comment::MultiLineComment(s).into())); } s.push('*'); s.push('/'); @@ -2444,7 +2521,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; @@ -2459,7 +2535,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from(".1"), false), ]; @@ -2475,7 +2550,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Word(Word { value: "foo".to_string(), quote_style: None, @@ -2496,7 +2570,6 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("10".to_string(), false), Token::make_word("_000", None), ]; @@ -2506,17 +2579,13 @@ mod tests { "SELECT 10_000, _10_000, 10_00_, 10___0", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("10_000".to_string(), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("_10_000", None), // leading underscore tokenizes as a word (parsed as column identifier) Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number("10_00".to_string(), false), Token::make_word("_", None), // trailing underscores tokenizes as a word (syntax error in some dialects) Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number("10".to_string(), false), Token::make_word("___0", None), // multiple underscores tokenizes as a word (syntax error in some dialects) ], @@ -2531,24 +2600,18 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e10"), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e-10"), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e+10"), false), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), Token::make_word("ea", None), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e-10"), false), Token::make_word("a", None), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1e-10"), false), Token::Minus, Token::Number(String::from("10"), false), @@ -2565,7 +2628,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("sqrt", None), Token::LParen, Token::Number(String::from("1"), false), @@ -2583,11 +2645,8 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString(String::from("a")), - Token::Whitespace(Whitespace::Space), Token::StringConcat, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString(String::from("b")), ]; @@ -2601,15 +2660,10 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("one", None), - Token::Whitespace(Whitespace::Space), Token::Pipe, - Token::Whitespace(Whitespace::Space), Token::make_word("two", None), - Token::Whitespace(Whitespace::Space), Token::Caret, - Token::Whitespace(Whitespace::Space), Token::make_word("three", None), ]; compare(expected, tokens); @@ -2624,32 +2678,20 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_keyword("false"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("true"), ]; compare(expected, tokens); @@ -2663,23 +2705,14 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), Token::Eq, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), - Token::Whitespace(Whitespace::Space), Token::make_keyword("LIMIT"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("5"), false), ]; @@ -2694,21 +2727,13 @@ mod tests { let expected = vec![ Token::make_keyword("EXPLAIN"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), Token::Eq, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; @@ -2723,23 +2748,14 @@ mod tests { let expected = vec![ Token::make_keyword("EXPLAIN"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("ANALYZE"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), Token::Eq, - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; @@ -2754,19 +2770,12 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), Token::make_word("salary", None), - Token::Whitespace(Whitespace::Space), Token::Neq, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString(String::from("Not Provided")), ]; @@ -2780,11 +2789,7 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); - let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Char('💝'), - Token::make_word("مصطفىh", None), - ]; + let expected = vec![Token::Char('💝'), Token::make_word("مصطفىh", None)]; compare(expected, tokens); } @@ -2839,16 +2844,10 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Whitespace::Newline), Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("table"), - Token::Whitespace(Whitespace::Tab), Token::Char('💝'), Token::make_word("مصطفىh", None), ]; @@ -2862,7 +2861,6 @@ mod tests { String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"), vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(), tag: Some("tag".into()), @@ -2873,7 +2871,6 @@ mod tests { String::from("SELECT $abc$x$ab$abc$"), vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "x$ab".into(), tag: Some("abc".into()), @@ -2884,7 +2881,6 @@ mod tests { String::from("SELECT $abc$$abc$"), vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "".into(), tag: Some("abc".into()), @@ -2961,16 +2957,12 @@ mod tests { tokens, vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Placeholder("$$".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Placeholder("$$ABC$$".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Placeholder("$ABC$".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::Placeholder("$ABC".into()), ] ); @@ -2983,7 +2975,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "dollar $nested$ string".into(), tag: Some("tag".into()), @@ -2999,7 +2990,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "".into(), tag: None, @@ -3016,7 +3006,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::DollarQuotedString(DollarQuotedString { value: "within dollar '$' quoted strings have $tags like this$ ".into(), tag: None, @@ -3067,9 +3056,7 @@ mod tests { let expected = vec![ Token::make_word("a", None), - Token::Whitespace(Whitespace::Space), Token::make_keyword("IS"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("NULL"), ]; @@ -3083,31 +3070,17 @@ mod tests { String::from("0--this is a comment\n1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\n".to_string(), - }), Token::Number("1".to_string(), false), ], ), ( String::from("0--this is a comment\r1"), - vec![ - Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\r1".to_string(), - }), - ], + vec![Token::Number("0".to_string(), false)], ), ( String::from("0--this is a comment\r\n1"), vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\r\n".to_string(), - }), Token::Number("1".to_string(), false), ], ), @@ -3129,26 +3102,27 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "\r".to_string(), - }), Token::Number("0".to_string(), false), ]; compare(expected, tokens); } #[test] - fn tokenize_comment_at_eof() { + fn tokenize_leading_inline_comment_at_eof() { let sql = String::from("--this is a comment"); let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment".to_string(), - })]; - compare(expected, tokens); + assert!(tokens.is_empty()); + } + + #[test] + fn tokenize_leading_multiline_comment_at_eof() { + let sql = String::from("/* this is a comment */"); + + let dialect = GenericDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + assert!(tokens.is_empty()); } #[test] @@ -3159,9 +3133,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* /comment".to_string(), - )), Token::Number("1".to_string(), false), ]; compare(expected, tokens); @@ -3173,10 +3144,6 @@ mod tests { "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1", vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment*/*/ ".into(), - )), - Token::Whitespace(Whitespace::Space), Token::Div, Token::Word(Word { value: "comment".to_string(), @@ -3193,9 +3160,6 @@ mod tests { "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1", vec![ Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(), - )), Token::Number("1".to_string(), false), ], ); @@ -3204,9 +3168,7 @@ mod tests { "SELECT 1/* a /* b */ c */0", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())), Token::Number("0".to_string(), false), ], ); @@ -3218,9 +3180,7 @@ mod tests { "select 1/*/**/*/0", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())), Token::Number("0".to_string(), false), ], ); @@ -3232,11 +3192,7 @@ mod tests { "SELECT 1/*/* nested comment */*/0", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Number("1".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "/* nested comment ".to_string(), - )), Token::Mul, Token::Div, Token::Number("0".to_string(), false), @@ -3250,11 +3206,9 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), - Token::Whitespace(Whitespace::Newline), - ]; + let expected = vec![Token::LeadingComment( + Comment::MultiLineComment("* Comment *".to_string()).into(), + )]; compare(expected, tokens); } @@ -3264,12 +3218,7 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); - let expected = vec![ - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::Newline), - ]; - compare(expected, tokens); + assert!(tokens.is_empty()); } #[test] @@ -3295,13 +3244,9 @@ mod tests { let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), - Token::Whitespace(Whitespace::Newline), Token::make_word("line2", None), - Token::Whitespace(Whitespace::Newline), Token::make_word("line3", None), - Token::Whitespace(Whitespace::Newline), Token::make_word("line4", None), - Token::Whitespace(Whitespace::Newline), ]; compare(expected, tokens); } @@ -3313,15 +3258,10 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("TOP"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("5"), false), - Token::Whitespace(Whitespace::Space), Token::make_word("bar", Some('[')), - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("foo", None), ]; compare(expected, tokens); @@ -3334,32 +3274,20 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::Tilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::TildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkTilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkTildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("^a".into()), ]; compare(expected, tokens); @@ -3372,32 +3300,20 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::DoubleTilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::DoubleTildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkDoubleTilde, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), Token::Comma, - Token::Whitespace(Whitespace::Space), Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), Token::ExclamationMarkDoubleTildeAsterisk, - Token::Whitespace(Whitespace::Space), Token::SingleQuotedString("_a%".into()), ]; compare(expected, tokens); @@ -3409,13 +3325,9 @@ mod tests { let dialect = GenericDialect {}; let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a " b"#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a ""#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"c """#, Some('"')), - Token::Whitespace(Whitespace::Space), ]; compare(expected, tokens); } @@ -3442,13 +3354,9 @@ mod tests { .tokenize() .unwrap(); let expected = vec![ - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a "" b"#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"a """#, Some('"')), - Token::Whitespace(Whitespace::Space), Token::make_word(r#"c """""#, Some('"')), - Token::Whitespace(Whitespace::Space), ]; compare(expected, tokens); } @@ -3462,23 +3370,8 @@ mod tests { .unwrap(); let expected = vec![ TokenWithSpan::at(Token::make_keyword("SELECT"), (1, 1).into(), (1, 7).into()), - TokenWithSpan::at( - Token::Whitespace(Whitespace::Space), - (1, 7).into(), - (1, 8).into(), - ), TokenWithSpan::at(Token::make_word("a", None), (1, 8).into(), (1, 9).into()), TokenWithSpan::at(Token::Comma, (1, 9).into(), (1, 10).into()), - TokenWithSpan::at( - Token::Whitespace(Whitespace::Newline), - (1, 10).into(), - (2, 1).into(), - ), - TokenWithSpan::at( - Token::Whitespace(Whitespace::Space), - (2, 1).into(), - (2, 2).into(), - ), TokenWithSpan::at(Token::make_word("b", None), (2, 2).into(), (2, 3).into()), ]; compare(expected, tokens); @@ -3600,11 +3493,8 @@ mod tests { let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Mul, - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -3802,9 +3692,7 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("CREATE"), - Token::Whitespace(Whitespace::Space), Token::make_keyword("USER"), - Token::Whitespace(Whitespace::Space), Token::make_word("root", Some('`')), Token::AtSign, Token::make_word("%", Some('`')), @@ -3820,7 +3708,6 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::AtSign, Token::SingleQuotedString("1".to_string()), ]; @@ -3835,12 +3722,9 @@ mod tests { let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::AtSign, Token::DoubleQuotedString("bar".to_string()), - Token::Whitespace(Whitespace::Space), Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), Token::make_word("foo", None), ]; compare(expected, tokens); @@ -3853,7 +3737,6 @@ mod tests { "select n'''''\\'", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::NationalStringLiteral("''\\".to_string()), ], ); @@ -3866,7 +3749,6 @@ mod tests { "select n'''''\\''", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::NationalStringLiteral("'''".to_string()), ], ); @@ -3878,7 +3760,6 @@ mod tests { "select e'...'", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::make_word("e", None), Token::SingleQuotedString("...".to_string()), ], @@ -3888,7 +3769,6 @@ mod tests { "select E'...'", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::make_word("E", None), Token::SingleQuotedString("...".to_string()), ], @@ -3901,7 +3781,6 @@ mod tests { "select e'\\''", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::EscapedStringLiteral("'".to_string()), ], ); @@ -3910,7 +3789,6 @@ mod tests { "select E'\\''", vec![ Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), Token::EscapedStringLiteral("'".to_string()), ], ); @@ -3923,7 +3801,6 @@ mod tests { "SELECT --'abc'", vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Minus, Token::Minus, Token::SingleQuotedString("abc".to_string()), @@ -3931,70 +3808,25 @@ mod tests { ); all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT -- 'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - }), - ], - ); + .tokenizes_to("SELECT -- 'abc'", vec![Token::make_keyword("SELECT")]); all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace()) .tokenizes_to( "SELECT --", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Minus, - Token::Minus, - ], + vec![Token::make_keyword("SELECT"), Token::Minus, Token::Minus], ); } #[test] fn test_whitespace_not_required_after_single_line_comment() { all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT --'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "'abc'".to_string(), - }), - ], - ); + .tokenizes_to("SELECT --'abc'", vec![Token::make_keyword("SELECT")]); all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT -- 'abc'", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: " 'abc'".to_string(), - }), - ], - ); + .tokenizes_to("SELECT -- 'abc'", vec![Token::make_keyword("SELECT")]); all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace()) - .tokenizes_to( - "SELECT --", - vec![ - Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "".to_string(), - }), - ], - ); + .tokenizes_to("SELECT --", vec![Token::make_keyword("SELECT")]); } #[test] @@ -4033,7 +3865,6 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), Token::Word(Word { value: "table".to_string(), quote_style: None, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 03a0ac813..6c9b3f886 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -466,7 +466,8 @@ fn parse_create_table_with_unquoted_hyphen() { vec![ColumnDef { name: Ident::new("x"), data_type: DataType::Int64, - options: vec![] + options: vec![], + leading_comment: None, },], columns ); @@ -519,7 +520,8 @@ fn parse_create_table_with_options() { ), },]) }, - ] + ], + leading_comment: None, }, ColumnDef { name: Ident::new("y"), @@ -534,7 +536,8 @@ fn parse_create_table_with_options() { ) ), },]) - }] + }], + leading_comment: None, }, ], columns @@ -620,6 +623,7 @@ fn parse_nested_data_types() { StructBracketKind::AngleBrackets ), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("y"), @@ -634,6 +638,7 @@ fn parse_nested_data_types() { ), ))), options: vec![], + leading_comment: None, }, ] ); @@ -1567,6 +1572,7 @@ fn parse_table_identifiers() { fn test_table_ident_err(ident: &str) { let sql = format!("SELECT 1 FROM {ident}"); + dbg!(bigquery().parse_sql_statements(&sql)); assert!(bigquery().parse_sql_statements(&sql).is_err()); } diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 44bfcda42..c6fdb105b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -534,6 +534,7 @@ fn column_def(name: Ident, data_type: DataType) -> ColumnDef { name, data_type, options: vec![], + leading_comment: None, } } @@ -625,6 +626,7 @@ fn parse_create_table_with_nullable() { name: None, option: ColumnOption::Null }], + leading_comment: None, } ] ); @@ -665,6 +667,7 @@ fn parse_create_table_with_nested_data_types() { ) ]), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("k"), @@ -683,6 +686,7 @@ fn parse_create_table_with_nested_data_types() { ]) ))), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("l"), @@ -701,6 +705,7 @@ fn parse_create_table_with_nested_data_types() { }, ]), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("m"), @@ -709,6 +714,7 @@ fn parse_create_table_with_nested_data_types() { Box::new(DataType::UInt16) ), options: vec![], + leading_comment: None, }, ] ); @@ -740,11 +746,13 @@ fn parse_create_table_with_primary_key() { name: Ident::with_quote('`', "i"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::with_quote('`', "k"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ], columns @@ -839,6 +847,7 @@ fn parse_create_table_with_variant_default_expressions() { within_group: vec![], })) }], + leading_comment: None, }, ColumnDef { name: Ident::new("b"), @@ -860,6 +869,7 @@ fn parse_create_table_with_variant_default_expressions() { within_group: vec![], }))) }], + leading_comment: None, }, ColumnDef { name: Ident::new("c"), @@ -868,6 +878,7 @@ fn parse_create_table_with_variant_default_expressions() { name: None, option: ColumnOption::Ephemeral(None) }], + leading_comment: None, }, ColumnDef { name: Ident::new("d"), @@ -891,6 +902,7 @@ fn parse_create_table_with_variant_default_expressions() { within_group: vec![], })) }], + leading_comment: None, } ] ) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f807ecfe..d186876c1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -39,7 +39,7 @@ use sqlparser::dialect::{ }; use sqlparser::keywords::{Keyword, ALL_KEYWORDS}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; -use sqlparser::tokenizer::Tokenizer; +use sqlparser::tokenizer::{Comment, Tokenizer}; use sqlparser::tokenizer::{Location, Span}; use test_utils::{ all_dialects, all_dialects_where, all_dialects_with_options, alter_table_op, assert_eq_vec, @@ -52,7 +52,6 @@ mod test_utils; #[cfg(test)] use pretty_assertions::assert_eq; -use sqlparser::ast::ColumnOption::Comment; use sqlparser::ast::DateTimeField::Seconds; use sqlparser::ast::Expr::{Identifier, UnaryOp}; use sqlparser::ast::Value::Number; @@ -3739,6 +3738,7 @@ fn parse_create_table() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -3747,11 +3747,13 @@ fn parse_create_table() { name: None, option: ColumnOption::Null, }], + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: "constrained".into(), @@ -3788,6 +3790,7 @@ fn parse_create_table() { }), }, ], + leading_comment: None, }, ColumnDef { name: "ref".into(), @@ -3806,6 +3809,7 @@ fn parse_create_table() { characteristics: None, }), }], + leading_comment: None, }, ColumnDef { name: "ref2".into(), @@ -3824,6 +3828,7 @@ fn parse_create_table() { characteristics: None, }), },], + leading_comment: None, }, ] ); @@ -3946,6 +3951,7 @@ fn parse_create_table_with_constraint_characteristics() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -3954,11 +3960,13 @@ fn parse_create_table_with_constraint_characteristics() { name: None, option: ColumnOption::Null, }], + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ] ); @@ -4110,7 +4118,8 @@ fn parse_create_table_column_constraint_characteristics() { is_primary: false, characteristics: expected_value } - }] + }], + leading_comment: None, }], "{message}" ) @@ -4219,11 +4228,13 @@ fn parse_create_table_hive_array() { name: Ident::new("name"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("val"), data_type: DataType::Array(expected), options: vec![], + leading_comment: None, }, ], ) @@ -4270,6 +4281,88 @@ fn parse_create_table_with_multiple_on_delete_fails() { .expect_err("should have failed"); } +#[test] +fn parse_create_table_with_leading_comment() { + let single_line_sql = r#"-- a single line leading comment +CREATE TABLE user (-- a column single line comment +id INT PRIMARY KEY)"#; + let single_line_ast = verified_stmt(single_line_sql); + match single_line_ast { + Statement::CreateTable(CreateTable { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + columns, + .. + }) => { + assert_eq!(comment, " a single line leading comment\n"); + assert_eq!(prefix, "--"); + let [ColumnDef { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + .. + }] = columns.as_slice() + else { + unreachable!("unexpected column array: {columns:?}") + }; + assert_eq!(comment, " a column single line comment\n"); + assert_eq!(prefix, "--"); + } + _ => unreachable!(), + }; + let multi_line_sql = r#"/* a multi line +leading comment */CREATE TABLE user (/* a column multiline +comment */id INT PRIMARY KEY)"#; + let multi_line_ast = verified_stmt(multi_line_sql); + match multi_line_ast { + Statement::CreateTable(CreateTable { + leading_comment: Some(Comment::MultiLineComment(comment)), + columns, + .. + }) => { + assert_eq!(comment, " a multi line\nleading comment "); + let [ColumnDef { + leading_comment: Some(Comment::MultiLineComment(comment)), + .. + }] = columns.as_slice() + else { + unreachable!("unexpected column array: {columns:?}") + }; + assert_eq!(comment, " a column multiline\ncomment "); + } + _ => unreachable!(), + }; +} + +#[test] +fn parse_alter_table_with_leading_comment() { + let single_line_sql = r#"-- a single line leading comment +ALTER TABLE user ADD COLUMN id INT PRIMARY KEY"#; + let single_line_ast = verified_stmt(single_line_sql); + match single_line_ast { + Statement::AlterTable(AlterTable { + leading_comment: Some(Comment::SingleLineComment { comment, prefix }), + .. + }) => { + assert_eq!(comment, " a single line leading comment\n"); + assert_eq!(prefix, "--"); + } + _ => unreachable!(), + }; + let multi_line_sql = r#"/* a multi line +leading comment */ALTER TABLE user ADD COLUMN id INT PRIMARY KEY"#; + let multi_line_ast = verified_stmt(multi_line_sql); + match multi_line_ast { + Statement::AlterTable( + AlterTable { + leading_comment: Some(Comment::MultiLineComment(comment)), + .. + }, + .., + ) => { + assert_eq!(comment, " a multi line\nleading comment "); + } + _ => unreachable!(), + }; +} + #[test] fn parse_assert() { let sql = "ASSERT (SELECT COUNT(*) FROM my_table) > 0"; @@ -4595,6 +4688,7 @@ fn parse_create_external_table() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "lat".into(), @@ -4603,11 +4697,13 @@ fn parse_create_external_table() { name: None, option: ColumnOption::Null, }], + leading_comment: None, }, ColumnDef { name: "lng".into(), data_type: DataType::Double(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ] ); @@ -4663,6 +4759,7 @@ fn parse_create_or_replace_external_table() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, },] ); assert!(constraints.is_empty()); @@ -12177,8 +12274,9 @@ fn test_parse_inline_comment() { data_type: DataType::Int(None), options: vec![ColumnOptionDef { name: None, - option: Comment("comment without equal".to_string()), - }] + option: ColumnOption::Comment("comment without equal".to_string()), + }], + leading_comment: None, }] ); assert_eq!( @@ -14868,6 +14966,7 @@ fn parse_create_table_with_enum_types() { Some(8) ), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar"), @@ -14889,6 +14988,7 @@ fn parse_create_table_with_enum_types() { Some(16) ), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("baz"), @@ -14900,6 +15000,7 @@ fn parse_create_table_with_enum_types() { None ), options: vec![], + leading_comment: None, } ], columns @@ -17282,7 +17383,8 @@ fn parse_invisible_column() { ColumnDef { name: "foo".into(), data_type: DataType::Int(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "bar".into(), @@ -17290,7 +17392,8 @@ fn parse_invisible_column() { options: vec![ColumnOptionDef { name: None, option: ColumnOption::Invisible - }] + }], + leading_comment: None, } ] ); @@ -17313,7 +17416,8 @@ fn parse_invisible_column() { options: vec![ColumnOptionDef { name: None, option: ColumnOption::Invisible - }] + }], + leading_comment: None, }, column_position: None }] diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index e01611b6f..b46db66c4 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -359,6 +359,7 @@ fn data_type_timestamp_ntz() { name: "x".into(), data_type: DataType::TimestampNtz, options: vec![], + leading_comment: None, }] ); } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 0f8051955..fc7f45607 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -64,6 +64,7 @@ fn test_struct() { name: "s".into(), data_type: struct_type1.clone(), options: vec![], + leading_comment: None, }] ); @@ -78,6 +79,7 @@ fn test_struct() { None )), options: vec![], + leading_comment: None, }] ); @@ -126,6 +128,7 @@ fn test_struct() { None )), options: vec![], + leading_comment: None, }] ); @@ -709,7 +712,8 @@ fn test_duckdb_union_datatype() { field_name: "a".into(), field_type: DataType::Int(None) }]), - options: Default::default() + options: Default::default(), + leading_comment: None, }, ColumnDef { name: "two".into(), @@ -723,7 +727,8 @@ fn test_duckdb_union_datatype() { field_type: DataType::Int(None) } ]), - options: Default::default() + options: Default::default(), + leading_comment: None, }, ColumnDef { name: "nested".into(), @@ -734,7 +739,8 @@ fn test_duckdb_union_datatype() { field_type: DataType::Int(None) }]) }]), - options: Default::default() + options: Default::default(), + leading_comment: None, } ], constraints: Default::default(), @@ -782,6 +788,7 @@ fn test_duckdb_union_datatype() { refresh_mode: None, initialize: None, require_user: Default::default(), + leading_comment: None, }), stmt ); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index e11c79f01..690ee3007 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1868,6 +1868,7 @@ fn parse_create_table_with_valid_options() { }, data_type: Int(None,), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident { @@ -1877,6 +1878,7 @@ fn parse_create_table_with_valid_options() { }, data_type: Int(None,), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident { @@ -1886,6 +1888,7 @@ fn parse_create_table_with_valid_options() { }, data_type: Int(None,), options: vec![], + leading_comment: None, }, ], constraints: vec![], @@ -1934,6 +1937,7 @@ fn parse_create_table_with_valid_options() { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, }) ); } @@ -2059,6 +2063,7 @@ fn parse_create_table_with_identity_column() { data_type: Int(None,), options: column_options, + leading_comment: None, },], constraints: vec![], hive_distribution: HiveDistributionStyle::NONE, @@ -2105,6 +2110,7 @@ fn parse_create_table_with_identity_column() { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, }), ); } @@ -2301,7 +2307,8 @@ fn parse_mssql_varbinary_max_length() { name: Ident::new("var_binary_col"), data_type: Varbinary(Some(BinaryLength::Max)), - options: vec![] + options: vec![], + leading_comment: None, },], ); } @@ -2326,7 +2333,8 @@ fn parse_mssql_varbinary_max_length() { name: Ident::new("var_binary_col"), data_type: Varbinary(Some(BinaryLength::IntegerLength { length: 50 })), - options: vec![] + options: vec![], + leading_comment: None, },], ); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e0ddecf32..543a494fd 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -650,6 +650,7 @@ fn parse_create_table_auto_increment() { )]), }, ], + leading_comment: None, }], columns ); @@ -755,6 +756,7 @@ fn parse_create_table_primary_and_unique_key() { ]), }, ], + leading_comment: None, }, ColumnDef { name: Ident::new("bar"), @@ -763,6 +765,7 @@ fn parse_create_table_primary_and_unique_key() { name: None, option: ColumnOption::NotNull, },], + leading_comment: None, }, ], columns @@ -1181,6 +1184,7 @@ fn parse_create_table_set_enum() { name: Ident::new("bar"), data_type: DataType::Set(vec!["a".to_string(), "b".to_string()]), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("baz"), @@ -1192,6 +1196,7 @@ fn parse_create_table_set_enum() { None ), options: vec![], + leading_comment: None, } ], columns @@ -1217,6 +1222,7 @@ fn parse_create_table_engine_default_charset() { name: Ident::new("id"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, },], columns ); @@ -1259,6 +1265,7 @@ fn parse_create_table_collate() { name: Ident::new("id"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, },], columns ); @@ -1339,6 +1346,7 @@ fn parse_create_table_comment_character_set() { option: ColumnOption::Comment("comment".to_string()) } ], + leading_comment: None, },], columns ); @@ -1387,6 +1395,7 @@ fn parse_quote_identifiers() { characteristics: None }, }], + leading_comment: None, }], columns ); @@ -1644,26 +1653,31 @@ fn parse_create_table_with_minimum_display_width() { name: Ident::new("bar_tinyint"), data_type: DataType::TinyInt(Some(3)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_smallint"), data_type: DataType::SmallInt(Some(5)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_mediumint"), data_type: DataType::MediumInt(Some(6)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_int"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_bigint"), data_type: DataType::BigInt(Some(20)), options: vec![], + leading_comment: None, } ], columns @@ -1685,26 +1699,31 @@ fn parse_create_table_unsigned() { name: Ident::new("bar_tinyint"), data_type: DataType::TinyIntUnsigned(Some(3)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_smallint"), data_type: DataType::SmallIntUnsigned(Some(5)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_mediumint"), data_type: DataType::MediumIntUnsigned(Some(13)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_int"), data_type: DataType::IntUnsigned(Some(11)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_bigint"), data_type: DataType::BigIntUnsigned(Some(20)), options: vec![], + leading_comment: None, }, ], columns @@ -1727,26 +1746,31 @@ fn parse_signed_data_types() { name: Ident::new("bar_tinyint"), data_type: DataType::TinyInt(Some(3)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_smallint"), data_type: DataType::SmallInt(Some(5)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_mediumint"), data_type: DataType::MediumInt(Some(13)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_int"), data_type: DataType::Int(Some(11)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_bigint"), data_type: DataType::BigInt(Some(20)), options: vec![], + leading_comment: None, }, ], columns @@ -1771,11 +1795,13 @@ fn parse_deprecated_mysql_unsigned_data_types() { name: Ident::new("bar_decimal"), data_type: DataType::DecimalUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_decimal_prec"), data_type: DataType::DecimalUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_decimal_scale"), @@ -1783,31 +1809,37 @@ fn parse_deprecated_mysql_unsigned_data_types() { 10, 2 )), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_dec"), data_type: DataType::DecUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_dec_prec"), data_type: DataType::DecUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_dec_scale"), data_type: DataType::DecUnsigned(ExactNumberInfo::PrecisionAndScale(10, 2)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_float"), data_type: DataType::FloatUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_float_prec"), data_type: DataType::FloatUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_float_scale"), @@ -1815,16 +1847,19 @@ fn parse_deprecated_mysql_unsigned_data_types() { 10, 2 )), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double"), data_type: DataType::DoubleUnsigned(ExactNumberInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double_prec"), data_type: DataType::DoubleUnsigned(ExactNumberInfo::Precision(10)), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double_scale"), @@ -1832,16 +1867,19 @@ fn parse_deprecated_mysql_unsigned_data_types() { 10, 2 )), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_real"), data_type: DataType::RealUnsigned, options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::new("bar_double_precision"), data_type: DataType::DoublePrecisionUnsigned, options: vec![], + leading_comment: None, }, ], columns @@ -2738,6 +2776,7 @@ fn parse_alter_table_add_column() { location: _, on_cluster: _, end_token: _, + leading_comment: _, }) => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2752,6 +2791,7 @@ fn parse_alter_table_add_column() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::First), },] @@ -2780,6 +2820,7 @@ fn parse_alter_table_add_column() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::After(Ident { value: String::from("foo"), @@ -2818,6 +2859,7 @@ fn parse_alter_table_add_columns() { name: "a".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::First), }, @@ -2828,6 +2870,7 @@ fn parse_alter_table_add_columns() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: Some(MySQLColumnPosition::After(Ident { value: String::from("foo"), @@ -3320,6 +3363,7 @@ fn parse_table_column_option_on_update() { name: None, option: ColumnOption::OnUpdate(call("CURRENT_TIMESTAMP", [])), },], + leading_comment: None, }], columns ); @@ -3622,6 +3666,7 @@ fn parse_create_table_with_column_collate() { )])) } ], + leading_comment: None, },], columns ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9d08540ad..b3c5a084d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -370,6 +370,7 @@ fn parse_create_table_with_defaults() { pg().verified_expr("nextval(public.customer_customer_id_seq)") ) }], + leading_comment: None, }, ColumnDef { name: "store_id".into(), @@ -378,6 +379,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "first_name".into(), @@ -391,6 +393,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull, }], + leading_comment: None, }, ColumnDef { name: "last_name".into(), @@ -412,6 +415,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "email".into(), @@ -422,6 +426,7 @@ fn parse_create_table_with_defaults() { } )), options: vec![], + leading_comment: None, }, ColumnDef { name: "address_id".into(), @@ -430,6 +435,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull }], + leading_comment: None, }, ColumnDef { name: "activebool".into(), @@ -446,6 +452,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "create_date".into(), @@ -460,6 +467,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "last_update".into(), @@ -474,6 +482,7 @@ fn parse_create_table_with_defaults() { option: ColumnOption::NotNull, } ], + leading_comment: None, }, ColumnDef { name: "active".into(), @@ -482,6 +491,7 @@ fn parse_create_table_with_defaults() { name: None, option: ColumnOption::NotNull }], + leading_comment: None, }, ] ); @@ -849,6 +859,7 @@ fn parse_alter_table_add_columns() { name: "a".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, column_position: None, }, @@ -859,6 +870,7 @@ fn parse_alter_table_add_columns() { name: "b".into(), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, column_position: None, }, @@ -5019,32 +5031,38 @@ fn parse_create_table_with_alias() { ColumnDef { name: "int8_col".into(), data_type: DataType::Int8(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "int4_col".into(), data_type: DataType::Int4(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "int2_col".into(), data_type: DataType::Int2(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "float8_col".into(), data_type: DataType::Float8, - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "float4_col".into(), data_type: DataType::Float4, - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "bool_col".into(), data_type: DataType::Bool, - options: vec![] + options: vec![], + leading_comment: None, }, ] ); @@ -5065,12 +5083,14 @@ fn parse_create_table_with_partition_by() { ColumnDef { name: "a".into(), data_type: DataType::Int(None), - options: vec![] + options: vec![], + leading_comment: None, }, ColumnDef { name: "b".into(), data_type: DataType::Text, - options: vec![] + options: vec![], + leading_comment: None, } ], create_table.columns @@ -5987,21 +6007,25 @@ fn parse_trigger_related_functions() { name: "empname".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, ColumnDef { name: "salary".into(), data_type: DataType::Integer(None), options: vec![], + leading_comment: None, }, ColumnDef { name: "last_date".into(), data_type: DataType::Timestamp(None, TimezoneInfo::None), options: vec![], + leading_comment: None, }, ColumnDef { name: "last_user".into(), data_type: DataType::Text, options: vec![], + leading_comment: None, }, ], constraints: vec![], @@ -6049,6 +6073,7 @@ fn parse_trigger_related_functions() { refresh_mode: None, initialize: None, require_user: false, + leading_comment: None, } ); @@ -6350,11 +6375,13 @@ fn parse_varbit_datatype() { name: "x".into(), data_type: DataType::VarBit(None), options: vec![], + leading_comment: None, }, ColumnDef { name: "y".into(), data_type: DataType::VarBit(Some(42)), options: vec![], + leading_comment: None, } ] ); @@ -6400,6 +6427,7 @@ fn parse_ts_datatypes() { name: "x".into(), data_type: DataType::TsVector, options: vec![], + leading_comment: None, }] ); } @@ -6414,6 +6442,7 @@ fn parse_ts_datatypes() { name: "x".into(), data_type: DataType::TsQuery, options: vec![], + leading_comment: None, }] ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e7a128343..a64394760 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -372,6 +372,7 @@ fn test_snowflake_create_table_column_comment() { name: None, option: ColumnOption::Comment("some comment".to_string()) }], + leading_comment: None, }], columns ) @@ -565,10 +566,13 @@ fn test_snowflake_single_line_tokenize() { Token::make_keyword("CREATE"), Token::Whitespace(Whitespace::Space), Token::make_keyword("TABLE"), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "#".to_string(), - comment: " this is a comment \n".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "#".to_string(), + comment: " this is a comment \n".to_string(), + } + .into(), + ), Token::make_word("table_1", None), ]; @@ -582,10 +586,13 @@ fn test_snowflake_single_line_tokenize() { Token::Whitespace(Whitespace::Space), Token::make_keyword("TABLE"), Token::Whitespace(Whitespace::Space), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "//".to_string(), - comment: " this is a comment \n".to_string(), - }), + Token::Whitespace( + Comment::SingleLineComment { + prefix: "//".to_string(), + comment: " this is a comment \n".to_string(), + } + .into(), + ), Token::make_word("table_1", None), ]; @@ -619,7 +626,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: Some(IdentityPropertyOrder::Order), } )) - }] + }], + leading_comment: None, }, ColumnDef { name: "b".into(), @@ -637,7 +645,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: Some(IdentityPropertyOrder::NoOrder), } )) - }] + }], + leading_comment: None, }, ColumnDef { name: "c".into(), @@ -650,7 +659,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: None, } )) - }] + }], + leading_comment: None, }, ColumnDef { name: "d".into(), @@ -674,7 +684,8 @@ fn test_snowflake_create_table_with_autoincrement_columns() { order: Some(IdentityPropertyOrder::Order), } )) - }] + }], + leading_comment: None, }, ] ); @@ -697,7 +708,8 @@ fn test_snowflake_create_table_with_collated_column() { option: ColumnOption::Collation(ObjectName::from(vec![Ident::with_quote( '\'', "de_DE" )])), - }] + }], + leading_comment: None, },] ); } @@ -746,6 +758,7 @@ fn test_snowflake_create_table_with_columns_masking_policy() { } )) }], + leading_comment: None, },] ); } @@ -780,6 +793,7 @@ fn test_snowflake_create_table_with_columns_projection_policy() { } )) }], + leading_comment: None, },] ); } @@ -823,6 +837,7 @@ fn test_snowflake_create_table_with_columns_tags() { ] }), }], + leading_comment: None, },] ); } @@ -884,6 +899,7 @@ fn test_snowflake_create_table_with_several_column_options() { }), } ], + leading_comment: None, }, ColumnDef { name: "b".into(), @@ -922,6 +938,7 @@ fn test_snowflake_create_table_with_several_column_options() { }), } ], + leading_comment: None, }, ] ); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index f0d6d9b72..59ac509ab 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -229,6 +229,7 @@ fn parse_create_table_auto_increment() { )]), }, ], + leading_comment: None, }], columns ); @@ -255,6 +256,7 @@ fn parse_create_table_primary_key_asc_desc() { option: ColumnOption::DialectSpecific(vec![Token::make_keyword(kind)]), }, ], + leading_comment: None, }; let sql = "CREATE TABLE foo (bar INT PRIMARY KEY ASC)"; @@ -285,11 +287,13 @@ fn parse_create_sqlite_quote() { name: Ident::with_quote('"', "KEY"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ColumnDef { name: Ident::with_quote('[', "INDEX"), data_type: DataType::Int(None), options: vec![], + leading_comment: None, }, ], columns