From 56b4dd5cb299aabc4d61151b560cd80597c6fe73 Mon Sep 17 00:00:00 2001 From: Sean Gillespie Date: Mon, 29 Sep 2025 21:09:42 -0700 Subject: [PATCH 1/3] Parse compression codecs with no parameters Compression codecs such as `DoubleDelta` do not require any additional positional parameters and infer defaults, such as `CODEC(DoubleDelta)`. This commit doesn't continue parsing a codec if there's not a comma indicating additional parameters to the codec. --- parser/parser_column.go | 16 ++-- .../ddl/create_table_codec_no_args.sql | 3 + .../ddl/format/create_table_codec_no_args.sql | 7 ++ ...create_table_codec_no_args.sql.golden.json | 78 +++++++++++++++++++ 4 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 parser/testdata/ddl/create_table_codec_no_args.sql create mode 100644 parser/testdata/ddl/format/create_table_codec_no_args.sql create mode 100644 parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json diff --git a/parser/parser_column.go b/parser/parser_column.go index 5312319..e9d2978 100644 --- a/parser/parser_column.go +++ b/parser/parser_column.go @@ -1142,13 +1142,15 @@ func (p *Parser) tryParseCompressionCodecs(pos Pos) (*CompressionCodec, error) { if err != nil { return nil, err } - // consume comma - if err := p.expectTokenKind(TokenKindComma); err != nil { - return nil, err - } - name, err = p.parseIdent() - if err != nil { - return nil, err + + if p.matchTokenKind(TokenKindComma) { + if err := p.expectTokenKind(TokenKindComma); err != nil { + return nil, err + } + name, err = p.parseIdent() + if err != nil { + return nil, err + } } } diff --git a/parser/testdata/ddl/create_table_codec_no_args.sql b/parser/testdata/ddl/create_table_codec_no_args.sql new file mode 100644 index 0000000..6bee985 --- /dev/null +++ b/parser/testdata/ddl/create_table_codec_no_args.sql @@ -0,0 +1,3 @@ +CREATE TABLE shark_attacks ( + timestamp DateTime CODEC(DoubleDelta), +); \ No newline at end of file diff --git a/parser/testdata/ddl/format/create_table_codec_no_args.sql b/parser/testdata/ddl/format/create_table_codec_no_args.sql new file mode 100644 index 0000000..558d2d3 --- /dev/null +++ b/parser/testdata/ddl/format/create_table_codec_no_args.sql @@ -0,0 +1,7 @@ +-- Origin SQL: +CREATE TABLE shark_attacks ( + timestamp DateTime CODEC(DoubleDelta), +); + +-- Format SQL: +CREATE TABLE shark_attacks (timestamp DateTime CODEC(DoubleDelta, DoubleDelta)); diff --git a/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json b/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json new file mode 100644 index 0000000..87f2fb9 --- /dev/null +++ b/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json @@ -0,0 +1,78 @@ +[ + { + "CreatePos": 0, + "StatementEnd": 0, + "OrReplace": false, + "Name": { + "Database": null, + "Table": { + "Name": "shark_attacks", + "QuoteType": 1, + "NamePos": 13, + "NameEnd": 26 + } + }, + "IfNotExists": false, + "UUID": null, + "OnCluster": null, + "TableSchema": { + "SchemaPos": 27, + "SchemaEnd": 72, + "Columns": [ + { + "NamePos": 33, + "ColumnEnd": 70, + "Name": { + "Ident": { + "Name": "timestamp", + "QuoteType": 1, + "NamePos": 33, + "NameEnd": 42 + }, + "DotIdent": null + }, + "Type": { + "Name": { + "Name": "DateTime", + "QuoteType": 1, + "NamePos": 43, + "NameEnd": 51 + } + }, + "NotNull": null, + "Nullable": null, + "DefaultExpr": null, + "MaterializedExpr": null, + "AliasExpr": null, + "Codec": { + "CodecPos": 52, + "RightParenPos": 70, + "Type": { + "Name": "DoubleDelta", + "QuoteType": 1, + "NamePos": 58, + "NameEnd": 69 + }, + "TypeLevel": null, + "Name": { + "Name": "DoubleDelta", + "QuoteType": 1, + "NamePos": 58, + "NameEnd": 69 + }, + "Level": null + }, + "TTL": null, + "Comment": null, + "CompressionCodec": null + } + ], + "AliasTable": null, + "TableFunction": null + }, + "Engine": null, + "SubQuery": null, + "HasTemporary": false, + "Comment": null + } +] \ No newline at end of file From 5a288d7c1512b0b4199e48817bc596cf85854b14 Mon Sep 17 00:00:00 2001 From: Sean Gillespie Date: Wed, 12 Nov 2025 21:48:05 -0800 Subject: [PATCH 2/3] Fix rendering of codec name and type per CR feedback --- parser/ast.go | 24 +++++++---- parser/parser_column.go | 41 +++++++++++++++---- .../ddl/format/create_table_codec_no_args.sql | 2 +- ...create_table_codec_no_args.sql.golden.json | 7 +--- 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/parser/ast.go b/parser/ast.go index cd2eb11..8092b29 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -4512,11 +4512,13 @@ func (c *CompressionCodec) String() string { builder.WriteByte(',') builder.WriteByte(' ') } - builder.WriteString(c.Name.String()) - if c.Level != nil { - builder.WriteByte('(') - builder.WriteString(c.Level.String()) - builder.WriteByte(')') + if c.Name != nil { + builder.WriteString(c.Name.String()) + if c.Level != nil { + builder.WriteByte('(') + builder.WriteString(c.Level.String()) + builder.WriteByte(')') + } } builder.WriteByte(')') return builder.String() @@ -4525,16 +4527,20 @@ func (c *CompressionCodec) String() string { func (c *CompressionCodec) Accept(visitor ASTVisitor) error { visitor.Enter(c) defer visitor.Leave(c) - if err := c.Type.Accept(visitor); err != nil { - return err + if c.Type != nil { + if err := c.Type.Accept(visitor); err != nil { + return err + } } if c.TypeLevel != nil { if err := c.TypeLevel.Accept(visitor); err != nil { return err } } - if err := c.Name.Accept(visitor); err != nil { - return err + if c.Name != nil { + if err := c.Name.Accept(visitor); err != nil { + return err + } } if c.Level != nil { if err := c.Level.Accept(visitor); err != nil { diff --git a/parser/parser_column.go b/parser/parser_column.go index e9d2978..7f64144 100644 --- a/parser/parser_column.go +++ b/parser/parser_column.go @@ -1126,17 +1126,17 @@ func (p *Parser) tryParseCompressionCodecs(pos Pos) (*CompressionCodec, error) { } // parse codec name - name, err := p.parseIdent() + codecType, err := p.parseIdent() if err != nil { return nil, err } // parse DELTA if CODEC(Delta, ZSTD(1)) // or CODEC(Delta(9), ZSTD(1)) or CODEC(T64, ZSTD(1)) - var codecType *Ident + //var codecType *Ident + var name *Ident var typeLevel *NumberLiteral - switch strings.ToUpper(name.Name) { + switch strings.ToUpper(codecType.Name) { case "DELTA", "DOUBLEDELTA", "T64", "GORILLA": - codecType = name // try parse delta level typeLevel, err = p.tryParseCompressionLevel(p.Pos()) if err != nil { @@ -1152,15 +1152,23 @@ func (p *Parser) tryParseCompressionCodecs(pos Pos) (*CompressionCodec, error) { return nil, err } } + case "ZSTD", "LZ4HC", "LH4": + // For compression codecs, try to parse level + typeLevel, err = p.tryParseCompressionLevel(p.Pos()) + if err != nil { + return nil, err + } } var level *NumberLiteral // TODO: check if the codec name is valid - switch strings.ToUpper(name.Name) { - case "ZSTD", "LZ4HC", "LH4": - level, err = p.tryParseCompressionLevel(p.Pos()) - if err != nil { - return nil, err + if name != nil { + switch strings.ToUpper(name.Name) { + case "ZSTD", "LZ4HC", "LH4": + level, err = p.tryParseCompressionLevel(p.Pos()) + if err != nil { + return nil, err + } } } @@ -1169,6 +1177,21 @@ func (p *Parser) tryParseCompressionCodecs(pos Pos) (*CompressionCodec, error) { return nil, err } + // If there's only one codec (no second name), store it in Name field instead of Type + // This handles cases like CODEC(DoubleDelta) or CODEC(ZSTD(1)) + if name == nil { + return &CompressionCodec{ + CodecPos: pos, + RightParenPos: rightParenPos, + Type: nil, + TypeLevel: nil, + Name: codecType, + Level: typeLevel, + }, nil + } + + // Two codecs: Type is the preprocessing codec, Name is the compression codec + // e.g., CODEC(DoubleDelta, ZSTD(1)) return &CompressionCodec{ CodecPos: pos, RightParenPos: rightParenPos, diff --git a/parser/testdata/ddl/format/create_table_codec_no_args.sql b/parser/testdata/ddl/format/create_table_codec_no_args.sql index 558d2d3..4d031cf 100644 --- a/parser/testdata/ddl/format/create_table_codec_no_args.sql +++ b/parser/testdata/ddl/format/create_table_codec_no_args.sql @@ -4,4 +4,4 @@ CREATE TABLE shark_attacks ( ); -- Format SQL: -CREATE TABLE shark_attacks (timestamp DateTime CODEC(DoubleDelta, DoubleDelta)); +CREATE TABLE shark_attacks (timestamp DateTime CODEC(DoubleDelta)); diff --git a/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json b/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json index 87f2fb9..56f8c63 100644 --- a/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json +++ b/parser/testdata/ddl/output/create_table_codec_no_args.sql.golden.json @@ -47,12 +47,7 @@ "Codec": { "CodecPos": 52, "RightParenPos": 70, - "Type": { - "Name": "DoubleDelta", - "QuoteType": 1, - "NamePos": 58, - "NameEnd": 69 - }, + "Type": null, "TypeLevel": null, "Name": { "Name": "DoubleDelta", From 53a7287171a0fea770aad17caf1080a3b29b92d4 Mon Sep 17 00:00:00 2001 From: Sean Gillespie Date: Wed, 12 Nov 2025 21:48:51 -0800 Subject: [PATCH 3/3] remove commented out code --- parser/parser_column.go | 1 - 1 file changed, 1 deletion(-) diff --git a/parser/parser_column.go b/parser/parser_column.go index 7f64144..557bfbf 100644 --- a/parser/parser_column.go +++ b/parser/parser_column.go @@ -1132,7 +1132,6 @@ func (p *Parser) tryParseCompressionCodecs(pos Pos) (*CompressionCodec, error) { } // parse DELTA if CODEC(Delta, ZSTD(1)) // or CODEC(Delta(9), ZSTD(1)) or CODEC(T64, ZSTD(1)) - //var codecType *Ident var name *Ident var typeLevel *NumberLiteral switch strings.ToUpper(codecType.Name) {