Skip to content

Commit e792a89

Browse files
committed
Initial set of changes for Dremio SQL
1 parent e113bb9 commit e792a89

File tree

2 files changed

+677
-540
lines changed

2 files changed

+677
-540
lines changed

src/sqlParser.jison

Lines changed: 103 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
1-
/* description: Parses SQL */
1+
/* description: Parses Dremio SQL */
22
/* :tabSize=4:indentSize=4:noTabs=true: */
3+
/* Based on https://github.com/christyharagan/dremio-node-api */
4+
/* Based on https://github.com/JavaScriptor/js-sql-parser */
5+
/* Original License: ISC, albin zeng, 9 Feb 2018 */
36
%lex
47

58
%options case-insensitive
69

710
%%
811

912
[/][*](.|\n)*?[*][/] /* skip comments */
10-
[-][-]\s.*\n /* skip sql comments */
13+
[-][-].*\n? /* skip sql comments */
1114
[#]\s.*\n /* skip sql comments */
1215
\s+ /* skip whitespace */
1316

1417
[`][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*[`] return 'IDENTIFIER'
1518
[\w]+[\u4e00-\u9fa5]+[0-9a-zA-Z_\u4e00-\u9fa5]* return 'IDENTIFIER'
19+
\.\* return 'IDENTIFIER'
20+
["](?:[^"\\]|\\.)*["](?:\[\d+\])? return 'IDENTIFIER'
21+
[`](?:[^`\\]|\\.)*[`](?:\[\d+\])? return 'IDENTIFIER'
1622
[\u4e00-\u9fa5][0-9a-zA-Z_\u4e00-\u9fa5]* return 'IDENTIFIER'
1723
SELECT return 'SELECT'
1824
ALL return 'ALL'
@@ -50,25 +56,29 @@ IN return 'IN'
5056
SOUNDS return 'SOUNDS'
5157
LIKE return 'LIKE'
5258
ESCAPE return 'ESCAPE'
59+
CAST return 'CAST'
60+
OVER return 'OVER'
5361
REGEXP return 'REGEXP'
5462
IS return 'IS'
5563
UNKNOWN return 'UNKNOWN'
5664
AND return 'AND'
5765
OR return 'OR'
66+
INTERVAL return 'INTERVAL'
5867
XOR return 'XOR'
5968
FROM return 'FROM'
60-
PARTITION return 'PARTITION'
69+
PARTITION\s(?!BY) return 'PARTITION'
6170
USE return 'USE'
6271
INDEX return 'INDEX'
63-
KEY return 'KEY'
6472
FOR return 'FOR'
6573
JOIN return 'JOIN'
74+
PARTITION\s+BY return 'PARTITION_BY'
6675
ORDER\s+BY return 'ORDER_BY'
6776
GROUP\s+BY return 'GROUP_BY'
6877
IGNORE return 'IGNORE'
6978
FORCE return 'FORCE'
7079
INNER return 'INNER'
7180
CROSS return 'CROSS'
81+
FULL return 'FULL'
7282
ON return 'ON'
7383
USING return 'USING'
7484
LEFT return 'LEFT'
@@ -91,6 +101,29 @@ OJ return 'OJ'
91101
LIMIT return 'LIMIT'
92102
UNION return 'UNION'
93103

104+
MICROSECOND return 'MICROSECOND'
105+
SECOND return 'SECOND'
106+
MINUTE return 'MINUTE'
107+
HOUR return 'HOUR'
108+
DAY return 'DAY'
109+
WEEK return 'WEEK'
110+
MONTH return 'MONTH'
111+
/* TODO: we need to make this handle field names of quarter
112+
QUARTER return 'QUARTER'*/
113+
YEAR return 'YEAR'
114+
SECOND_MICROSECOND return 'SECOND_MICROSECOND'
115+
MINUTE_MICROSECOND return 'MINUTE_MICROSECOND'
116+
MINUTE_SECOND return 'MINUTE_SECOND'
117+
HOUR_MICROSECOND return 'HOUR_MICROSECOND'
118+
HOUR_SECOND return 'HOUR_SECOND'
119+
HOUR_MINUTE return 'HOUR_MINUTE'
120+
DAY_MICROSECOND return 'DAY_MICROSECOND'
121+
DAY_SECOND return 'DAY_SECOND'
122+
DAY_MINUTE return 'DAY_MINUTE'
123+
DAY_HOUR return 'DAY_HOUR'
124+
YEAR_MONTH return 'YEAR_MONTH'
125+
126+
/* "'" return 'SINGLE_QUOTE' */
94127
"," return ','
95128
"=" return '='
96129
"(" return '('
@@ -124,11 +157,10 @@ UNION return 'UNION'
124157
[-]?[0-9]+(\.[0-9]+)? return 'NUMERIC'
125158
[-]?[0-9]+(\.[0-9]+)?[eE][-][0-9]+(\.[0-9]+)? return 'EXPONENT_NUMERIC'
126159

127-
[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]* return 'IDENTIFIER'
160+
[\$a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5:]*(?:\[\d+\])? return 'IDENTIFIER'
128161
\. return 'DOT'
129-
['"][a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*["'] return 'QUOTED_IDENTIFIER'
130-
[`].+[`] return 'QUOTED_IDENTIFIER'
131-
162+
['"][@a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5]*["'] return 'QUOTED_IDENTIFIER'
163+
132164
<<EOF>> return 'EOF'
133165
. return 'INVALID'
134166

@@ -139,8 +171,8 @@ UNION return 'UNION'
139171
%nonassoc PARTITION
140172
%left INDEX_HINT_LIST
141173
%left INDEX_HINT_COMMA
142-
%left INNER_CROSS_JOIN_NULL LEFT_RIGHT_JOIN
143-
%left INNER_CROSS_JOIN
174+
%left INNER_CROSS_JOIN_NULL_FULL LEFT_RIGHT_JOIN
175+
%left INNER_CROSS_JOIN_FULL
144176
%right USING
145177
%right ON
146178
%left OR XOR '||'
@@ -228,6 +260,40 @@ selectClause
228260
}
229261
;
230262

263+
intervalExpr
264+
: INTERVAL intervalNumeric timePeriod { $$ = { type: 'Interval', duration: $2, period: $3 } }
265+
;
266+
267+
intervalNumeric
268+
: NUMERIC { $$ = $1 }
269+
| EXPONENT_NUMERIC { $$ = $1 }
270+
| HEX_NUMERIC { $$ = $1 }
271+
| STRING { $$ = $1 }
272+
;
273+
274+
timePeriod
275+
: MICROSECOND { $$ = $1 }
276+
| SECOND { $$ = $1 }
277+
| MINUTE { $$ = $1 }
278+
| HOUR { $$ = $1 }
279+
| DAY { $$ = $1 }
280+
| WEEK { $$ = $1 }
281+
| MONTH { $$ = $1 }
282+
| QUARTER { $$ = $1 }
283+
| YEAR { $$ = $1 }
284+
| SECOND_MICROSECOND { $$ = $1 }
285+
| MINUTE_MICROSECOND { $$ = $1 }
286+
| MINUTE_SECOND { $$ = $1 }
287+
| HOUR_MICROSECOND { $$ = $1 }
288+
| HOUR_SECOND { $$ = $1 }
289+
| HOUR_MINUTE { $$ = $1 }
290+
| DAY_MICROSECOND { $$ = $1 }
291+
| DAY_SECOND { $$ = $1 }
292+
| DAY_MINUTE { $$ = $1 }
293+
| DAY_HOUR { $$ = $1 }
294+
| YEAR_MONTH { $$ = $1 }
295+
;
296+
231297
distinctOpt
232298
: ALL { $$ = $1 }
233299
| DISTINCT { $$ = $1 }
@@ -280,8 +346,6 @@ selectExprAliasOpt
280346
: { $$ = {alias: null, hasAs: null} }
281347
| AS IDENTIFIER { $$ = {alias: $2, hasAs: true} }
282348
| IDENTIFIER { $$ = {alias: $1, hasAs: false} }
283-
| AS QUOTED_IDENTIFIER { $$ = {alias: $2, hasAs: true} }
284-
| QUOTED_IDENTIFIER { $$ = {alias: $1, hasAs: false} }
285349
;
286350

287351
string
@@ -307,7 +371,7 @@ literal
307371
| null { $$ = $1 }
308372
;
309373
function_call
310-
: IDENTIFIER '(' function_call_param_list ')' { $$ = {type: 'FunctionCall', name: $1, params: $3} }
374+
: IDENTIFIER '(' function_call_param_list ')' over_opt { $$ = {type: 'FunctionCall', name: $1, params: $3, overOpt: $5} }
311375
;
312376
function_call_param_list
313377
: function_call_param_list ',' function_call_param { $1.push($3); $$ = $1; }
@@ -318,7 +382,11 @@ function_call_param
318382
| '*' { $$ = $1 }
319383
| SELECT_EXPR_STAR { $$ = $1 }
320384
| DISTINCT expr { $$ = { type: 'FunctionCallParam', distinctOpt: $1, value: $2 } }
385+
| expr FROM expr { $$ = { type: 'FromCallParam', left: $1, right: $3 } }
386+
| timePeriod FROM expr { $$ = { type: 'TimePeriodFromCallParam', left: $1, right: $3 } }
321387
| expr { $$ = $1 }
388+
| expr AS expr { $$ = { type: 'ASExpression', left: $1, right: $3 } }
389+
| timePeriod
322390
;
323391
identifier
324392
: IDENTIFIER { $$ = { type: 'Identifier', value: $1 } }
@@ -343,6 +411,15 @@ case_when_else
343411
case_when
344412
: CASE case_expr_opt when_then_list case_when_else END { $$ = { type: 'CaseWhen', caseExprOpt: $2, whenThenList: $3, else: $4 } }
345413
;
414+
cast
415+
: CAST '(' expr AS IDENTIFIER ')' { $$ = { type: 'Cast', expr: $3, castTo: $5 } }
416+
| CAST '(' expr AS INTERVAL timePeriod ')' { $$ = { type: 'CastInterval', expr: $3, castTo: $6 } }
417+
| CAST '(' expr AS function_call ')' { $$ = { type: 'CastFunction', expr: $3, castTo: $5 } }
418+
;
419+
over_opt
420+
: { $$ = null }
421+
| OVER '(' partition_by_opt order_by_opt ')' { $$ = { type: 'Over', partitionBy: $3, orderBy: $4 } }
422+
;
346423
simple_expr_prefix
347424
: '+' simple_expr %prec UPLUS { $$ = { type: 'Prefix', prefix: $1, value: $2 } }
348425
| '-' simple_expr %prec UMINUS { $$ = { type: 'Prefix', prefix: $1, value: $2 } }
@@ -352,6 +429,7 @@ simple_expr_prefix
352429
;
353430
simple_expr
354431
: literal { $$ = $1 }
432+
| intervalExpr { $$ = $1 }
355433
| identifier { $$ = $1 }
356434
| function_call { $$ = $1 }
357435
| simple_expr_prefix { $$ = $1 }
@@ -361,6 +439,7 @@ simple_expr
361439
| EXISTS '(' selectClause ')' { $$ = { type: 'SubQuery', value: $3, hasExists: true } }
362440
| '{' identifier expr '}' { $$ = { type: 'IdentifierExpr', identifier: $2, value: $3 } }
363441
| case_when { $$ = $1 }
442+
| cast { $$ = $1 }
364443
;
365444
bit_expr
366445
: simple_expr { $$ = $1 }
@@ -447,6 +526,13 @@ roll_up_opt
447526
group_by
448527
: GROUP_BY group_by_order_by_item_list roll_up_opt { $$ = { type: 'GroupBy', value: $2, rollUp: $3 } }
449528
;
529+
partition_by_opt
530+
: { $$ = null }
531+
| partition_by { $$ = $1 }
532+
;
533+
partition_by
534+
: PARTITION_BY group_by_order_by_item_list { $$ = { type: 'PartitionBy', value: $2 } }
535+
;
450536
order_by_opt
451537
: { $$ = null }
452538
| order_by { $$ = $1 }
@@ -503,11 +589,13 @@ table_references
503589
escaped_table_reference
504590
: table_reference { $$ = { type: 'TableReference', value: $1 } }
505591
| '{' OJ table_reference '}' { $$ = { type: 'TableReference', hasOj: true, value: $3 } }
592+
| STRING { $$ = { type: 'TableReference', value: $1 } }
506593
;
507594
join_inner_cross
508595
: { $$ = null }
509596
| INNER { $$ = $1 }
510597
| CROSS { $$ = $1 }
598+
| FULL { $$ = $1 }
511599
;
512600
left_right
513601
: LEFT { $$ = $1 }
@@ -522,8 +610,8 @@ left_right_out_opt
522610
| left_right out_opt { $$ = { leftRight: $1, outOpt: $2 } }
523611
;
524612
join_table
525-
: table_reference join_inner_cross JOIN table_factor %prec INNER_CROSS_JOIN_NULL { $$ = { type: 'InnerCrossJoinTable', innerCrossOpt: $2, left: $1, right: $4, condition: null } }
526-
| table_reference join_inner_cross JOIN table_factor join_condition %prec INNER_CROSS_JOIN { $$ = { type: 'InnerCrossJoinTable', innerCrossOpt: $2, left: $1, right: $4, condition: $5 } }
613+
: table_reference join_inner_cross JOIN table_factor %prec INNER_CROSS_JOIN_NULL_FULL { $$ = { type: 'InnerCrossJoinTable', innerCrossOpt: $2, left: $1, right: $4, condition: null } }
614+
| table_reference join_inner_cross JOIN table_factor join_condition %prec INNER_CROSS_JOIN_FULL { $$ = { type: 'InnerCrossJoinTable', innerCrossOpt: $2, left: $1, right: $4, condition: $5 } }
527615
| table_reference STRAIGHT_JOIN table_factor on_join_condition { $$ = { type: 'StraightJoinTable', left: $1, right: $3, condition: $4 } }
528616
| table_reference left_right out_opt JOIN table_reference join_condition %prec LEFT_RIGHT_JOIN { $$ = { type: 'LeftRightJoinTable', leftRight: $2, outOpt: $3, left: $1, right: $5, condition: $6 } }
529617
| table_reference NATURAL left_right_out_opt JOIN table_factor { $$ = { type: 'NaturalJoinTable', leftRight: $3.leftRight, outOpt: $3.outOpt, left: $1, right: $5 } }

0 commit comments

Comments
 (0)