1- /* description: Parses SQL */
1+ /* description: Parses Dremio SQL */
22/* :tabSize=4:indentSize=4:noTabs=true: */
3+ /* Based on https://github.com/christyharagan/dremio-node-api */
4+ /* Based on https://github.com/JavaScriptor/js-sql-parser */
5+ /* Original License: ISC, albin zeng, 9 Feb 2018 */
36%lex
47
58%options case-insensitive
69
710%%
811
912[/][*] (. | \n )*? [*][/] /* skip comments */
10- [-][-] \s . * \n /* skip sql comments */
13+ [-][-] . * \n ? /* skip sql comments */
1114[#] \s . * \n /* skip sql comments */
1215\s + /* skip whitespace */
1316
1417[`][a-zA-Z_\u4e00 -\u9fa5 ][a-zA-Z0-9_\u4e00 -\u9fa5 ] * [`] return ' IDENTIFIER'
1518[\w ] + [\u4e00 -\u9fa5 ] + [0-9a-zA-Z_\u4e00 -\u9fa5 ] * return ' IDENTIFIER'
19+ \.\* return ' IDENTIFIER'
20+ ["] (?:[^ "\\ ] | \\ . )* ["] (?:\[ \d + \] )? return ' IDENTIFIER'
21+ [`] (?:[^ `\\ ] | \\ . )* [`] (?:\[ \d + \] )? return ' IDENTIFIER'
1622[\u4e00 -\u9fa5 ][0-9a-zA-Z_\u4e00 -\u9fa5 ] * return ' IDENTIFIER'
1723SELECT return ' SELECT'
1824ALL return ' ALL'
@@ -50,25 +56,29 @@ IN return 'IN'
5056SOUNDS return ' SOUNDS'
5157LIKE return ' LIKE'
5258ESCAPE return ' ESCAPE'
59+ CAST return ' CAST'
60+ OVER return ' OVER'
5361REGEXP return ' REGEXP'
5462IS return ' IS'
5563UNKNOWN return ' UNKNOWN'
5664AND return ' AND'
5765OR return ' OR'
66+ INTERVAL return ' INTERVAL'
5867XOR return ' XOR'
5968FROM return ' FROM'
60- PARTITION return ' PARTITION'
69+ PARTITION\s (?!BY) return ' PARTITION'
6170USE return ' USE'
6271INDEX return ' INDEX'
63- KEY return ' KEY'
6472FOR return ' FOR'
6573JOIN return ' JOIN'
74+ PARTITION\s + BY return ' PARTITION_BY'
6675ORDER\s + BY return ' ORDER_BY'
6776GROUP\s + BY return ' GROUP_BY'
6877IGNORE return ' IGNORE'
6978FORCE return ' FORCE'
7079INNER return ' INNER'
7180CROSS return ' CROSS'
81+ FULL return ' FULL'
7282ON return ' ON'
7383USING return ' USING'
7484LEFT return ' LEFT'
@@ -91,6 +101,29 @@ OJ return 'OJ'
91101LIMIT return ' LIMIT'
92102UNION return ' UNION'
93103
104+ MICROSECOND return ' MICROSECOND'
105+ SECOND return ' SECOND'
106+ MINUTE return ' MINUTE'
107+ HOUR return ' HOUR'
108+ DAY return ' DAY'
109+ WEEK return ' WEEK'
110+ MONTH return ' MONTH'
111+ /* TODO: we need to make this handle field names of quarter
112+ QUARTER return 'QUARTER'*/
113+ YEAR return ' YEAR'
114+ SECOND_MICROSECOND return ' SECOND_MICROSECOND'
115+ MINUTE_MICROSECOND return ' MINUTE_MICROSECOND'
116+ MINUTE_SECOND return ' MINUTE_SECOND'
117+ HOUR_MICROSECOND return ' HOUR_MICROSECOND'
118+ HOUR_SECOND return ' HOUR_SECOND'
119+ HOUR_MINUTE return ' HOUR_MINUTE'
120+ DAY_MICROSECOND return ' DAY_MICROSECOND'
121+ DAY_SECOND return ' DAY_SECOND'
122+ DAY_MINUTE return ' DAY_MINUTE'
123+ DAY_HOUR return ' DAY_HOUR'
124+ YEAR_MONTH return ' YEAR_MONTH'
125+
126+ /* "'" return 'SINGLE_QUOTE' */
94127"," return ' ,'
95128"=" return ' ='
96129"(" return ' ('
@@ -124,11 +157,10 @@ UNION return 'UNION'
124157[-] ? [0-9] + (\. [0-9] + )? return ' NUMERIC'
125158[-] ? [0-9] + (\. [0-9] + )? [eE][-][0-9] + (\. [0-9] + )? return ' EXPONENT_NUMERIC'
126159
127- [a-zA-Z_\u4e00 -\u9fa5 ][a-zA-Z0-9_\u4e00 -\u9fa5 ] * return ' IDENTIFIER'
160+ [\$ a-zA-Z_\u4e00 -\u9fa5 ][a-zA-Z0-9_\u4e00 -\u9fa5 :] * (?: \[ \d + \] ) ? return ' IDENTIFIER'
128161\. return ' DOT'
129- ['"][a-zA-Z_\u4e00 -\u9fa5 ][a-zA-Z0-9_\u4e00 -\u9fa5 ] * ["'] return ' QUOTED_IDENTIFIER'
130- [`] . + [`] return ' QUOTED_IDENTIFIER'
131-
162+ ['"][@a-zA-Z_\u4e00 -\u9fa5 ][a-zA-Z0-9_\u4e00 -\u9fa5 ] * ["'] return ' QUOTED_IDENTIFIER'
163+
132164<<EOF>> return ' EOF'
133165. return ' INVALID'
134166
@@ -139,8 +171,8 @@ UNION return 'UNION'
139171%nonassoc PARTITION
140172%left INDEX_HINT_LIST
141173%left INDEX_HINT_COMMA
142- %left INNER_CROSS_JOIN_NULL LEFT_RIGHT_JOIN
143- %left INNER_CROSS_JOIN
174+ %left INNER_CROSS_JOIN_NULL_FULL LEFT_RIGHT_JOIN
175+ %left INNER_CROSS_JOIN_FULL
144176%right USING
145177%right ON
146178%left OR XOR '||'
@@ -228,6 +260,40 @@ selectClause
228260 }
229261 ;
230262
263+ intervalExpr
264+ : INTERVAL intervalNumeric timePeriod { $$ = { type: ' Interval' , duration: $2 , period: $3 } }
265+ ;
266+
267+ intervalNumeric
268+ : NUMERIC { $$ = $1 }
269+ | EXPONENT_NUMERIC { $$ = $1 }
270+ | HEX_NUMERIC { $$ = $1 }
271+ | STRING { $$ = $1 }
272+ ;
273+
274+ timePeriod
275+ : MICROSECOND { $$ = $1 }
276+ | SECOND { $$ = $1 }
277+ | MINUTE { $$ = $1 }
278+ | HOUR { $$ = $1 }
279+ | DAY { $$ = $1 }
280+ | WEEK { $$ = $1 }
281+ | MONTH { $$ = $1 }
282+ | QUARTER { $$ = $1 }
283+ | YEAR { $$ = $1 }
284+ | SECOND_MICROSECOND { $$ = $1 }
285+ | MINUTE_MICROSECOND { $$ = $1 }
286+ | MINUTE_SECOND { $$ = $1 }
287+ | HOUR_MICROSECOND { $$ = $1 }
288+ | HOUR_SECOND { $$ = $1 }
289+ | HOUR_MINUTE { $$ = $1 }
290+ | DAY_MICROSECOND { $$ = $1 }
291+ | DAY_SECOND { $$ = $1 }
292+ | DAY_MINUTE { $$ = $1 }
293+ | DAY_HOUR { $$ = $1 }
294+ | YEAR_MONTH { $$ = $1 }
295+ ;
296+
231297distinctOpt
232298 : ALL { $$ = $1 }
233299 | DISTINCT { $$ = $1 }
@@ -280,8 +346,6 @@ selectExprAliasOpt
280346 : { $$ = {alias: null , hasAs: null } }
281347 | AS IDENTIFIER { $$ = {alias: $2 , hasAs: true } }
282348 | IDENTIFIER { $$ = {alias: $1 , hasAs: false } }
283- | AS QUOTED_IDENTIFIER { $$ = {alias: $2 , hasAs: true } }
284- | QUOTED_IDENTIFIER { $$ = {alias: $1 , hasAs: false } }
285349 ;
286350
287351string
@@ -307,7 +371,7 @@ literal
307371 | null { $$ = $1 }
308372 ;
309373function_call
310- : IDENTIFIER '(' function_call_param_list ')' { $$ = {type: ' FunctionCall' , name: $1 , params: $3 } }
374+ : IDENTIFIER '(' function_call_param_list ')' over_opt { $$ = {type: ' FunctionCall' , name: $1 , params: $3 , overOpt : $5 } }
311375 ;
312376function_call_param_list
313377 : function_call_param_list ',' function_call_param { $1 .push ($3 ); $$ = $1 ; }
@@ -318,7 +382,11 @@ function_call_param
318382 | '*' { $$ = $1 }
319383 | SELECT_EXPR_STAR { $$ = $1 }
320384 | DISTINCT expr { $$ = { type: ' FunctionCallParam' , distinctOpt: $1 , value: $2 } }
385+ | expr FROM expr { $$ = { type: ' FromCallParam' , left: $1 , right: $3 } }
386+ | timePeriod FROM expr { $$ = { type: ' TimePeriodFromCallParam' , left: $1 , right: $3 } }
321387 | expr { $$ = $1 }
388+ | expr AS expr { $$ = { type: ' ASExpression' , left: $1 , right: $3 } }
389+ | timePeriod
322390 ;
323391identifier
324392 : IDENTIFIER { $$ = { type: ' Identifier' , value: $1 } }
@@ -343,6 +411,15 @@ case_when_else
343411case_when
344412 : CASE case_expr_opt when_then_list case_when_else END { $$ = { type: ' CaseWhen' , caseExprOpt: $2 , whenThenList: $3 , else: $4 } }
345413 ;
414+ cast
415+ : CAST '(' expr AS IDENTIFIER ')' { $$ = { type: ' Cast' , expr: $3 , castTo: $5 } }
416+ | CAST '(' expr AS INTERVAL timePeriod ')' { $$ = { type: ' CastInterval' , expr: $3 , castTo: $6 } }
417+ | CAST '(' expr AS function_call ')' { $$ = { type: ' CastFunction' , expr: $3 , castTo: $5 } }
418+ ;
419+ over_opt
420+ : { $$ = null }
421+ | OVER '(' partition_by_opt order_by_opt ')' { $$ = { type: ' Over' , partitionBy: $3 , orderBy: $4 } }
422+ ;
346423simple_expr_prefix
347424 : '+' simple_expr %prec UPLUS { $$ = { type: ' Prefix' , prefix: $1 , value: $2 } }
348425 | '-' simple_expr %prec UMINUS { $$ = { type: ' Prefix' , prefix: $1 , value: $2 } }
@@ -352,6 +429,7 @@ simple_expr_prefix
352429 ;
353430simple_expr
354431 : literal { $$ = $1 }
432+ | intervalExpr { $$ = $1 }
355433 | identifier { $$ = $1 }
356434 | function_call { $$ = $1 }
357435 | simple_expr_prefix { $$ = $1 }
@@ -361,6 +439,7 @@ simple_expr
361439 | EXISTS '(' selectClause ')' { $$ = { type: ' SubQuery' , value: $3 , hasExists: true } }
362440 | '{' identifier expr '}' { $$ = { type: ' IdentifierExpr' , identifier: $2 , value: $3 } }
363441 | case_when { $$ = $1 }
442+ | cast { $$ = $1 }
364443 ;
365444bit_expr
366445 : simple_expr { $$ = $1 }
@@ -447,6 +526,13 @@ roll_up_opt
447526group_by
448527 : GROUP_BY group_by_order_by_item_list roll_up_opt { $$ = { type: ' GroupBy' , value: $2 , rollUp: $3 } }
449528 ;
529+ partition_by_opt
530+ : { $$ = null }
531+ | partition_by { $$ = $1 }
532+ ;
533+ partition_by
534+ : PARTITION_BY group_by_order_by_item_list { $$ = { type: ' PartitionBy' , value: $2 } }
535+ ;
450536order_by_opt
451537 : { $$ = null }
452538 | order_by { $$ = $1 }
@@ -503,11 +589,13 @@ table_references
503589escaped_table_reference
504590 : table_reference { $$ = { type: ' TableReference' , value: $1 } }
505591 | '{' OJ table_reference '}' { $$ = { type: ' TableReference' , hasOj: true , value: $3 } }
592+ | STRING { $$ = { type: ' TableReference' , value: $1 } }
506593 ;
507594join_inner_cross
508595 : { $$ = null }
509596 | INNER { $$ = $1 }
510597 | CROSS { $$ = $1 }
598+ | FULL { $$ = $1 }
511599 ;
512600left_right
513601 : LEFT { $$ = $1 }
@@ -522,8 +610,8 @@ left_right_out_opt
522610 | left_right out_opt { $$ = { leftRight: $1 , outOpt: $2 } }
523611 ;
524612join_table
525- : table_reference join_inner_cross JOIN table_factor %prec INNER_CROSS_JOIN_NULL { $$ = { type: ' InnerCrossJoinTable' , innerCrossOpt: $2 , left: $1 , right: $4 , condition: null } }
526- | table_reference join_inner_cross JOIN table_factor join_condition %prec INNER_CROSS_JOIN { $$ = { type: ' InnerCrossJoinTable' , innerCrossOpt: $2 , left: $1 , right: $4 , condition: $5 } }
613+ : table_reference join_inner_cross JOIN table_factor %prec INNER_CROSS_JOIN_NULL_FULL { $$ = { type: ' InnerCrossJoinTable' , innerCrossOpt: $2 , left: $1 , right: $4 , condition: null } }
614+ | table_reference join_inner_cross JOIN table_factor join_condition %prec INNER_CROSS_JOIN_FULL { $$ = { type: ' InnerCrossJoinTable' , innerCrossOpt: $2 , left: $1 , right: $4 , condition: $5 } }
527615 | table_reference STRAIGHT_JOIN table_factor on_join_condition { $$ = { type: ' StraightJoinTable' , left: $1 , right: $3 , condition: $4 } }
528616 | table_reference left_right out_opt JOIN table_reference join_condition %prec LEFT_RIGHT_JOIN { $$ = { type: ' LeftRightJoinTable' , leftRight: $2 , outOpt: $3 , left: $1 , right: $5 , condition: $6 } }
529617 | table_reference NATURAL left_right_out_opt JOIN table_factor { $$ = { type: ' NaturalJoinTable' , leftRight: $3 .leftRight , outOpt: $3 .outOpt , left: $1 , right: $5 } }
0 commit comments