1- from typing import List , Union
1+ import re
2+ from typing import Any , List , Union
23from ..abcs .database_types import (
4+ ColType ,
5+ Array ,
6+ JSON ,
7+ Struct ,
38 Timestamp ,
49 Datetime ,
510 Integer ,
1015 FractionalType ,
1116 TemporalType ,
1217 Boolean ,
18+ UnknownColType ,
1319)
1420from ..abcs .mixins import (
1521 AbstractMixin_MD5 ,
@@ -36,6 +42,7 @@ def md5_as_int(self, s: str) -> str:
3642
3743
3844class Mixin_NormalizeValue (AbstractMixin_NormalizeValue ):
45+
3946 def normalize_timestamp (self , value : str , coltype : TemporalType ) -> str :
4047 if coltype .rounds :
4148 timestamp = f"timestamp_micros(cast(round(unix_micros(cast({ value } as timestamp))/1000000, { coltype .precision } )*1000000 as int))"
@@ -57,6 +64,27 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
5764 def normalize_boolean (self , value : str , _coltype : Boolean ) -> str :
5865 return self .to_string (f"cast({ value } as int)" )
5966
67+ def normalize_json (self , value : str , _coltype : JSON ) -> str :
68+ # BigQuery is unable to compare arrays & structs with ==/!=/distinct from, e.g.:
69+ # Got error: 400 Grouping is not defined for arguments of type ARRAY<INT64> at …
70+ # So we do the best effort and compare it as strings, hoping that the JSON forms
71+ # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
72+ return f"to_json_string({ value } )"
73+
74+ def normalize_array (self , value : str , _coltype : Array ) -> str :
75+ # BigQuery is unable to compare arrays & structs with ==/!=/distinct from, e.g.:
76+ # Got error: 400 Grouping is not defined for arguments of type ARRAY<INT64> at …
77+ # So we do the best effort and compare it as strings, hoping that the JSON forms
78+ # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
79+ return f"to_json_string({ value } )"
80+
81+ def normalize_struct (self , value : str , _coltype : Struct ) -> str :
82+ # BigQuery is unable to compare arrays & structs with ==/!=/distinct from, e.g.:
83+ # Got error: 400 Grouping is not defined for arguments of type ARRAY<INT64> at …
84+ # So we do the best effort and compare it as strings, hoping that the JSON forms
85+ # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
86+ return f"to_json_string({ value } )"
87+
6088
6189class Mixin_Schema (AbstractMixin_Schema ):
6290 def list_tables (self , table_schema : str , like : Compilable = None ) -> Compilable :
@@ -112,11 +140,12 @@ class Dialect(BaseDialect, Mixin_Schema):
112140 "BIGNUMERIC" : Decimal ,
113141 "FLOAT64" : Float ,
114142 "FLOAT32" : Float ,
115- # Text
116143 "STRING" : Text ,
117- # Boolean
118144 "BOOL" : Boolean ,
145+ "JSON" : JSON ,
119146 }
147+ TYPE_ARRAY_RE = re .compile (r'ARRAY<(.+)>' )
148+ TYPE_STRUCT_RE = re .compile (r'STRUCT<(.+)>' )
120149 MIXINS = {Mixin_Schema , Mixin_MD5 , Mixin_NormalizeValue , Mixin_TimeTravel , Mixin_RandomSample }
121150
122151 def random (self ) -> str :
@@ -134,6 +163,40 @@ def type_repr(self, t) -> str:
134163 except KeyError :
135164 return super ().type_repr (t )
136165
166+ def parse_type (
167+ self ,
168+ table_path : DbPath ,
169+ col_name : str ,
170+ type_repr : str ,
171+ * args : Any , # pass-through args
172+ ** kwargs : Any , # pass-through args
173+ ) -> ColType :
174+ col_type = super ().parse_type (table_path , col_name , type_repr , * args , ** kwargs )
175+ if isinstance (col_type , UnknownColType ):
176+
177+ m = self .TYPE_ARRAY_RE .fullmatch (type_repr )
178+ if m :
179+ item_type = self .parse_type (table_path , col_name , m .group (1 ), * args , ** kwargs )
180+ col_type = Array (item_type = item_type )
181+
182+ # We currently ignore structs' structure, but later can parse it too. Examples:
183+ # - STRUCT<INT64, STRING(10)> (unnamed)
184+ # - STRUCT<foo INT64, bar STRING(10)> (named)
185+ # - STRUCT<foo INT64, bar ARRAY<INT64>> (with complex fields)
186+ # - STRUCT<foo INT64, bar STRUCT<a INT64, b INT64>> (nested)
187+ m = self .TYPE_STRUCT_RE .fullmatch (type_repr )
188+ if m :
189+ col_type = Struct ()
190+
191+ return col_type
192+
193+ def to_comparable (self , value : str , coltype : ColType ) -> str :
194+ """Ensure that the expression is comparable in ``IS DISTINCT FROM``."""
195+ if isinstance (coltype , (JSON , Array , Struct )):
196+ return self .normalize_value_by_type (value , coltype )
197+ else :
198+ return super ().to_comparable (value , coltype )
199+
137200 def set_timezone_to_utc (self ) -> str :
138201 raise NotImplementedError ()
139202
0 commit comments