1+ import re
2+
13from cube_dbt .dump import dump
24
5+ # As of 2024-10-17, the valid "Dimension Types" listed on
6+ # https://cube.dev/docs/reference/data-model/types-and-formats#dimension-types
7+ # are: time, string, number, boolean, and geo
8+ VALID_DIMENSION_TYPES = [
9+ "boolean" ,
10+ "geo" ,
11+ "number" ,
12+ "string" ,
13+ "time" ,
14+ ]
15+ # Other System's Type => Cube Type
16+ # See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
17+ BIGQUERY_TYPE_MAPPINGS = {
18+ "array" : "string" ,
19+ "bool" : "boolean" ,
20+ "bytes" : "string" ,
21+ "date" : "time" ,
22+ "datetime" : "time" ,
23+ "geography" : "geo" ,
24+ "interval" : "string" ,
25+ "json" : "string" ,
26+
27+ # numeric types
28+ "int64" : "number" ,
29+ "int" : "number" ,
30+ "smallint" : "number" ,
31+ "integer" : "number" ,
32+ "bigint" : "number" ,
33+ "tinyint" : "number" ,
34+ "byteint" : "number" ,
35+ "numeric" : "number" ,
36+ "decimal" : "number" ,
37+ "bignumeric" : "number" ,
38+ "bigdecimal" : "number" ,
39+ "float64" : "number" ,
40+
41+ "range" : "string" ,
42+ # string does not need to be mapped
43+ "struct" : "string" ,
44+ # time does not need to be mapped
45+ "timestamp" : "time" ,
46+ }
47+ # See https://docs.snowflake.com/en/sql-reference-data-types
48+ SNOWFLAKE_TYPE_MAPPINGS = {
49+ # Numeric data types
50+ # number does not need to be mapped
51+ "decimal" : "number" ,
52+ "dec" : "number" ,
53+ "numeric" : "number" ,
54+ "int" : "number" ,
55+ "integer" : "number" ,
56+ "bigint" : "number" ,
57+ "smallint" : "number" ,
58+ "tinyint" : "number" ,
59+ "byteint" : "number" ,
60+ "float" : "number" ,
61+ "float4" : "number" ,
62+ "float8" : "number" ,
63+ "double" : "number" ,
64+ "double precision" : "number" ,
65+ "real" : "number" ,
66+
67+ # String & binary data types
68+ "varchar" : "string" ,
69+ "char" : "string" ,
70+ "character" : "string" ,
71+ "nchar" : "string" ,
72+ # string does not need to be mapped
73+ "text" : "string" ,
74+ "nvarchar" : "string" ,
75+ "nvarchar2" : "string" ,
76+ "char varying" : "string" ,
77+ "nchar varying" : "string" ,
78+ "binary" : "string" ,
79+ "varbinary" : "string" ,
80+
81+ # Logical data types
82+ # boolean does not need to be mapped
83+
84+ # Date & time data types
85+ "date" : "time" ,
86+ "datetime" : "time" ,
87+ # time does not need to be mapped
88+ "timestamp" : "time" ,
89+ "timestamp_ltz" : "time" ,
90+ "timestamp_ntz" : "time" ,
91+ "timestamp_tz" : "time" ,
92+
93+ # Semi-structured data types
94+ "variant" : "string" ,
95+ "object" : "string" ,
96+ "array" : "string" ,
97+
98+ # Geospatial data types
99+ "geography" : "geo" ,
100+ "geometry" : "string" ,
101+
102+ # Vector data types
103+ "vector" : "string" ,
104+ }
105+ # See https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html
106+ REDSHIFT_TYPE_MAPPINGS = {
107+ # Signed two-byte integer
108+ "smallint" : "number" ,
109+ "int2" : "number" ,
110+
111+ # Signed four-byte integer
112+ "integer" : "number" ,
113+ "int" : "number" ,
114+ "int4" : "number" ,
115+
116+ # Signed eight-byte integer
117+ "bigint" : "number" ,
118+ "int8" : "number" ,
119+
120+ # Exact numeric of selectable precision
121+ "decimal" : "number" ,
122+ "numeric" : "number" ,
123+
124+ # Single precision floating-point number
125+ "real" : "number" ,
126+ "float4" : "number" ,
127+
128+ # Double precision floating-point number
129+ "double precision" : "number" ,
130+ "float8" : "number" ,
131+ "float" : "number" ,
132+
133+ # Fixed-length character string
134+ "char" : "string" ,
135+ "character" : "string" ,
136+ "nchar" : "string" ,
137+ "bpchar" : "string" ,
138+
139+ # Variable-length character string with a user-defined limit
140+ "varchar" : "string" ,
141+ "character varying" : "string" ,
142+ "nvarchar" : "string" ,
143+ "text" : "string" ,
144+
145+ # Calendar date (year, month, day)
146+ "date" : "time" ,
147+
148+ # Time of day
149+ "time" : "time" ,
150+ "time without time zone" : "time" ,
151+
152+ # Time of day with time zone
153+ "timetz" : "time" ,
154+ "time with time zone" : "time" ,
155+
156+ # Date and time (without time zone)
157+ "timestamp" : "time" ,
158+ "timestamp without time zone" : "time" ,
159+
160+ # Date and time (with time zone)
161+ "timestamptz" : "time" ,
162+ "timestamp with time zone" : "time" ,
163+
164+ # Time duration in year to month order
165+ "interval year to month" : "string" ,
166+
167+ # Time duration in day to second order
168+ "interval day to second" : "string" ,
169+
170+ # Logical Boolean (true/false)
171+ # boolean does not need to be mapped
172+ "bool" : "boolean" ,
173+
174+ # Type used with HyperLogLog sketches
175+ "hllsketch" : "string" ,
176+
177+ # A superset data type that encompasses all scalar types of Amazon Redshift including complex types such as ARRAY and STRUCTS
178+ "super" : "string" ,
179+
180+ # Variable-length binary value
181+ "varbyte" : "string" ,
182+ "varbinary" : "string" ,
183+ "binary varying" : "string" ,
184+
185+ # Spatial data
186+ "geometry" : "geo" ,
187+ "geography" : "string" ,
188+ }
189+ TYPE_MAPPINGS = {
190+ ** BIGQUERY_TYPE_MAPPINGS ,
191+ ** REDSHIFT_TYPE_MAPPINGS ,
192+ ** SNOWFLAKE_TYPE_MAPPINGS ,
193+ }
194+
195+
3196class Column :
4197 def __init__ (self , model_name : str , column_dict : dict ) -> None :
5198 self ._model_name = model_name
@@ -25,29 +218,20 @@ def sql(self) -> str:
25218 def type (self ) -> str :
26219 if not 'data_type' in self ._column_dict or self ._column_dict ['data_type' ] == None :
27220 return 'string'
28-
29- column_to_dimension_types = {
30- 'time' : 'time' ,
31- 'date' : 'time' ,
32- 'datetime' : 'time' ,
33- 'timestamp' : 'time' ,
34-
35- 'string' : 'string' ,
36221
37- 'number' : 'number' ,
38- 'numeric' : 'number' ,
222+ # Normalize the data_type value, downcasing it, and removing extra information.
223+ source_data_type = re . sub ( r"<.*>" , "" , re . sub ( r"\([^\)]*\)" , "" , self . _column_dict [ "data_type" ]. lower ()))
39224
40- 'boolean' : 'boolean' ,
41- 'bool' : 'boolean' ,
225+ if source_data_type in TYPE_MAPPINGS :
226+ cube_data_type = TYPE_MAPPINGS [source_data_type ]
227+ else :
228+ cube_data_type = source_data_type
42229
43- 'geo' : 'geo' ,
44- 'geography' : 'geo' ,
45- }
46- if not self ._column_dict ['data_type' ] in column_to_dimension_types :
230+ if cube_data_type not in VALID_DIMENSION_TYPES :
47231 raise RuntimeError (f"Unknown column type of { self ._model_name } .{ self .name } : { self ._column_dict ['data_type' ]} " )
48232
49- return column_to_dimension_types [ self . _column_dict [ 'data_type' ]]
50-
233+ return cube_data_type
234+
51235 @property
52236 def meta (self ) -> dict :
53237 return self ._column_dict ['meta' ]
@@ -78,4 +262,4 @@ def as_dimension(self) -> str:
78262 For use in Jinja:
79263 {{ dbt.model('name').column('name').as_dimension() }}
80264 """
81- return dump (self ._as_dimension (), indent = 8 )
265+ return dump (self ._as_dimension (), indent = 8 )
0 commit comments