|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import typing |
| 4 | +from datetime import date, datetime, time, timezone |
| 5 | +from decimal import Decimal |
| 6 | +from enum import Enum, auto |
| 7 | + |
| 8 | +if typing.TYPE_CHECKING: |
| 9 | + from redshift_connector import Connection |
| 10 | + |
| 11 | +SCHEMA_NAME: str = "datatype_integration" |
| 12 | +root_path: str = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 13 | +CREATE_FILE_PATH: str = "{root}/datatype/datatype_test_stmts.sql".format(root=root_path) |
| 14 | +TEARDOWN_FILE_PATH: str = "{root}/datatype/datatype_teardown_stmts.sql".format(root=root_path) |
| 15 | +""" |
| 16 | +This file generates a sql file that creates db resources used for testing datatype support. |
| 17 | +The file generated is run directly with psql to bypass redshift_connector. |
| 18 | +""" |
| 19 | + |
| 20 | + |
| 21 | +class Datatypes(Enum): |
| 22 | + """ |
| 23 | + All supported datatypes are defined here, so a test table can be created for each |
| 24 | + """ |
| 25 | + |
| 26 | + int2 = auto() |
| 27 | + int4 = auto() |
| 28 | + int8 = auto() |
| 29 | + numeric = auto() |
| 30 | + float4 = auto() |
| 31 | + float8 = auto() |
| 32 | + bool = auto() |
| 33 | + char = auto() |
| 34 | + varchar = auto() |
| 35 | + date = auto() |
| 36 | + timestamp = auto() |
| 37 | + timestamptz = auto() |
| 38 | + time = auto() |
| 39 | + timetz = auto() |
| 40 | + |
| 41 | + @classmethod |
| 42 | + def list(cls) -> typing.List["Datatypes"]: |
| 43 | + return list(map(lambda p: p, cls)) |
| 44 | + |
| 45 | + |
| 46 | +FLOAT_DATATYPES: typing.Tuple[Datatypes, ...] = (Datatypes.float4, Datatypes.float8) |
| 47 | + |
| 48 | +DATATYPES_WITH_MS: typing.Tuple[Datatypes, ...] = (Datatypes.timetz, Datatypes.timestamptz) |
| 49 | + |
| 50 | +# test_data is structured as follows. |
| 51 | +# 1) a description of the test row. |
| 52 | +# 2) the test value. |
| 53 | +# 3) (Optional) the Python value we expect to receive. If this field is missing, |
| 54 | +# we expect to receive the test value back directly. |
| 55 | + |
| 56 | +test_data: typing.Dict[Datatypes, typing.Tuple[typing.Tuple[str, ...], ...]] = { |
| 57 | + Datatypes.int2.name: ( # smallint |
| 58 | + ("-32768", -32768), # signed 2 byte int min |
| 59 | + ("-128", -128), |
| 60 | + ("-1", -1), |
| 61 | + ("0", 0), |
| 62 | + ("1", 1), |
| 63 | + ("2", 2), |
| 64 | + ("123", 123), |
| 65 | + ("127", 127), |
| 66 | + ("32767", 32767), # signed 2 byte int max |
| 67 | + ), |
| 68 | + Datatypes.int4.name: ( # integer |
| 69 | + ("-2147483648", -2147483648), # signed 4 byte int min |
| 70 | + ("-32768", -32768), # signed 2 byte int min |
| 71 | + ("-128", -128), |
| 72 | + ("-1", -1), |
| 73 | + ("0", 0), |
| 74 | + ("1", 1), |
| 75 | + ("2", 2), |
| 76 | + ("123", 123), |
| 77 | + ("127", 127), |
| 78 | + ("32767", 32767), # signed 2 byte int max |
| 79 | + ("2147483647", 2147483647), # signed 4 byte int max |
| 80 | + ), |
| 81 | + Datatypes.int8.name: ( # bigint |
| 82 | + ("-9223372036854775808", -9223372036854775808), # signed 8 byte int min |
| 83 | + ("-2147483648", -2147483648), # signed 4 byte int min |
| 84 | + ("-32768", -32768), # signed 2 byte int min |
| 85 | + ("-128", -128), |
| 86 | + ("-1", -1), |
| 87 | + ("0", 0), |
| 88 | + ("1", 1), |
| 89 | + ("2", 2), |
| 90 | + ("123", 123), |
| 91 | + ("127", 127), |
| 92 | + ("32767", 32767), # signed 2 byte int max |
| 93 | + ("2147483647", 2147483647), # signed 4 byte int max |
| 94 | + ("9223372036854775807", 9223372036854775807), # signed 8 byte int max |
| 95 | + ), |
| 96 | + Datatypes.numeric.name: ( |
| 97 | + ("-2147483648", -2147483648, Decimal(-2147483648)), # signed 4 byte int min |
| 98 | + ("-32768", -32768, Decimal(-32768)), # signed 2 byte int min |
| 99 | + ("-128", -128, Decimal(-128)), |
| 100 | + ("-1", -1, Decimal(-1)), |
| 101 | + ("0", 0, Decimal(0)), |
| 102 | + ("1", 1, Decimal(1)), |
| 103 | + ("2", 2, Decimal(2)), |
| 104 | + ("123", 123, Decimal(123)), |
| 105 | + ("127", 127, Decimal(127)), |
| 106 | + ("32767", 32767, Decimal(32767)), # signed 2 byte int max |
| 107 | + ("2147483647", 2147483647, Decimal(2147483647)), # signed 4 byte int max |
| 108 | + ), |
| 109 | + Datatypes.float4.name: ( # real |
| 110 | + ("-2147483648.0001", -2147483648.0001), |
| 111 | + ("-2147483648", -2147483648), # signed 4 byte int min |
| 112 | + ("-32768", -32768), # signed 2 byte int min |
| 113 | + ("-32767.0", -32767.0), |
| 114 | + ("-128.497839", -128.497839), |
| 115 | + ("-128", -128), |
| 116 | + ("-1.000000000001", -1.000000000001), |
| 117 | + ("-1", -1), |
| 118 | + ("-0.465890", -0.465890), |
| 119 | + ("9e-6", 9e-6), |
| 120 | + ("0", 0), |
| 121 | + ("1", 1), |
| 122 | + ("1.9", 1.9), |
| 123 | + ("1.0", 1.0), |
| 124 | + ("2", 2), |
| 125 | + ("123", 123), |
| 126 | + ("123.456", 123.456), |
| 127 | + ("127", 127), |
| 128 | + ("127.890", 127.890), |
| 129 | + ("32767", 32767), # signed 2 byte int max |
| 130 | + ("12345678.901234", 12345678.901234), |
| 131 | + ("2147483647", 2147483647), # signed 4 byte int max, |
| 132 | + ), |
| 133 | + Datatypes.float8.name: ( # double precision |
| 134 | + ("-2147483648.0001", -2147483648.0001), |
| 135 | + ("-2147483648", -2147483648), # signed 4 byte int min |
| 136 | + ("-12345678.123456789123456", 12345678.132456789123456), |
| 137 | + ("-32768", -32768), # signed 2 byte int min |
| 138 | + ("-32767.0", -32767.0), |
| 139 | + ("-128.497839", -128.497839), |
| 140 | + ("-128", -128), |
| 141 | + ("-1.000000000001", -1.000000000001), |
| 142 | + ("-1", -1), |
| 143 | + ("-0.465890", -0.465890), |
| 144 | + ("9e-6", 9e-6), |
| 145 | + ("0", 0), |
| 146 | + ("0.00000006733", 0.00000006733), |
| 147 | + ("1", 1), |
| 148 | + ("1.9", 1.9), |
| 149 | + ("1.0", 1.0), |
| 150 | + ("2", 2), |
| 151 | + ("123", 123), |
| 152 | + ("123.456", 123.456), |
| 153 | + ("127", 127), |
| 154 | + ("127.890", 127.890), |
| 155 | + ("32767", 32767), # signed 2 byte int max |
| 156 | + ("12345678.123456789123456", 12345678.132456789123456), |
| 157 | + ("12345678.901234", 12345678.901234), |
| 158 | + ("2147483647", 2147483647), # signed 4 byte int max, |
| 159 | + ), |
| 160 | + Datatypes.bool.name: ( |
| 161 | + ("TRUE", "TRUE", True), |
| 162 | + ("t", "t", True), |
| 163 | + ("true", "true", True), |
| 164 | + ("y", "y", True), |
| 165 | + ("yes", "yes", True), |
| 166 | + ("1", "1", True), |
| 167 | + ("FALSE", "FALSE", False), |
| 168 | + ("f", "f", False), |
| 169 | + ("false", "false", False), |
| 170 | + ("n", "n", False), |
| 171 | + ("no", "no", False), |
| 172 | + ("0", "0", False), |
| 173 | + ), |
| 174 | + Datatypes.char.name: tuple( |
| 175 | + ("chr({})".format(i), chr(i)) |
| 176 | + for i in list(range(32, 39)) + list(range(40, 92)) + list(range(93, 128)) |
| 177 | + # skip ' \ some control chars |
| 178 | + # ref: https://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec |
| 179 | + ), |
| 180 | + Datatypes.varchar.name: ( |
| 181 | + ("empty", ""), |
| 182 | + ("negative one", "-1"), |
| 183 | + ("zero", "0"), |
| 184 | + ("one", "1"), |
| 185 | + ("special characters", "~!@#$%^&*()_+{}|:<>?"), |
| 186 | + ("uuid", "123e4567-e89b-12d3-a456-426614174000"), |
| 187 | + ( |
| 188 | + "bin", |
| 189 | + "01100100 01101111 01101111 01100110 00100000 01100100 01101111 01101111 01100110 00100000 01110101 01101110 01110100 01111010 00100000 01110101 01101110 01110100 01111010 ", |
| 190 | + ), |
| 191 | + ("hex", "646f6f6620646f6f6620756e747a20756e747a"), |
| 192 | + ("oct", "144 157 157 146 040 144 157 157 146 040 165 156 164 172 040 165 156 164 172"), |
| 193 | + ("ascii", "[)00|= [)00|= (_)|V72 (_)|V72"), |
| 194 | + ("euro", "€€€€"), |
| 195 | + ("string", "The quick brown fox jumps over the lazy dog"), |
| 196 | + ( |
| 197 | + "string with trailing spaces", |
| 198 | + "The quick brown fox jumps over the lazy dog ", |
| 199 | + ), |
| 200 | + ), |
| 201 | + Datatypes.date.name: ( |
| 202 | + ("julian date", "4713-01-12", date(year=4713, month=1, day=12)), |
| 203 | + ("mm/dd/yyy", "01-06-2020", date(year=2020, month=1, day=6)), |
| 204 | + ("yyyy-mm-dd", "2020-01-06", date(year=2020, month=1, day=6)), |
| 205 | + ("mm.dd.yyyy", "01.20.2020", date(year=2020, month=1, day=20)), |
| 206 | + ("some day", "01-01-1900", date(year=1900, month=1, day=1)), |
| 207 | + ("feb 29 2020", "02-29-2020", date(year=2020, month=2, day=29)), |
| 208 | + ), |
| 209 | + Datatypes.timestamp.name: ( |
| 210 | + ("julian date", "4713-01-12 00:00:00", datetime(year=4713, month=1, day=12, hour=0, minute=0, second=0)), |
| 211 | + ("jun 1 2008", "Jun 1,2008 09:59:59", datetime(year=2008, month=6, day=1, hour=9, minute=59, second=59)), |
| 212 | + ("dec 31 2008", "Dec 31,2008 18:20", datetime(year=2008, month=12, day=31, hour=18, minute=20, second=0)), |
| 213 | + ("feb 29, 2020", "02-29-2020 00:00:00", datetime(year=2020, month=2, day=29, hour=0, minute=0, second=0)), |
| 214 | + ), |
| 215 | + Datatypes.timestamptz.name: ( |
| 216 | + ( |
| 217 | + "julian date", |
| 218 | + "4713-01-12 00:00:00 UTC", |
| 219 | + datetime(year=4713, month=1, day=12, hour=0, minute=0, second=0, tzinfo=timezone.utc), |
| 220 | + ), |
| 221 | + ( |
| 222 | + "jun 1 2008", |
| 223 | + "Jun 1,2008 09:59:59 EST", |
| 224 | + datetime(year=2008, month=6, day=1, hour=14, minute=59, second=59, tzinfo=timezone.utc), |
| 225 | + ), |
| 226 | + ( |
| 227 | + "dec 31 2008", |
| 228 | + "Dec 31,2008 18:20 US/Pacific", |
| 229 | + datetime(year=2009, month=1, day=1, hour=2, minute=20, second=0, tzinfo=timezone.utc), |
| 230 | + ), |
| 231 | + ( |
| 232 | + "feb 29, 2020", |
| 233 | + "02-29-2020 00:00:00 UTC", |
| 234 | + datetime(year=2020, month=2, day=29, hour=0, minute=0, second=0, tzinfo=timezone.utc), |
| 235 | + ), |
| 236 | + ), |
| 237 | + Datatypes.time.name: ( |
| 238 | + ("early", "00:00:00", time(hour=0, minute=0, second=0)), |
| 239 | + ("noon", "12:30:10", time(hour=12, minute=30, second=10)), |
| 240 | + ("evening", "18:42:22", time(hour=18, minute=42, second=22)), |
| 241 | + ("night", "22:44:54", time(hour=22, minute=44, second=54)), |
| 242 | + ("end", "24:00:00", time(hour=0, minute=0)), |
| 243 | + ), |
| 244 | + Datatypes.timetz.name: ( |
| 245 | + ("early", "00:00:00 EST", time(hour=5, minute=0, second=0, tzinfo=timezone.utc)), |
| 246 | + ("noon", "12:30:10 WDT", time(hour=3, minute=30, second=10, tzinfo=timezone.utc)), |
| 247 | + ("evening", "18:42:22 GMT", time(hour=18, minute=42, second=22, tzinfo=timezone.utc)), |
| 248 | + ("night", "22:44:54 CET", time(hour=21, minute=44, second=54, tzinfo=timezone.utc)), |
| 249 | + ( |
| 250 | + "with micro1", |
| 251 | + "22:44:54.189717 CET", |
| 252 | + time(hour=21, minute=44, second=54, microsecond=189717, tzinfo=timezone.utc), |
| 253 | + ), |
| 254 | + ("with micro2", "22:44:54.18 CET", time(hour=21, minute=44, second=54, microsecond=18, tzinfo=timezone.utc)), |
| 255 | + ("end", "24:00:00 WET", time(hour=0, minute=0, second=0, tzinfo=timezone.utc)), |
| 256 | + ), |
| 257 | +} |
| 258 | + |
| 259 | + |
| 260 | +def get_table_name(dt: Datatypes) -> str: |
| 261 | + return "{schema}.test_{datatype}".format(schema=SCHEMA_NAME, datatype=dt.name) |
| 262 | + |
| 263 | + |
| 264 | +def _make_data_str(dt: Datatypes) -> str: |
| 265 | + datas: typing.List[str] = [] |
| 266 | + |
| 267 | + for row in test_data[dt.name]: |
| 268 | + # if the column storing test data is a string in test_data, insert it as a string |
| 269 | + test_col: str = "'{val}'".format(val=row[1]) if isinstance(row[1], str) else row[1] |
| 270 | + datas.append("('{c1_val}', {c2_val})".format(c1_val=row[0], c2_val=test_col)) |
| 271 | + |
| 272 | + return ",".join(datas) |
| 273 | + |
| 274 | + |
| 275 | +def _build_table_stmts(dt: Datatypes) -> None: |
| 276 | + drop_stmt: str = "drop table if exists {schema}.test_{datatype};".format(schema=SCHEMA_NAME, datatype=dt.name) |
| 277 | + create_stmt: str = "create table {schema}.test_{datatype} (c1 varchar, c2 {datatype});".format( |
| 278 | + schema=SCHEMA_NAME, datatype=dt.name |
| 279 | + ) |
| 280 | + insert_stmt: str = "insert into {schema}.test_{datatype}(c1, c2) values{data};".format( |
| 281 | + schema=SCHEMA_NAME, datatype=dt.name, data=_make_data_str(dt) |
| 282 | + ) |
| 283 | + |
| 284 | + with open(CREATE_FILE_PATH, "a") as f: |
| 285 | + f.write(drop_stmt + "\n") |
| 286 | + f.write(create_stmt + "\n") |
| 287 | + f.write(insert_stmt + "\n") |
| 288 | + |
| 289 | + |
| 290 | +def _build_schema_stmts() -> None: |
| 291 | + drop_stmt: str = "drop schema if exists {name} cascade;".format(name=SCHEMA_NAME) |
| 292 | + create_stmt: str = "create schema {name};".format(name=SCHEMA_NAME) |
| 293 | + |
| 294 | + with open(CREATE_FILE_PATH, "a+") as f: |
| 295 | + f.write(drop_stmt + "\n") |
| 296 | + f.write(create_stmt + "\n") |
| 297 | + |
| 298 | + |
| 299 | +def datatype_test_setup(conf) -> None: |
| 300 | + try: # remove test sql file if exists |
| 301 | + os.remove(CREATE_FILE_PATH) |
| 302 | + except OSError: |
| 303 | + pass |
| 304 | + |
| 305 | + # build test sql file |
| 306 | + _build_schema_stmts() |
| 307 | + for dt in Datatypes: |
| 308 | + _build_table_stmts(dt) |
| 309 | + # execute test sql file |
| 310 | + os.system( |
| 311 | + "PGPASSWORD={password} psql --host={host} --port 5439 --user={user} --dbname={db} -f {file}".format( |
| 312 | + password=conf.get("database", "password"), |
| 313 | + host=conf.get("database", "host"), |
| 314 | + user=conf.get("database", "user"), |
| 315 | + db=conf.get("database", "database"), |
| 316 | + file=CREATE_FILE_PATH, |
| 317 | + ) |
| 318 | + ) |
| 319 | + |
| 320 | + |
| 321 | +def datatype_test_teardown(conf) -> None: |
| 322 | + try: # remove test sql file if exists |
| 323 | + os.remove(TEARDOWN_FILE_PATH) |
| 324 | + except OSError: |
| 325 | + pass |
| 326 | + with open(TEARDOWN_FILE_PATH, "a+") as f: |
| 327 | + f.write("drop schema if exists {name} cascade;".format(name=SCHEMA_NAME)) |
| 328 | + |
| 329 | + os.system( |
| 330 | + "PGPASSWORD={password} psql --host={host} --port 5439 --user={user} --dbname={db} -f {file}".format( |
| 331 | + password=conf.get("database", "password"), |
| 332 | + host=conf.get("database", "host"), |
| 333 | + user=conf.get("database", "user"), |
| 334 | + db=conf.get("database", "database"), |
| 335 | + file=TEARDOWN_FILE_PATH, |
| 336 | + ) |
| 337 | + ) |
0 commit comments