1- from .compat import BaseModel , validator , root_validator , field_validator
2- from typing import Dict , Optional , Union , Any , Literal , List
3- import pandas as pd
4- from IPython .display import display , HTML
5-
6- DbldatagenBasicType = Literal [
7- "string" ,
8- "int" ,
9- "long" ,
10- "float" ,
11- "double" ,
12- "decimal" ,
13- "boolean" ,
14- "date" ,
15- "timestamp" ,
16- "short" ,
17- "byte" ,
18- "binary" ,
19- "integer" ,
20- "bigint" ,
21- "tinyint" ,
22- ]
23-
24- class ColumnDefinition (BaseModel ):
25- name : str
26- type : Optional [DbldatagenBasicType ] = None
27- primary : bool = False
28- options : Optional [Dict [str , Any ]] = {}
29- nullable : Optional [bool ] = False
30- omit : Optional [bool ] = False
31- baseColumn : Optional [str ] = "id"
32- baseColumnType : Optional [str ] = "auto"
33-
34- @root_validator (skip_on_failure = True )
35- def check_model_constraints (cls , values : Dict [str , Any ]) -> Dict [str , Any ]:
36- """
37- Validates constraints across the entire model after individual fields are processed.
38- """
39- is_primary = values .get ("primary" )
40- options = values .get ("options" , {})
41- name = values .get ("name" )
42- is_nullable = values .get ("nullable" )
43- column_type = values .get ("type" )
1+ from __future__ import annotations
442
45- if is_primary :
46- if "min" in options or "max" in options :
47- raise ValueError (f"Primary column '{ name } ' cannot have min/max options." )
3+ from typing import Any , Literal , Union
484
49- if is_nullable :
50- raise ValueError (f"Primary column '{ name } ' cannot be nullable." )
5+ import pandas as pd
6+ from IPython .display import HTML , display
7+
8+ from dbldatagen .spec .column_spec import ColumnDefinition
519
52- if column_type is None :
53- raise ValueError (f"Primary column '{ name } ' must have a type defined." )
54- return values
10+ from .compat import BaseModel , validator
5511
5612
5713class UCSchemaTarget (BaseModel ):
5814 catalog : str
5915 schema_ : str
6016 output_format : str = "delta" # Default to delta for UC Schema
6117
62- @field_validator ("catalog" , "schema_" , mode = "after " )
18+ @validator ("catalog" , "schema_" )
6319 def validate_identifiers (cls , v ): # noqa: N805, pylint: disable=no-self-argument
6420 if not v .strip ():
6521 raise ValueError ("Identifier must be non-empty." )
@@ -76,7 +32,7 @@ class FilePathTarget(BaseModel):
7632 base_path : str
7733 output_format : Literal ["csv" , "parquet" ] # No default, must be specified
7834
79- @field_validator ("base_path" , mode = "after " )
35+ @validator ("base_path" )
8036 def validate_base_path (cls , v ): # noqa: N805, pylint: disable=no-self-argument
8137 if not v .strip ():
8238 raise ValueError ("base_path must be non-empty." )
@@ -88,16 +44,16 @@ def __str__(self):
8844
8945class TableDefinition (BaseModel ):
9046 number_of_rows : int
91- partitions : Optional [ int ] = None
92- columns : List [ColumnDefinition ]
47+ partitions : int | None = None
48+ columns : list [ColumnDefinition ]
9349
9450
9551class ValidationResult :
9652 """Container for validation results with errors and warnings."""
9753
9854 def __init__ (self ) -> None :
99- self .errors : List [str ] = []
100- self .warnings : List [str ] = []
55+ self .errors : list [str ] = []
56+ self .warnings : list [str ] = []
10157
10258 def add_error (self , message : str ) -> None :
10359 """Add an error message."""
@@ -132,16 +88,16 @@ def __str__(self) -> str:
13288 return "\n " .join (lines )
13389
13490class DatagenSpec (BaseModel ):
135- tables : Dict [str , TableDefinition ]
136- output_destination : Optional [ Union [UCSchemaTarget , FilePathTarget ]] = None # there is a abstraction, may be we can use that? talk to Greg
137- generator_options : Optional [ Dict [ str , Any ]] = {}
138- intended_for_databricks : Optional [ bool ] = None # May be infered.
91+ tables : dict [str , TableDefinition ]
92+ output_destination : Union [UCSchemaTarget , FilePathTarget ] | None = None # there is a abstraction, may be we can use that? talk to Greg
93+ generator_options : dict [ str , Any ] | None = {}
94+ intended_for_databricks : bool | None = None # May be infered.
13995
14096 def _check_circular_dependencies (
14197 self ,
14298 table_name : str ,
143- columns : List [ColumnDefinition ]
144- ) -> List [str ]:
99+ columns : list [ColumnDefinition ]
100+ ) -> list [str ]:
145101 """
146102 Check for circular dependencies in baseColumn references.
147103 Returns a list of error messages if circular dependencies are found.
@@ -284,17 +240,15 @@ def validate(self, strict: bool = True) -> ValidationResult:
284240 "random" , "randomSeed" , "randomSeedMethod" , "verbose" ,
285241 "debug" , "seedColumnName"
286242 ]
287- for key in self .generator_options . keys () :
243+ for key in self .generator_options :
288244 if key not in known_options :
289245 result .add_warning (
290246 f"Unknown generator option: '{ key } '. "
291247 "This may be ignored during generation."
292248 )
293249
294250 # Now that all validations are complete, decide whether to raise
295- if strict and (result .errors or result .warnings ):
296- raise ValueError (str (result ))
297- elif not strict and result .errors :
251+ if (strict and (result .errors or result .warnings )) or (not strict and result .errors ):
298252 raise ValueError (str (result ))
299253
300254 return result
0 commit comments