Skip to content

Commit f1c95c7

Browse files
chore: Remove Pydantic V1 deprecation warnings (#15057)
Co-authored-by: Claude <noreply@anthropic.com>
1 parent d90fce5 commit f1c95c7

File tree

256 files changed

+1947
-1655
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

256 files changed

+1947
-1655
lines changed

metadata-ingestion/pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,5 +56,11 @@ max-complexity = 20
5656
[tool.ruff.lint.flake8-tidy-imports]
5757
ban-relative-imports = "all"
5858

59+
[tool.ruff.lint.flake8-tidy-imports.banned-api]
60+
# pytandic v2 deprecations
61+
"pydantic.validator" = { msg = "Use pydantic.field_validator instead of deprecated validator" }
62+
"pydantic.root_validator" = { msg = "Use pydantic.model_validator instead of deprecated root_validator" }
63+
5964
[tool.ruff.lint.per-file-ignores]
6065
"__init__.py" = ["F401"]
66+
"src/datahub/configuration/pydantic_migration_helpers.py" = ["TID251"] # Intentional V1 imports for backward compatibility

metadata-ingestion/src/datahub/api/entities/common/serialized_value.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def as_pydantic_object(
104104
assert self.schema_ref
105105
assert self.schema_ref == model_type.__name__
106106
object_dict = self.as_raw_json()
107-
return model_type.parse_obj(object_dict)
107+
return model_type.model_validate(object_dict)
108108

109109
@classmethod
110110
def from_resource_value(
@@ -131,7 +131,7 @@ def create(
131131
elif isinstance(object, BaseModel):
132132
return SerializedResourceValue(
133133
content_type=models.SerializedValueContentTypeClass.JSON,
134-
blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
134+
blob=json.dumps(object.model_dump(), sort_keys=True).encode("utf-8"),
135135
schema_type=models.SerializedValueSchemaTypeClass.JSON,
136136
schema_ref=object.__class__.__name__,
137137
)

metadata-ingestion/src/datahub/api/entities/corpgroup/corpgroup.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22

33
import logging
44
from dataclasses import dataclass
5-
from typing import Callable, Iterable, List, Optional, Union
5+
from typing import Any, Callable, Iterable, List, Optional, Union
66

7-
import pydantic
8-
from pydantic import BaseModel
7+
from pydantic import BaseModel, field_validator
98

109
import datahub.emitter.mce_builder as builder
1110
from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
@@ -70,9 +69,15 @@ class CorpGroup(BaseModel):
7069

7170
_rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
7271

73-
@pydantic.validator("owners", "members", each_item=True)
74-
def make_urn_if_needed(cls, v):
75-
if isinstance(v, str):
72+
@field_validator("owners", "members", mode="before")
73+
@classmethod
74+
def make_urn_if_needed(cls, v: Any) -> Any:
75+
if isinstance(v, list):
76+
return [
77+
builder.make_user_urn(item) if isinstance(item, str) else item
78+
for item in v
79+
]
80+
elif isinstance(v, str):
7681
return builder.make_user_urn(v)
7782
return v
7883

metadata-ingestion/src/datahub/api/entities/corpuser/corpuser.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass
44
from typing import Callable, Iterable, List, Optional
55

6-
import pydantic
6+
from pydantic import model_validator
77

88
import datahub.emitter.mce_builder as builder
99
from datahub.configuration.common import ConfigModel
@@ -65,16 +65,16 @@ class CorpUser(ConfigModel):
6565
picture_link: Optional[str] = None
6666
phone: Optional[str] = None
6767

68-
@pydantic.validator("full_name", always=True)
69-
def full_name_can_be_built_from_first_name_last_name(v, values):
70-
if not v:
71-
if "first_name" in values or "last_name" in values:
72-
first_name = values.get("first_name") or ""
73-
last_name = values.get("last_name") or ""
74-
full_name = f"{first_name} {last_name}" if last_name else first_name
75-
return full_name
76-
else:
77-
return v
68+
@model_validator(mode="after")
69+
def full_name_can_be_built_from_first_name_last_name(self) -> "CorpUser":
70+
if not self.full_name:
71+
if self.first_name or self.last_name:
72+
first_name = self.first_name or ""
73+
last_name = self.last_name or ""
74+
self.full_name = (
75+
f"{first_name} {last_name}" if last_name else first_name
76+
)
77+
return self
7878

7979
@property
8080
def urn(self):

metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pathlib import Path
55
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
66

7-
import pydantic
7+
from pydantic import field_validator, model_validator
88
from ruamel.yaml import YAML
99
from typing_extensions import assert_never
1010

@@ -71,7 +71,8 @@ class Ownership(ConfigModel):
7171
id: str
7272
type: str
7373

74-
@pydantic.validator("type")
74+
@field_validator("type", mode="after")
75+
@classmethod
7576
def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str:
7677
_, _ = builder.validate_ownership_type(v)
7778
return v
@@ -116,30 +117,49 @@ class DataProduct(ConfigModel):
116117
output_ports: Optional[List[str]] = None
117118
_original_yaml_dict: Optional[dict] = None
118119

119-
@pydantic.validator("assets", each_item=True)
120-
def assets_must_be_urns(cls, v: str) -> str:
121-
try:
122-
Urn.from_string(v)
123-
except Exception as e:
124-
raise ValueError(f"asset {v} is not an urn: {e}") from e
125-
126-
return v
127-
128-
@pydantic.validator("output_ports", each_item=True)
129-
def output_ports_must_be_urns(cls, v: str) -> str:
130-
try:
131-
Urn.create_from_string(v)
132-
except Exception as e:
133-
raise ValueError(f"Output port {v} is not an urn: {e}") from e
120+
@field_validator("assets", mode="before")
121+
@classmethod
122+
def assets_must_be_urns(cls, v: Any) -> Any:
123+
if isinstance(v, list):
124+
for item in v:
125+
try:
126+
Urn.from_string(item)
127+
except Exception as e:
128+
raise ValueError(f"asset {item} is not an urn: {e}") from e
129+
return v
130+
else:
131+
try:
132+
Urn.from_string(v)
133+
except Exception as e:
134+
raise ValueError(f"asset {v} is not an urn: {e}") from e
135+
return v
134136

137+
@field_validator("output_ports", mode="before")
138+
@classmethod
139+
def output_ports_must_be_urns(cls, v: Any) -> Any:
140+
if v is not None:
141+
if isinstance(v, list):
142+
for item in v:
143+
try:
144+
Urn.create_from_string(item)
145+
except Exception as e:
146+
raise ValueError(
147+
f"Output port {item} is not an urn: {e}"
148+
) from e
149+
else:
150+
try:
151+
Urn.create_from_string(v)
152+
except Exception as e:
153+
raise ValueError(f"Output port {v} is not an urn: {e}") from e
135154
return v
136155

137-
@pydantic.validator("output_ports", each_item=True)
138-
def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
139-
assets = values.get("assets", [])
140-
if v not in assets:
141-
raise ValueError(f"Output port {v} is not in asset list")
142-
return v
156+
@model_validator(mode="after")
157+
def output_ports_must_be_from_asset_list(self) -> "DataProduct":
158+
if self.output_ports and self.assets:
159+
for port in self.output_ports:
160+
if port not in self.assets:
161+
raise ValueError(f"Output port {port} is not in asset list")
162+
return self
143163

144164
@property
145165
def urn(self) -> str:
@@ -454,7 +474,7 @@ def _patch_ownership(
454474
patches_add.append(new_owner)
455475
else:
456476
patches_add.append(
457-
Ownership(id=new_owner, type=new_owner_type).dict()
477+
Ownership(id=new_owner, type=new_owner_type).model_dump()
458478
)
459479

460480
mutation_needed = bool(patches_replace or patches_drop or patches_add)
@@ -485,8 +505,8 @@ def patch_yaml(
485505
raise Exception("Original Data Product was not loaded from yaml")
486506

487507
orig_dictionary = original_dataproduct._original_yaml_dict
488-
original_dataproduct_dict = original_dataproduct.dict()
489-
this_dataproduct_dict = self.dict()
508+
original_dataproduct_dict = original_dataproduct.model_dump()
509+
this_dataproduct_dict = self.model_dump()
490510
for simple_field in ["display_name", "description", "external_url"]:
491511
if original_dataproduct_dict.get(simple_field) != this_dataproduct_dict.get(
492512
simple_field
@@ -566,7 +586,7 @@ def to_yaml(
566586
yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip)
567587
yaml.indent(mapping=2, sequence=4, offset=2)
568588
yaml.default_flow_style = False
569-
yaml.dump(self.dict(), fp)
589+
yaml.dump(self.model_dump(), fp)
570590

571591
@staticmethod
572592
def get_patch_builder(

metadata-ingestion/src/datahub/api/entities/dataset/dataset.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import time
44
from pathlib import Path
55
from typing import (
6+
Any,
67
Dict,
78
Iterable,
89
List,
@@ -19,8 +20,9 @@
1920
BaseModel,
2021
Field,
2122
StrictStr,
22-
root_validator,
23-
validator,
23+
ValidationInfo,
24+
field_validator,
25+
model_validator,
2426
)
2527
from ruamel.yaml import YAML
2628
from typing_extensions import TypeAlias
@@ -213,14 +215,15 @@ def from_schema_field(
213215
),
214216
)
215217

216-
@validator("urn", pre=True, always=True)
217-
def either_id_or_urn_must_be_filled_out(cls, v, values):
218-
if not v and not values.get("id"):
218+
@model_validator(mode="after")
219+
def either_id_or_urn_must_be_filled_out(self) -> "SchemaFieldSpecification":
220+
if not self.urn and not self.id:
219221
raise ValueError("Either id or urn must be present")
220-
return v
222+
return self
221223

222-
@root_validator(pre=True)
223-
def sync_doc_into_description(cls, values: Dict) -> Dict:
224+
@model_validator(mode="before")
225+
@classmethod
226+
def sync_doc_into_description(cls, values: Any) -> Any:
224227
"""Synchronize doc into description field if doc is provided."""
225228
description = values.get("description")
226229
doc = values.pop("doc", None)
@@ -348,8 +351,9 @@ class SchemaSpecification(BaseModel):
348351
fields: Optional[List[SchemaFieldSpecification]] = None
349352
raw_schema: Optional[str] = None
350353

351-
@validator("file")
352-
def file_must_be_avsc(cls, v):
354+
@field_validator("file", mode="after")
355+
@classmethod
356+
def file_must_be_avsc(cls, v: Optional[str]) -> Optional[str]:
353357
if v and not v.endswith(".avsc"):
354358
raise ValueError("file must be a .avsc file")
355359
return v
@@ -359,7 +363,8 @@ class Ownership(ConfigModel):
359363
id: str
360364
type: str
361365

362-
@validator("type")
366+
@field_validator("type", mode="after")
367+
@classmethod
363368
def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str:
364369
_, _ = validate_ownership_type(v)
365370
return v
@@ -397,30 +402,36 @@ def platform_urn(self) -> str:
397402
dataset_urn = DatasetUrn.from_string(self.urn)
398403
return str(dataset_urn.get_data_platform_urn())
399404

400-
@validator("urn", pre=True, always=True)
401-
def urn_must_be_present(cls, v, values):
405+
@field_validator("urn", mode="before")
406+
@classmethod
407+
def urn_must_be_present(cls, v: Any, info: ValidationInfo) -> Any:
402408
if not v:
409+
values = info.data
403410
assert "id" in values, "id must be present if urn is not"
404411
assert "platform" in values, "platform must be present if urn is not"
405412
assert "env" in values, "env must be present if urn is not"
406413
return make_dataset_urn(values["platform"], values["id"], values["env"])
407414
return v
408415

409-
@validator("name", pre=True, always=True)
410-
def name_filled_with_id_if_not_present(cls, v, values):
416+
@field_validator("name", mode="before")
417+
@classmethod
418+
def name_filled_with_id_if_not_present(cls, v: Any, info: ValidationInfo) -> Any:
411419
if not v:
420+
values = info.data
412421
assert "id" in values, "id must be present if name is not"
413422
return values["id"]
414423
return v
415424

416-
@validator("platform")
417-
def platform_must_not_be_urn(cls, v):
418-
if v.startswith("urn:li:dataPlatform:"):
425+
@field_validator("platform", mode="after")
426+
@classmethod
427+
def platform_must_not_be_urn(cls, v: Optional[str]) -> Optional[str]:
428+
if v and v.startswith("urn:li:dataPlatform:"):
419429
return v[len("urn:li:dataPlatform:") :]
420430
return v
421431

422-
@validator("structured_properties")
423-
def simplify_structured_properties(cls, v):
432+
@field_validator("structured_properties", mode="after")
433+
@classmethod
434+
def simplify_structured_properties(cls, v: Any) -> Any:
424435
return StructuredPropertiesHelper.simplify_structured_properties_list(v)
425436

426437
def _mint_auditstamp(self, message: str) -> AuditStampClass:
@@ -461,7 +472,7 @@ def from_yaml(cls, file: str) -> Iterable["Dataset"]:
461472
if isinstance(datasets, dict):
462473
datasets = [datasets]
463474
for dataset_raw in datasets:
464-
dataset = Dataset.parse_obj(dataset_raw)
475+
dataset = Dataset.model_validate(dataset_raw)
465476
# dataset = Dataset.model_validate(dataset_raw, strict=True)
466477
yield dataset
467478

metadata-ingestion/src/datahub/api/entities/external/lake_formation_external_entites.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
1313
from typing import Any, Dict, Optional
1414

15-
from pydantic import validator
15+
from pydantic import field_validator
1616
from typing_extensions import ClassVar
1717

1818
from datahub.api.entities.external.external_tag import ExternalTag
@@ -50,11 +50,10 @@ class LakeFormationTag(ExternalTag):
5050
value: Optional[LakeFormationTagValueText] = None
5151
catalog: Optional[str] = None
5252

53-
# Pydantic v1 validators
54-
@validator("key", pre=True)
53+
@field_validator("key", mode="before")
5554
@classmethod
5655
def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
57-
"""Validate and convert key field for Pydantic v1."""
56+
"""Validate and convert key field."""
5857
if isinstance(v, LakeFormationTagKeyText):
5958
return v
6059

@@ -64,10 +63,10 @@ def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
6463

6564
return LakeFormationTagKeyText(raw_text=v)
6665

67-
@validator("value", pre=True)
66+
@field_validator("value", mode="before")
6867
@classmethod
6968
def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
70-
"""Validate and convert value field for Pydantic v1."""
69+
"""Validate and convert value field."""
7170
if v is None:
7271
return None
7372

0 commit comments

Comments
 (0)