Skip to content

Commit b81b1b4

Browse files
committed
✨ add data schema override to v2
1 parent 212a490 commit b81b1b4

File tree

4 files changed

+80
-41
lines changed

4 files changed

+80
-41
lines changed

mindee/input/inference_parameters.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,36 @@
1+
import json
12
from dataclasses import dataclass
2-
from typing import List, Optional
3+
from typing import List, Optional, Union
34

45
from mindee.input.polling_options import PollingOptions
56

67

8+
class DataSchema:
9+
_override: Optional[dict] = None
10+
11+
def __init__(self, override: Optional[dict] = None):
12+
self._override = override
13+
14+
@property
15+
def override(self):
16+
return self._override
17+
18+
@override.setter
19+
def override(self, value: Union[dict, str]) -> None:
20+
if value is None:
21+
_override = None
22+
elif isinstance(value, str):
23+
_override = json.loads(value)
24+
else:
25+
_override = value
26+
if _override is not None and _override == {}:
27+
raise ValueError("Empty override provided")
28+
self._override = _override
29+
30+
def __str__(self) -> str:
31+
return json.dumps({"override": self.override})
32+
33+
734
@dataclass
835
class InferenceParameters:
936
"""Inference parameters to set when sending a file."""
@@ -31,3 +58,4 @@ class InferenceParameters:
3158
"""Whether to close the file after parsing."""
3259
text_context: Optional[str] = None
3360
"""Additional text context used by the model during inference. Not recommended, for specific use only."""
61+
data_schema: Optional[DataSchema] = None

mindee/mindee_http/mindee_api_v2.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,10 @@ def req_post_inference_enqueue(
9494
data["webhook_ids"] = params.webhook_ids
9595
if params.alias and len(params.alias):
9696
data["alias"] = params.alias
97-
if params.text_context and (params.text_context):
97+
if params.text_context and len(params.text_context):
9898
data["text_context"] = params.text_context
99+
if params.data_schema is not None:
100+
data["data_schema"] = str(params.data_schema)
99101

100102
if isinstance(input_source, LocalInputSource):
101103
files = {"file": input_source.read_contents(params.close_file)}

tests/v2/test_client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from mindee.error.mindee_error import MindeeApiV2Error, MindeeError
77
from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2
88
from mindee.input import LocalInputSource, PathInput
9+
from mindee.input.inference_parameters import DataSchema
910
from mindee.mindee_http.base_settings import USER_AGENT
1011
from mindee.parsing.v2.inference import Inference
1112
from mindee.parsing.v2.job import Job
@@ -130,7 +131,10 @@ def test_enqueue_and_parse_path_with_env_token(custom_base_url_client):
130131
with pytest.raises(MindeeHTTPErrorV2):
131132
custom_base_url_client.enqueue_and_get_inference(
132133
input_doc,
133-
InferenceParameters("dummy-model", text_context="ignore this message"),
134+
InferenceParameters(
135+
"dummy-model",
136+
text_context="ignore this message",
137+
data_schema=DataSchema(override={"test_field": {}})),
134138
)
135139

136140

tests/v2/test_client_integration.py

Lines changed: 43 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from mindee import ClientV2, InferenceParameters, PathInput, UrlInputSource
77
from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2
8+
from mindee.input.inference_parameters import DataSchema
89
from mindee.parsing.v2.inference_response import InferenceResponse
910
from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR
1011

@@ -24,6 +25,19 @@ def v2_client() -> ClientV2:
2425
api_key = os.getenv("MINDEE_V2_API_KEY")
2526
return ClientV2(api_key)
2627

28+
def _basic_assert_success(response: InferenceResponse, page_count: int) -> None:
29+
assert response is not None
30+
assert response.inference is not None
31+
32+
assert response.inference.file is not None
33+
assert response.inference.file.page_count == page_count
34+
35+
assert response.inference.model is not None
36+
assert response.inference.model.id == findoc_model_id
37+
38+
assert response.inference.result is not None
39+
assert response.inference.active_options is not None
40+
2741

2842
@pytest.mark.integration
2943
@pytest.mark.v2
@@ -49,24 +63,15 @@ def test_parse_file_empty_multiple_pages_must_succeed(
4963
response: InferenceResponse = v2_client.enqueue_and_get_inference(
5064
input_source, params
5165
)
52-
assert response is not None
53-
assert response.inference is not None
66+
_basic_assert_success(response=response, page_count=2)
5467

55-
assert response.inference.file is not None
5668
assert response.inference.file.name == "multipage_cut-2.pdf"
57-
assert response.inference.file.page_count == 2
58-
59-
assert response.inference.model is not None
60-
assert response.inference.model.id == findoc_model_id
6169

62-
assert response.inference.active_options is not None
6370
assert response.inference.active_options.rag is False
6471
assert response.inference.active_options.raw_text is True
6572
assert response.inference.active_options.polygon is False
6673
assert response.inference.active_options.confidence is False
6774

68-
assert response.inference.result is not None
69-
7075
assert response.inference.result.raw_text is not None
7176
assert len(response.inference.result.raw_text.pages) == 2
7277

@@ -93,24 +98,15 @@ def test_parse_file_empty_single_page_options_must_succeed(
9398
response: InferenceResponse = v2_client.enqueue_and_get_inference(
9499
input_source, params
95100
)
96-
assert response is not None
97-
assert response.inference is not None
98-
99-
assert response.inference.model is not None
100-
assert response.inference.model.id == findoc_model_id
101+
_basic_assert_success(response=response, page_count=1)
101102

102-
assert response.inference.file is not None
103103
assert response.inference.file.name == "blank_1.pdf"
104-
assert response.inference.file.page_count == 1
105104

106-
assert response.inference.active_options is not None
107105
assert response.inference.active_options.rag is True
108106
assert response.inference.active_options.raw_text is True
109107
assert response.inference.active_options.polygon is True
110108
assert response.inference.active_options.confidence is True
111109

112-
assert response.inference.result is not None
113-
114110

115111
@pytest.mark.integration
116112
@pytest.mark.v2
@@ -137,26 +133,17 @@ def test_parse_file_filled_single_page_must_succeed(
137133
response: InferenceResponse = v2_client.enqueue_and_get_inference(
138134
input_source, params
139135
)
136+
_basic_assert_success(response=response, page_count=1)
140137

141-
assert response is not None
142-
assert response.inference is not None
143-
144-
assert response.inference.file is not None
145138
assert response.inference.file.name == "default_sample.jpg"
146-
assert response.inference.file.page_count == 1
147-
148-
assert response.inference.model is not None
149-
assert response.inference.model.id == findoc_model_id
150139

151-
assert response.inference.active_options is not None
152140
assert response.inference.active_options.rag is False
153141
assert response.inference.active_options.raw_text is False
154142
assert response.inference.active_options.polygon is False
155143
assert response.inference.active_options.confidence is False
156144

157145
assert response.inference.result.raw_text is None
158146

159-
assert response.inference.result is not None
160147
supplier_name = response.inference.result.fields["supplier_name"]
161148
assert supplier_name is not None
162149
assert supplier_name.value == "John Smith"
@@ -266,15 +253,33 @@ def test_blank_url_input_source_must_succeed(
266253
response: InferenceResponse = v2_client.enqueue_and_get_inference(
267254
input_source, params
268255
)
269-
assert response is not None
270-
assert response.inference is not None
256+
_basic_assert_success(response=response, page_count=1)
271257

272-
assert response.inference.file is not None
273-
assert response.inference.file.page_count == 1
274258

275-
assert response.inference.model is not None
276-
assert response.inference.model.id == findoc_model_id
259+
@pytest.mark.integration
260+
@pytest.mark.v2
261+
def test_data_schema_must_succeed(
262+
v2_client: ClientV2,
263+
findoc_model_id: str,
264+
) -> None:
265+
"""
266+
Load a blank PDF from an HTTPS URL and make sure the inference call completes without raising any errors.
267+
"""
268+
input_path: Path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf"
277269

278-
assert response.inference.result is not None
270+
input_source = PathInput(input_path)
271+
params = InferenceParameters(
272+
model_id=findoc_model_id,
273+
rag=False,
274+
raw_text=False,
275+
polygon=False,
276+
confidence=False,
277+
webhook_ids=[],
278+
data_schema=DataSchema(override={"test_field": {}}),
279+
alias="py_integration_data_schema_override",
280+
)
281+
response: InferenceResponse = v2_client.enqueue_and_get_inference(
282+
input_source, params
283+
)
284+
_basic_assert_success(response=response, page_count=1)
279285

280-
assert response.inference.active_options is not None

0 commit comments

Comments
 (0)