From 0e4c366199e484780ec3706191d9def17f5f28f2 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 15:35:38 +0200 Subject: [PATCH 1/8] :sparkles: add support for custom httpx client for V1 and V2 --- mindee/input/url_input_source.py | 17 +++++++-- mindee/v1/client.py | 22 +++++++++--- mindee/v1/mindee_http/base_endpoint.py | 11 +++++- mindee/v1/mindee_http/endpoint.py | 41 +++++++++++++--------- mindee/v1/mindee_http/workflow_endpoint.py | 11 +++--- mindee/v2/client.py | 8 +++-- mindee/v2/mindee_http/mindee_api_v2.py | 22 +++++++----- tests/v2/test_client_integration.py | 33 +++++++++++++++++ 8 files changed, 126 insertions(+), 39 deletions(-) diff --git a/mindee/input/url_input_source.py b/mindee/input/url_input_source.py index 1f32baea..b3e33d14 100644 --- a/mindee/input/url_input_source.py +++ b/mindee/input/url_input_source.py @@ -173,7 +173,14 @@ def __fill_filename(self, filename=None) -> str: return filename @staticmethod - def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: + def __make_request( + url, + auth, + headers, + redirects, + max_redirects, + http_client: httpx.Client | None = None, + ) -> bytes: """ Makes an HTTP request to the given URL, while following redirections. @@ -185,11 +192,15 @@ def __make_request(url, auth, headers, redirects, max_redirects) -> bytes: :return: The content of the response. :raises MindeeSourceError: If max redirects are exceeded or the request fails. """ - result = httpx.get(url, headers=headers, timeout=120, auth=auth) + http_client = http_client or httpx.Client() + result = http_client.get( + url, headers=headers, timeout=120, auth=auth, follow_redirects=True + ) if 299 < result.status_code < 400: if redirects == max_redirects: raise MindeeSourceError( - f"Can't reach URL after {redirects} out of {max_redirects} redirects, " + f"Can't reach URL after {redirects} out of {max_redirects} " + f"redirects, " f"aborting operation." ) return URLInputSource.__make_request( diff --git a/mindee/v1/client.py b/mindee/v1/client.py index cc500a85..d0cf3128 100644 --- a/mindee/v1/client.py +++ b/mindee/v1/client.py @@ -1,5 +1,7 @@ from time import sleep +import httpx + from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error @@ -59,14 +61,21 @@ class Client(ClientMixin): """ api_key: str + """API key for all endpoints.""" + http_client: httpx.Client + """HTTP client for making requests.""" - def __init__(self, api_key: str = "") -> None: + def __init__( + self, api_key: str = "", http_client: httpx.Client | None = None + ) -> None: """ Mindee API Client. :param api_key: Your API key for all endpoints + :param http_client: HTTP client for making requests. """ self.api_key = api_key + self.http_client = http_client or httpx.Client() def parse( self, @@ -522,7 +531,8 @@ def _send_to_workflow( raise MindeeClientError("No input document provided") workflow_endpoint = WorkflowEndpoint( - WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id) + WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id), + self.http_client, ) response = workflow_endpoint.workflow_execution_post(input_source, options) @@ -555,8 +565,12 @@ def _build_endpoint( version=version, ) if account_name and len(account_name) > 0 and account_name != "mindee": - return CustomEndpoint(endpoint_name, account_name, version, api_settings) - return Endpoint(endpoint_name, account_name, version, api_settings) + return CustomEndpoint( + endpoint_name, account_name, version, api_settings, self.http_client + ) + return Endpoint( + endpoint_name, account_name, version, api_settings, self.http_client + ) def create_endpoint( self, diff --git a/mindee/v1/mindee_http/base_endpoint.py b/mindee/v1/mindee_http/base_endpoint.py index 6b252d8a..0f6b4418 100644 --- a/mindee/v1/mindee_http/base_endpoint.py +++ b/mindee/v1/mindee_http/base_endpoint.py @@ -1,13 +1,22 @@ +import httpx + from mindee.v1.mindee_http.base_settings import BaseSettings class BaseEndpoint: """Base endpoint class for the Mindee API.""" - def __init__(self, settings: BaseSettings) -> None: + settings: BaseSettings + http_client: httpx.Client + + def __init__( + self, settings: BaseSettings, http_client: httpx.Client | None = None + ) -> None: """ Base API endpoint class for all endpoints. :param settings: Settings relating to all endpoints. + :param http_client: HTTP client for making requests. """ self.settings = settings + self.http_client = http_client or httpx.Client() diff --git a/mindee/v1/mindee_http/endpoint.py b/mindee/v1/mindee_http/endpoint.py index 4d6e0a5d..f6dbd379 100644 --- a/mindee/v1/mindee_http/endpoint.py +++ b/mindee/v1/mindee_http/endpoint.py @@ -13,7 +13,12 @@ class Endpoint(BaseEndpoint): settings: MindeeAPI def __init__( - self, url_name: str, owner: str, version: str, settings: MindeeAPI + self, + url_name: str, + owner: str, + version: str, + settings: MindeeAPI, + http_client: httpx.Client | None = None, ) -> None: """ Generic API endpoint for a product. @@ -21,8 +26,10 @@ def __init__( :param owner: owner of the product :param url_name: name of the product as it appears in the URL :param version: interface version + :param settings: settings for the API + :param http_client: HTTP client for making requests. """ - super().__init__(settings) + super().__init__(settings, http_client) self.owner = owner self.url_name = url_name self.version = version @@ -42,7 +49,8 @@ def predict_req_post( :param include_words: Include raw OCR words in the response :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param full_text: Whether to include the full OCR text response in compatible + APIs. :return: httpx response """ return self._custom_request( @@ -66,7 +74,8 @@ def predict_async_req_post( :param include_words: Include raw OCR words in the response :param close_file: Whether to `close()` the file after parsing it. :param cropper: Including Mindee cropping results. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param full_text: Whether to include the full OCR text response in compatible + APIs. :param workflow_id: Workflow ID. :param rag: If set, will enable Retrieval-Augmented Generation. :return: httpx response @@ -112,7 +121,7 @@ def _custom_request( if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = httpx.post( + response = self.http_client.post( url=url, headers=self.settings.base_headers, data=data, @@ -121,7 +130,7 @@ def _custom_request( ) else: files = {"document": input_source.read_contents(close_file)} - response = httpx.post( + response = self.http_client.post( url=url, files=files, headers=self.settings.base_headers, @@ -138,7 +147,7 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: :param queue_id: queue_id received from the API """ - return httpx.get( + return self.http_client.get( f"{self.settings.url_root}/documents/queue/{queue_id}", headers=self.settings.base_headers, timeout=self.settings.request_timeout, @@ -147,7 +156,7 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: def openapi_get_req(self) -> httpx.Response: """Get the OpenAPI specification of the product.""" - return httpx.get( + return self.http_client.get( f"{self.settings.url_root}/openapi.json", headers=self.settings.base_headers, timeout=self.settings.request_timeout, @@ -163,7 +172,7 @@ def document_feedback_req_put( :param document_id: ID of the document to send feedback to. :param feedback: Feedback object to send. """ - return httpx.put( + return self.http_client.put( f"{self.settings.base_url}/v1/documents/{document_id}/feedback", headers=self.settings.base_headers, data=feedback, @@ -187,7 +196,7 @@ def training_req_post( files = {"document": input_source.read_contents(close_file)} params = {"training": True, "with_candidates": True} - response = httpx.post( + response = self.http_client.post( f"{self.settings.url_root}/predict", files=files, headers=self.settings.base_headers, @@ -209,7 +218,7 @@ def training_async_req_post( files = {"document": input_source.read_contents(close_file)} params = {"training": True, "async": True} - response = httpx.post( + response = self.http_client.post( f"{self.settings.url_root}/predict", files=files, headers=self.settings.base_headers, @@ -240,7 +249,7 @@ def documents_req_get(self, page_id: int = 1) -> httpx.Response: params = { "page": page_id, } - response = httpx.get( + response = self.http_client.get( f"{self.settings.url_root}/documents", headers=self.settings.base_headers, params=params, @@ -260,7 +269,7 @@ def document_req_get(self, document_id: str) -> httpx.Response: "include_candidates": True, "global_orientation": True, } - response = httpx.get( + response = self.http_client.get( f"{self.settings.url_root}/documents/{document_id}", headers=self.settings.base_headers, params=params, @@ -279,7 +288,7 @@ def annotations_req_post( :param annotations: Annotations object :return: httpx response """ - response = httpx.post( + response = self.http_client.post( f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, json=annotations, @@ -297,7 +306,7 @@ def annotations_req_put( :param annotations: Annotations object :return: httpx response """ - response = httpx.put( + response = self.http_client.put( f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, json=annotations, @@ -312,7 +321,7 @@ def annotations_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document to annotate :return: httpx response """ - response = httpx.delete( + response = self.http_client.delete( f"{self.settings.url_root}/documents/{document_id}/annotations", headers=self.settings.base_headers, timeout=self.settings.request_timeout, diff --git a/mindee/v1/mindee_http/workflow_endpoint.py b/mindee/v1/mindee_http/workflow_endpoint.py index 7217013c..da774e18 100644 --- a/mindee/v1/mindee_http/workflow_endpoint.py +++ b/mindee/v1/mindee_http/workflow_endpoint.py @@ -11,14 +11,17 @@ class WorkflowEndpoint(BaseEndpoint): """Workflow endpoint.""" settings: WorkflowSettings + """Settings object.""" - def __init__(self, settings: WorkflowSettings) -> None: + def __init__( + self, settings: WorkflowSettings, http_client: httpx.Client | None = None + ) -> None: """ Workflow Endpoint. :param settings: Settings object. """ - super().__init__(settings) + super().__init__(settings, http_client) def workflow_execution_post( self, @@ -50,7 +53,7 @@ def workflow_execution_post( if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = httpx.post( + response = self.http_client.post( self.settings.url_root, headers=self.settings.base_headers, data=data, @@ -59,7 +62,7 @@ def workflow_execution_post( ) else: files = {"document": input_source.read_contents(True)} - response = httpx.post( + response = self.http_client.post( self.settings.url_root, files=files, headers=self.settings.base_headers, diff --git a/mindee/v2/client.py b/mindee/v2/client.py index 22d9c5ab..183e76c8 100644 --- a/mindee/v2/client.py +++ b/mindee/v2/client.py @@ -1,6 +1,8 @@ from time import sleep from typing import TypeVar +import httpx + from mindee.client_mixin import ClientMixin from mindee.client_options.polling_options import PollingOptions from mindee.error.mindee_error import MindeeError @@ -27,14 +29,16 @@ class Client(ClientMixin): api_key: str | None mindee_api: MindeeAPIV2 - def __init__(self, api_key: str | None = None) -> None: + def __init__( + self, api_key: str | None = None, http_client: httpx.Client | None = None + ) -> None: """ Mindee API Client. :param api_key: Your API key for all endpoints """ self.api_key = api_key - self.mindee_api = MindeeAPIV2(api_key) + self.mindee_api = MindeeAPIV2(api_key, http_client) def enqueue( self, diff --git a/mindee/v2/mindee_http/mindee_api_v2.py b/mindee/v2/mindee_http/mindee_api_v2.py index d2e384b0..050b4013 100644 --- a/mindee/v2/mindee_http/mindee_api_v2.py +++ b/mindee/v2/mindee_http/mindee_api_v2.py @@ -38,8 +38,9 @@ class MindeeAPIV2(SettingsMixin): """Root of the URL to use for polling.""" api_key: str | None """API Key for the client.""" + http_client: httpx.Client - def __init__(self, api_key: str | None): + def __init__(self, api_key: str | None, http_client: httpx.Client | None = None): self.api_key = ( api_key if api_key @@ -56,6 +57,7 @@ def __init__(self, api_key: str | None): f"'{API_KEY_V2_ENV_NAME}' environment variable." ) self.url_root = f"{self.base_url.rstrip('/')}" + self.http_client = http_client or httpx.Client() @property def base_headers(self) -> dict[str, str]: @@ -96,7 +98,7 @@ def req_post_inference_enqueue( if isinstance(input_source, LocalInputSource): files = {"file": input_source.read_contents(params.close_file)} - response = httpx.post( + response = self.http_client.post( url=url, files=files, headers=self.base_headers, @@ -105,7 +107,7 @@ def req_post_inference_enqueue( ) elif isinstance(input_source, URLInputSource): data["url"] = input_source.url - response = httpx.post( + response = self.http_client.post( url=url, headers=self.base_headers, data=data, @@ -121,7 +123,7 @@ def req_get_job(self, job_id: str) -> httpx.Response: :param job_id: Job ID, returned by the enqueue request. """ - return httpx.get( + return self.http_client.get( f"{self.url_root}/v2/jobs/{job_id}", headers=self.base_headers, timeout=self.request_timeout, @@ -130,12 +132,13 @@ def req_get_job(self, job_id: str) -> httpx.Response: def req_get_inference_by_url(self, url) -> httpx.Response: """ - Sends a request matching a given inference_id. Returns either a Job or a Document. + Sends a request matching a given inference_id. Returns either a Job or a + Document. :param url: URL to use for the request. :return: Response object from the request. """ - return httpx.get( + return self.http_client.get( url, headers=self.base_headers, timeout=self.request_timeout, @@ -151,7 +154,7 @@ def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: """ url = f"{self.url_root}/v2/{slug}/{inference_id}" - return httpx.get( + return self.http_client.get( url, headers=self.base_headers, timeout=self.request_timeout, @@ -168,7 +171,7 @@ def req_get_search_models( :return: Response object containing search results. """ url = f"{self.url_root}/v2/search/models" - return httpx.get( + return self.http_client.get( url, headers=self.base_headers, params={"name": model_name, "model_type": model_type}, @@ -258,5 +261,6 @@ def _response_json(response: httpx.Response) -> StringDict: return response.json() except httpx.DecodingError as e: raise MindeeHTTPUnknownErrorV2( - f"HTTP {response.status_code} response is not valid JSON: {response.text}" + f"HTTP {response.status_code} response is not valid JSON: " + f"{response.text}" ) from e diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index ebf88ff0..20df7d2b 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,6 +1,7 @@ import os from pathlib import Path +import httpx import pytest from mindee import ExtractionParameters @@ -306,3 +307,35 @@ def test_data_schema_must_succeed( assert response.inference.active_options.data_schema.replace is True assert response.inference.result.fields["test_replace"] is not None assert response.inference.result.fields["test_replace"].value == "a test value" + + +@pytest.mark.integration +@pytest.mark.v2 +def test_custom_httpx_client_event_hook( + findoc_model_id: str, +) -> None: + request_urls = [] + + def log_request(request: httpx.Request): + request_urls.append(str(request.url)) + + httpx_client = httpx.Client(event_hooks={"request": [log_request]}) + client = Client(http_client=httpx_client) + + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + input_source = PathInput(input_path) + + params = ExtractionParameters( + model_id=findoc_model_id, + rag=False, + raw_text=False, + polygon=False, + confidence=False, + webhook_ids=[], + alias="py_integration_custom_httpx_client", + ) + + client.enqueue(input_source, params) + + assert len(request_urls) > 0 + assert any("enqueue" in url for url in request_urls) From 393df368370b2e0f11d2b0d6ffa104d34d17b195 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 16:24:27 +0200 Subject: [PATCH 2/8] fix pagination metadata class name --- .../parsing/search/{pagination.py => paginationmetadata.py} | 2 +- mindee/v2/parsing/search/search_response.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename mindee/v2/parsing/search/{pagination.py => paginationmetadata.py} (97%) diff --git a/mindee/v2/parsing/search/pagination.py b/mindee/v2/parsing/search/paginationmetadata.py similarity index 97% rename from mindee/v2/parsing/search/pagination.py rename to mindee/v2/parsing/search/paginationmetadata.py index 34b63a3d..14e628e3 100644 --- a/mindee/v2/parsing/search/pagination.py +++ b/mindee/v2/parsing/search/paginationmetadata.py @@ -1,4 +1,4 @@ -class Pagination: +class PaginationMetadata: """Pagination metadata.""" per_page: int diff --git a/mindee/v2/parsing/search/search_response.py b/mindee/v2/parsing/search/search_response.py index 233be58d..5dea6b22 100644 --- a/mindee/v2/parsing/search/search_response.py +++ b/mindee/v2/parsing/search/search_response.py @@ -1,5 +1,5 @@ from mindee.parsing.common.string_dict import StringDict -from mindee.v2.parsing.search.pagination import Pagination +from mindee.v2.parsing.search.paginationmetadata import PaginationMetadata from mindee.v2.parsing.search.search_models import SearchModels @@ -8,12 +8,12 @@ class SearchResponse: models: SearchModels """Parsed search payload.""" - pagination: Pagination + pagination: PaginationMetadata """Pagination metadata for the search results.""" def __init__(self, raw_response: StringDict) -> None: self.models = SearchModels(raw_response["models"]) - self.pagination = Pagination(raw_response["pagination"]) + self.pagination = PaginationMetadata(raw_response["pagination"]) def __str__(self) -> str: """ From d788adbffdab2c4473fcd61e740b4d8301879ed7 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:22:08 +0200 Subject: [PATCH 3/8] add better tests --- mindee/input/url_input_source.py | 1 + mindee/v1/mindee_http/base_endpoint.py | 10 +++++++ mindee/v2/client.py | 10 +++++++ pyproject.toml | 1 + tests/fixtures.py | 14 +++++++++ tests/v2/test_client.py | 5 +--- tests/v2/test_client_integration.py | 40 ++++++++++++++++++++++---- 7 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 tests/fixtures.py diff --git a/mindee/input/url_input_source.py b/mindee/input/url_input_source.py index b3e33d14..69ccffb1 100644 --- a/mindee/input/url_input_source.py +++ b/mindee/input/url_input_source.py @@ -212,4 +212,5 @@ def __make_request( f"Couldn't retrieve file from server, error code {result.status_code}." ) + http_client.close() return result.content diff --git a/mindee/v1/mindee_http/base_endpoint.py b/mindee/v1/mindee_http/base_endpoint.py index 0f6b4418..d599c686 100644 --- a/mindee/v1/mindee_http/base_endpoint.py +++ b/mindee/v1/mindee_http/base_endpoint.py @@ -20,3 +20,13 @@ def __init__( """ self.settings = settings self.http_client = http_client or httpx.Client() + + def close(self) -> None: + """Closes the underlying HTTP client.""" + self.http_client.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() diff --git a/mindee/v2/client.py b/mindee/v2/client.py index 183e76c8..87efb314 100644 --- a/mindee/v2/client.py +++ b/mindee/v2/client.py @@ -170,3 +170,13 @@ def search_models( :return: A list of models matching the provided criteria. """ return self.mindee_api.get_models(name, model_type) + + def close(self) -> None: + """Closes the underlying HTTP client.""" + self.mindee_api.http_client.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() diff --git a/pyproject.toml b/pyproject.toml index 795a1390..db5b3da7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ test = [ "toml~=0.10.2", "pytest~=9.0.3", "pytest-cov~=7.1.0", + "respx~=0.23.1" ] docs = [ "sphinx~=9.1.0", diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 00000000..835cc64a --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,14 @@ +import os + +import pytest + + +@pytest.fixture(scope="session") +def findoc_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + findoc_model_id = os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") + if not findoc_model_id: + raise ValueError( + "MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID environment variable is not set" + ) + return findoc_model_id diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index dc4ef3c2..23cf2c1b 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -30,11 +30,10 @@ def env_client(monkeypatch) -> Client: @pytest.fixture def custom_base_url_client(monkeypatch) -> Client: class _FakePostRespError: - status_code = 400 # any non-2xx will do + status_code = 400 is_error = True def json(self): - # Shape must match what handle_error_v2 expects return { "status": 0, "code": "000-000", @@ -157,8 +156,6 @@ def test_enqueue_and_parse_path_with_env_token(custom_base_url_client): def _assert_findoc_inference(response: ExtractionResponse): - # There are already detailed tests of the inference object. - # Here we are just testing whether the client can load OK. assert isinstance(response, ExtractionResponse) assert isinstance(response.inference, ExtractionInference) assert response.inference.id diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index 20df7d2b..e8e8009b 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -3,23 +3,19 @@ import httpx import pytest +import respx from mindee import ExtractionParameters from mindee.input.path_input import PathInput from mindee.input.url_input_source import URLInputSource from mindee.v2.client import Client +from mindee.v2.error import MindeeAPIV2Error from mindee.v2.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.v2.parsing import InferenceActiveOptions from mindee.v2.product.extraction.extraction_response import ExtractionResponse from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR -@pytest.fixture(scope="session") -def findoc_model_id() -> str: - """Identifier of the Financial Document model, supplied through an env var.""" - return os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") - - @pytest.fixture(scope="session") def v2_client() -> Client: return Client() @@ -339,3 +335,35 @@ def log_request(request: httpx.Request): assert len(request_urls) > 0 assert any("enqueue" in url for url in request_urls) + + +@pytest.mark.v2 +@respx.mock +def test_explicit_timeout_failure(findoc_model_id) -> None: + respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( + side_effect=httpx.ReadTimeout("Simulated Read Timeout") + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + + with pytest.raises(httpx.ReadTimeout): + client.enqueue(input_source, params) + + +@pytest.mark.v2 +@respx.mock +def test_explicit_500_server_error(findoc_model_id) -> None: + respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( + return_value=httpx.Response(500, json={"message": "Internal Server Error"}) + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + + with pytest.raises(MindeeAPIV2Error) as exc_info: + client.enqueue(input_source, params) + + assert exc_info.value.status_code == 500 From 02c3d207893edefca68ecd294d0ba8221dd50d57 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:36:45 +0200 Subject: [PATCH 4/8] add tests --- tests/v2/test_client.py | 10 ++++++++++ tests/v2/test_client_integration.py | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 23cf2c1b..00a256c4 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -258,3 +258,13 @@ def test_queue_get(custom_base_url_client): assert not response.job.result_url assert len(response.job.webhooks) == 0 assert not response.job.error + + +@pytest.mark.v2 +def test_client_closes_httpx_connections() -> None: + client = Client(api_key="dummy_key") + client.close() + with pytest.raises( + RuntimeError, match=r"Cannot send a request, as the client has been closed\." + ): + client.mindee_api.http_client.get("https://google.com") diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index e8e8009b..cded0c78 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,3 +1,4 @@ +import concurrent.futures import os from pathlib import Path @@ -367,3 +368,28 @@ def test_explicit_500_server_error(findoc_model_id) -> None: client.enqueue(input_source, params) assert exc_info.value.status_code == 500 + + +@pytest.mark.integration +@pytest.mark.v2 +def test_httpx_multiple_calls_thread_safety(findoc_model_id) -> None: + client = Client() + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + + def make_request(): + input_source = PathInput(input_path) + params = ExtractionParameters(model_id=findoc_model_id) + return client.enqueue(input_source, params) + + thread_count = 20 + successful_responses = 0 + + with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: + futures = [executor.submit(make_request) for _ in range(thread_count)] + + for future in concurrent.futures.as_completed(futures): + response = future.result() + if response.job and response.job.id: + successful_responses += 1 + + assert successful_responses == thread_count From 6f8c8f0ebc6b83e311ab6c33e25e07f04555b457 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:57:14 +0200 Subject: [PATCH 5/8] fix tests --- mindee/v2/parsing/job/job.py | 2 +- tests/conftest.py | 7 ++++++ tests/fixtures.py | 14 ----------- tests/v2/test_client.py | 39 +++++++++++++++++++++++++++++ tests/v2/test_client_integration.py | 27 +------------------- 5 files changed, 48 insertions(+), 41 deletions(-) delete mode 100644 tests/fixtures.py diff --git a/mindee/v2/parsing/job/job.py b/mindee/v2/parsing/job/job.py index 9871b0b8..492c2fa5 100644 --- a/mindee/v2/parsing/job/job.py +++ b/mindee/v2/parsing/job/job.py @@ -35,7 +35,7 @@ def __init__(self, raw_response: StringDict) -> None: self.id = raw_response["id"] self.status = raw_response["status"] self.error = ( - ErrorResponse(raw_response["error"]) if raw_response["error"] else None + ErrorResponse(raw_response["error"]) if raw_response.get("error") else None ) self.created_at = datetime.fromisoformat( raw_response["created_at"].replace("Z", "+00:00") diff --git a/tests/conftest.py b/tests/conftest.py index c130f51b..b5cc6b4a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import gc +import os import pytest @@ -7,3 +8,9 @@ def force_gc(): yield gc.collect() + + +@pytest.fixture(scope="session") +def findoc_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + return os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID", "") diff --git a/tests/fixtures.py b/tests/fixtures.py deleted file mode 100644 index 835cc64a..00000000 --- a/tests/fixtures.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - -import pytest - - -@pytest.fixture(scope="session") -def findoc_model_id() -> str: - """Identifier of the Financial Document model, supplied through an env var.""" - findoc_model_id = os.getenv("MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID") - if not findoc_model_id: - raise ValueError( - "MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID environment variable is not set" - ) - return findoc_model_id diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 00a256c4..4dfc58ca 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -1,8 +1,12 @@ +import concurrent.futures import json import os +import re +import time import httpx import pytest +import respx from mindee import ExtractionParameters, ExtractionResponse, LocalResponse from mindee.error.mindee_error import MindeeError @@ -268,3 +272,38 @@ def test_client_closes_httpx_connections() -> None: RuntimeError, match=r"Cannot send a request, as the client has been closed\." ): client.mindee_api.http_client.get("https://google.com") + + +@pytest.mark.v2 +@respx.mock +def test_httpx_multiple_calls_thread_safety() -> None: + client = Client(api_key="dummy_key") + input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" + + def delayed_response(request: httpx.Request) -> httpx.Response: + job_json = json.loads((V2_DATA_DIR / "job" / "ok_processing.json").read_text()) + time.sleep(0.1) + return httpx.Response(201, json=job_json) + + url_pattern = re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue") + respx.post(url_pattern).mock(side_effect=delayed_response) + + def make_request(): + input_source = PathInput(input_path) + params = ExtractionParameters(model_id="dummy-model-id") + return client.enqueue(input_source, params) + + thread_count = 20 + successful_responses = 0 + with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: + futures = [executor.submit(make_request) for _ in range(thread_count)] + + for future in concurrent.futures.as_completed(futures): + response = future.result() + if ( + response.job + and response.job.id == "12345678-1234-1234-1234-123456789ABC" + ): + successful_responses += 1 + + assert successful_responses == thread_count diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index cded0c78..f0329bc1 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,4 +1,3 @@ -import concurrent.futures import os from pathlib import Path @@ -354,6 +353,7 @@ def test_explicit_timeout_failure(findoc_model_id) -> None: @pytest.mark.v2 +@pytest.mark.integration @respx.mock def test_explicit_500_server_error(findoc_model_id) -> None: respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( @@ -368,28 +368,3 @@ def test_explicit_500_server_error(findoc_model_id) -> None: client.enqueue(input_source, params) assert exc_info.value.status_code == 500 - - -@pytest.mark.integration -@pytest.mark.v2 -def test_httpx_multiple_calls_thread_safety(findoc_model_id) -> None: - client = Client() - input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" - - def make_request(): - input_source = PathInput(input_path) - params = ExtractionParameters(model_id=findoc_model_id) - return client.enqueue(input_source, params) - - thread_count = 20 - successful_responses = 0 - - with concurrent.futures.ThreadPoolExecutor(max_workers=thread_count) as executor: - futures = [executor.submit(make_request) for _ in range(thread_count)] - - for future in concurrent.futures.as_completed(futures): - response = future.result() - if response.job and response.job.id: - successful_responses += 1 - - assert successful_responses == thread_count From 3857258bed8d314500d145bae0251b16aeed175c Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:12:41 +0200 Subject: [PATCH 6/8] fix tests, again --- tests/v2/test_client_integration.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index f0329bc1..a82e7451 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,4 +1,5 @@ import os +import re from pathlib import Path import httpx @@ -9,8 +10,10 @@ from mindee.input.path_input import PathInput from mindee.input.url_input_source import URLInputSource from mindee.v2.client import Client -from mindee.v2.error import MindeeAPIV2Error -from mindee.v2.error.mindee_http_error_v2 import MindeeHTTPErrorV2 +from mindee.v2.error.mindee_http_error_v2 import ( + MindeeHTTPErrorV2, + MindeeHTTPUnknownErrorV2, +) from mindee.v2.parsing import InferenceActiveOptions from mindee.v2.product.extraction.extraction_response import ExtractionResponse from tests.utils import FILE_TYPES_DIR, V2_PRODUCT_DATA_DIR @@ -355,16 +358,15 @@ def test_explicit_timeout_failure(findoc_model_id) -> None: @pytest.mark.v2 @pytest.mark.integration @respx.mock -def test_explicit_500_server_error(findoc_model_id) -> None: - respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( +def test_explicit_500_server_error(findoc_model_id: str) -> None: + respx.post(re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue")).mock( return_value=httpx.Response(500, json={"message": "Internal Server Error"}) ) client = Client(api_key="dummy") input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") params = ExtractionParameters(model_id=findoc_model_id) - - with pytest.raises(MindeeAPIV2Error) as exc_info: + with pytest.raises(MindeeHTTPUnknownErrorV2) as exc_info: client.enqueue(input_source, params) - assert exc_info.value.status_code == 500 + assert "Couldn't deserialize server error" in str(exc_info.value) From 27bcb069abb887eda1eb958533bfaca47f500a1b Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 9 Jun 2026 11:38:52 +0200 Subject: [PATCH 7/8] fix memory safety issues, simplify all http calls, allow non-client creation --- mindee/error/mindee_http_error.py | 2 +- mindee/input/url_input_source.py | 26 +- mindee/mindee_http/response_validation.py | 2 +- mindee/v1/client.py | 21 +- mindee/v1/mindee_http/base_endpoint.py | 16 +- mindee/v1/mindee_http/endpoint.py | 224 +++++++++--------- mindee/v1/mindee_http/workflow_endpoint.py | 29 +-- mindee/v1/parsing/custom/classification.py | 2 +- mindee/v1/parsing/custom/list.py | 2 +- mindee/v1/parsing/generated/generated_list.py | 2 +- .../v1/parsing/generated/generated_object.py | 2 +- mindee/v1/parsing/standard/address.py | 2 +- mindee/v1/parsing/standard/amount.py | 2 +- mindee/v1/parsing/standard/base.py | 2 +- mindee/v1/parsing/standard/boolean.py | 2 +- mindee/v1/parsing/standard/classification.py | 2 +- .../parsing/standard/company_registration.py | 3 +- mindee/v1/parsing/standard/date.py | 2 +- mindee/v1/parsing/standard/locale.py | 2 +- mindee/v1/parsing/standard/payment_details.py | 2 +- mindee/v1/parsing/standard/position.py | 2 +- mindee/v1/parsing/standard/tax.py | 2 +- mindee/v1/parsing/standard/text.py | 2 +- mindee/v2/client.py | 10 +- mindee/v2/error/mindee_http_error_v2.py | 2 +- mindee/v2/mindee_http/mindee_api_v2.py | 113 +++++---- mindee/v2/parsing/error/error_item.py | 2 +- mindee/v2/parsing/error/error_response.py | 2 +- mindee/v2/parsing/inference/base_inference.py | 2 +- .../v2/parsing/inference/field/base_field.py | 2 +- mindee/v2/parsing/inference/field/factory.py | 2 +- .../parsing/inference/field/field_location.py | 2 +- .../inference/field/inference_fields.py | 2 +- .../v2/parsing/inference/field/list_field.py | 2 +- .../parsing/inference/field/simple_field.py | 2 +- .../inference/inference_active_options.py | 2 +- mindee/v2/parsing/inference/inference_file.py | 2 +- mindee/v2/parsing/inference/inference_job.py | 2 +- .../v2/parsing/inference/inference_model.py | 2 +- mindee/v2/parsing/inference/rag_metadata.py | 2 +- mindee/v2/parsing/inference/raw_text.py | 2 +- mindee/v2/parsing/inference/raw_text_page.py | 2 +- mindee/v2/parsing/job/job.py | 2 +- mindee/v2/parsing/job/job_response.py | 2 +- mindee/v2/parsing/job/job_webhook.py | 2 +- .../classification_classifier.py | 2 +- .../classification_inference.py | 2 +- .../classification/classification_response.py | 2 +- .../classification/classification_result.py | 2 +- mindee/v2/product/crop/crop_inference.py | 2 +- mindee/v2/product/crop/crop_item.py | 2 +- mindee/v2/product/crop/crop_response.py | 2 +- mindee/v2/product/crop/crop_result.py | 2 +- .../extraction/extraction_inference.py | 2 +- .../product/extraction/extraction_response.py | 2 +- .../product/extraction/extraction_result.py | 2 +- mindee/v2/product/ocr/ocr_inference.py | 2 +- mindee/v2/product/ocr/ocr_page.py | 2 +- mindee/v2/product/ocr/ocr_response.py | 2 +- mindee/v2/product/ocr/ocr_result.py | 2 +- mindee/v2/product/split/split_inference.py | 2 +- mindee/v2/product/split/split_range.py | 2 +- mindee/v2/product/split/split_response.py | 2 +- mindee/v2/product/split/split_result.py | 2 +- pyproject.toml | 2 +- .../test_url_input_source_integration.py | 2 +- tests/v2/test_client.py | 4 +- tests/v2/test_client_integration.py | 14 ++ 68 files changed, 326 insertions(+), 250 deletions(-) diff --git a/mindee/error/mindee_http_error.py b/mindee/error/mindee_http_error.py index 40c4fbf5..89936a11 100644 --- a/mindee/error/mindee_http_error.py +++ b/mindee/error/mindee_http_error.py @@ -1,5 +1,5 @@ from mindee.error.mindee_error import MindeeError -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class MindeeHTTPError(RuntimeError): diff --git a/mindee/input/url_input_source.py b/mindee/input/url_input_source.py index 69ccffb1..972cb75b 100644 --- a/mindee/input/url_input_source.py +++ b/mindee/input/url_input_source.py @@ -10,6 +10,7 @@ from mindee.error.mindee_error import MindeeSourceError from mindee.input.bytes_input import BytesInput from mindee.logger import logger +from mindee.parsing.common.string_dict import StringDict class URLInputSource: @@ -192,10 +193,16 @@ def __make_request( :return: The content of the response. :raises MindeeSourceError: If max redirects are exceeded or the request fails. """ - http_client = http_client or httpx.Client() - result = http_client.get( - url, headers=headers, timeout=120, auth=auth, follow_redirects=True - ) + get_kwargs: StringDict = { + "headers": headers, + "timeout": 120, + "auth": auth, + "follow_redirects": True, + } + if http_client is None: + result = httpx.get(url, **get_kwargs) + else: + result = http_client.get(url, **get_kwargs) if 299 < result.status_code < 400: if redirects == max_redirects: raise MindeeSourceError( @@ -204,13 +211,18 @@ def __make_request( f"aborting operation." ) return URLInputSource.__make_request( - result.headers["Location"], auth, headers, redirects + 1, max_redirects + result.headers["Location"], + auth, + headers, + redirects + 1, + max_redirects, + http_client, ) if result.status_code >= 400 or result.status_code < 200: raise MindeeSourceError( f"Couldn't retrieve file from server, error code {result.status_code}." ) - - http_client.close() + if http_client is not None and not http_client.is_closed: + http_client.close() return result.content diff --git a/mindee/mindee_http/response_validation.py b/mindee/mindee_http/response_validation.py index fbeb423b..bcf0c771 100644 --- a/mindee/mindee_http/response_validation.py +++ b/mindee/mindee_http/response_validation.py @@ -2,7 +2,7 @@ import httpx -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict def is_valid_sync_response(response: httpx.Response) -> bool: diff --git a/mindee/v1/client.py b/mindee/v1/client.py index d0cf3128..a6ff7b51 100644 --- a/mindee/v1/client.py +++ b/mindee/v1/client.py @@ -62,7 +62,7 @@ class Client(ClientMixin): api_key: str """API key for all endpoints.""" - http_client: httpx.Client + http_client: httpx.Client | None """HTTP client for making requests.""" def __init__( @@ -75,7 +75,7 @@ def __init__( :param http_client: HTTP client for making requests. """ self.api_key = api_key - self.http_client = http_client or httpx.Client() + self.http_client = http_client def parse( self, @@ -597,3 +597,20 @@ def create_endpoint( ) version = "1" return self._build_endpoint(endpoint_name, account_name, version) + + def close(self): + """Close the HTTP client.""" + if self.http_client and not self.http_client.is_closed: + self.http_client.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def __del__(self): + """Ensure the HTTP client is closed when the object is garbage collected.""" + if self.http_client and self.http_client and not self.http_client.is_closed: + logger.info("Force-closing unclosed Mindee Client (V1) %s.", str(self)) + self.close() diff --git a/mindee/v1/mindee_http/base_endpoint.py b/mindee/v1/mindee_http/base_endpoint.py index d599c686..8bb06404 100644 --- a/mindee/v1/mindee_http/base_endpoint.py +++ b/mindee/v1/mindee_http/base_endpoint.py @@ -7,7 +7,9 @@ class BaseEndpoint: """Base endpoint class for the Mindee API.""" settings: BaseSettings - http_client: httpx.Client + """Settings relating to all endpoints.""" + http_client: httpx.Client | None + """HTTP client for making requests.""" def __init__( self, settings: BaseSettings, http_client: httpx.Client | None = None @@ -19,14 +21,4 @@ def __init__( :param http_client: HTTP client for making requests. """ self.settings = settings - self.http_client = http_client or httpx.Client() - - def close(self) -> None: - """Closes the underlying HTTP client.""" - self.http_client.close() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() + self.http_client = http_client diff --git a/mindee/v1/mindee_http/endpoint.py b/mindee/v1/mindee_http/endpoint.py index f6dbd379..a2889b7c 100644 --- a/mindee/v1/mindee_http/endpoint.py +++ b/mindee/v1/mindee_http/endpoint.py @@ -2,7 +2,7 @@ from mindee.input.local_input_source import LocalInputSource from mindee.input.url_input_source import URLInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.mindee_http.base_endpoint import BaseEndpoint from mindee.v1.mindee_http.mindee_api import MindeeAPI @@ -114,6 +114,13 @@ def _custom_request( if rag: params["rag"] = "true" + post_kwargs: StringDict = { + "headers": self.settings.base_headers, + "data": data, + "params": params, + "timeout": self.settings.request_timeout, + } + if workflow_id: url = f"{self.settings.base_url}/v1/workflows/{workflow_id}/{route}" else: @@ -121,25 +128,11 @@ def _custom_request( if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = self.http_client.post( - url=url, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) else: - files = {"document": input_source.read_contents(close_file)} - response = self.http_client.post( - url=url, - files=files, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) - - return response + post_kwargs["files"] = {"document": input_source.read_contents(close_file)} + if self.http_client is None or self.http_client.is_closed: + return httpx.post(url, **post_kwargs) + return self.http_client.post(url, **post_kwargs) def document_queue_req_get(self, queue_id: str) -> httpx.Response: """ @@ -147,21 +140,27 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: :param queue_id: queue_id received from the API """ - return self.http_client.get( - f"{self.settings.url_root}/documents/queue/{queue_id}", - headers=self.settings.base_headers, - timeout=self.settings.request_timeout, - follow_redirects=True, - ) + url = f"{self.settings.url_root}/documents/queue/{queue_id}" + get_kwargs: StringDict = { + "headers": self.settings.base_headers, + "timeout": self.settings.request_timeout, + "follow_redirects": True, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def openapi_get_req(self) -> httpx.Response: """Get the OpenAPI specification of the product.""" - return self.http_client.get( - f"{self.settings.url_root}/openapi.json", - headers=self.settings.base_headers, - timeout=self.settings.request_timeout, - follow_redirects=True, - ) + url = f"{self.settings.url_root}/openapi.json" + get_kwargs: StringDict = { + "headers": self.settings.base_headers, + "timeout": self.settings.request_timeout, + "follow_redirects": True, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def document_feedback_req_put( self, document_id: str, feedback: StringDict @@ -172,12 +171,15 @@ def document_feedback_req_put( :param document_id: ID of the document to send feedback to. :param feedback: Feedback object to send. """ - return self.http_client.put( - f"{self.settings.base_url}/v1/documents/{document_id}/feedback", - headers=self.settings.base_headers, - data=feedback, - timeout=self.settings.request_timeout, - ) + url = f"{self.settings.url_root}/documents/{document_id}/feedback" + put_kwargs: StringDict = { + "headers": self.settings.base_headers, + "data": feedback, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.put(url, **put_kwargs) + return self.http_client.put(url, **put_kwargs) class CustomEndpoint(Endpoint): @@ -193,17 +195,16 @@ def training_req_post( :return: httpx response :param close_file: Whether to `close()` the file after parsing it. """ - files = {"document": input_source.read_contents(close_file)} - params = {"training": True, "with_candidates": True} - - response = self.http_client.post( - f"{self.settings.url_root}/predict", - files=files, - headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, - ) - return response + url = f"{self.settings.url_root}/predict" + post_kwargs: StringDict = { + "files": {"document": input_source.read_contents(close_file)}, + "headers": self.settings.base_headers, + "params": {"training": True, "with_candidates": True}, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.post(url, **post_kwargs) + return self.http_client.post(url, **post_kwargs) def training_async_req_post( self, input_source: LocalInputSource, close_file: bool = True @@ -215,17 +216,16 @@ def training_async_req_post( :return: httpx response :param close_file: Whether to `close()` the file after parsing it. """ - files = {"document": input_source.read_contents(close_file)} - params = {"training": True, "async": True} - - response = self.http_client.post( - f"{self.settings.url_root}/predict", - files=files, - headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, - ) - return response + url = f"{self.settings.url_root}/predict" + post_kwargs: StringDict = { + "files": {"document": input_source.read_contents(close_file)}, + "headers": self.settings.base_headers, + "params": {"training": True, "async": True}, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.post(url, **post_kwargs) + return self.http_client.post(url, **post_kwargs) def document_req_del(self, document_id: str) -> httpx.Response: """ @@ -233,12 +233,14 @@ def document_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document """ - response = httpx.delete( - f"{self.settings.url_root}/documents/{document_id}", - headers=self.settings.base_headers, - timeout=self.settings.request_timeout, - ) - return response + url = f"{self.settings.url_root}/documents/{document_id}" + delete_kwargs: StringDict = { + "headers": self.settings.base_headers, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.delete(url, **delete_kwargs) + return httpx.delete(url, **delete_kwargs) def documents_req_get(self, page_id: int = 1) -> httpx.Response: """ @@ -246,17 +248,18 @@ def documents_req_get(self, page_id: int = 1) -> httpx.Response: :param page_id: Page number """ - params = { - "page": page_id, + url = f"{self.settings.url_root}/documents" + get_kwargs: StringDict = { + "headers": self.settings.base_headers, + "params": { + "page": page_id, + }, + "timeout": self.settings.request_timeout, + "follow_redirects": True, } - response = self.http_client.get( - f"{self.settings.url_root}/documents", - headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, - follow_redirects=True, - ) - return response + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def document_req_get(self, document_id: str) -> httpx.Response: """ @@ -264,19 +267,20 @@ def document_req_get(self, document_id: str) -> httpx.Response: :param document_id: ID of the document """ - params = { - "include_annotations": True, - "include_candidates": True, - "global_orientation": True, + url = f"{self.settings.url_root}/documents/{document_id}" + get_kwargs: StringDict = { + "headers": self.settings.base_headers, + "params": { + "include_annotations": True, + "include_candidates": True, + "global_orientation": True, + }, + "timeout": self.settings.request_timeout, + "follow_redirects": True, } - response = self.http_client.get( - f"{self.settings.url_root}/documents/{document_id}", - headers=self.settings.base_headers, - params=params, - timeout=self.settings.request_timeout, - follow_redirects=True, - ) - return response + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def annotations_req_post( self, document_id: str, annotations: dict @@ -288,13 +292,15 @@ def annotations_req_post( :param annotations: Annotations object :return: httpx response """ - response = self.http_client.post( - f"{self.settings.url_root}/documents/{document_id}/annotations", - headers=self.settings.base_headers, - json=annotations, - timeout=self.settings.request_timeout, - ) - return response + url = f"{self.settings.url_root}/documents/{document_id}/annotations" + post_kwargs: StringDict = { + "headers": self.settings.base_headers, + "json": annotations, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.post(url, **post_kwargs) + return self.http_client.post(url, **post_kwargs) def annotations_req_put( self, document_id: str, annotations: dict @@ -306,13 +312,15 @@ def annotations_req_put( :param annotations: Annotations object :return: httpx response """ - response = self.http_client.put( - f"{self.settings.url_root}/documents/{document_id}/annotations", - headers=self.settings.base_headers, - json=annotations, - timeout=self.settings.request_timeout, - ) - return response + url = f"{self.settings.url_root}/documents/{document_id}/annotations" + put_kwargs: StringDict = { + "headers": self.settings.base_headers, + "json": annotations, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.put(url, **put_kwargs) + return self.http_client.put(url, **put_kwargs) def annotations_req_del(self, document_id: str) -> httpx.Response: """ @@ -321,9 +329,11 @@ def annotations_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document to annotate :return: httpx response """ - response = self.http_client.delete( - f"{self.settings.url_root}/documents/{document_id}/annotations", - headers=self.settings.base_headers, - timeout=self.settings.request_timeout, - ) - return response + url = f"{self.settings.url_root}/documents/{document_id}/annotations" + delete_kwargs: StringDict = { + "headers": self.settings.base_headers, + "timeout": self.settings.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.delete(url, **delete_kwargs) + return self.http_client.delete(url, **delete_kwargs) diff --git a/mindee/v1/mindee_http/workflow_endpoint.py b/mindee/v1/mindee_http/workflow_endpoint.py index da774e18..2aa944a8 100644 --- a/mindee/v1/mindee_http/workflow_endpoint.py +++ b/mindee/v1/mindee_http/workflow_endpoint.py @@ -2,6 +2,7 @@ from mindee.input.local_input_source import LocalInputSource from mindee.input.url_input_source import URLInputSource +from mindee.parsing.common.string_dict import StringDict from mindee.v1.client_options.workflow_options import WorkflowOptions from mindee.v1.mindee_http.base_endpoint import BaseEndpoint from mindee.v1.mindee_http.workflow_settings import WorkflowSettings @@ -50,25 +51,17 @@ def workflow_execution_post( params["full_text_ocr"] = "true" if options.rag: params["rag"] = "true" + post_kwargs: StringDict = { + "headers": self.settings.base_headers, + "params": params, + "timeout": self.settings.request_timeout, + } if isinstance(input_source, URLInputSource): data["document"] = input_source.url - response = self.http_client.post( - self.settings.url_root, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) else: - files = {"document": input_source.read_contents(True)} - response = self.http_client.post( - self.settings.url_root, - files=files, - headers=self.settings.base_headers, - data=data, - params=params, - timeout=self.settings.request_timeout, - ) - - return response + post_kwargs["files"] = {"document": input_source.read_contents(True)} + post_kwargs["data"] = data + if self.http_client is None or self.http_client.is_closed: + return httpx.post(self.settings.url_root, **post_kwargs) + return self.http_client.post(self.settings.url_root, **post_kwargs) diff --git a/mindee/v1/parsing/custom/classification.py b/mindee/v1/parsing/custom/classification.py index c11099ff..bc5bd4a3 100644 --- a/mindee/v1/parsing/custom/classification.py +++ b/mindee/v1/parsing/custom/classification.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class ClassificationField: diff --git a/mindee/v1/parsing/custom/list.py b/mindee/v1/parsing/custom/list.py index 91982d8f..61ca7368 100644 --- a/mindee/v1/parsing/custom/list.py +++ b/mindee/v1/parsing/custom/list.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import FieldPositionMixin diff --git a/mindee/v1/parsing/generated/generated_list.py b/mindee/v1/parsing/generated/generated_list.py index 5712a5fc..d0fe6d3b 100644 --- a/mindee/v1/parsing/generated/generated_list.py +++ b/mindee/v1/parsing/generated/generated_list.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.generated.generated_object import ( GeneratedObjectField, is_generated_object, diff --git a/mindee/v1/parsing/generated/generated_object.py b/mindee/v1/parsing/generated/generated_object.py index f29e15e0..ab4736e4 100644 --- a/mindee/v1/parsing/generated/generated_object.py +++ b/mindee/v1/parsing/generated/generated_object.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.position import PositionField diff --git a/mindee/v1/parsing/standard/address.py b/mindee/v1/parsing/standard/address.py index 7ee280bd..7b0bfc25 100644 --- a/mindee/v1/parsing/standard/address.py +++ b/mindee/v1/parsing/standard/address.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.text import StringField diff --git a/mindee/v1/parsing/standard/amount.py b/mindee/v1/parsing/standard/amount.py index 84b68063..92f8d972 100644 --- a/mindee/v1/parsing/standard/amount.py +++ b/mindee/v1/parsing/standard/amount.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import ( BaseField, FieldPositionMixin, diff --git a/mindee/v1/parsing/standard/base.py b/mindee/v1/parsing/standard/base.py index 0362728d..40e21651 100644 --- a/mindee/v1/parsing/standard/base.py +++ b/mindee/v1/parsing/standard/base.py @@ -3,7 +3,7 @@ from mindee.geometry.polygon import Polygon from mindee.geometry.quadrilateral import Quadrilateral, get_bounding_box -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class FieldPositionMixin: diff --git a/mindee/v1/parsing/standard/boolean.py b/mindee/v1/parsing/standard/boolean.py index e69804d9..add3701c 100644 --- a/mindee/v1/parsing/standard/boolean.py +++ b/mindee/v1/parsing/standard/boolean.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v1/parsing/standard/classification.py b/mindee/v1/parsing/standard/classification.py index 8921d5a3..21a4dc02 100644 --- a/mindee/v1/parsing/standard/classification.py +++ b/mindee/v1/parsing/standard/classification.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField diff --git a/mindee/v1/parsing/standard/company_registration.py b/mindee/v1/parsing/standard/company_registration.py index 10a913ff..c309c670 100644 --- a/mindee/v1/parsing/standard/company_registration.py +++ b/mindee/v1/parsing/standard/company_registration.py @@ -1,4 +1,5 @@ -from mindee.parsing.common import StringDict, format_for_display +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.summary_helper import format_for_display from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v1/parsing/standard/date.py b/mindee/v1/parsing/standard/date.py index b3c9ea3b..f157cbb8 100644 --- a/mindee/v1/parsing/standard/date.py +++ b/mindee/v1/parsing/standard/date.py @@ -1,6 +1,6 @@ from datetime import date, datetime -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin ISO8601_DATE_FORMAT = "%Y-%m-%d" diff --git a/mindee/v1/parsing/standard/locale.py b/mindee/v1/parsing/standard/locale.py index c9f097af..57790535 100644 --- a/mindee/v1/parsing/standard/locale.py +++ b/mindee/v1/parsing/standard/locale.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField diff --git a/mindee/v1/parsing/standard/payment_details.py b/mindee/v1/parsing/standard/payment_details.py index bc9b7b7f..470a8262 100644 --- a/mindee/v1/parsing/standard/payment_details.py +++ b/mindee/v1/parsing/standard/payment_details.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v1/parsing/standard/position.py b/mindee/v1/parsing/standard/position.py index 502322f3..af7d4f77 100644 --- a/mindee/v1/parsing/standard/position.py +++ b/mindee/v1/parsing/standard/position.py @@ -1,7 +1,7 @@ from mindee.error.geometry_error import MindeeGeometryError from mindee.geometry.polygon import Polygon from mindee.geometry.quadrilateral import Quadrilateral, quadrilateral_from_prediction -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField diff --git a/mindee/v1/parsing/standard/tax.py b/mindee/v1/parsing/standard/tax.py index 3da6e431..410d0b85 100644 --- a/mindee/v1/parsing/standard/tax.py +++ b/mindee/v1/parsing/standard/tax.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import ( BaseField, FieldPositionMixin, diff --git a/mindee/v1/parsing/standard/text.py b/mindee/v1/parsing/standard/text.py index 8cf58c28..2f862ec6 100644 --- a/mindee/v1/parsing/standard/text.py +++ b/mindee/v1/parsing/standard/text.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin diff --git a/mindee/v2/client.py b/mindee/v2/client.py index 87efb314..1d54c9ca 100644 --- a/mindee/v2/client.py +++ b/mindee/v2/client.py @@ -173,10 +173,18 @@ def search_models( def close(self) -> None: """Closes the underlying HTTP client.""" - self.mindee_api.http_client.close() + self.mindee_api.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() + + def __del__(self): + """Ensure the HTTP client is closed when the object is garbage collected.""" + mindee_api = getattr(self, "mindee_api", None) + if mindee_api: + httpx_client = getattr(self.mindee_api, "http_client", None) + if httpx_client and self.mindee_api: + self.mindee_api.delete_http_client() diff --git a/mindee/v2/error/mindee_http_error_v2.py b/mindee/v2/error/mindee_http_error_v2.py index 4bc97c0f..ac34c6ef 100644 --- a/mindee/v2/error/mindee_http_error_v2.py +++ b/mindee/v2/error/mindee_http_error_v2.py @@ -1,6 +1,6 @@ import json -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing import ErrorItem, ErrorResponse diff --git a/mindee/v2/mindee_http/mindee_api_v2.py b/mindee/v2/mindee_http/mindee_api_v2.py index 050b4013..66e6c4c9 100644 --- a/mindee/v2/mindee_http/mindee_api_v2.py +++ b/mindee/v2/mindee_http/mindee_api_v2.py @@ -38,7 +38,8 @@ class MindeeAPIV2(SettingsMixin): """Root of the URL to use for polling.""" api_key: str | None """API Key for the client.""" - http_client: httpx.Client + http_client: httpx.Client | None + """HTTP client for making requests.""" def __init__(self, api_key: str | None, http_client: httpx.Client | None = None): self.api_key = ( @@ -57,7 +58,7 @@ def __init__(self, api_key: str | None, http_client: httpx.Client | None = None) f"'{API_KEY_V2_ENV_NAME}' environment variable." ) self.url_root = f"{self.base_url.rstrip('/')}" - self.http_client = http_client or httpx.Client() + self.http_client = http_client @property def base_headers(self) -> dict[str, str]: @@ -95,26 +96,23 @@ def req_post_inference_enqueue( """ data = params.get_form_data() url = f"{self.url_root}/v2/{slug}/enqueue" + post_kwargs: StringDict = { + "headers": self.base_headers, + "timeout": self.request_timeout, + } if isinstance(input_source, LocalInputSource): - files = {"file": input_source.read_contents(params.close_file)} - response = self.http_client.post( - url=url, - files=files, - headers=self.base_headers, - data=data, - timeout=self.request_timeout, - ) + post_kwargs["files"] = { + "file": input_source.read_contents(params.close_file) + } elif isinstance(input_source, URLInputSource): data["url"] = input_source.url - response = self.http_client.post( - url=url, - headers=self.base_headers, - data=data, - timeout=self.request_timeout, - ) + post_kwargs["data"] = data + + if self.http_client is None or self.http_client.is_closed: + response = httpx.post(url, **post_kwargs) else: - raise MindeeAPIV2Error("Invalid input source.") + response = self.http_client.post(url, **post_kwargs) return response def req_get_job(self, job_id: str) -> httpx.Response: @@ -123,12 +121,15 @@ def req_get_job(self, job_id: str) -> httpx.Response: :param job_id: Job ID, returned by the enqueue request. """ - return self.http_client.get( - f"{self.url_root}/v2/jobs/{job_id}", - headers=self.base_headers, - timeout=self.request_timeout, - follow_redirects=False, - ) + url = f"{self.url_root}/v2/jobs/{job_id}" + get_kwargs: StringDict = { + "headers": self.base_headers, + "timeout": self.request_timeout, + "follow_redirects": False, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def req_get_inference_by_url(self, url) -> httpx.Response: """ @@ -138,12 +139,14 @@ def req_get_inference_by_url(self, url) -> httpx.Response: :param url: URL to use for the request. :return: Response object from the request. """ - return self.http_client.get( - url, - headers=self.base_headers, - timeout=self.request_timeout, - follow_redirects=False, - ) + get_kwargs: StringDict = { + "headers": self.base_headers, + "timeout": self.request_timeout, + "follow_redirects": False, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: """ @@ -152,14 +155,15 @@ def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: :param inference_id: Inference ID, returned by the job request. :param slug: Slug of the inference, defaults to nothing. """ - url = f"{self.url_root}/v2/{slug}/{inference_id}" - return self.http_client.get( - url, - headers=self.base_headers, - timeout=self.request_timeout, - follow_redirects=False, - ) + get_kwargs: StringDict = { + "headers": self.base_headers, + "timeout": self.request_timeout, + "follow_redirects": False, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def req_get_search_models( self, model_name: str | None, model_type: str | None @@ -171,12 +175,14 @@ def req_get_search_models( :return: Response object containing search results. """ url = f"{self.url_root}/v2/search/models" - return self.http_client.get( - url, - headers=self.base_headers, - params={"name": model_name, "model_type": model_type}, - timeout=self.request_timeout, - ) + get_kwargs: StringDict = { + "headers": self.base_headers, + "params": {"name": model_name, "model_type": model_type}, + "timeout": self.request_timeout, + } + if self.http_client is None or self.http_client.is_closed: + return httpx.get(url, **get_kwargs) + return self.http_client.get(url, **get_kwargs) def enqueue( self, input_source: LocalInputSource | URLInputSource, params: BaseParameters @@ -264,3 +270,26 @@ def _response_json(response: httpx.Response) -> StringDict: f"HTTP {response.status_code} response is not valid JSON: " f"{response.text}" ) from e + + def close(self) -> None: + """Closes the underlying HTTP client.""" + if self.http_client and not self.http_client.is_closed: + self.http_client.close() + + def __enter__(self): + self.http_client = httpx.Client() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def delete_http_client(self): + """Delete the underlying HTTP client.""" + httpx_client = getattr(self, "http_client", None) + if httpx_client and not self.http_client.is_closed: + logger.info("Force-closing unclosed Mindee Client (V2) %s.", str(self)) + self.close() + + def __del__(self): + """Ensure the HTTP client is closed when the object is garbage collected.""" + self.delete_http_client() diff --git a/mindee/v2/parsing/error/error_item.py b/mindee/v2/parsing/error/error_item.py index 50ec5bb1..c997585a 100644 --- a/mindee/v2/parsing/error/error_item.py +++ b/mindee/v2/parsing/error/error_item.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class ErrorItem: diff --git a/mindee/v2/parsing/error/error_response.py b/mindee/v2/parsing/error/error_response.py index eb9a62d0..351f1339 100644 --- a/mindee/v2/parsing/error/error_response.py +++ b/mindee/v2/parsing/error/error_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.error.error_item import ErrorItem diff --git a/mindee/v2/parsing/inference/base_inference.py b/mindee/v2/parsing/inference/base_inference.py index 80f129da..659c6a85 100644 --- a/mindee/v2/parsing/inference/base_inference.py +++ b/mindee/v2/parsing/inference/base_inference.py @@ -1,7 +1,7 @@ from abc import ABC from typing import TypeVar -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.inference_file import InferenceFile from mindee.v2.parsing.inference.inference_job import InferenceJob from mindee.v2.parsing.inference.inference_model import InferenceModel diff --git a/mindee/v2/parsing/inference/field/base_field.py b/mindee/v2/parsing/inference/field/base_field.py index 73f8ae19..b8a12450 100644 --- a/mindee/v2/parsing/inference/field/base_field.py +++ b/mindee/v2/parsing/inference/field/base_field.py @@ -1,6 +1,6 @@ from enum import Enum -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.field_confidence import FieldConfidence from mindee.v2.parsing.inference.field.field_location import FieldLocation diff --git a/mindee/v2/parsing/inference/field/factory.py b/mindee/v2/parsing/inference/field/factory.py index eebe6890..7cd76738 100644 --- a/mindee/v2/parsing/inference/field/factory.py +++ b/mindee/v2/parsing/inference/field/factory.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.error.mindee_api_v2_error import MindeeAPIV2Error from mindee.v2.parsing.inference.field.list_field import ListField from mindee.v2.parsing.inference.field.object_field import ObjectField diff --git a/mindee/v2/parsing/inference/field/field_location.py b/mindee/v2/parsing/inference/field/field_location.py index 8dae3c5f..31ca7056 100644 --- a/mindee/v2/parsing/inference/field/field_location.py +++ b/mindee/v2/parsing/inference/field/field_location.py @@ -1,5 +1,5 @@ from mindee.geometry.polygon import Polygon -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class FieldLocation: diff --git a/mindee/v2/parsing/inference/field/inference_fields.py b/mindee/v2/parsing/inference/field/inference_fields.py index 9890688d..9b73043c 100644 --- a/mindee/v2/parsing/inference/field/inference_fields.py +++ b/mindee/v2/parsing/inference/field/inference_fields.py @@ -1,7 +1,7 @@ from collections.abc import Callable from typing import TYPE_CHECKING, cast -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.base_field import BaseField, FieldType if TYPE_CHECKING: diff --git a/mindee/v2/parsing/inference/field/list_field.py b/mindee/v2/parsing/inference/field/list_field.py index 1701c961..af844aae 100644 --- a/mindee/v2/parsing/inference/field/list_field.py +++ b/mindee/v2/parsing/inference/field/list_field.py @@ -1,6 +1,6 @@ from collections.abc import Callable -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.base_field import BaseField, FieldType from mindee.v2.parsing.inference.field.object_field import ObjectField from mindee.v2.parsing.inference.field.simple_field import SimpleField diff --git a/mindee/v2/parsing/inference/field/simple_field.py b/mindee/v2/parsing/inference/field/simple_field.py index a1149036..574c2289 100644 --- a/mindee/v2/parsing/inference/field/simple_field.py +++ b/mindee/v2/parsing/inference/field/simple_field.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field.base_field import BaseField, FieldType diff --git a/mindee/v2/parsing/inference/inference_active_options.py b/mindee/v2/parsing/inference/inference_active_options.py index 730eb8b7..4c894aef 100644 --- a/mindee/v2/parsing/inference/inference_active_options.py +++ b/mindee/v2/parsing/inference/inference_active_options.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class DataSchemaActiveOptions: diff --git a/mindee/v2/parsing/inference/inference_file.py b/mindee/v2/parsing/inference/inference_file.py index 98151ec9..5cd950f5 100644 --- a/mindee/v2/parsing/inference/inference_file.py +++ b/mindee/v2/parsing/inference/inference_file.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class InferenceFile: diff --git a/mindee/v2/parsing/inference/inference_job.py b/mindee/v2/parsing/inference/inference_job.py index 19519260..ac877545 100644 --- a/mindee/v2/parsing/inference/inference_job.py +++ b/mindee/v2/parsing/inference/inference_job.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class InferenceJob: diff --git a/mindee/v2/parsing/inference/inference_model.py b/mindee/v2/parsing/inference/inference_model.py index a93f65da..c44c4d0a 100644 --- a/mindee/v2/parsing/inference/inference_model.py +++ b/mindee/v2/parsing/inference/inference_model.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class InferenceModel: diff --git a/mindee/v2/parsing/inference/rag_metadata.py b/mindee/v2/parsing/inference/rag_metadata.py index a0c88c98..c9af29b5 100644 --- a/mindee/v2/parsing/inference/rag_metadata.py +++ b/mindee/v2/parsing/inference/rag_metadata.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class RAGMetadata: diff --git a/mindee/v2/parsing/inference/raw_text.py b/mindee/v2/parsing/inference/raw_text.py index eadb09dd..dae61af3 100644 --- a/mindee/v2/parsing/inference/raw_text.py +++ b/mindee/v2/parsing/inference/raw_text.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.raw_text_page import RawTextPage diff --git a/mindee/v2/parsing/inference/raw_text_page.py b/mindee/v2/parsing/inference/raw_text_page.py index e3eaf041..b10c68c7 100644 --- a/mindee/v2/parsing/inference/raw_text_page.py +++ b/mindee/v2/parsing/inference/raw_text_page.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class RawTextPage: diff --git a/mindee/v2/parsing/job/job.py b/mindee/v2/parsing/job/job.py index 492c2fa5..0952eeff 100644 --- a/mindee/v2/parsing/job/job.py +++ b/mindee/v2/parsing/job/job.py @@ -1,6 +1,6 @@ from datetime import datetime -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.error.error_response import ErrorResponse from mindee.v2.parsing.job.job_webhook import JobWebhook diff --git a/mindee/v2/parsing/job/job_response.py b/mindee/v2/parsing/job/job_response.py index 948564c4..da9f64ca 100644 --- a/mindee/v2/parsing/job/job_response.py +++ b/mindee/v2/parsing/job/job_response.py @@ -1,5 +1,5 @@ -from mindee.parsing.common import StringDict from mindee.parsing.common.common_response import CommonResponse +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.job.job import Job diff --git a/mindee/v2/parsing/job/job_webhook.py b/mindee/v2/parsing/job/job_webhook.py index c060ed93..b9a7fb69 100644 --- a/mindee/v2/parsing/job/job_webhook.py +++ b/mindee/v2/parsing/job/job_webhook.py @@ -1,6 +1,6 @@ from datetime import datetime -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.error.error_response import ErrorResponse diff --git a/mindee/v2/product/classification/classification_classifier.py b/mindee/v2/product/classification/classification_classifier.py index 7edfe975..98c16232 100644 --- a/mindee/v2/product/classification/classification_classifier.py +++ b/mindee/v2/product/classification/classification_classifier.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.extraction.extraction_response import ExtractionResponse diff --git a/mindee/v2/product/classification/classification_inference.py b/mindee/v2/product/classification/classification_inference.py index 7953059e..9e00ad60 100644 --- a/mindee/v2/product/classification/classification_inference.py +++ b/mindee/v2/product/classification/classification_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.classification.classification_result import ClassificationResult diff --git a/mindee/v2/product/classification/classification_response.py b/mindee/v2/product/classification/classification_response.py index 2e5380ed..c5e82c72 100644 --- a/mindee/v2/product/classification/classification_response.py +++ b/mindee/v2/product/classification/classification_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.classification.classification_inference import ( ClassificationInference, diff --git a/mindee/v2/product/classification/classification_result.py b/mindee/v2/product/classification/classification_result.py index 9bad332c..2d314ffb 100644 --- a/mindee/v2/product/classification/classification_result.py +++ b/mindee/v2/product/classification/classification_result.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.classification.classification_classifier import ( ClassificationClassifier, ) diff --git a/mindee/v2/product/crop/crop_inference.py b/mindee/v2/product/crop/crop_inference.py index 9ac140e0..6839e534 100644 --- a/mindee/v2/product/crop/crop_inference.py +++ b/mindee/v2/product/crop/crop_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.crop.crop_result import CropResult diff --git a/mindee/v2/product/crop/crop_item.py b/mindee/v2/product/crop/crop_item.py index 5ee0d6ea..10c61756 100644 --- a/mindee/v2/product/crop/crop_item.py +++ b/mindee/v2/product/crop/crop_item.py @@ -1,7 +1,7 @@ from mindee.image.extracted_image import ExtractedImage from mindee.image.image_extractor import extract_multiple_images_from_source from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field import FieldLocation from mindee.v2.product.extraction.extraction_response import ExtractionResponse diff --git a/mindee/v2/product/crop/crop_response.py b/mindee/v2/product/crop/crop_response.py index db9c273b..8b70cc19 100644 --- a/mindee/v2/product/crop/crop_response.py +++ b/mindee/v2/product/crop/crop_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.crop.crop_inference import CropInference diff --git a/mindee/v2/product/crop/crop_result.py b/mindee/v2/product/crop/crop_result.py index 59d11509..47561e90 100644 --- a/mindee/v2/product/crop/crop_result.py +++ b/mindee/v2/product/crop/crop_result.py @@ -1,5 +1,5 @@ from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.file_operations.crop import extract_multiple_crops from mindee.v2.file_operations.crop_files import CropFiles from mindee.v2.product.crop.crop_item import CropItem diff --git a/mindee/v2/product/extraction/extraction_inference.py b/mindee/v2/product/extraction/extraction_inference.py index c7a65aab..defdd454 100644 --- a/mindee/v2/product/extraction/extraction_inference.py +++ b/mindee/v2/product/extraction/extraction_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.parsing.inference.inference_active_options import InferenceActiveOptions from mindee.v2.product.extraction.extraction_result import ExtractionResult diff --git a/mindee/v2/product/extraction/extraction_response.py b/mindee/v2/product/extraction/extraction_response.py index 39c396d8..3dac7e7a 100644 --- a/mindee/v2/product/extraction/extraction_response.py +++ b/mindee/v2/product/extraction/extraction_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.extraction.extraction_inference import ExtractionInference diff --git a/mindee/v2/product/extraction/extraction_result.py b/mindee/v2/product/extraction/extraction_result.py index 42eb9160..9d97a0b6 100644 --- a/mindee/v2/product/extraction/extraction_result.py +++ b/mindee/v2/product/extraction/extraction_result.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.field import InferenceFields from mindee.v2.parsing.inference.field.factory import parse_field from mindee.v2.parsing.inference.rag_metadata import RAGMetadata diff --git a/mindee/v2/product/ocr/ocr_inference.py b/mindee/v2/product/ocr/ocr_inference.py index acb7d330..60eda267 100644 --- a/mindee/v2/product/ocr/ocr_inference.py +++ b/mindee/v2/product/ocr/ocr_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.ocr.ocr_result import OCRResult diff --git a/mindee/v2/product/ocr/ocr_page.py b/mindee/v2/product/ocr/ocr_page.py index fa1243a6..5673b127 100644 --- a/mindee/v2/product/ocr/ocr_page.py +++ b/mindee/v2/product/ocr/ocr_page.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.ocr.ocr_word import OCRWord diff --git a/mindee/v2/product/ocr/ocr_response.py b/mindee/v2/product/ocr/ocr_response.py index 32b7d052..da21bfea 100644 --- a/mindee/v2/product/ocr/ocr_response.py +++ b/mindee/v2/product/ocr/ocr_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.ocr.ocr_inference import OCRInference diff --git a/mindee/v2/product/ocr/ocr_result.py b/mindee/v2/product/ocr/ocr_result.py index 6d01c60b..d91f4c6e 100644 --- a/mindee/v2/product/ocr/ocr_result.py +++ b/mindee/v2/product/ocr/ocr_result.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.product.ocr.ocr_page import OCRPage diff --git a/mindee/v2/product/split/split_inference.py b/mindee/v2/product/split/split_inference.py index 7540f1fc..6e68bdc2 100644 --- a/mindee/v2/product/split/split_inference.py +++ b/mindee/v2/product/split/split_inference.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_inference import BaseInference from mindee.v2.product.split.split_result import SplitResult diff --git a/mindee/v2/product/split/split_range.py b/mindee/v2/product/split/split_range.py index f742b2cd..92140657 100644 --- a/mindee/v2/product/split/split_range.py +++ b/mindee/v2/product/split/split_range.py @@ -1,5 +1,5 @@ from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.pdf.extracted_pdf import ExtractedPDF from mindee.v2.file_operations.split import extract_single_split from mindee.v2.product.extraction.extraction_response import ExtractionResponse diff --git a/mindee/v2/product/split/split_response.py b/mindee/v2/product/split/split_response.py index be6e0673..ff8ace92 100644 --- a/mindee/v2/product/split/split_response.py +++ b/mindee/v2/product/split/split_response.py @@ -1,4 +1,4 @@ -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.parsing.inference.base_response import BaseResponse from mindee.v2.product.split.split_inference import SplitInference diff --git a/mindee/v2/product/split/split_result.py b/mindee/v2/product/split/split_result.py index 04025322..ab3921bf 100644 --- a/mindee/v2/product/split/split_result.py +++ b/mindee/v2/product/split/split_result.py @@ -1,5 +1,5 @@ from mindee.input.local_input_source import LocalInputSource -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict from mindee.v2.file_operations.split import extract_multiple_splits from mindee.v2.file_operations.split_files import SplitFiles from mindee.v2.product.split.split_range import SplitRange diff --git a/pyproject.toml b/pyproject.toml index db5b3da7..14b2296d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ requires-python = ">=3.10" dependencies = [ "pypdfium2>=4.0,<6.0", "Pillow>=12.2.0", - "httpx>=0.28.1,<1.0", + "httpx[http2]>=0.28.1,<1.0", ] [project.urls] diff --git a/tests/v1/input/test_url_input_source_integration.py b/tests/v1/input/test_url_input_source_integration.py index 029e61a7..3973aa88 100644 --- a/tests/v1/input/test_url_input_source_integration.py +++ b/tests/v1/input/test_url_input_source_integration.py @@ -3,7 +3,7 @@ import pytest -from mindee import URLInputSource +from mindee.input.url_input_source import URLInputSource from mindee.v1.client import Client from mindee.v1.product.invoice import InvoiceV4 from tests.utils import cleanup_output_files diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 4dfc58ca..6eac7bdb 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -269,7 +269,7 @@ def test_client_closes_httpx_connections() -> None: client = Client(api_key="dummy_key") client.close() with pytest.raises( - RuntimeError, match=r"Cannot send a request, as the client has been closed\." + AttributeError, match=r"NoneType' object has no attribute 'get'" ): client.mindee_api.http_client.get("https://google.com") @@ -280,7 +280,7 @@ def test_httpx_multiple_calls_thread_safety() -> None: client = Client(api_key="dummy_key") input_path = FILE_TYPES_DIR / "pdf" / "blank_1.pdf" - def delayed_response(request: httpx.Request) -> httpx.Response: + def delayed_response(_: httpx.Request) -> httpx.Response: job_json = json.loads((V2_DATA_DIR / "job" / "ok_processing.json").read_text()) time.sleep(0.1) return httpx.Response(201, json=job_json) diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index a82e7451..b599f02f 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -341,6 +341,20 @@ def log_request(request: httpx.Request): @pytest.mark.v2 +@pytest.mark.integration +def test_http2_client(findoc_model_id) -> None: + httpx_client = httpx.Client(http2=True) + client = Client(http_client=httpx_client) + input_source = PathInput( + V2_PRODUCT_DATA_DIR / "extraction" / "financial_document" / "default_sample.jpg" + ) + params = ExtractionParameters(model_id=findoc_model_id) + response = client.enqueue_and_get_result(ExtractionResponse, input_source, params) + _basic_assert_success(response, page_count=1, model_id=findoc_model_id) + + +@pytest.mark.v2 +@pytest.mark.integration @respx.mock def test_explicit_timeout_failure(findoc_model_id) -> None: respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( From 0501fad391efd4fdcd462f788d2b154921d285ae Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:53:19 +0200 Subject: [PATCH 8/8] fix most issues and simplify syntaxes --- mindee/mindee_http/settings_mixin.py | 2 +- mindee/v1/mindee_http/endpoint.py | 241 +++++++++++++-------- mindee/v1/mindee_http/workflow_endpoint.py | 28 ++- mindee/v2/mindee_http/mindee_api_v2.py | 123 +++++++---- tests/v2/test_client.py | 31 +++ tests/v2/test_client_integration.py | 55 +---- 6 files changed, 282 insertions(+), 198 deletions(-) diff --git a/mindee/mindee_http/settings_mixin.py b/mindee/mindee_http/settings_mixin.py index a5452869..59bc7f48 100644 --- a/mindee/mindee_http/settings_mixin.py +++ b/mindee/mindee_http/settings_mixin.py @@ -3,7 +3,7 @@ class SettingsMixin: base_url: str """Base URL for all V2 requests.""" - request_timeout: int + request_timeout: float """Timeout for all requests.""" def set_timeout(self, value: str | int) -> None: diff --git a/mindee/v1/mindee_http/endpoint.py b/mindee/v1/mindee_http/endpoint.py index a2889b7c..e03fa216 100644 --- a/mindee/v1/mindee_http/endpoint.py +++ b/mindee/v1/mindee_http/endpoint.py @@ -1,3 +1,5 @@ +from collections.abc import Callable + import httpx from mindee.input.local_input_source import LocalInputSource @@ -114,12 +116,7 @@ def _custom_request( if rag: params["rag"] = "true" - post_kwargs: StringDict = { - "headers": self.settings.base_headers, - "data": data, - "params": params, - "timeout": self.settings.request_timeout, - } + post_kwargs: StringDict = {} if workflow_id: url = f"{self.settings.base_url}/v1/workflows/{workflow_id}/{route}" @@ -130,9 +127,19 @@ def _custom_request( data["document"] = input_source.url else: post_kwargs["files"] = {"document": input_source.read_contents(close_file)} + post_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.post(url, **post_kwargs) - return self.http_client.post(url, **post_kwargs) + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url, + headers=self.settings.base_headers, + data=data, + params=params, + **post_kwargs, + ) def document_queue_req_get(self, queue_id: str) -> httpx.Response: """ @@ -140,27 +147,30 @@ def document_queue_req_get(self, queue_id: str) -> httpx.Response: :param queue_id: queue_id received from the API """ - url = f"{self.settings.url_root}/documents/queue/{queue_id}" - get_kwargs: StringDict = { - "headers": self.settings.base_headers, - "timeout": self.settings.request_timeout, - "follow_redirects": True, - } + get_kwargs: StringDict = {"follow_redirects": True} + get_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.settings.url_root}/documents/queue/{queue_id}", + headers=self.settings.base_headers, + **get_kwargs, + ) def openapi_get_req(self) -> httpx.Response: """Get the OpenAPI specification of the product.""" url = f"{self.settings.url_root}/openapi.json" - get_kwargs: StringDict = { - "headers": self.settings.base_headers, - "timeout": self.settings.request_timeout, - "follow_redirects": True, - } + get_kwargs: StringDict = {} + get_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller(url, headers=self.settings.base_headers, **get_kwargs) def document_feedback_req_put( self, document_id: str, feedback: StringDict @@ -171,15 +181,19 @@ def document_feedback_req_put( :param document_id: ID of the document to send feedback to. :param feedback: Feedback object to send. """ - url = f"{self.settings.url_root}/documents/{document_id}/feedback" - put_kwargs: StringDict = { - "headers": self.settings.base_headers, - "data": feedback, - "timeout": self.settings.request_timeout, - } + put_kwargs: StringDict = {"follow_redirects": True} + put_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.put(url, **put_kwargs) - return self.http_client.put(url, **put_kwargs) + put_caller = httpx.put + put_kwargs["timeout"] = self.settings.request_timeout + else: + put_caller = self.http_client.put + return put_caller( + url=f"{self.settings.url_root}/documents/{document_id}/feedback", + headers=self.settings.base_headers, + data=feedback, + **put_kwargs, + ) class CustomEndpoint(Endpoint): @@ -195,16 +209,20 @@ def training_req_post( :return: httpx response :param close_file: Whether to `close()` the file after parsing it. """ - url = f"{self.settings.url_root}/predict" - post_kwargs: StringDict = { - "files": {"document": input_source.read_contents(close_file)}, - "headers": self.settings.base_headers, - "params": {"training": True, "with_candidates": True}, - "timeout": self.settings.request_timeout, - } + post_kwargs: StringDict = {"follow_redirects": True} + post_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.post(url, **post_kwargs) - return self.http_client.post(url, **post_kwargs) + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url=f"{self.settings.url_root}/predict", + headers=self.settings.base_headers, + files={"document": input_source.read_contents(close_file)}, + params={"training": True, "with_candidates": True}, + **post_kwargs, + ) def training_async_req_post( self, input_source: LocalInputSource, close_file: bool = True @@ -216,16 +234,20 @@ def training_async_req_post( :return: httpx response :param close_file: Whether to `close()` the file after parsing it. """ - url = f"{self.settings.url_root}/predict" - post_kwargs: StringDict = { - "files": {"document": input_source.read_contents(close_file)}, - "headers": self.settings.base_headers, - "params": {"training": True, "async": True}, - "timeout": self.settings.request_timeout, - } + post_kwargs: StringDict = {"follow_redirects": True} + post_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.post(url, **post_kwargs) - return self.http_client.post(url, **post_kwargs) + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url=f"{self.settings.url_root}/predict", + headers=self.settings.base_headers, + files={"document": input_source.read_contents(close_file)}, + params={"training": True, "async": True}, + **post_kwargs, + ) def document_req_del(self, document_id: str) -> httpx.Response: """ @@ -233,14 +255,19 @@ def document_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document """ - url = f"{self.settings.url_root}/documents/{document_id}" - delete_kwargs: StringDict = { - "headers": self.settings.base_headers, - "timeout": self.settings.request_timeout, - } + + delete_kwargs: StringDict = {"follow_redirects": True} + delete_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.delete(url, **delete_kwargs) - return httpx.delete(url, **delete_kwargs) + delete_caller = httpx.delete + delete_kwargs["timeout"] = self.settings.request_timeout + else: + delete_caller = self.http_client.delete + return delete_caller( + url=f"{self.settings.url_root}/documents/{document_id}", + headers=self.settings.base_headers, + **delete_kwargs, + ) def documents_req_get(self, page_id: int = 1) -> httpx.Response: """ @@ -248,18 +275,21 @@ def documents_req_get(self, page_id: int = 1) -> httpx.Response: :param page_id: Page number """ - url = f"{self.settings.url_root}/documents" - get_kwargs: StringDict = { - "headers": self.settings.base_headers, - "params": { + get_kwargs: StringDict = {"follow_redirects": True} + get_caller: Callable + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.settings.url_root}/documents", + headers=self.settings.base_headers, + params={ "page": page_id, }, - "timeout": self.settings.request_timeout, - "follow_redirects": True, - } - if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + **get_kwargs, + ) def document_req_get(self, document_id: str) -> httpx.Response: """ @@ -267,20 +297,25 @@ def document_req_get(self, document_id: str) -> httpx.Response: :param document_id: ID of the document """ - url = f"{self.settings.url_root}/documents/{document_id}" get_kwargs: StringDict = { - "headers": self.settings.base_headers, - "params": { + "follow_redirects": True, + } + get_caller: Callable + if self.http_client is None or self.http_client.is_closed: + get_caller = httpx.get + get_kwargs["timeout"] = self.settings.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.settings.url_root}/documents/{document_id}", + headers=self.settings.base_headers, + params={ "include_annotations": True, "include_candidates": True, "global_orientation": True, }, - "timeout": self.settings.request_timeout, - "follow_redirects": True, - } - if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + **get_kwargs, + ) def annotations_req_post( self, document_id: str, annotations: dict @@ -292,15 +327,21 @@ def annotations_req_post( :param annotations: Annotations object :return: httpx response """ - url = f"{self.settings.url_root}/documents/{document_id}/annotations" post_kwargs: StringDict = { - "headers": self.settings.base_headers, - "json": annotations, - "timeout": self.settings.request_timeout, + "follow_redirects": True, } + post_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.post(url, **post_kwargs) - return self.http_client.post(url, **post_kwargs) + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + url=f"{self.settings.url_root}/documents/{document_id}/annotations", + headers=self.settings.base_headers, + json=annotations, + **post_kwargs, + ) def annotations_req_put( self, document_id: str, annotations: dict @@ -312,15 +353,19 @@ def annotations_req_put( :param annotations: Annotations object :return: httpx response """ - url = f"{self.settings.url_root}/documents/{document_id}/annotations" - put_kwargs: StringDict = { - "headers": self.settings.base_headers, - "json": annotations, - "timeout": self.settings.request_timeout, - } + put_kwargs: StringDict = {"follow_redirects": True} + put_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.put(url, **put_kwargs) - return self.http_client.put(url, **put_kwargs) + put_caller = httpx.put + put_kwargs["timeout"] = self.settings.request_timeout + else: + put_caller = self.http_client.put + return put_caller( + url=f"{self.settings.url_root}/documents/{document_id}/annotations", + headers=self.settings.base_headers, + json=annotations, + **put_kwargs, + ) def annotations_req_del(self, document_id: str) -> httpx.Response: """ @@ -329,11 +374,15 @@ def annotations_req_del(self, document_id: str) -> httpx.Response: :param document_id: ID of the document to annotate :return: httpx response """ - url = f"{self.settings.url_root}/documents/{document_id}/annotations" - delete_kwargs: StringDict = { - "headers": self.settings.base_headers, - "timeout": self.settings.request_timeout, - } + delete_kwargs: StringDict = {"follow_redirects": True} + delete_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.delete(url, **delete_kwargs) - return self.http_client.delete(url, **delete_kwargs) + delete_caller = httpx.delete + delete_kwargs["timeout"] = self.settings.request_timeout + else: + delete_caller = self.http_client.delete + return delete_caller( + url=f"{self.settings.url_root}/documents/{document_id}/annotations", + headers=self.settings.base_headers, + **delete_kwargs, + ) diff --git a/mindee/v1/mindee_http/workflow_endpoint.py b/mindee/v1/mindee_http/workflow_endpoint.py index 2aa944a8..d2e098fa 100644 --- a/mindee/v1/mindee_http/workflow_endpoint.py +++ b/mindee/v1/mindee_http/workflow_endpoint.py @@ -1,3 +1,5 @@ +from collections.abc import Callable + import httpx from mindee.input.local_input_source import LocalInputSource @@ -51,17 +53,23 @@ def workflow_execution_post( params["full_text_ocr"] = "true" if options.rag: params["rag"] = "true" - post_kwargs: StringDict = { - "headers": self.settings.base_headers, - "params": params, - "timeout": self.settings.request_timeout, - } - + post_kwargs: StringDict = {} + files = None if isinstance(input_source, URLInputSource): data["document"] = input_source.url else: - post_kwargs["files"] = {"document": input_source.read_contents(True)} - post_kwargs["data"] = data + files = {"document": input_source.read_contents(True)} + post_caller: Callable if self.http_client is None or self.http_client.is_closed: - return httpx.post(self.settings.url_root, **post_kwargs) - return self.http_client.post(self.settings.url_root, **post_kwargs) + post_caller = httpx.post + post_kwargs["timeout"] = self.settings.request_timeout + else: + post_caller = self.http_client.post + return post_caller( + self.settings.url_root, + headers=self.settings.base_headers, + data=data, + params=params, + files=files, + **post_kwargs, + ) diff --git a/mindee/v2/mindee_http/mindee_api_v2.py b/mindee/v2/mindee_http/mindee_api_v2.py index 66e6c4c9..0e75baae 100644 --- a/mindee/v2/mindee_http/mindee_api_v2.py +++ b/mindee/v2/mindee_http/mindee_api_v2.py @@ -1,4 +1,6 @@ import os +from collections.abc import Callable +from typing import TypeVar import httpx @@ -18,6 +20,7 @@ is_valid_get_response, is_valid_post_response, ) +from mindee.v2.parsing import BaseResponse from mindee.v2.parsing.job.job_response import JobResponse from mindee.v2.parsing.search.search_response import SearchResponse @@ -30,6 +33,8 @@ REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" TIMEOUT_DEFAULT = 120 +ResponseT = TypeVar("ResponseT", bound=BaseResponse) + class MindeeAPIV2(SettingsMixin): """Settings class relating to API V2 requests.""" @@ -40,6 +45,7 @@ class MindeeAPIV2(SettingsMixin): """API Key for the client.""" http_client: httpx.Client | None """HTTP client for making requests.""" + request_timeout: float def __init__(self, api_key: str | None, http_client: httpx.Client | None = None): self.api_key = ( @@ -47,7 +53,6 @@ def __init__(self, api_key: str | None, http_client: httpx.Client | None = None) if api_key else os.environ.get(API_KEY_V2_ENV_NAME, API_KEY_V2_DEFAULT) ) - self.request_timeout = TIMEOUT_DEFAULT self.set_base_url(BASE_URL_DEFAULT) self.set_from_env() if not self.api_key: @@ -59,6 +64,9 @@ def __init__(self, api_key: str | None, http_client: httpx.Client | None = None) ) self.url_root = f"{self.base_url.rstrip('/')}" self.http_client = http_client + self.request_timeout = float( + os.environ.get(REQUEST_TIMEOUT_ENV_NAME, TIMEOUT_DEFAULT) + ) @property def base_headers(self) -> dict[str, str]: @@ -87,7 +95,7 @@ def req_post_inference_enqueue( slug: str, ) -> httpx.Response: """ - Make an asynchronous request to POST a document for prediction on the V2 API. + Make a request to POST a document for enqueue on the V2 API. :param input_source: Input object. :param params: Options for the enqueueing of the document. @@ -96,24 +104,26 @@ def req_post_inference_enqueue( """ data = params.get_form_data() url = f"{self.url_root}/v2/{slug}/enqueue" - post_kwargs: StringDict = { - "headers": self.base_headers, - "timeout": self.request_timeout, - } - + post_kwargs: StringDict = {} if isinstance(input_source, LocalInputSource): post_kwargs["files"] = { "file": input_source.read_contents(params.close_file) } elif isinstance(input_source, URLInputSource): data["url"] = input_source.url - post_kwargs["data"] = data + post_caller: Callable if self.http_client is None or self.http_client.is_closed: - response = httpx.post(url, **post_kwargs) + post_caller = httpx.post + post_kwargs["timeout"] = self.request_timeout else: - response = self.http_client.post(url, **post_kwargs) - return response + post_caller = self.http_client.post + return post_caller( + url, + headers=self.base_headers, + data=data, + **post_kwargs, + ) def req_get_job(self, job_id: str) -> httpx.Response: """ @@ -121,17 +131,21 @@ def req_get_job(self, job_id: str) -> httpx.Response: :param job_id: Job ID, returned by the enqueue request. """ - url = f"{self.url_root}/v2/jobs/{job_id}" - get_kwargs: StringDict = { - "headers": self.base_headers, - "timeout": self.request_timeout, - "follow_redirects": False, - } + get_caller: Callable + get_kwargs: StringDict = {} if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.url_root}/v2/jobs/{job_id}", + headers=self.base_headers, + follow_redirects=False, + **get_kwargs, + ) - def req_get_inference_by_url(self, url) -> httpx.Response: + def req_get_inference_by_url(self, url: str) -> httpx.Response: """ Sends a request matching a given inference_id. Returns either a Job or a Document. @@ -139,14 +153,19 @@ def req_get_inference_by_url(self, url) -> httpx.Response: :param url: URL to use for the request. :return: Response object from the request. """ - get_kwargs: StringDict = { - "headers": self.base_headers, - "timeout": self.request_timeout, - "follow_redirects": False, - } + get_caller: Callable + get_kwargs: StringDict = {} if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=url, + headers=self.base_headers, + follow_redirects=False, + **get_kwargs, + ) def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: """ @@ -155,34 +174,43 @@ def req_get_inference(self, inference_id: str, slug: str) -> httpx.Response: :param inference_id: Inference ID, returned by the job request. :param slug: Slug of the inference, defaults to nothing. """ - url = f"{self.url_root}/v2/{slug}/{inference_id}" - get_kwargs: StringDict = { - "headers": self.base_headers, - "timeout": self.request_timeout, - "follow_redirects": False, - } + get_caller: Callable + get_kwargs: StringDict = {} if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.url_root}/v2/{slug}/{inference_id}", + headers=self.base_headers, + follow_redirects=False, + **get_kwargs, + ) def req_get_search_models( - self, model_name: str | None, model_type: str | None + self, name: str | None, model_type: str | None ) -> httpx.Response: """ Searches for a list of models matching criteria. - :param model_name: Name pattern to search for. + :param name: Name pattern to search for. :param model_type: Type of model to search for (exact match). :return: Response object containing search results. """ - url = f"{self.url_root}/v2/search/models" - get_kwargs: StringDict = { - "headers": self.base_headers, - "params": {"name": model_name, "model_type": model_type}, - "timeout": self.request_timeout, - } + get_caller: Callable + get_kwargs: StringDict = {} if self.http_client is None or self.http_client.is_closed: - return httpx.get(url, **get_kwargs) - return self.http_client.get(url, **get_kwargs) + get_caller = httpx.get + get_kwargs["timeout"] = self.request_timeout + else: + get_caller = self.http_client.get + return get_caller( + url=f"{self.url_root}/v2/search/models", + headers=self.base_headers, + params={"name": name, "model_type": model_type}, + follow_redirects=False, + **get_kwargs, + ) def enqueue( self, input_source: LocalInputSource | URLInputSource, params: BaseParameters @@ -193,7 +221,6 @@ def enqueue( :param params: Parameters :return: A valid inference Response. """ - response = self.req_post_inference_enqueue( input_source=input_source, params=params, slug=params.get_enqueue_slug() ) @@ -218,7 +245,7 @@ def get_job(self, job_id: str) -> JobResponse: handle_error_v2(dict_response) return JobResponse(dict_response) - def get_result(self, response_type, inference_id: str): + def get_result(self, response_type: type[ResponseT], inference_id: str): """ Get the result of an inference that was previously enqueued. @@ -232,7 +259,7 @@ def get_result(self, response_type, inference_id: str): handle_error_v2(dict_response) return response_type(dict_response) - def get_result_by_url(self, response_type, url: str): + def get_result_by_url(self, response_type: type[ResponseT], url: str): """ Get the result of an inference that was previously enqueued by its URL. diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 6eac7bdb..ad4b9de2 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -307,3 +307,34 @@ def make_request(): successful_responses += 1 assert successful_responses == thread_count + + +@pytest.mark.v2 +@respx.mock +def test_explicit_timeout_failure(findoc_model_id) -> None: + respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( + side_effect=httpx.ReadTimeout("Simulated Read Timeout") + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + + with pytest.raises(httpx.ReadTimeout): + client.enqueue(input_source, params) + + +@pytest.mark.v2 +@respx.mock +def test_explicit_500_server_error(findoc_model_id: str) -> None: + respx.post(re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue")).mock( + return_value=httpx.Response(500, json={"message": "Internal Server Error"}) + ) + + client = Client(api_key="dummy") + input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") + params = ExtractionParameters(model_id=findoc_model_id) + with pytest.raises(MindeeHTTPUnknownErrorV2) as exc_info: + client.enqueue(input_source, params) + + assert "Couldn't deserialize server error" in str(exc_info.value) diff --git a/tests/v2/test_client_integration.py b/tests/v2/test_client_integration.py index b599f02f..1d9cd18b 100644 --- a/tests/v2/test_client_integration.py +++ b/tests/v2/test_client_integration.py @@ -1,10 +1,8 @@ import os -import re from pathlib import Path import httpx import pytest -import respx from mindee import ExtractionParameters from mindee.input.path_input import PathInput @@ -12,7 +10,6 @@ from mindee.v2.client import Client from mindee.v2.error.mindee_http_error_v2 import ( MindeeHTTPErrorV2, - MindeeHTTPUnknownErrorV2, ) from mindee.v2.parsing import InferenceActiveOptions from mindee.v2.product.extraction.extraction_response import ExtractionResponse @@ -344,43 +341,15 @@ def log_request(request: httpx.Request): @pytest.mark.integration def test_http2_client(findoc_model_id) -> None: httpx_client = httpx.Client(http2=True) - client = Client(http_client=httpx_client) - input_source = PathInput( - V2_PRODUCT_DATA_DIR / "extraction" / "financial_document" / "default_sample.jpg" - ) - params = ExtractionParameters(model_id=findoc_model_id) - response = client.enqueue_and_get_result(ExtractionResponse, input_source, params) - _basic_assert_success(response, page_count=1, model_id=findoc_model_id) - - -@pytest.mark.v2 -@pytest.mark.integration -@respx.mock -def test_explicit_timeout_failure(findoc_model_id) -> None: - respx.post("https://api-v2.mindee.net/v2/inferences/enqueue").mock( - side_effect=httpx.ReadTimeout("Simulated Read Timeout") - ) - - client = Client(api_key="dummy") - input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") - params = ExtractionParameters(model_id=findoc_model_id) - - with pytest.raises(httpx.ReadTimeout): - client.enqueue(input_source, params) - - -@pytest.mark.v2 -@pytest.mark.integration -@respx.mock -def test_explicit_500_server_error(findoc_model_id: str) -> None: - respx.post(re.compile(r"https://api-v2\.mindee\.net/v2/.+/enqueue")).mock( - return_value=httpx.Response(500, json={"message": "Internal Server Error"}) - ) - - client = Client(api_key="dummy") - input_source = PathInput(FILE_TYPES_DIR / "pdf" / "blank_1.pdf") - params = ExtractionParameters(model_id=findoc_model_id) - with pytest.raises(MindeeHTTPUnknownErrorV2) as exc_info: - client.enqueue(input_source, params) - - assert "Couldn't deserialize server error" in str(exc_info.value) + with Client(http_client=httpx_client) as client: + input_source = PathInput( + V2_PRODUCT_DATA_DIR + / "extraction" + / "financial_document" + / "default_sample.jpg" + ) + params = ExtractionParameters(model_id=findoc_model_id) + response = client.enqueue_and_get_result( + ExtractionResponse, input_source, params + ) + _basic_assert_success(response, page_count=1, model_id=findoc_model_id)