diff --git a/.github/workflows/release-testpypi.yml b/.github/workflows/release-testpypi.yml index 4935a55..9bff756 100644 --- a/.github/workflows/release-testpypi.yml +++ b/.github/workflows/release-testpypi.yml @@ -2,6 +2,17 @@ name: Release (TestPyPI) on: workflow_dispatch: + inputs: + ref: + description: 'Branch, tag, or commit SHA to build from' + required: true + default: 'main' + dev_version: + description: >- + Optional PEP 440 pre/dev version override (e.g. 1.1.0.dev1, 1.1.0a2, 1.1.0rc1). + Leave blank to use the version in pyproject.toml as-is. + required: false + default: '' jobs: build: @@ -9,6 +20,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref }} - name: Set up uv uses: astral-sh/setup-uv@v5 @@ -18,11 +31,37 @@ jobs: - name: Set up Python run: uv python install 3.12 + - name: Apply dev_version override + if: inputs.dev_version != '' + # Pass the input through the environment, never interpolated into the script body: + # `${{ }}` is expanded into the script text before the shell runs, so splicing it inline + # would let a crafted dev_version inject shell commands before the validation below could reject it. + env: + DEV_VERSION: ${{ inputs.dev_version }} + run: | + # Reject anything that isn't a pre-release / dev version — final releases must go through release.yml. + if ! printf '%s' "${DEV_VERSION}" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+(a|b|rc|\.dev)[0-9]+$'; then + echo "::error::dev_version '${DEV_VERSION}' is not a PEP 440 pre/dev version (must end in aN, bN, rcN, or .devN)" + exit 1 + fi + uv run python -c " + import os, re, pathlib + version = os.environ['DEV_VERSION'] + path = pathlib.Path('pyproject.toml') + text = path.read_text() + new = re.sub(r'^version\s*=\s*\".*\"', f'version = \"{version}\"', text, count=1, flags=re.M) + if new == text: + raise SystemExit('Failed to locate version line in pyproject.toml') + path.write_text(new) + " + - name: Show package version + env: + REF: ${{ inputs.ref }} run: | VERSION="$(uv run python -c 'import tomllib; print(tomllib.loads(open("pyproject.toml","rb").read().decode())["project"]["version"])')" - echo "Publishing version: ${VERSION}" - echo "::notice title=TestPyPI version::${VERSION}" + echo "Publishing version: ${VERSION} (from ref ${REF})" + echo "::notice title=TestPyPI version::${VERSION} from ${REF}" - name: Build run: uv build diff --git a/HISTORY.rst b/HISTORY.rst index 73bb66e..dc72f9a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,29 @@ History ======= +1.1.0 (2026-06-24) +------------------ + +* Added discovery configuration models and management APIs. +* Added schema-discovery and file-data-discovery APIs that take a saved discovery configuration + (``start_schema_discovery_run_from_config`` / ``start_file_data_discovery_run_from_config``). + Adoption is recommended; the older APIs that take individual options will be deprecated in a future release. +* Corrected the file-data-discovery ``include``/``skip`` filter syntax and added ``ignore_rules`` support. +* Added ``InvalidDiscoveryConfigError`` and ``DiscoveryConfigNotFoundError``, + raised when a discovery run can't start due to an unusable or missing discovery config. +* Added ``get_discovery_run_config_snapshot_yaml`` to retrieve the discovery-config YAML + that was effective at the start of a given discovery run. +* Added ``is_user_subscribed`` to ``MaskingRunRequest`` to subscribe the requesting user to a run's email notifications. +* Added ``auto_pull`` / ``auto_pull_branch`` to ``MaskingRunOptions`` + to refresh the run's ruleset from git before starting. +* Added ``validation_error`` (and ``validation_error_type`` for rulesets) to ``Ruleset`` and ``RulesetLibrary``. +* Exposed git provenance on ``Ruleset`` and ``RulesetLibrary`` as a nested ``git`` field (``GitSnapshot``). +* Read-only fields (``id``, ``is_valid``, ``validation_error``, etc.) + are no longer echoed back in ``Ruleset`` / ``RulesetLibrary`` create/update request bodies. +* Fixed ``SslZipFile`` uploads to send the required ``database_type=mysql`` form field. +* **Breaking:** ``delete_ruleset_by_name_if_exists`` now requires a ``ruleset_type`` argument, + since ruleset names are unique only per type. + 1.0.5 (2026-06-18) ------------------ diff --git a/datamasque/client/__init__.py b/datamasque/client/__init__.py index c4f007d..0872d95 100644 --- a/datamasque/client/__init__.py +++ b/datamasque/client/__init__.py @@ -17,8 +17,10 @@ DataMasqueNotReadyError, DataMasqueTransportError, DataMasqueUserError, + DiscoveryConfigNotFoundError, FailedToStartError, IfmAuthError, + InvalidDiscoveryConfigError, InvalidLibraryError, InvalidRulesetError, RunNotCancellableError, @@ -54,10 +56,15 @@ from datamasque.client.models.discovery import ( ConstraintColumns, DiscoveryMatch, + FileDataDiscoveryFromConfigRequest, + FileDataDiscoveryOptions, + FileDataDiscoveryRequest, FileDiscoveryFile, FileDiscoveryLocatorResult, FileDiscoveryMatch, FileDiscoveryResult, + FileFilter, + FileFilterMatchAgainst, FileRulesetGenerationRequest, ForeignKeyRef, InDataDiscoveryConfig, @@ -65,11 +72,13 @@ ReferencingForeignKey, RulesetGenerationRequest, SchemaDiscoveryColumn, + SchemaDiscoveryFromConfigRequest, SchemaDiscoveryPage, SchemaDiscoveryRequest, SchemaDiscoveryResult, TableConstraints, ) +from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType from datamasque.client.models.dm_instance import DataMasqueInstanceConfig from datamasque.client.models.files import ( DataMasqueFile, @@ -79,6 +88,7 @@ SnowflakeKeyFile, SslZipFile, ) +from datamasque.client.models.git import GitSnapshot from datamasque.client.models.ifm import ( DataMasqueIfmInstanceConfig, IfmLog, @@ -104,7 +114,12 @@ RunInfo, UnfinishedRun, ) -from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus, MaskingRunStatus, ValidationStatus +from datamasque.client.models.status import ( + AsyncRulesetGenerationTaskStatus, + MaskingRunStatus, + ValidationErrorType, + ValidationStatus, +) from datamasque.client.models.user import User, UserId, UserRole __version__ = version("datamasque-python") @@ -130,18 +145,28 @@ "DatabaseConnectionConfig", "DatabaseType", "DatabricksConnectionConfig", + "DiscoveryConfig", + "DiscoveryConfigId", + "DiscoveryConfigNotFoundError", + "DiscoveryConfigType", "DiscoveryMatch", "DynamoConnectionConfig", "FailedToStartError", "FileConnectionConfig", + "FileDataDiscoveryFromConfigRequest", + "FileDataDiscoveryOptions", + "FileDataDiscoveryRequest", "FileDiscoveryFile", "FileDiscoveryLocatorResult", "FileDiscoveryMatch", "FileDiscoveryResult", + "FileFilter", + "FileFilterMatchAgainst", "FileId", "FileOrContent", "FileRulesetGenerationRequest", "ForeignKeyRef", + "GitSnapshot", "HashColumnsTableConfig", "IfmAuthError", "IfmLog", @@ -151,6 +176,7 @@ "IfmTokenInfo", "InDataDiscoveryConfig", "InDataDiscoveryRule", + "InvalidDiscoveryConfigError", "InvalidLibraryError", "InvalidRulesetError", "JsonPath", @@ -182,6 +208,7 @@ "RunNotCancellableError", "S3ConnectionConfig", "SchemaDiscoveryColumn", + "SchemaDiscoveryFromConfigRequest", "SchemaDiscoveryPage", "SchemaDiscoveryRequest", "SchemaDiscoveryResult", @@ -202,5 +229,6 @@ "UserId", "UserRole", "UserSelection", + "ValidationErrorType", "ValidationStatus", ] diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py index 19b89f0..f6f3850 100644 --- a/datamasque/client/discovery.py +++ b/datamasque/client/discovery.py @@ -4,11 +4,15 @@ from pathlib import Path from typing import Iterator, Optional, Union +from requests import Response + from datamasque.client.base import BaseClient, UploadFile from datamasque.client.exceptions import ( AsyncRulesetGenerationInProgressError, DataMasqueException, + DiscoveryConfigNotFoundError, FailedToStartError, + InvalidDiscoveryConfigError, ) from datamasque.client.models.connection import ConnectionId from datamasque.client.models.data_selection import ( @@ -17,9 +21,12 @@ SelectedFileData, ) from datamasque.client.models.discovery import ( + FileDataDiscoveryFromConfigRequest, + FileDataDiscoveryRequest, FileDiscoveryResult, FileRulesetGenerationRequest, RulesetGenerationRequest, + SchemaDiscoveryFromConfigRequest, SchemaDiscoveryPage, SchemaDiscoveryRequest, SchemaDiscoveryResult, @@ -185,6 +192,13 @@ def get_generated_rulesets(self, connection_id: ConnectionId) -> list[Ruleset]: with zip_file.open(file_info) as file: yaml_content = file.read().decode("utf-8") rulesets.append(Ruleset(name=Path(file_info.filename).stem, yaml=yaml_content)) + + if not rulesets: + raise DataMasqueException( + f"Ruleset generation for connection {connection_id} reported `finished` " + f"but the downloaded archive contained no rulesets." + ) + return rulesets generated = response.json().get("generated_ruleset") @@ -230,6 +244,148 @@ def start_schema_discovery_run(self, discovery_config: SchemaDiscoveryRequest) - response=response, ) + def start_file_data_discovery_run(self, request: FileDataDiscoveryRequest) -> RunId: + """ + Starts a file data discovery run with the given configuration. + + Args: + request: A `FileDataDiscoveryRequest` with connection and optional settings. + + Returns: + RunId: The ID of the started discovery run + + Raises: + FailedToStartError: If run fails to start + """ + + data = request.model_dump(exclude_none=True, mode="json") + response = self.make_request( + "POST", + "/api/run-file-data-discovery/", + data=data, + require_status_check=False, + ) + run_data = response.json() + + if response.status_code == 201: + logger.info("File data discovery run %s started successfully", run_data["id"]) + return RunId(run_data["id"]) + + logger.error("File data discovery run failed to start: %s", run_data) + raise FailedToStartError( + f"File data discovery run failed to start " + f"(server responded with status {response.status_code}: {response.text}).", + response=response, + ) + + def start_schema_discovery_run_from_config(self, request: SchemaDiscoveryFromConfigRequest) -> RunId: + """ + Starts a schema discovery run from a saved discovery config. + + Args: + request: A `SchemaDiscoveryFromConfigRequest` with the `connection` and a required `discovery_config` + (a saved config, or `None` for the server's defaults). + + Returns: + RunId: The ID of the started discovery run + + Raises: + DiscoveryConfigNotFoundError: the referenced discovery config cannot be found + (it does not exist or is the wrong type for the run). + InvalidDiscoveryConfigError: the config is present but not in a `valid` validation state, + or its YAML is rejected when the run starts. + FailedToStartError: the run failed to start for any other reason. + """ + + return self._start_discovery_run_from_config(request, "/api/schema-discovery/v2/", "Schema discovery") + + def start_file_data_discovery_run_from_config(self, request: FileDataDiscoveryFromConfigRequest) -> RunId: + """ + Starts a file data discovery run from a saved discovery config. + + Args: + request: A `FileDataDiscoveryFromConfigRequest` with the `connection`, + a required `discovery_config` (a saved config, or `None` for the server's defaults), + and optional run `options`. + + Returns: + RunId: The ID of the started discovery run + + Raises: + DiscoveryConfigNotFoundError: the referenced discovery config cannot be found + (it does not exist or is the wrong type for the run). + InvalidDiscoveryConfigError: the config is present but not in a `valid` validation state, + or its YAML is rejected when the run starts. + FailedToStartError: the run failed to start for any other reason. + """ + + return self._start_discovery_run_from_config(request, "/api/run-file-data-discovery/v2/", "File data discovery") + + def _start_discovery_run_from_config( + self, + request: Union[SchemaDiscoveryFromConfigRequest, FileDataDiscoveryFromConfigRequest], + path: str, + run_kind: str, + ) -> RunId: + """Post a saved-config discovery request and return its run id, classifying config errors on failure.""" + + data = request.model_dump(exclude_none=True, mode="json") + # The server requires `discovery_config` to be present; a null selects its built-in defaults, + # so send it explicitly rather than letting `exclude_none` drop a None. + data.setdefault("discovery_config", None) + response = self.make_request("POST", path, data=data, require_status_check=False) + run_data = response.json() if response.content else {} + + if response.status_code == 201: + logger.info("%s run %s started successfully", run_kind, run_data["id"]) + return RunId(run_data["id"]) + + logger.error("%s run failed to start: %s", run_kind, run_data) + self._maybe_raise_discovery_config_error(run_data, response, run_kind) + raise FailedToStartError( + f"{run_kind} run failed to start (server responded with status {response.status_code}: {response.text}).", + response=response, + ) + + # Server key for a 400 that means the discovery config itself is unusable: + # a missing or wrong-type config, or one not in a `valid` validation state (string messages), + # or re-validation of broken saved-config YAML when the run starts + # (a `{"message", "line_number", "column_number"}` dict). + DISCOVERY_CONFIG_ERROR_FIELD = "discovery_config" + + # The phrase the server uses when the config id cannot be resolved (a missing or wrong-type config). + MISSING_DISCOVERY_CONFIG_SIGNATURE = "object does not exist" + + @classmethod + def _maybe_raise_discovery_config_error(cls, run_data: object, response: Response, run_kind: str) -> None: + """Raise a discovery-config error if the server's 400 body cites the discovery config.""" + if not isinstance(run_data, dict): + return + + if not (errors := run_data.get(cls.DISCOVERY_CONFIG_ERROR_FIELD)): + return + + detail = cls._format_discovery_config_error(errors) + if cls.MISSING_DISCOVERY_CONFIG_SIGNATURE in detail: + raise DiscoveryConfigNotFoundError( + f"{run_kind} run failed to start: the referenced discovery config could not be found: {detail}", + response=response, + ) + + raise InvalidDiscoveryConfigError( + f"{run_kind} run failed to start due to discovery config error: {detail}", + response=response, + ) + + @staticmethod + def _format_discovery_config_error(errors: object) -> str: + """Render the first server error, handling both string and `{message, ...}` dict items.""" + first = errors[0] if isinstance(errors, list) and errors else errors + if isinstance(first, dict) and "message" in first: + return str(first["message"]) + + return str(first) + def iter_schema_discovery_results(self, run_id: RunId) -> Iterator[SchemaDiscoveryResult]: """Lazily iterate all schema discovery results for a run via the paginated v2 endpoint.""" @@ -284,3 +440,22 @@ def get_file_data_discovery_report(self, run_id: RunId) -> list[FileDiscoveryRes response = self.make_request("GET", f"api/runs/{run_id}/file-discovery-results/") return [FileDiscoveryResult.model_validate(d) for d in response.json()] + + def get_discovery_run_config_snapshot_yaml(self, run_id: RunId, *, timezone: Optional[str] = None) -> str: + """ + Returns the discovery-config YAML that was effective at the start of the given discovery run. + + The YAML is prefixed with a commented provenance header naming the saved config + (or the built-in defaults) the run used, and whether it has since been modified or deleted. + `timezone`, a `±HH:MM` UTC offset, sets the timezone of the header timestamp; the server defaults to UTC. + """ + + params = {"timezone": timezone} if timezone is not None else None + response = self.make_request("GET", f"/api/discovery/runs/{run_id}/config-snapshot/", params=params) + with zipfile.ZipFile(BytesIO(response.content)) as zip_file: + names = zip_file.namelist() + if not names: + raise DataMasqueException(f"Discovery run {run_id} config snapshot archive contained no files.") + + with zip_file.open(names[0]) as snapshot_file: + return snapshot_file.read().decode("utf-8") diff --git a/datamasque/client/discovery_configs.py b/datamasque/client/discovery_configs.py new file mode 100644 index 0000000..60f448d --- /dev/null +++ b/datamasque/client/discovery_configs.py @@ -0,0 +1,158 @@ +import logging +from typing import Iterator, Optional + +from datamasque.client.base import BaseClient +from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException +from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, DiscoveryConfigType +from datamasque.client.models.pagination import Page + +logger = logging.getLogger(__name__) + + +class DiscoveryConfigClient(BaseClient): + """Discovery config CRUD API methods. Mixed into `DataMasqueClient`.""" + + def iter_discovery_configs(self) -> Iterator[DiscoveryConfig]: + """Lazily iterate all discovery configs via the paginated endpoint.""" + + return self._iter_paginated("/api/discovery/configs/", model=DiscoveryConfig) + + def list_discovery_configs(self) -> list[DiscoveryConfig]: + """ + Lists all discovery configs. + + Note: the YAML content is not included in the list response for performance. + Use `get_discovery_config` to retrieve the full config with its YAML body. + """ + + return list(self.iter_discovery_configs()) + + def get_discovery_config(self, config_id: DiscoveryConfigId) -> DiscoveryConfig: + """Retrieves a single discovery config by ID.""" + + response = self.make_request("GET", f"/api/discovery/configs/{config_id}/") + return DiscoveryConfig.model_validate(response.json()) + + def _get_discovery_config_id_by_name( + self, name: str, config_type: DiscoveryConfigType + ) -> Optional[DiscoveryConfigId]: + """Return the id of the config matching name and type via a single list request, or `None`.""" + + response = self.make_request( + "GET", + "/api/discovery/configs/", + params={"name_exact": name, "config_type": config_type.value, "limit": 1}, + ) + page = Page[DiscoveryConfig].model_validate(response.json()) + if not page.results: + return None + + config_id = page.results[0].id + if config_id is None: + raise DataMasqueApiError( + "Server returned a discovery config list entry without an `id`.", + response=response, + ) + + return config_id + + def get_discovery_config_by_name(self, name: str, config_type: DiscoveryConfigType) -> Optional[DiscoveryConfig]: + """ + Looks for a discovery config matching the given name and type (case-sensitive, exact match). + + Config names are unique per type, so a type is required to identify a single config. + Returns it if found, otherwise `None`. + """ + + config_id = self._get_discovery_config_id_by_name(name, config_type) + if config_id is None: + return None + + return self.get_discovery_config(config_id) + + def create_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig: + """ + Creates a new discovery config on the server. + + Sets the config's server-assigned fields + (`id`, `is_valid`, `validation_error`, `created`, `modified`) and returns the config. + """ + + data = config.model_dump(exclude_none=True, by_alias=True, mode="json") + response = self.make_request("POST", "/api/discovery/configs/", data=data) + created = DiscoveryConfig.model_validate(response.json()) + config.id = created.id + config.is_valid = created.is_valid + config.validation_error = created.validation_error + config.created = created.created + config.modified = created.modified + logger.info('Creation of discovery config "%s" successful', config.name) + return config + + def update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig: + """ + Performs a full update of the discovery config. + + The config must have its `id` set + (i.e., it must have been previously created or retrieved from the server). + """ + + if config.id is None: + raise ValueError("Cannot update a discovery config that has not been created yet (id is None)") + + data = config.model_dump(exclude_none=True, by_alias=True, mode="json") + response = self.make_request("PUT", f"/api/discovery/configs/{config.id}/", data=data) + updated = DiscoveryConfig.model_validate(response.json()) + config.is_valid = updated.is_valid + config.validation_error = updated.validation_error + config.modified = updated.modified + logger.debug('Update of discovery config "%s" successful', config.name) + return config + + def create_or_update_discovery_config(self, config: DiscoveryConfig) -> DiscoveryConfig: + """ + Creates the config if it doesn't exist, or updates it if one with the same name already exists. + + Sets the config's `id` property. + """ + + existing_id = self._get_discovery_config_id_by_name(config.name, config.config_type) + if existing_id is not None: + config.id = existing_id + return self.update_discovery_config(config) + + return self.create_discovery_config(config) + + def delete_discovery_config_by_id_if_exists(self, config_id: DiscoveryConfigId) -> None: + """ + Deletes the discovery config with the given ID. + + No-op if the config does not exist. + """ + + self._delete_if_exists(f"/api/discovery/configs/{config_id}/") + + def delete_discovery_config_by_name_if_exists(self, name: str, config_type: DiscoveryConfigType) -> None: + """ + Deletes the discovery config with the given name and type. + + Config names are unique per type, so a type is required to identify a single config. + No-op if no such config exists. + """ + + matching = [ + config + for config in self.list_discovery_configs() + if config.name == name and config.config_type is config_type + ] + for config in matching: + if config.id is None: + raise DataMasqueException(f'Server returned a discovery config named "{config.name}" without an `id`.') + + self.delete_discovery_config_by_id_if_exists(config.id) + + def get_default_discovery_config_yaml(self) -> str: + """Returns the server's built-in default discovery configuration as a YAML string.""" + + response = self.make_request("GET", "/api/discovery/configs/defaults/") + return response.content.decode("utf-8") diff --git a/datamasque/client/dmclient.py b/datamasque/client/dmclient.py index 55cc6f3..25efcc6 100644 --- a/datamasque/client/dmclient.py +++ b/datamasque/client/dmclient.py @@ -1,6 +1,7 @@ from datamasque.client.base import FileOrContent, UploadFile from datamasque.client.connections import ConnectionClient from datamasque.client.discovery import DiscoveryClient +from datamasque.client.discovery_configs import DiscoveryConfigClient from datamasque.client.files import FileClient from datamasque.client.license import LicenseClient from datamasque.client.ruleset_libraries import RulesetLibraryClient @@ -20,6 +21,7 @@ class DataMasqueClient( FileClient, RunClient, DiscoveryClient, + DiscoveryConfigClient, UserClient, SettingsClient, ): diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py index 05fe339..39c90fa 100644 --- a/datamasque/client/exceptions.py +++ b/datamasque/client/exceptions.py @@ -41,6 +41,23 @@ class InvalidLibraryError(FailedToStartError): """Specific error for when runs fail to start due to having an invalid ruleset library.""" +class InvalidDiscoveryConfigError(FailedToStartError): + """ + Raised when a discovery run fails to start because the referenced config is present but unusable. + + The config exists but is not in a `valid` validation state, or its YAML is rejected when the run starts. + A config that cannot be found raises `DiscoveryConfigNotFoundError` instead. + """ + + +class DiscoveryConfigNotFoundError(FailedToStartError): + """ + Raised when a discovery run references a discovery config that cannot be found. + + The config does not exist or is the wrong type for the run. + """ + + class DataMasqueTransportError(DataMasqueException): """ Raised when a request to the DataMasque server fails before any response is received. diff --git a/datamasque/client/files.py b/datamasque/client/files.py index 2f476e6..89d1a67 100644 --- a/datamasque/client/files.py +++ b/datamasque/client/files.py @@ -30,7 +30,7 @@ def upload_file( response = self.make_request( "POST", file_type.get_url(), - data={"name": file_name}, + data={"name": file_name, **file_type.get_extra_form_data()}, files=[ UploadFile( field_name=file_type.get_content_param_name(), diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py index 7532526..7bccefa 100644 --- a/datamasque/client/models/discovery.py +++ b/datamasque/client/models/discovery.py @@ -1,12 +1,15 @@ """Typed request and response shapes for schema-discovery and ruleset-generation endpoints.""" +from enum import Enum from typing import Any, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, field_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from datamasque.client.models.connection import ConnectionConfig, ConnectionId, unwrap_connection_id from datamasque.client.models.data_selection import HashColumnsTableConfig, Locator, UserSelection +from datamasque.client.models.discovery_config import DiscoveryConfig, DiscoveryConfigId, unwrap_discovery_config_id from datamasque.client.models.pagination import Page +from datamasque.client.models.runs import RunConnectionRef class InDataDiscoveryRule(BaseModel): @@ -27,15 +30,17 @@ class InDataDiscoveryConfig(BaseModel): row_sample_size: Optional[int] = None custom_rules: Optional[list[InDataDiscoveryRule]] = None non_sensitive_rules: Optional[list[InDataDiscoveryRule]] = None + ignore_rules: Optional[list[InDataDiscoveryRule]] = None force: Optional[bool] = None class SchemaDiscoveryRequest(BaseModel): """ - Request body for `POST /api/schema-discovery/`. + Request body for `POST /api/schema-discovery/` (the keyword-driven schema-discovery trigger). - `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` returned by an earlier client call. - Every other field uses the server's default value when omitted. + `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` + returned by an earlier client call. + This request does not accept a `discovery_config`. """ model_config = ConfigDict(extra="forbid") @@ -54,6 +59,45 @@ class SchemaDiscoveryRequest(BaseModel): def _unwrap_connection(cls, value: Any) -> Any: return unwrap_connection_id(value) + @model_validator(mode="before") + @classmethod + def _reject_discovery_config(cls, data: Any) -> Any: + if isinstance(data, dict) and "discovery_config" in data: + raise ValueError( + "`discovery_config` is not accepted by the keyword-driven schema-discovery request; " + "use `start_schema_discovery_run_from_config` with a `SchemaDiscoveryFromConfigRequest` " + "to run from a saved discovery config." + ) + return data + + +class SchemaDiscoveryFromConfigRequest(BaseModel): + """ + Request body for `POST /api/schema-discovery/v2/` (start a run from a saved discovery config). + + `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` + returned by an earlier client call. + `discovery_config` is required: pass a `DiscoveryConfigId`, a full `DiscoveryConfig`, + or `None` to run with the default discovery options. + `schemas` optionally scopes the run to specific schemas; omit it to scan the connection's default schema. + """ + + model_config = ConfigDict(extra="forbid") + + connection: Union[ConnectionId, ConnectionConfig] + discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] + schemas: Optional[list[str]] = None + + @field_validator("connection", mode="before") + @classmethod + def _unwrap_connection(cls, value: Any) -> Any: + return unwrap_connection_id(value) + + @field_validator("discovery_config", mode="before") + @classmethod + def _unwrap_discovery_config(cls, value: Any) -> Any: + return unwrap_discovery_config_id(value) + class RulesetGenerationRequest(BaseModel): """ @@ -77,6 +121,114 @@ def _unwrap_connection(cls, value: Any) -> Any: return unwrap_connection_id(value) +class FileFilterMatchAgainst(Enum): + """Which part of a file's path an `include`/`skip` filter is matched against.""" + + path = "path" + filename = "filename" + + +class FileFilter(BaseModel): + """ + A single `include` or `skip` filter for file data discovery. + + Exactly one of `glob` or `regex` must be set. + `match_against` selects whether the pattern is applied to the full path or just the filename + (defaults to the full path when omitted). + """ + + model_config = ConfigDict(extra="forbid") + + glob: Optional[str] = Field(default=None, min_length=1) + regex: Optional[str] = Field(default=None, min_length=1) + match_against: Optional[FileFilterMatchAgainst] = None + + @model_validator(mode="after") + def _check_glob_xor_regex(self) -> "FileFilter": + if (self.glob is None) == (self.regex is None): + raise ValueError("A `FileFilter` must set exactly one of `glob` or `regex`.") + return self + + +class FileDataDiscoveryOptions(BaseModel): + """Run options nested under `FileDataDiscoveryRequest.options`.""" + + model_config = ConfigDict(extra="forbid") + + diagnostic_logging: Optional[bool] = None + + +class FileDataDiscoveryRequest(BaseModel): + """ + Request body for `POST /api/run-file-data-discovery/` (the keyword-driven file-data-discovery trigger). + + `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` + returned by an earlier client call. + This request does not accept a `discovery_config`. + """ + + model_config = ConfigDict(extra="forbid") + + connection: Union[ConnectionId, ConnectionConfig] + options: Optional[FileDataDiscoveryOptions] = None + custom_keywords: list[str] = Field(default_factory=list) + ignored_keywords: list[str] = Field(default_factory=list) + disable_built_in_keywords: bool = False + disable_global_custom_keywords: Optional[bool] = None + disable_global_ignored_keywords: Optional[bool] = None + in_data_discovery: Optional[InDataDiscoveryConfig] = None + recurse: Optional[bool] = None + include: Optional[list[FileFilter]] = None + skip: Optional[list[FileFilter]] = None + encoding: Optional[str] = None + workers: Optional[int] = None + + @field_validator("connection", mode="before") + @classmethod + def _unwrap_connection(cls, value: Any) -> Any: + return unwrap_connection_id(value) + + @model_validator(mode="before") + @classmethod + def _reject_discovery_config(cls, data: Any) -> Any: + if isinstance(data, dict) and "discovery_config" in data: + raise ValueError( + "`discovery_config` is not accepted by the keyword-driven file-data-discovery request; " + "use `start_file_data_discovery_run_from_config` with a `FileDataDiscoveryFromConfigRequest` " + "to run from a saved discovery config." + ) + return data + + +class FileDataDiscoveryFromConfigRequest(BaseModel): + """ + Request body for `POST /api/run-file-data-discovery/v2/` (start a run from a saved discovery config). + + `connection` accepts either a `ConnectionId` or a full `ConnectionConfig` + returned by an earlier client call. + `discovery_config` is required: pass a `DiscoveryConfigId`, a full `DiscoveryConfig`, + or `None` to run with the server's default discovery options. + `options` carries the `diagnostic_logging` run-time toggle; + detection and file-handling settings come from the discovery config, not the request. + """ + + model_config = ConfigDict(extra="forbid") + + connection: Union[ConnectionId, ConnectionConfig] + discovery_config: Optional[Union[DiscoveryConfigId, DiscoveryConfig]] + options: Optional[FileDataDiscoveryOptions] = None + + @field_validator("connection", mode="before") + @classmethod + def _unwrap_connection(cls, value: Any) -> Any: + return unwrap_connection_id(value) + + @field_validator("discovery_config", mode="before") + @classmethod + def _unwrap_discovery_config(cls, value: Any) -> Any: + return unwrap_discovery_config_id(value) + + class FileRulesetGenerationRequest(BaseModel): """ Request body for `POST /api/generate-file-ruleset/`. @@ -189,11 +341,11 @@ class FileDiscoveryMatch(BaseModel): model_config = ConfigDict(extra="allow") - categories: Optional[list[str]] = None - flagged_by: Optional[str] = None - description: Optional[str] = None - label: Optional[str] = None - hit_ratio: Optional[int] = None + flagged_by: str + description: str + label: Optional[str] = None # Omitted for non-sensitive and ignored matches. + categories: Optional[list[str]] = None # Omitted for ignored matches. + hit_ratio: Optional[int] = None # None for metadata matches, percentage 0-100 for IDD matches. class FileDiscoveryLocatorResult(BaseModel): @@ -201,9 +353,9 @@ class FileDiscoveryLocatorResult(BaseModel): model_config = ConfigDict(extra="allow") - locator: Optional[Locator] = None - matches: Optional[list[FileDiscoveryMatch]] = None - data_types: Optional[list[str]] = None + locator: Locator + matches: list[FileDiscoveryMatch] + data_types: list[str] class FileDiscoveryFile(BaseModel): @@ -211,8 +363,8 @@ class FileDiscoveryFile(BaseModel): model_config = ConfigDict(extra="allow") - path: Optional[str] = None - file_type: Optional[str] = None + path: str + file_type: str delimiter: Optional[str] = None encoding: Optional[str] = None @@ -222,8 +374,8 @@ class FileDiscoveryResult(BaseModel): model_config = ConfigDict(extra="allow") - id: Optional[int] = None - connection: Optional[Any] = None - file_type: Optional[str] = None - files: Optional[list[FileDiscoveryFile]] = None - results: Optional[list[FileDiscoveryLocatorResult]] = None + id: int + connection: RunConnectionRef + file_type: str + files: list[FileDiscoveryFile] + results: list[FileDiscoveryLocatorResult] diff --git a/datamasque/client/models/discovery_config.py b/datamasque/client/models/discovery_config.py new file mode 100644 index 0000000..09b123f --- /dev/null +++ b/datamasque/client/models/discovery_config.py @@ -0,0 +1,53 @@ +import enum +from datetime import datetime +from typing import Any, NewType, Optional + +from pydantic import BaseModel, ConfigDict, Field + +from datamasque.client.models.status import ValidationStatus + +DiscoveryConfigId = NewType("DiscoveryConfigId", str) + + +class DiscoveryConfigType(enum.Enum): + """Which discovery config variant a config targets: database (qualified columns) or file (locators).""" + + database = "database" + file = "file" + + +def unwrap_discovery_config_id(value: Any) -> Any: + """ + Coerce a `DiscoveryConfig` to its `id`; pass other values through unchanged. + + Used by request-model validators that accept either a `DiscoveryConfigId` + or a full `DiscoveryConfig` for user convenience. + Raises `ValueError` if the config has no `id` + (i.e. the caller hasn't yet created it on the server). + """ + + if isinstance(value, DiscoveryConfig): + if value.id is None: + raise ValueError("Discovery config has not been created yet (id is None)") + return value.id + + return value + + +class DiscoveryConfig(BaseModel): + """Represents a named, persisted YAML discovery configuration.""" + + model_config = ConfigDict(extra="allow", populate_by_name=True) + + name: str + yaml: Optional[str] = Field(default=None, alias="config_yaml") + config_type: DiscoveryConfigType + + # Server-populated read-only fields, excluded from request bodies. + id: Optional[DiscoveryConfigId] = Field(default=None, exclude=True) + is_valid: Optional[ValidationStatus] = Field(default=None, exclude=True) + """Validation status; may be `in_progress` briefly after creating a large config.""" + validation_error: Optional[str] = Field(default=None, exclude=True) + """Human-readable validation error, or `None` when valid.""" + created: Optional[datetime] = Field(default=None, exclude=True) + modified: Optional[datetime] = Field(default=None, exclude=True) diff --git a/datamasque/client/models/files.py b/datamasque/client/models/files.py index 59bfb7f..13c9846 100644 --- a/datamasque/client/models/files.py +++ b/datamasque/client/models/files.py @@ -40,6 +40,12 @@ def get_content_param_name(cls) -> str: raise NotImplementedError # pragma: no cover + @classmethod + def get_extra_form_data(cls) -> dict[str, str]: + """Extra multipart form fields to send alongside the file on upload. Empty by default.""" + + return {} + class SeedFile(DataMasqueFile): """Represents a seed file (CSV file).""" @@ -76,6 +82,11 @@ def get_url(cls) -> str: def get_content_param_name(cls) -> str: return "zip_archive" + @classmethod + def get_extra_form_data(cls) -> dict[str, str]: + # The connection-filesets endpoint requires a database_type; SSL filesets are MySQL-only today. + return {"database_type": "mysql"} + class SnowflakeKeyFile(DataMasqueFile): """Represents a private SSH key file for Snowflake connections.""" diff --git a/datamasque/client/models/git.py b/datamasque/client/models/git.py new file mode 100644 index 0000000..df8e71a --- /dev/null +++ b/datamasque/client/models/git.py @@ -0,0 +1,60 @@ +from collections.abc import Mapping +from datetime import datetime +from typing import Any, Optional + +from pydantic import BaseModel, ConfigDict, Field, model_validator + + +class GitSnapshot(BaseModel): + """ + Git provenance for a ruleset or ruleset library. + + Identifies the commit the entity's contents came from — + `commit_sha` on `branch` in `repo_url`, as of `synced_at`. + """ + + branch: str + commit_sha: str + repo_url: str + synced_at: datetime + + +GIT_RESPONSE_FIELDS = ("git_branch", "git_commit_sha", "git_repo_url", "git_synced_at") + + +def git_snapshot_from_response(data: Mapping[str, Any]) -> Optional[GitSnapshot]: + """Build a `GitSnapshot` from the server's flat `git_*` fields, or `None` when not git-synced.""" + if data.get("git_branch") is None: + return None + + return GitSnapshot( + branch=data["git_branch"], + commit_sha=data["git_commit_sha"], + repo_url=data["git_repo_url"], + synced_at=data["git_synced_at"], + ) + + +class GitTrackedEntity(BaseModel): + """Base class for rulesets and ruleset libraries that carry git provenance.""" + + model_config = ConfigDict(extra="allow", populate_by_name=True) + + git: Optional[GitSnapshot] = Field(default=None, exclude=True) + """Git provenance, or `None` when the entity is not currently in sync with a git commit.""" + + @model_validator(mode="before") + @classmethod + def _collapse_git_fields(cls, data: Any) -> Any: + if not isinstance(data, dict) or "git" in data: + return data + + data = dict(data) + snapshot = git_snapshot_from_response(data) + for field in GIT_RESPONSE_FIELDS: + data.pop(field, None) + + if snapshot is not None: + data["git"] = snapshot + + return data diff --git a/datamasque/client/models/ruleset.py b/datamasque/client/models/ruleset.py index 564b7c1..362fc8b 100644 --- a/datamasque/client/models/ruleset.py +++ b/datamasque/client/models/ruleset.py @@ -1,9 +1,10 @@ import enum from typing import Any, NewType, Optional -from pydantic import BaseModel, ConfigDict, Field +from pydantic import Field -from datamasque.client.models.status import ValidationStatus +from datamasque.client.models.git import GitTrackedEntity +from datamasque.client.models.status import ValidationErrorType, ValidationStatus RulesetId = NewType("RulesetId", str) @@ -33,13 +34,17 @@ class RulesetType(enum.Enum): database = "database" -class Ruleset(BaseModel): +class Ruleset(GitTrackedEntity): """Represents a ruleset.""" - model_config = ConfigDict(extra="allow", populate_by_name=True) - name: str yaml: str = Field(default="", alias="config_yaml") ruleset_type: RulesetType = Field(default=RulesetType.database, alias="mask_type") - id: Optional[RulesetId] = None - is_valid: Optional[ValidationStatus] = None + + # Server-populated read-only fields, excluded from request bodies. + id: Optional[RulesetId] = Field(default=None, exclude=True) + is_valid: Optional[ValidationStatus] = Field(default=None, exclude=True) + validation_error: Optional[str] = Field(default=None, exclude=True) + """Human-readable validation error, or `None` when valid.""" + validation_error_type: Optional[ValidationErrorType] = Field(default=None, exclude=True) + """Category of the validation failure, or `None` when valid.""" diff --git a/datamasque/client/models/ruleset_library.py b/datamasque/client/models/ruleset_library.py index 1ebe789..d1e1d3b 100644 --- a/datamasque/client/models/ruleset_library.py +++ b/datamasque/client/models/ruleset_library.py @@ -1,22 +1,25 @@ from datetime import datetime from typing import NewType, Optional -from pydantic import BaseModel, ConfigDict, Field +from pydantic import Field +from datamasque.client.models.git import GitTrackedEntity from datamasque.client.models.status import ValidationStatus RulesetLibraryId = NewType("RulesetLibraryId", str) -class RulesetLibrary(BaseModel): +class RulesetLibrary(GitTrackedEntity): """Represents a ruleset library.""" - model_config = ConfigDict(extra="allow", populate_by_name=True) - name: str namespace: str = "" yaml: Optional[str] = Field(default=None, alias="config_yaml") - id: Optional[RulesetLibraryId] = None - is_valid: Optional[ValidationStatus] = None - created: Optional[datetime] = None - modified: Optional[datetime] = None + + # Server-populated read-only fields, excluded from request bodies. + id: Optional[RulesetLibraryId] = Field(default=None, exclude=True) + is_valid: Optional[ValidationStatus] = Field(default=None, exclude=True) + validation_error: Optional[str] = Field(default=None, exclude=True) + """Human-readable validation error, or `None` when valid.""" + created: Optional[datetime] = Field(default=None, exclude=True) + modified: Optional[datetime] = Field(default=None, exclude=True) diff --git a/datamasque/client/models/runs.py b/datamasque/client/models/runs.py index 379f82a..dc15663 100644 --- a/datamasque/client/models/runs.py +++ b/datamasque/client/models/runs.py @@ -30,6 +30,7 @@ class MaskingRunOptions(BaseModel): if supplied, must be 16–256 characters and is used as the per-run encryption key; the server auto-generates one when omitted. + Set `auto_pull` to refresh the run's ruleset from git before the run starts. """ model_config = ConfigDict(extra="forbid") @@ -41,6 +42,10 @@ class MaskingRunOptions(BaseModel): diagnostic_logging: Optional[bool] = None run_secret: Optional[str] = Field(default=None, min_length=16, max_length=256) disable_instance_secret: Optional[bool] = None + auto_pull: Optional[bool] = None + """When `True`, pull the run's ruleset from git before starting.""" + auto_pull_branch: Optional[str] = None + """Branch to auto-pull from; omitted or empty uses the instance's configured branch.""" class MaskingRunRequest(BaseModel): @@ -50,6 +55,9 @@ class MaskingRunRequest(BaseModel): `connection`, `destination_connection`, and `ruleset` accept either the server-assigned ID or the corresponding object returned by an earlier client call (e.g. a `ConnectionConfig` or `Ruleset`); the object's `id` is extracted at construction time. + + Set `is_user_subscribed=True` to subscribe the requesting user to the run's email notifications; + when omitted the server applies its default (no subscription). """ model_config = ConfigDict(extra="forbid") @@ -60,6 +68,7 @@ class MaskingRunRequest(BaseModel): destination_connection: Optional[Union[ConnectionId, ConnectionConfig]] = None options: MaskingRunOptions = Field(default_factory=MaskingRunOptions) name: Optional[str] = None + is_user_subscribed: Optional[bool] = None @field_validator("connection", "destination_connection", mode="before") @classmethod diff --git a/datamasque/client/models/status.py b/datamasque/client/models/status.py index b09e18f..f470fd0 100644 --- a/datamasque/client/models/status.py +++ b/datamasque/client/models/status.py @@ -10,6 +10,15 @@ class ValidationStatus(enum.Enum): unknown = "unknown" +class ValidationErrorType(enum.Enum): + """Categorises why a ruleset failed validation (see `Ruleset.validation_error_type`).""" + + ruleset = "ruleset" + library_missing = "library_missing" + library_invalid = "library_invalid" + expansion = "expansion" # The ruleset is not valid once its library references are expanded. + + class MaskingRunStatus(enum.Enum): """List of valid masking run statuses.""" diff --git a/datamasque/client/ruleset_libraries.py b/datamasque/client/ruleset_libraries.py index 81acb8f..e734bc2 100644 --- a/datamasque/client/ruleset_libraries.py +++ b/datamasque/client/ruleset_libraries.py @@ -63,7 +63,8 @@ def create_ruleset_library(self, library: RulesetLibrary) -> RulesetLibrary: """ Creates a new ruleset library on the server. - Sets the library's server-assigned fields (`id`, `is_valid`, `created`, `modified`) and returns the library. + Sets the library's server-assigned fields + (`id`, `is_valid`, `validation_error`, `git`, `created`, `modified`) and returns the library. """ data = library.model_dump(exclude_none=True, by_alias=True, mode="json") @@ -71,6 +72,8 @@ def create_ruleset_library(self, library: RulesetLibrary) -> RulesetLibrary: created_library = RulesetLibrary.model_validate(response.json()) library.id = created_library.id library.is_valid = created_library.is_valid + library.validation_error = created_library.validation_error + library.git = created_library.git library.created = created_library.created library.modified = created_library.modified logger.info('Creation of ruleset library "%s" successful', library.name) @@ -90,6 +93,8 @@ def update_ruleset_library(self, library: RulesetLibrary) -> RulesetLibrary: response = self.make_request("PUT", f"/api/ruleset-libraries/{library.id}/", data=data) updated_library = RulesetLibrary.model_validate(response.json()) library.is_valid = updated_library.is_valid + library.validation_error = updated_library.validation_error + library.git = updated_library.git library.modified = updated_library.modified logger.debug('Update of ruleset library "%s" successful', library.name) return library @@ -110,7 +115,7 @@ def create_or_update_ruleset_library(self, library: RulesetLibrary) -> RulesetLi def delete_ruleset_library_by_id_if_exists(self, library_id: RulesetLibraryId, *, force: bool = False) -> None: """ - Deletes (archives) the ruleset library with the given ID. + Deletes the ruleset library with the given ID. No-op if the library does not exist. @@ -139,13 +144,13 @@ def delete_ruleset_library_by_name_if_exists( self.delete_ruleset_library_by_id_if_exists(lib.id, force=force) def iter_rulesets_using_library(self, library_id: RulesetLibraryId) -> Iterator[Ruleset]: - """Lazily iterate non-archived rulesets that import the given library.""" + """Lazily iterate rulesets that import the given library.""" return self._iter_paginated(f"/api/ruleset-libraries/{library_id}/rulesets/", model=Ruleset) def list_rulesets_using_library(self, library_id: RulesetLibraryId) -> list[Ruleset]: """ - Lists non-archived rulesets that import the given library. + Lists rulesets that import the given library. Note: The YAML content is not included in the response for performance. Each returned Ruleset will have an empty string for `yaml`. diff --git a/datamasque/client/rulesets.py b/datamasque/client/rulesets.py index 4b9e038..5ea0ce9 100644 --- a/datamasque/client/rulesets.py +++ b/datamasque/client/rulesets.py @@ -2,8 +2,7 @@ from datamasque.client.base import BaseClient from datamasque.client.exceptions import DataMasqueException -from datamasque.client.models.ruleset import Ruleset, RulesetId -from datamasque.client.models.status import ValidationStatus +from datamasque.client.models.ruleset import Ruleset, RulesetId, RulesetType logger = logging.getLogger(__name__) @@ -21,17 +20,18 @@ def create_or_update_ruleset(self, ruleset: Ruleset) -> Ruleset: """ Creates or updates a ruleset. - Populates the given ruleset's `id` and `is_valid` fields from the server response, - and returns the same ruleset instance for convenience. + Populates the given ruleset's `id`, `is_valid`, `validation_error`, `validation_error_type`, + and `git` fields from the server response, and returns the same ruleset instance for convenience. """ data = ruleset.model_dump(exclude_none=True, by_alias=True, mode="json") response = self.make_request("POST", "/api/rulesets/", data=data, params={"upsert": "true"}) - response_data = response.json() - ruleset.id = RulesetId(response_data["id"]) - is_valid = response_data.get("is_valid") - if is_valid is not None: - ruleset.is_valid = ValidationStatus(is_valid) + created = Ruleset.model_validate(response.json()) + ruleset.id = created.id + ruleset.is_valid = created.is_valid + ruleset.validation_error = created.validation_error + ruleset.validation_error_type = created.validation_error_type + ruleset.git = created.git if response.status_code == 201: logger.info('Creation of ruleset "%s" successful', ruleset.name) @@ -45,12 +45,20 @@ def delete_ruleset_by_id_if_exists(self, ruleset_id: RulesetId) -> None: self._delete_if_exists(f"/api/rulesets/{ruleset_id}/") - def delete_ruleset_by_name_if_exists(self, ruleset_name: str) -> None: - """Deletes the ruleset with the given name. No-op if the ruleset does not exist.""" + def delete_ruleset_by_name_if_exists(self, ruleset_name: str, ruleset_type: RulesetType) -> None: + """ + Deletes the ruleset with the given name and type. + + Ruleset names are unique per type, so a type is required to identify a single ruleset. + No-op if no such ruleset exists. + """ - all_rulesets = self.list_rulesets() - rulesets_matching_name = [ruleset for ruleset in all_rulesets if ruleset.name == ruleset_name] - for ruleset in rulesets_matching_name: + matching = [ + ruleset + for ruleset in self.list_rulesets() + if ruleset.name == ruleset_name and ruleset.ruleset_type is ruleset_type + ] + for ruleset in matching: if ruleset.id is None: raise DataMasqueException(f'Server returned a ruleset named "{ruleset.name}" without an `id`.') diff --git a/pyproject.toml b/pyproject.toml index 5fdc7d0..1d226e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datamasque-python" -version = "1.0.5" +version = "1.1.0" description = "Official Python client for the DataMasque data-masking API." authors = [ { name = "DataMasque Ltd" }, diff --git a/setup.cfg b/setup.cfg index f04494b..26b71a7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.4 +current_version = 1.1.0 commit = True tag = True diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 2830287..42debfd 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -6,12 +6,23 @@ import pytest import requests_mock +from pydantic import ValidationError from datamasque.client import ( DataMasqueClient, + DiscoveryConfig, + DiscoveryConfigId, + FileDataDiscoveryFromConfigRequest, + FileDataDiscoveryOptions, + FileDataDiscoveryRequest, + FileDiscoveryResult, + FileFilter, + FileFilterMatchAgainst, FileRulesetGenerationRequest, + InDataDiscoveryConfig, RulesetGenerationRequest, RunId, + SchemaDiscoveryFromConfigRequest, SchemaDiscoveryPage, SchemaDiscoveryRequest, SchemaDiscoveryResult, @@ -20,13 +31,17 @@ AsyncRulesetGenerationInProgressError, DataMasqueApiError, DataMasqueException, + DiscoveryConfigNotFoundError, FailedToStartError, + InvalidDiscoveryConfigError, ) from datamasque.client.models.connection import ConnectionId, DatabaseConnectionConfig, DatabaseType from datamasque.client.models.data_selection import SelectedColumns, SelectedFileData, UserSelection from datamasque.client.models.status import AsyncRulesetGenerationTaskStatus from tests.helpers import parse_multipart_form +DISCOVERY_CONFIG_ID = "aaaaaaaa-1111-2222-3333-444444444444" + def test_generate_ruleset(client): req = RulesetGenerationRequest(connection="conn-1", selected_columns={"public": {"users": ["email"]}}) @@ -178,6 +193,33 @@ def test_get_generated_rulesets_success(client): assert rulesets[1].yaml == yaml_content_2.decode("utf-8") +def test_get_generated_rulesets_empty_archive_raises(client): + """A finished task whose download archive contains no ruleset files raises a clear error.""" + connection_id = ConnectionId("1") + + with requests_mock.Mocker() as m: + m.get( + f"http://test-server/api/async-generate-ruleset/{connection_id}/", + json={"status": "finished"}, + status_code=200, + ) + + zip_buffer = BytesIO() + with zipfile.ZipFile(zip_buffer, "w") as zip_file: + zip_file.writestr("README.txt", "no rulesets here") + zip_buffer.seek(0) + + m.get( + f"http://test-server/api/async-generate-ruleset/{connection_id}/download-rulesets/", + content=zip_buffer.getvalue(), + headers={"Content-Disposition": 'attachment; filename="rulesets.zip"'}, + status_code=200, + ) + + with pytest.raises(DataMasqueException, match="contained no rulesets"): + client.get_generated_rulesets(connection_id) + + def test_get_generated_rulesets_from_selection_success(client): """Non-CSV async RG: server 303s to the task-status endpoint, whose JSON body carries `generated_ruleset`.""" connection_id = ConnectionId("1") @@ -727,3 +769,497 @@ def test_start_schema_discovery_run_raises_on_non_201(client): ) with pytest.raises(FailedToStartError, match="Schema discovery run failed to start"): client.start_schema_discovery_run(SchemaDiscoveryRequest(connection="nope")) + + +def test_schema_discovery_request_rejects_discovery_config(): + """The v1 schema-discovery request rejects `discovery_config` and points the user at the from-config method.""" + with pytest.raises(ValidationError, match="start_schema_discovery_run_from_config"): + SchemaDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)) + + +def test_file_data_discovery_request_rejects_discovery_config(): + """The v1 file-data-discovery request rejects `discovery_config` and points the user at the from-config method.""" + with pytest.raises(ValidationError, match="start_file_data_discovery_run_from_config"): + FileDataDiscoveryRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)) + + +def test_start_file_data_discovery_run_minimal(client): + """A minimal FDD request — only `connection` set — round-trips through the server.""" + req = FileDataDiscoveryRequest(connection="conn-1") + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/run-file-data-discovery/", + json={"id": 42}, + status_code=201, + ) + run_id = client.start_file_data_discovery_run(req) + + assert run_id == 42 + body = m.last_request.json() + assert body["connection"] == "conn-1" + assert "discovery_config" not in body + + +def test_start_file_data_discovery_run_full(client): + """All legacy FDD request fields populate the wire payload and pass through unwrap helpers.""" + req = FileDataDiscoveryRequest( + connection="conn-1", + options=FileDataDiscoveryOptions(diagnostic_logging=True), + custom_keywords=["foo"], + ignored_keywords=["bar"], + disable_built_in_keywords=True, + disable_global_custom_keywords=True, + disable_global_ignored_keywords=False, + in_data_discovery={"enabled": True, "row_sample_size": 50}, + recurse=True, + include=[{"glob": "*.csv"}], + skip=[{"regex": r".*/tmp/.*", "match_against": "path"}], + encoding="utf-8", + workers=4, + ) + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/run-file-data-discovery/", + json={"id": 99}, + status_code=201, + ) + assert client.start_file_data_discovery_run(req) == 99 + + body = m.last_request.json() + assert body == { + "connection": "conn-1", + "options": {"diagnostic_logging": True}, + "custom_keywords": ["foo"], + "ignored_keywords": ["bar"], + "disable_built_in_keywords": True, + "disable_global_custom_keywords": True, + "disable_global_ignored_keywords": False, + "in_data_discovery": {"enabled": True, "row_sample_size": 50}, + "recurse": True, + "include": [{"glob": "*.csv"}], + "skip": [{"regex": r".*/tmp/.*", "match_against": "path"}], + "encoding": "utf-8", + "workers": 4, + } + + +def test_file_filter_requires_exactly_one_of_glob_or_regex(): + """A `FileFilter` with neither, or both, of `glob`/`regex` is rejected.""" + FileFilter(glob="*.csv") + FileFilter(regex=r".*\.csv") + FileFilter(glob="*.csv", match_against=FileFilterMatchAgainst.filename) + + with pytest.raises(ValidationError, match="exactly one of `glob` or `regex`"): + FileFilter() + + with pytest.raises(ValidationError, match="exactly one of `glob` or `regex`"): + FileFilter(glob="*.csv", regex=r".*\.csv") + + +def test_file_filter_rejects_empty_pattern(): + """An empty `glob`/`regex` is rejected rather than silently treated as unset.""" + with pytest.raises(ValidationError): + FileFilter(glob="") + + with pytest.raises(ValidationError): + FileFilter(regex="", glob="*.csv") + + +def test_file_discovery_result_parses_server_response(): + """A file-discovery-results record parses; match label/categories stay optional for non-sensitive matches.""" + result = FileDiscoveryResult.model_validate( + { + "id": 7, + "connection": {"id": "conn-1", "name": "my files"}, + "file_type": "csv", + "files": [{"path": "data/people.csv", "file_type": "csv", "delimiter": ",", "encoding": "utf-8"}], + "results": [ + { + "locator": "email", + "data_types": ["string"], + "matches": [ + {"flagged_by": "MDD", "description": "Email", "label": "email", "categories": ["PII"]}, + {"flagged_by": "MDD", "description": "Not sensitive"}, + ], + } + ], + } + ) + + assert result.id == 7 + assert result.connection.name == "my files" + assert result.files[0].path == "data/people.csv" + non_sensitive_match = result.results[0].matches[1] + assert non_sensitive_match.label is None + assert non_sensitive_match.categories is None + + +def test_file_data_discovery_ignore_rules_serialize(): + """`in_data_discovery.ignore_rules` round-trips into the wire payload.""" + req = FileDataDiscoveryRequest( + connection="conn-1", + in_data_discovery=InDataDiscoveryConfig( + enabled=True, + custom_rules=[{"name": "cc", "pattern": r"^1234"}], + non_sensitive_rules=[{"pattern": r"^5555"}], + ignore_rules=[{"pattern": r"^4321"}], + ), + ) + dumped = req.model_dump(exclude_none=True, mode="json") + assert dumped["in_data_discovery"]["ignore_rules"] == [{"pattern": r"^4321"}] + + +def test_start_file_data_discovery_run_raises_on_non_201(client): + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/run-file-data-discovery/", + json={"detail": "connection not found"}, + status_code=400, + ) + with pytest.raises(FailedToStartError, match="File data discovery run failed to start"): + client.start_file_data_discovery_run(FileDataDiscoveryRequest(connection="nope")) + + +def test_schema_discovery_from_config_request_accepts_discovery_config_id(): + """A `DiscoveryConfigId` string passes through unchanged in the v2 request body.""" + req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)) + assert req.model_dump(exclude_none=True, mode="json") == { + "connection": "conn-1", + "discovery_config": DISCOVERY_CONFIG_ID, + } + + +def test_schema_discovery_from_config_request_unwraps_discovery_config_model(): + """Passing a full `DiscoveryConfig` object substitutes its `id` for the wire payload.""" + config = DiscoveryConfig(name="my_cfg", config_type="database", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID)) + req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config) + assert req.model_dump(exclude_none=True, mode="json")["discovery_config"] == DISCOVERY_CONFIG_ID + + +def test_schema_discovery_from_config_request_rejects_unsaved_discovery_config(): + """A `DiscoveryConfig` without an `id` cannot be used yet — raises immediately.""" + config = DiscoveryConfig(name="my_cfg", config_type="database") + with pytest.raises(ValueError, match="id is None"): + SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config) + + +def test_schema_discovery_from_config_request_rejects_legacy_fields(): + """The saved-config request rejects legacy detection options — they live in the config.""" + with pytest.raises(ValidationError): + SchemaDiscoveryFromConfigRequest( + connection="conn-1", + discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), + custom_keywords=["foo"], + ) + + +def test_schema_discovery_from_config_request_accepts_schemas(): + """`schemas` scopes the saved-config schema run to specific schemas and is forwarded as-is.""" + req = SchemaDiscoveryFromConfigRequest( + connection="conn-1", + discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), + schemas=["public", "sales"], + ) + assert req.model_dump(exclude_none=True, mode="json") == { + "connection": "conn-1", + "discovery_config": DISCOVERY_CONFIG_ID, + "schemas": ["public", "sales"], + } + + +@pytest.mark.parametrize( + "extra_field", + [ + {"custom_keywords": ["foo"]}, + {"in_data_discovery": {"enabled": True}}, + {"recurse": True}, + {"include": [{"glob": "*.csv"}]}, + {"skip": [{"glob": "*.tmp"}]}, + {"encoding": "utf-8"}, + {"workers": 4}, + ], +) +def test_file_data_discovery_from_config_request_rejects_non_config_fields(extra_field): + """Detection options and file-handling params are rejected — they live in the config, not the request.""" + with pytest.raises(ValidationError): + FileDataDiscoveryFromConfigRequest( + connection="conn-1", + discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), + **extra_field, + ) + + +def test_schema_discovery_from_config_request_requires_discovery_config(): + """`discovery_config` must be set explicitly; omitting it is a validation error (it may be None, but not absent).""" + with pytest.raises(ValidationError, match="discovery_config"): + SchemaDiscoveryFromConfigRequest(connection="conn-1") + + +def test_schema_discovery_from_config_request_accepts_none_discovery_config(): + """An explicit None is accepted and means the server uses its built-in defaults.""" + req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None) + assert req.discovery_config is None + + +def test_file_data_discovery_from_config_request_requires_discovery_config(): + """`discovery_config` must be set explicitly; omitting it is a validation error (it may be None, but not absent).""" + with pytest.raises(ValidationError, match="discovery_config"): + FileDataDiscoveryFromConfigRequest(connection="conn-1") + + +def test_file_data_discovery_from_config_request_accepts_none_discovery_config(): + """An explicit None is accepted and means the server uses its built-in defaults.""" + req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None) + assert req.discovery_config is None + + +def test_start_schema_discovery_run_from_config_sends_discovery_config(client): + """`start_schema_discovery_run_from_config` posts the discovery_config id to the saved-config endpoint.""" + req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID)) + with requests_mock.Mocker() as m: + m.post("http://test-server/api/schema-discovery/v2/", json={"id": 11}, status_code=201) + assert client.start_schema_discovery_run_from_config(req) == 11 + + assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID} + + +def test_start_schema_discovery_run_from_config_sends_schemas(client): + """`start_schema_discovery_run_from_config` forwards a `schemas` scope to the saved-config endpoint.""" + req = SchemaDiscoveryFromConfigRequest( + connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), schemas=["public"] + ) + with requests_mock.Mocker() as m: + m.post("http://test-server/api/schema-discovery/v2/", json={"id": 12}, status_code=201) + assert client.start_schema_discovery_run_from_config(req) == 12 + + assert m.last_request.json() == { + "connection": "conn-1", + "discovery_config": DISCOVERY_CONFIG_ID, + "schemas": ["public"], + } + + +def test_start_file_data_discovery_run_from_config_sends_discovery_config(client): + """`start_file_data_discovery_run_from_config` posts only the connection and discovery_config id.""" + config = DiscoveryConfig(name="my_cfg", config_type="file", id=DiscoveryConfigId(DISCOVERY_CONFIG_ID)) + req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=config) + with requests_mock.Mocker() as m: + m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 99}, status_code=201) + assert client.start_file_data_discovery_run_from_config(req) == 99 + + assert m.last_request.json() == {"connection": "conn-1", "discovery_config": DISCOVERY_CONFIG_ID} + + +def test_start_file_data_discovery_run_from_config_sends_options(client): + """`options` (diagnostic_logging) is posted alongside the config on the file from-config trigger.""" + req = FileDataDiscoveryFromConfigRequest( + connection="conn-1", + discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), + options=FileDataDiscoveryOptions(diagnostic_logging=True), + ) + with requests_mock.Mocker() as m: + m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 77}, status_code=201) + assert client.start_file_data_discovery_run_from_config(req) == 77 + + assert m.last_request.json() == { + "connection": "conn-1", + "discovery_config": DISCOVERY_CONFIG_ID, + "options": {"diagnostic_logging": True}, + } + + +def test_start_schema_discovery_run_from_config_none_sends_null_discovery_config(client): + """With `discovery_config=None` the client posts an explicit null so the server applies its defaults.""" + req = SchemaDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None) + with requests_mock.Mocker() as m: + m.post("http://test-server/api/schema-discovery/v2/", json={"id": 13}, status_code=201) + assert client.start_schema_discovery_run_from_config(req) == 13 + + assert m.last_request.json() == {"connection": "conn-1", "discovery_config": None} + + +def test_start_file_data_discovery_run_from_config_none_sends_null_discovery_config(client): + """With `discovery_config=None` the file-data trigger posts an explicit null so the server applies its defaults.""" + req = FileDataDiscoveryFromConfigRequest(connection="conn-1", discovery_config=None) + with requests_mock.Mocker() as m: + m.post("http://test-server/api/run-file-data-discovery/v2/", json={"id": 21}, status_code=201) + assert client.start_file_data_discovery_run_from_config(req) == 21 + + assert m.last_request.json() == {"connection": "conn-1", "discovery_config": None} + + +def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_when_not_valid(client): + """A 400 with a `discovery_config` validation message raises the specific subclass.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/schema-discovery/v2/", + json={ + "discovery_config": ['Discovery config "my_cfg" cannot be used: validation status is `invalid`.'], + }, + status_code=400, + ) + with pytest.raises( + InvalidDiscoveryConfigError, + match=r'Schema discovery run failed to start due to discovery config error: .*"my_cfg".*invalid', + ): + client.start_schema_discovery_run_from_config( + SchemaDiscoveryFromConfigRequest( + connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID) + ), + ) + + +def test_start_schema_discovery_run_from_config_raises_not_found_when_missing(client): + """ + A 400 for a config that cannot be found raises `DiscoveryConfigNotFoundError`. + + This is a bad reference rather than an unusable-but-present config, + so it must not be conflated with `InvalidDiscoveryConfigError`. + The not-found subclass still inherits `FailedToStartError` for callers that catch the base. + """ + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/schema-discovery/v2/", + json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']}, + status_code=400, + ) + with pytest.raises(DiscoveryConfigNotFoundError, match="object does not exist") as exc_info: + client.start_schema_discovery_run_from_config( + SchemaDiscoveryFromConfigRequest( + connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID) + ), + ) + + assert isinstance(exc_info.value, FailedToStartError) + + +def test_start_file_data_discovery_run_from_config_raises_not_found_when_missing(client): + """A not-found saved config on the file-data trigger also raises `DiscoveryConfigNotFoundError`.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/run-file-data-discovery/v2/", + json={"discovery_config": [f'Invalid pk "{DISCOVERY_CONFIG_ID}" - object does not exist.']}, + status_code=400, + ) + with pytest.raises(DiscoveryConfigNotFoundError, match="object does not exist"): + client.start_file_data_discovery_run_from_config( + FileDataDiscoveryFromConfigRequest( + connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID) + ), + ) + + +def test_start_schema_discovery_run_from_config_raises_invalid_discovery_config_on_broken_yaml(client): + """ + A 400 carrying trigger-time re-validation of broken saved YAML is classified. + + The server reports these under `discovery_config` as `{"message", "line_number", "column_number"}` dicts, + not strings. + """ + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/schema-discovery/v2/", + json={ + "discovery_config": [ + {"message": "Unknown mask 'no_such_mask'.", "line_number": 4, "column_number": 7}, + ], + }, + status_code=400, + ) + with pytest.raises( + InvalidDiscoveryConfigError, + match=r"Schema discovery run failed to start due to discovery config error: Unknown mask 'no_such_mask'\.", + ): + client.start_schema_discovery_run_from_config( + SchemaDiscoveryFromConfigRequest( + connection="conn-1", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID) + ), + ) + + +def test_start_file_data_discovery_run_from_config_raises_invalid_discovery_config(client): + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/run-file-data-discovery/v2/", + json={ + "discovery_config": ['Discovery config "my_cfg" cannot be used: validation status is `in_progress`.'], + }, + status_code=400, + ) + with pytest.raises( + InvalidDiscoveryConfigError, + match=r"File data discovery run failed to start due to discovery config error: .*in_progress", + ): + client.start_file_data_discovery_run_from_config( + FileDataDiscoveryFromConfigRequest( + connection="conn-1", + discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID), + ), + ) + + +def test_start_schema_discovery_run_from_config_non_discovery_config_400_still_raises_generic_error(client): + """Other 400s (e.g. unknown connection) keep raising the base FailedToStartError, not the config subclass.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/schema-discovery/v2/", + json={"connection": ['Invalid pk "nope" - object does not exist.']}, + status_code=400, + ) + with pytest.raises(FailedToStartError) as exc_info: + client.start_schema_discovery_run_from_config( + SchemaDiscoveryFromConfigRequest( + connection="nope", discovery_config=DiscoveryConfigId(DISCOVERY_CONFIG_ID) + ), + ) + assert not isinstance(exc_info.value, InvalidDiscoveryConfigError) + + +def _zip_bytes(*members: tuple[str, str]) -> bytes: + buffer = BytesIO() + with zipfile.ZipFile(buffer, "w") as zip_file: + for name, content in members: + zip_file.writestr(name, content) + + return buffer.getvalue() + + +def test_get_discovery_run_config_snapshot_yaml_returns_yaml(client): + """The single zipped snapshot member is unzipped and returned as a YAML string.""" + snapshot = "# Discovery configuration: my_cfg.\nlabels: []\nmetadata_rules: []\n" + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/runs/7/config-snapshot/", + content=_zip_bytes(("config-7.yaml", snapshot)), + headers={"Content-Disposition": 'attachment; filename="config-7.yaml.zip"'}, + status_code=200, + ) + result = client.get_discovery_run_config_snapshot_yaml(RunId(7)) + + assert result == snapshot + assert "timezone" not in (m.last_request.qs or {}) + + +def test_get_discovery_run_config_snapshot_yaml_forwards_timezone(client): + """An explicit `timezone` is forwarded as a query param for the provenance-header timestamp.""" + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/runs/7/config-snapshot/", + content=_zip_bytes(("config-7.yaml", "labels: []\n")), + status_code=200, + ) + client.get_discovery_run_config_snapshot_yaml(RunId(7), timezone="+12:00") + + assert m.last_request.qs["timezone"] == ["+12:00"] + + +def test_get_discovery_run_config_snapshot_yaml_empty_archive_raises(client): + """An archive with no members raises a clear error rather than an IndexError.""" + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/runs/7/config-snapshot/", + content=_zip_bytes(), + status_code=200, + ) + with pytest.raises(DataMasqueException, match="contained no files"): + client.get_discovery_run_config_snapshot_yaml(RunId(7)) diff --git a/tests/test_discovery_configs.py b/tests/test_discovery_configs.py new file mode 100644 index 0000000..caa4b31 --- /dev/null +++ b/tests/test_discovery_configs.py @@ -0,0 +1,520 @@ +"""Tests for discovery-config support in the DataMasque client.""" + +from datetime import datetime +from typing import Any + +import pytest +import requests_mock + +from datamasque.client import DataMasqueClient +from datamasque.client.exceptions import DataMasqueApiError, DataMasqueException +from datamasque.client.models.discovery_config import ( + DiscoveryConfig, + DiscoveryConfigId, + DiscoveryConfigType, + unwrap_discovery_config_id, +) +from datamasque.client.models.status import ValidationStatus + +CONFIG_ID_1 = "aaaaaaaa-1111-2222-3333-444444444444" +CONFIG_ID_2 = "bbbbbbbb-1111-2222-3333-444444444444" + + +@pytest.fixture +def sample_config_list_response() -> dict[str, Any]: + return { + "count": 2, + "next": None, + "previous": None, + "results": [ + { + "id": CONFIG_ID_1, + "name": "my_config", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + }, + { + "id": CONFIG_ID_2, + "name": "another_config", + "config_type": "database", + "archived": False, + "created": "2025-02-01T12:00:00Z", + "modified": "2025-02-02T12:00:00Z", + }, + ], + } + + +@pytest.fixture +def sample_config_detail_response() -> dict[str, Any]: + return { + "id": CONFIG_ID_1, + "name": "my_config", + "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + } + + +@pytest.fixture +def discovery_config() -> DiscoveryConfig: + return DiscoveryConfig( + name="test_config", + yaml="labels: []\nmetadata_rules: []\nidd_rules: []\n", + config_type="database", + ) + + +def test_list_discovery_configs(client: DataMasqueClient, sample_config_list_response: dict[str, Any]) -> None: + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/", + json=sample_config_list_response, + status_code=200, + ) + configs = client.list_discovery_configs() + + assert len(configs) == 2 + assert configs[0].id == DiscoveryConfigId(CONFIG_ID_1) + assert configs[0].name == "my_config" + assert configs[0].yaml is None + assert configs[1].name == "another_config" + + +def test_list_discovery_configs_pagination(client: DataMasqueClient) -> None: + page1 = { + "count": 3, + "next": "http://test-server/api/discovery/configs/?limit=2&offset=2", + "previous": None, + "results": [ + { + "id": CONFIG_ID_1, + "name": "c1", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-01T12:00:00Z", + }, + { + "id": CONFIG_ID_2, + "name": "c2", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-01T12:00:00Z", + }, + ], + } + page2 = { + "count": 3, + "next": None, + "previous": "http://test-server/api/discovery/configs/?limit=2", + "results": [ + { + "id": "cccccccc-1111-2222-3333-444444444444", + "name": "c3", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-01T12:00:00Z", + }, + ], + } + + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/", + [{"json": page1, "status_code": 200}, {"json": page2, "status_code": 200}], + ) + configs = client.list_discovery_configs() + + assert [c.name for c in configs] == ["c1", "c2", "c3"] + + +def test_list_discovery_configs_empty(client: DataMasqueClient) -> None: + empty_response = {"count": 0, "next": None, "previous": None, "results": []} + + with requests_mock.Mocker() as m: + m.get("http://test-server/api/discovery/configs/", json=empty_response, status_code=200) + configs = client.list_discovery_configs() + + assert configs == [] + + +def test_get_discovery_config(client: DataMasqueClient, sample_config_detail_response: dict[str, Any]) -> None: + with requests_mock.Mocker() as m: + m.get( + f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", + json=sample_config_detail_response, + status_code=200, + ) + config = client.get_discovery_config(DiscoveryConfigId(CONFIG_ID_1)) + + assert config.id == DiscoveryConfigId(CONFIG_ID_1) + assert config.name == "my_config" + assert config.yaml == "labels: []\nmetadata_rules: []\nidd_rules: []\n" + + +def test_get_discovery_config_by_name_found( + client: DataMasqueClient, sample_config_detail_response: dict[str, Any] +) -> None: + list_response = { + "count": 1, + "next": None, + "previous": None, + "results": [ + { + "id": CONFIG_ID_1, + "name": "my_config", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + }, + ], + } + + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/", + json=list_response, + status_code=200, + ) + m.get( + f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", + json=sample_config_detail_response, + status_code=200, + ) + config = client.get_discovery_config_by_name("my_config", DiscoveryConfigType.database) + + assert config is not None + assert config.name == "my_config" + assert "name_exact=my_config" in m.request_history[0].url + assert "config_type=database" in m.request_history[0].url + + +def test_get_discovery_config_by_name_not_found(client: DataMasqueClient) -> None: + empty_response = {"count": 0, "next": None, "previous": None, "results": []} + + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/", + json=empty_response, + status_code=200, + ) + config = client.get_discovery_config_by_name("nonexistent", DiscoveryConfigType.database) + + assert config is None + + +def test_get_discovery_config_by_name_raises_when_server_omits_id(client: DataMasqueClient) -> None: + list_response_without_id = { + "count": 1, + "next": None, + "previous": None, + "results": [ + { + "name": "my_config", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + }, + ], + } + + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/", + json=list_response_without_id, + status_code=200, + ) + with pytest.raises(DataMasqueApiError, match="without an `id`"): + client.get_discovery_config_by_name("my_config", DiscoveryConfigType.database) + + +def test_create_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None: + create_response = { + "id": CONFIG_ID_1, + "name": "test_config", + "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n", + "config_type": "database", + "is_valid": "in_progress", + "archived": False, + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-01T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/discovery/configs/", + json=create_response, + status_code=201, + ) + result = client.create_discovery_config(discovery_config) + + assert result is discovery_config + assert result.id == DiscoveryConfigId(CONFIG_ID_1) + assert result.is_valid is ValidationStatus.in_progress + assert result.created == datetime.fromisoformat("2025-06-01T10:00:00+00:00") + assert result.modified == datetime.fromisoformat("2025-06-01T10:00:00+00:00") + + request_body = m.last_request.json() + assert request_body["name"] == "test_config" + assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n" + assert request_body["config_type"] == "database" + # Server-managed read-only fields must not be echoed back in the request body. + assert "id" not in request_body + assert "is_valid" not in request_body + assert "created" not in request_body + assert "modified" not in request_body + + +def test_update_discovery_config(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None: + discovery_config.id = DiscoveryConfigId(CONFIG_ID_1) + + update_response = { + "id": CONFIG_ID_1, + "name": "test_config", + "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n", + "config_type": "database", + "archived": False, + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-02T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.put( + f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", + json=update_response, + status_code=200, + ) + result = client.update_discovery_config(discovery_config) + + assert result is discovery_config + assert result.modified == datetime.fromisoformat("2025-06-02T10:00:00+00:00") + + request_body = m.last_request.json() + assert request_body["name"] == "test_config" + assert request_body["config_yaml"] == "labels: []\nmetadata_rules: []\nidd_rules: []\n" + # The id identifies the config in the URL, not the request body. + assert "id" not in request_body + + +def test_update_discovery_config_no_id_raises(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None: + with pytest.raises(ValueError, match="id is None"): + client.update_discovery_config(discovery_config) + + +def test_create_or_update_discovery_config_create(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None: + empty_list = {"count": 0, "next": None, "previous": None, "results": []} + create_response = { + "id": CONFIG_ID_1, + "name": "test_config", + "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n", + "config_type": "database", + "archived": False, + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-01T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.get("http://test-server/api/discovery/configs/", json=empty_list, status_code=200) + m.post("http://test-server/api/discovery/configs/", json=create_response, status_code=201) + result = client.create_or_update_discovery_config(discovery_config) + + assert result.id == DiscoveryConfigId(CONFIG_ID_1) + assert m.request_history[0].method == "GET" + assert m.request_history[1].method == "POST" + + +def test_create_or_update_discovery_config_update(client: DataMasqueClient, discovery_config: DiscoveryConfig) -> None: + list_response = { + "count": 1, + "next": None, + "previous": None, + "results": [ + { + "id": CONFIG_ID_1, + "name": "test_config", + "config_type": "database", + "archived": False, + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-01T10:00:00Z", + }, + ], + } + update_response = { + "id": CONFIG_ID_1, + "name": "test_config", + "config_yaml": "labels: []\nmetadata_rules: []\nidd_rules: []\n", + "config_type": "database", + "archived": False, + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-02T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.get("http://test-server/api/discovery/configs/", json=list_response, status_code=200) + m.put( + f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", + json=update_response, + status_code=200, + ) + result = client.create_or_update_discovery_config(discovery_config) + + assert result.id == DiscoveryConfigId(CONFIG_ID_1) + # The upsert resolves the id from the list response alone — no detail GET before the PUT. + assert [r.method for r in m.request_history] == ["GET", "PUT"] + + +def test_delete_discovery_config_by_id(client: DataMasqueClient) -> None: + with requests_mock.Mocker() as m: + m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=204) + client.delete_discovery_config_by_id_if_exists(DiscoveryConfigId(CONFIG_ID_1)) + + assert m.call_count == 1 + + +def test_delete_discovery_config_by_id_not_found(client: DataMasqueClient) -> None: + with requests_mock.Mocker() as m: + m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=404) + client.delete_discovery_config_by_id_if_exists(DiscoveryConfigId(CONFIG_ID_1)) + + +def test_delete_discovery_config_by_name(client: DataMasqueClient, sample_config_list_response: dict[str, Any]) -> None: + with requests_mock.Mocker() as m: + m.get("http://test-server/api/discovery/configs/", json=sample_config_list_response, status_code=200) + m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_1}/", status_code=204) + client.delete_discovery_config_by_name_if_exists("my_config", DiscoveryConfigType.database) + + assert m.request_history[0].method == "GET" + assert m.request_history[1].method == "DELETE" + + +def test_delete_discovery_config_by_name_not_found( + client: DataMasqueClient, sample_config_list_response: dict[str, Any] +) -> None: + with requests_mock.Mocker() as m: + m.get("http://test-server/api/discovery/configs/", json=sample_config_list_response, status_code=200) + client.delete_discovery_config_by_name_if_exists("nonexistent", DiscoveryConfigType.database) + + assert m.call_count == 1 + assert m.request_history[0].method == "GET" + + +def test_delete_discovery_config_by_name_raises_when_server_omits_id(client: DataMasqueClient) -> None: + list_response_without_id = { + "count": 1, + "next": None, + "previous": None, + "results": [ + { + "name": "my_config", + "config_type": "database", + "archived": False, + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + }, + ], + } + + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/", + json=list_response_without_id, + status_code=200, + ) + with pytest.raises(DataMasqueException, match="without an `id`"): + client.delete_discovery_config_by_name_if_exists("my_config", DiscoveryConfigType.database) + + +def test_delete_discovery_config_by_name_only_deletes_matching_type(client: DataMasqueClient) -> None: + shared_name_response = { + "count": 2, + "next": None, + "previous": None, + "results": [ + {"id": CONFIG_ID_1, "name": "shared", "config_type": "database", "archived": False}, + {"id": CONFIG_ID_2, "name": "shared", "config_type": "file", "archived": False}, + ], + } + + with requests_mock.Mocker() as m: + m.get("http://test-server/api/discovery/configs/", json=shared_name_response, status_code=200) + m.delete(f"http://test-server/api/discovery/configs/{CONFIG_ID_2}/", status_code=204) + client.delete_discovery_config_by_name_if_exists("shared", DiscoveryConfigType.file) + + assert m.request_history[-1].method == "DELETE" + assert CONFIG_ID_2 in m.request_history[-1].url + assert CONFIG_ID_1 not in m.request_history[-1].url + + +def test_get_default_discovery_config_yaml(client: DataMasqueClient) -> None: + yaml_body = "labels: []\nmetadata_rules: []\nidd_rules: []\n" + + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/discovery/configs/defaults/", + text=yaml_body, + status_code=200, + headers={"Content-Type": "application/x-yaml"}, + ) + result = client.get_default_discovery_config_yaml() + + assert result == yaml_body + + +def test_discovery_config_parses_validation_fields() -> None: + """`is_valid` and `validation_error` round-trip from API responses.""" + config = DiscoveryConfig.model_validate( + { + "id": CONFIG_ID_1, + "name": "my_config", + "config_type": "database", + "config_yaml": "labels: []", + "is_valid": "invalid", + "validation_error": "bad shape on line 3", + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + } + ) + + assert config.is_valid is ValidationStatus.invalid + assert config.validation_error == "bad shape on line 3" + + +def test_discovery_config_validation_fields_optional() -> None: + """Older / lighter API responses without validation fields still parse.""" + config = DiscoveryConfig.model_validate( + { + "id": CONFIG_ID_1, + "name": "my_config", + "config_type": "database", + "created": "2025-01-01T12:00:00Z", + "modified": "2025-01-02T12:00:00Z", + } + ) + + assert config.is_valid is None + assert config.validation_error is None + + +def test_unwrap_discovery_config_id_passes_through_strings() -> None: + assert unwrap_discovery_config_id(CONFIG_ID_1) == CONFIG_ID_1 + assert unwrap_discovery_config_id(None) is None + + +def test_unwrap_discovery_config_id_extracts_id_from_model() -> None: + config = DiscoveryConfig(name="x", config_type="database", id=DiscoveryConfigId(CONFIG_ID_1)) + assert unwrap_discovery_config_id(config) == CONFIG_ID_1 + + +def test_unwrap_discovery_config_id_raises_without_id() -> None: + config = DiscoveryConfig(name="x", config_type="database") + with pytest.raises(ValueError, match="id is None"): + unwrap_discovery_config_id(config) diff --git a/tests/test_files.py b/tests/test_files.py index 5f5574c..3b8f99b 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -271,3 +271,42 @@ def test_upload_file_retries_on_401(config): second_form = parse_multipart_form(m_request.request_history[1]) assert first_form["seed_file"]["content"] == b"seed content" assert second_form["seed_file"]["content"] == b"seed content" + + +def test_upload_ssl_zip_file_sends_mysql_database_type(client): + """`SslZipFile` uploads must include `database_type=mysql`; the connection-filesets endpoint requires it.""" + with requests_mock.Mocker() as m_request: + m_request.post( + f"http://test-server/{SslZipFile.get_url()}", + status_code=201, + json={ + "id": str(uuid.uuid4()), + "name": "certs.zip", + "created_date": "2024-01-01T00:00:00.000000Z", + "modified_date": "2024-01-01T00:00:00.000000Z", + }, + ) + client.upload_file(SslZipFile, "certs.zip", b"zip bytes") + + form = parse_multipart_form(m_request.request_history[0]) + assert form["database_type"] == "mysql" + + +@pytest.mark.parametrize("file_type", [SeedFile, OracleWalletFile, SnowflakeKeyFile]) +def test_upload_non_ssl_zip_file_omits_database_type(client, file_type): + """Only `SslZipFile` carries the `database_type` form field; other file types must not send it.""" + with requests_mock.Mocker() as m_request: + m_request.post( + f"http://test-server/{file_type.get_url()}", + status_code=201, + json={ + "id": str(uuid.uuid4()), + "name": "file.bin", + "created_date": "2024-01-01T00:00:00.000000Z", + "modified_date": "2024-01-01T00:00:00.000000Z", + }, + ) + client.upload_file(file_type, "file.bin", b"content") + + form = parse_multipart_form(m_request.request_history[0]) + assert "database_type" not in form diff --git a/tests/test_ruleset_library.py b/tests/test_ruleset_library.py index 3e2e476..6698531 100644 --- a/tests/test_ruleset_library.py +++ b/tests/test_ruleset_library.py @@ -70,61 +70,6 @@ def ruleset_library(): ) -def test_ruleset_library_model_dump() -> None: - lib = RulesetLibrary(name="lib1", namespace="ns", yaml="content: true") - assert lib.model_dump(exclude_none=True, by_alias=True, mode="json") == { - "name": "lib1", - "namespace": "ns", - "config_yaml": "content: true", - } - - -def test_ruleset_library_model_dump_no_yaml() -> None: - lib = RulesetLibrary(name="lib1", namespace="ns") - api_dict = lib.model_dump(exclude_none=True, by_alias=True, mode="json") - assert "config_yaml" not in api_dict - assert api_dict == {"name": "lib1", "namespace": "ns"} - - -def test_ruleset_library_model_validate() -> None: - response = { - "id": LIBRARY_ID_1, - "name": "my_library", - "namespace": "org", - "config_yaml": "version: '1.0'", - "is_valid": "valid", - "created": "2025-01-01T12:00:00Z", - "modified": "2025-01-02T12:00:00Z", - } - lib = RulesetLibrary.model_validate(response) - assert lib.id == RulesetLibraryId(LIBRARY_ID_1) - assert lib.name == "my_library" - assert lib.namespace == "org" - assert lib.yaml == "version: '1.0'" - assert lib.is_valid is ValidationStatus.valid - assert lib.created == datetime.fromisoformat("2025-01-01T12:00:00+00:00") - assert lib.modified == datetime.fromisoformat("2025-01-02T12:00:00+00:00") - - -def test_ruleset_library_model_validate_list( - sample_library_list_response: dict[str, Any], -) -> None: - """List responses omit config_yaml, so yaml should be None.""" - result = sample_library_list_response["results"][0] - lib = RulesetLibrary.model_validate(result) - assert lib.yaml is None - assert lib.is_valid is ValidationStatus.valid - - -def test_ruleset_library_equality() -> None: - """Pydantic structural equality compares all fields.""" - lib1 = RulesetLibrary(name="lib", namespace="ns", yaml="content") - lib2 = RulesetLibrary(name="lib", namespace="ns", yaml="content") - lib3 = RulesetLibrary(name="lib", namespace="other", yaml="content") - assert lib1 == lib2 - assert lib1 != lib3 - - def test_list_ruleset_libraries(client: DataMasqueClient, sample_library_list_response: dict[str, Any]) -> None: with requests_mock.Mocker() as m: m.get( @@ -671,3 +616,123 @@ def test_list_rulesets_using_library_pagination(client: DataMasqueClient) -> Non assert rulesets[0].name == "r1" assert rulesets[1].name == "r2" assert rulesets[2].name == "r3" + + +def test_create_ruleset_library_populates_validation_error( + client: DataMasqueClient, ruleset_library: RulesetLibrary +) -> None: + """The server's `validation_error` string is surfaced on the returned library.""" + create_response = { + "id": LIBRARY_ID_1, + "name": "test_library", + "namespace": "test_ns", + "config_yaml": "version: '1.0'\nfunctions: []", + "is_valid": "invalid", + "validation_error": "Invalid function definition", + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-01T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/ruleset-libraries/", + json=create_response, + status_code=201, + ) + result = client.create_ruleset_library(ruleset_library) + + assert result.validation_error == "Invalid function definition" + + +def test_update_ruleset_library_populates_validation_error( + client: DataMasqueClient, ruleset_library: RulesetLibrary +) -> None: + """A full update also surfaces the server's `validation_error` string.""" + ruleset_library.id = RulesetLibraryId(LIBRARY_ID_1) + update_response = { + "id": LIBRARY_ID_1, + "name": "test_library", + "namespace": "test_ns", + "config_yaml": "version: '1.0'\nfunctions: []", + "is_valid": "invalid", + "validation_error": "Invalid function definition", + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-02T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.put( + f"http://test-server/api/ruleset-libraries/{LIBRARY_ID_1}/", + json=update_response, + status_code=200, + ) + result = client.update_ruleset_library(ruleset_library) + + assert result.validation_error == "Invalid function definition" + + +def test_create_ruleset_library_does_not_send_read_only_fields( + client: DataMasqueClient, ruleset_library: RulesetLibrary +) -> None: + """Read-only server fields must never appear in the create request body.""" + ruleset_library.id = RulesetLibraryId(LIBRARY_ID_1) + ruleset_library.is_valid = ValidationStatus.invalid + ruleset_library.validation_error = "stale error" + create_response = { + "id": LIBRARY_ID_1, + "name": "test_library", + "namespace": "test_ns", + "config_yaml": "version: '1.0'\nfunctions: []", + "is_valid": "invalid", + "validation_error": "Invalid function definition", + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-01T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/ruleset-libraries/", + json=create_response, + status_code=201, + ) + client.create_ruleset_library(ruleset_library) + + body = m.last_request.json() + for read_only_field in ("id", "is_valid", "validation_error", "created", "modified"): + assert read_only_field not in body + # Input fields are still present. + assert body["name"] == "test_library" + assert body["config_yaml"] == "version: '1.0'\nfunctions: []" + + +def test_create_ruleset_library_collapses_git_snapshot( + client: DataMasqueClient, ruleset_library: RulesetLibrary +) -> None: + """The server's flat `git_*` fields are collapsed into a nested `git` snapshot on the library.""" + create_response = { + "id": LIBRARY_ID_1, + "name": "test_library", + "namespace": "test_ns", + "config_yaml": "version: '1.0'\nfunctions: []", + "is_valid": "valid", + "git_branch": "main", + "git_commit_sha": "abc123", + "git_repo_url": "https://git.example.com/repo.git", + "git_synced_at": "2025-06-01T10:00:00Z", + "created": "2025-06-01T10:00:00Z", + "modified": "2025-06-01T10:00:00Z", + } + + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/ruleset-libraries/", + json=create_response, + status_code=201, + ) + result = client.create_ruleset_library(ruleset_library) + + assert result.git is not None + assert result.git.branch == "main" + assert result.git.commit_sha == "abc123" + assert result.git.repo_url == "https://git.example.com/repo.git" + assert result.git.synced_at == datetime.fromisoformat("2025-06-01T10:00:00+00:00") diff --git a/tests/test_rulesets.py b/tests/test_rulesets.py index ae97b3a..1200a68 100644 --- a/tests/test_rulesets.py +++ b/tests/test_rulesets.py @@ -1,11 +1,13 @@ """Tests for `RulesetClient`.""" +from datetime import datetime + import pytest import requests_mock from datamasque.client.exceptions import DataMasqueApiError from datamasque.client.models.ruleset import RulesetType -from datamasque.client.models.status import ValidationStatus +from datamasque.client.models.status import ValidationErrorType, ValidationStatus def test_list_rulesets(client, existing_rulesets_json): @@ -32,7 +34,7 @@ def test_create_or_update_ruleset_create(client, ruleset): # Test creating a new ruleset with upsert m.post( "http://test-server/api/rulesets/?upsert=true", - json={"id": "2", "is_valid": "in_progress"}, + json={"id": "2", "name": "test_ruleset", "is_valid": "in_progress"}, status_code=201, ) @@ -62,7 +64,7 @@ def test_create_or_update_ruleset_update(client, ruleset): # Test updating an existing ruleset with upsert (returns 200 status for update) m.post( "http://test-server/api/rulesets/?upsert=true", - json={"id": "1", "is_valid": "valid"}, + json={"id": "1", "name": "test_ruleset", "is_valid": "valid"}, status_code=200, ) @@ -99,13 +101,26 @@ def test_delete_ruleset_by_name(client, existing_rulesets_json): status_code=200, ) m.delete("http://test-server/api/rulesets/2/", status_code=204) - client.delete_ruleset_by_name_if_exists("file_masking_ruleset") + client.delete_ruleset_by_name_if_exists("file_masking_ruleset", RulesetType.file) assert m.call_count == 2 assert m.request_history[0].method == "GET" assert m.request_history[1].method == "DELETE" +def test_delete_ruleset_by_name_skips_other_type(client, existing_rulesets_json): + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/v2/rulesets/", + json=existing_rulesets_json, + status_code=200, + ) + client.delete_ruleset_by_name_if_exists("file_masking_ruleset", RulesetType.database) + + assert m.call_count == 1 + assert m.request_history[0].method == "GET" + + def test_delete_ruleset_that_does_not_exist(client, existing_rulesets_json): with requests_mock.Mocker() as m: m.get( @@ -113,7 +128,150 @@ def test_delete_ruleset_that_does_not_exist(client, existing_rulesets_json): json=existing_rulesets_json, status_code=200, ) - client.delete_ruleset_by_name_if_exists("not_a_ruleset") + client.delete_ruleset_by_name_if_exists("not_a_ruleset", RulesetType.database) assert m.call_count == 1 assert m.request_history[0].method == "GET" + + +def test_create_or_update_ruleset_populates_validation_error(client, ruleset): + """The server's `validation_error` string and `validation_error_type` are surfaced on the returned ruleset.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/rulesets/?upsert=true", + json={ + "id": "2", + "name": "test_ruleset", + "is_valid": "invalid", + "validation_error": "Ruleset error: Missing required field: table", + "validation_error_type": "ruleset", + }, + status_code=201, + ) + result = client.create_or_update_ruleset(ruleset) + + assert result.validation_error == "Ruleset error: Missing required field: table" + assert result.validation_error_type is ValidationErrorType.ruleset + + +def test_create_or_update_ruleset_validation_error_none_when_absent(client, ruleset): + """`validation_error` and `validation_error_type` are `None` when the server omits them (a valid ruleset).""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/rulesets/?upsert=true", + json={"id": "2", "name": "test_ruleset", "is_valid": "valid"}, + status_code=201, + ) + result = client.create_or_update_ruleset(ruleset) + + assert result.validation_error is None + assert result.validation_error_type is None + + +def test_create_or_update_ruleset_does_not_send_read_only_fields(client, ruleset): + """Read-only server fields must never be echoed back into a re-submit's request body.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/rulesets/?upsert=true", + json={ + "id": "2", + "name": "test_ruleset", + "is_valid": "invalid", + "validation_error": "Ruleset error: bad", + "validation_error_type": "ruleset", + }, + status_code=201, + ) + client.create_or_update_ruleset(ruleset) + + # Re-submit the same object (read-only fields now populated). + client.create_or_update_ruleset(ruleset) + + body = m.last_request.json() + for read_only_field in ("id", "is_valid", "validation_error", "validation_error_type"): + assert read_only_field not in body + # Input fields are still present. + assert body["config_yaml"] == "version: '1.0'\ntasks: []" + + +def test_create_or_update_ruleset_collapses_git_snapshot(client, ruleset): + """The server's flat `git_*` fields are collapsed into a nested `git` snapshot.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/rulesets/?upsert=true", + json={ + "id": "2", + "name": "test_ruleset", + "is_valid": "valid", + "git_branch": "main", + "git_commit_sha": "abc123", + "git_repo_url": "https://git.example.com/repo.git", + "git_synced_at": "2025-06-01T10:00:00Z", + }, + status_code=201, + ) + result = client.create_or_update_ruleset(ruleset) + + assert result.git is not None + assert result.git.branch == "main" + assert result.git.commit_sha == "abc123" + assert result.git.repo_url == "https://git.example.com/repo.git" + assert result.git.synced_at == datetime.fromisoformat("2025-06-01T10:00:00+00:00") + + +def test_create_or_update_ruleset_git_none_when_not_synced(client, ruleset): + """`git` is `None` when the server reports no git provenance.""" + with requests_mock.Mocker() as m: + m.post( + "http://test-server/api/rulesets/?upsert=true", + json={ + "id": "2", + "name": "test_ruleset", + "is_valid": "valid", + "git_branch": None, + "git_commit_sha": None, + "git_repo_url": None, + "git_synced_at": None, + }, + status_code=201, + ) + result = client.create_or_update_ruleset(ruleset) + + assert result.git is None + + +def test_list_rulesets_collapses_git_snapshot(client): + """`git_*` fields are collapsed (and consumed, not left as extras) when listing rulesets.""" + with requests_mock.Mocker() as m: + m.get( + "http://test-server/api/v2/rulesets/", + json=[ + { + "id": "1", + "name": "synced_ruleset", + "mask_type": "database", + "config_yaml": "version: '1.0'", + "is_valid": "valid", + "git_branch": "main", + "git_commit_sha": "abc123", + "git_repo_url": "https://git.example.com/repo.git", + "git_synced_at": "2025-06-01T10:00:00Z", + }, + { + "id": "2", + "name": "local_ruleset", + "mask_type": "database", + "config_yaml": "version: '1.0'", + "is_valid": "valid", + }, + ], + status_code=200, + ) + rulesets = client.list_rulesets() + + assert rulesets[0].git is not None + assert rulesets[0].git.branch == "main" + assert rulesets[1].git is None + # The flat keys were consumed, not retained as extras that would leak back on re-submit. + assert "git_branch" not in rulesets[0].model_dump(by_alias=True) + assert "git" not in rulesets[0].model_dump(by_alias=True) diff --git a/tests/test_runs.py b/tests/test_runs.py index 3360093..d4be2e8 100644 --- a/tests/test_runs.py +++ b/tests/test_runs.py @@ -48,12 +48,21 @@ def test_get_file_data_discovery_report(client): with requests_mock.Mocker() as m: m.get( "http://test-server/api/runs/1/file-discovery-results/", - json=[{"id": 1, "file_type": "csv", "files": [], "results": []}], + json=[ + { + "id": 1, + "connection": {"id": "conn-1", "name": "files"}, + "file_type": "csv", + "files": [], + "results": [], + } + ], status_code=200, ) results = client.get_file_data_discovery_report(RunId(1)) assert len(results) == 1 assert results[0].id == 1 + assert results[0].connection.name == "files" def test_unfinished_run_str_with_destination(): @@ -161,6 +170,53 @@ def test_start_masking_run(client): assert client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name=fake.word())) == "1" +def test_start_masking_run_sends_is_user_subscribed_when_set(client): + """`is_user_subscribed=True` is sent top-level so the server subscribes the requesting user to the run.""" + with requests_mock.Mocker() as m: + m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201) + client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name="r", is_user_subscribed=True)) + + assert m.last_request.json()["is_user_subscribed"] is True + + +def test_start_masking_run_omits_is_user_subscribed_by_default(client): + """An unset `is_user_subscribed` is excluded from the request body rather than sent as null.""" + with requests_mock.Mocker() as m: + m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201) + client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name="r")) + + assert "is_user_subscribed" not in m.last_request.json() + + +def test_start_masking_run_sends_auto_pull_options(client): + """`auto_pull` / `auto_pull_branch` are sent inside `options` so the server refreshes the ruleset from git.""" + with requests_mock.Mocker() as m: + m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201) + client.start_masking_run( + MaskingRunRequest( + connection="1", + ruleset="rs-1", + name="r", + options=MaskingRunOptions(auto_pull=True, auto_pull_branch="main"), + ) + ) + + options = m.last_request.json()["options"] + assert options["auto_pull"] is True + assert options["auto_pull_branch"] == "main" + + +def test_start_masking_run_omits_auto_pull_options_by_default(client): + """Unset auto-pull options are excluded from the request body rather than sent as null.""" + with requests_mock.Mocker() as m: + m.post("http://test-server/api/runs/", json={"id": "1", "name": "r"}, status_code=201) + client.start_masking_run(MaskingRunRequest(connection="1", ruleset="rs-1", name="r")) + + options = m.last_request.json()["options"] + assert "auto_pull" not in options + assert "auto_pull_branch" not in options + + def test_start_masking_run_invalid_ruleset(client): with requests_mock.Mocker() as m: m.post( diff --git a/uv.lock b/uv.lock index 6fe9d57..31e21ef 100644 --- a/uv.lock +++ b/uv.lock @@ -428,7 +428,7 @@ toml = [ [[package]] name = "datamasque-python" -version = "1.0.3" +version = "1.1.0.dev0" source = { editable = "." } dependencies = [ { name = "pydantic" },