From 8d129ca6931b943690ee00d61ccb202929f77c1f Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 20 Jun 2026 20:02:32 -0400 Subject: [PATCH] Add Ledger target profile contracts --- README.md | 56 +-- .../target_profiles/__init__.py | 19 + policyengine_ledger/target_profiles/model.py | 299 +++++++++++++++ .../target_profiles/uk_local_geography.json | 346 ++++++++++++++++++ policyengine_ledger/targets/__init__.py | 6 +- ...est_policyengine_ledger_target_profiles.py | 177 +++++++++ 6 files changed, 873 insertions(+), 30 deletions(-) create mode 100644 policyengine_ledger/target_profiles/__init__.py create mode 100644 policyengine_ledger/target_profiles/model.py create mode 100644 policyengine_ledger/target_profiles/uk_local_geography.json create mode 100644 tests/test_policyengine_ledger_target_profiles.py diff --git a/README.md b/README.md index 686681b..0441b6d 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,15 @@ captures source publications, preserves provenance, and represents published values as structured, queryable facts. Ledger may normalize structure: parse files, type values, declare units and -scales, assign geography and period identifiers, and preserve lineage back to -source artifacts. Ledger does not choose among sources, reconcile inconsistent -sources, age values, impute missing data, select active calibration targets, or -apply simulator-specific mappings. +scales, assign geography and period identifiers, preserve lineage back to +source artifacts, and publish target profiles that identify source-backed facts +and measurement contracts. Ledger does not reconcile inconsistent sources, +impute missing data, or execute simulator-specific calibration. -Populace consumes Ledger facts to build simulation datasets and Populace -targets. Thesis can consume the same facts as official observations. Modeling -choices live in those consumers, not Ledger. +Populace consumes Ledger facts and target profiles, selects the subset its +current support universe can target, applies minimal period alignment when +declared, and runs calibration. Thesis can consume the same facts and +measurement contracts as official observations. ## Purpose @@ -30,8 +31,8 @@ This repository provides: geography, period, source table, and lineage. - **Normalization**: Low-assumption representation changes such as unit/scale conversion and source-published total/share arithmetic. -- **Target inputs**: Source-published aggregates, projections, rates, counts, - and metadata that Populace may use to compose calibration targets. +- **Target profiles**: Source-backed target contracts and model-measurement + bindings that Populace, Thesis, and future rule engines can consume. - **Microdata**: Survey and administrative microdata ingestion for CPS, PUF, FRS, and related datasets. - **Jurisdiction loaders**: Source-specific ETL that emits the shared Arch @@ -44,16 +45,17 @@ They are source-backed claims with provenance. The load-bearing rule: -> Ledger may re-express a published value, but may not choose among, reconcile, -> age, impute, or transform published values in ways that change their meaning. +> Ledger may re-express a published value and declare target contracts, but may +> not reconcile, impute, or transform published values in ways that change their +> meaning. | Layer | Owns | Examples | |-------|------|----------| | Ledger Sources | Source artifacts and provenance | URLs, checksums, source files, parsed tables/cells | | Ledger Facts | Structured source claims | SOI cells, ACS estimates, CPI values, CBO-published projections | | Ledger Normalization | Representation changes | Unit scales, typed values, geography/date identifiers | -| Ledger Target Inputs | Source facts shaped for calibration | SOI EITC totals, CBO baselines, source-published growth factors | -| Populace Targets | Model-ready target sets | Source selection, reconciliation, aging, activation profiles | +| Ledger Target Profiles | Source-backed calibration contracts | SOI EITC totals, CBO baselines, source-published growth factors, measurement bindings | +| Populace Targets | Build-ready active subset | Support-aware activation, solver inputs, diagnostics | The storage split is documented in [`docs/storage-architecture.md`](docs/storage-architecture.md): `arch-raw` @@ -113,10 +115,10 @@ arch/ ``` New code should prefer `policyengine_ledger` for source-backed fact and target -input consumers. Existing in-repo implementation code may continue using +profile consumers. Existing in-repo implementation code may continue using `arch.sources`, `arch.facts`, `arch.normalization`, `arch.targets`, and -`arch.microdata` while the rename is phased in. Populace-specific target -composition and calibration code belongs in Populace. +`arch.microdata` while the rename is phased in. Solver execution and calibrated +dataset construction belong in Populace. ## Quick Start @@ -493,8 +495,8 @@ Target inputs use a three-table schema: - **stratum_constraints**: Rules defining each stratum. - **targets**: Source-published aggregate values linked to strata. -These are inputs to Populace target composition. Populace owns the active, -reconciled, aged target sets used for calibration. +These are inputs to Ledger target profiles. Populace owns the active +support-aware subset and calibrated solver execution. ## Ledger Facts And Populace Targets @@ -504,9 +506,10 @@ Normalization is about representation, not modeling: units, scales, typed values, geography IDs, period IDs, and same-source arithmetic where the source publishes the total/share relationship. -Inflation, aging, cross-source reconciliation, source selection, and target -activation belong in Populace unless the source itself publishes the adjusted -or projected series. +Inflation, cross-source reconciliation, and support-aware activation belong in +Populace unless the source itself publishes the adjusted or projected series. +Target profiles in Ledger may declare the source-backed rows and measurement +bindings Populace is allowed to activate. ```python from arch.facts import SourceFact @@ -554,12 +557,11 @@ target_input = as_target( ## Boundaries -- **Ledger** owns source data, provenance, source facts, aggregate facts, and - microdata ingestion. -- **Populace Targets** owns source selection, reconciliation, aging, imputation, - active target sets, and calibration profiles. -- **Populace** owns simulation interfaces, entity modeling, weights, and - calibration execution. +- **Ledger** owns source data, provenance, source facts, aggregate facts, + microdata ingestion, target profiles, and measurement contracts. +- **Populace** owns support-aware target activation, minimal period alignment, + simulation interfaces, entity modeling, weights, diagnostics, and calibration + execution. - **Jurisdiction source packages** such as `arch-us` and `arch-uk` own source-specific parsers and specs that emit shared Arch records. - **Jurisdiction simulation packages** own simulation-specific variable diff --git a/policyengine_ledger/target_profiles/__init__.py b/policyengine_ledger/target_profiles/__init__.py new file mode 100644 index 0000000..91dce5e --- /dev/null +++ b/policyengine_ledger/target_profiles/__init__.py @@ -0,0 +1,19 @@ +"""Packaged Ledger target profiles.""" + +from policyengine_ledger.target_profiles.model import ( + TARGET_PROFILE_SCHEMA_VERSION, + TargetProfile, + TargetProfileBinding, + TargetProfileTarget, + load_target_profile, + target_profile_from_mapping, +) + +__all__ = [ + "TARGET_PROFILE_SCHEMA_VERSION", + "TargetProfile", + "TargetProfileBinding", + "TargetProfileTarget", + "load_target_profile", + "target_profile_from_mapping", +] diff --git a/policyengine_ledger/target_profiles/model.py b/policyengine_ledger/target_profiles/model.py new file mode 100644 index 0000000..9c27d9a --- /dev/null +++ b/policyengine_ledger/target_profiles/model.py @@ -0,0 +1,299 @@ +"""Ledger-owned target profiles and measurement contracts. + +Target profiles describe which source-backed Ledger facts a calibration build +may select and how a model should measure the matching quantity on microdata. +They do not contain target values. Values come from Ledger fact rows selected by +the profile's selectors. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping +from dataclasses import dataclass +from importlib.resources import files +from typing import Any + +TARGET_PROFILE_SCHEMA_VERSION = "policyengine_ledger.target_profile.v1" + + +@dataclass(frozen=True) +class TargetProfileBinding: + """Backend-specific executable binding for one measurement contract.""" + + backend: str + metric_name: str + payload: Mapping[str, Any] + + +@dataclass(frozen=True) +class TargetProfileTarget: + """One profile target family and its microdata measurement contract.""" + + target_id: str + family: str + geography_levels: tuple[str, ...] + ledger_selector: Mapping[str, Any] + measurement: Mapping[str, Any] + bindings: Mapping[str, TargetProfileBinding] + tolerance: float | None = None + + def binding(self, backend: str) -> TargetProfileBinding: + """Return the binding for ``backend`` or raise a useful error.""" + try: + return self.bindings[backend] + except KeyError: + raise KeyError( + f"Target profile row {self.target_id!r} has no {backend!r} binding." + ) from None + + +@dataclass(frozen=True) +class TargetProfile: + """A Ledger-owned target profile consumed by Populace or other solvers.""" + + profile_id: str + country: str + label: str + base_period_policy: str + default_operation: str + targets: tuple[TargetProfileTarget, ...] + + def targets_for_geography( + self, + geography_level: str, + ) -> tuple[TargetProfileTarget, ...]: + """Return profile rows active for a geography level.""" + return tuple( + target + for target in self.targets + if geography_level in target.geography_levels + ) + + +def load_target_profile(profile_id: str) -> TargetProfile: + """Load a packaged Ledger target profile by ID.""" + if not profile_id or "/" in profile_id or "\\" in profile_id: + raise ValueError(f"Invalid target profile id {profile_id!r}.") + path = files(__package__).joinpath(f"{profile_id}.json") + try: + payload = json.loads(path.read_text()) + except FileNotFoundError as exc: + raise FileNotFoundError(f"No packaged target profile {profile_id!r}.") from exc + return target_profile_from_mapping(payload) + + +def target_profile_from_mapping(raw: Mapping[str, Any]) -> TargetProfile: + """Validate and parse a JSON-like target profile mapping.""" + schema_version = raw.get("schema_version") + if schema_version != TARGET_PROFILE_SCHEMA_VERSION: + raise ValueError( + "target profile schema_version must be " + f"{TARGET_PROFILE_SCHEMA_VERSION!r}, got {schema_version!r}." + ) + _reject_forbidden_value_keys(raw, context="target profile") + profile_id = _required_string(raw, "profile_id") + country = _required_string(raw, "country") + label = _required_string(raw, "label") + defaults = _required_mapping(raw, "defaults") + base_period_policy = _required_string(defaults, "base_period_policy") + default_operation = _required_string(defaults, "operation") + if default_operation != "sum": + raise ValueError( + f"target profile {profile_id!r} must use operation 'sum', " + f"got {default_operation!r}." + ) + targets = tuple( + _target_from_mapping(target) + for target in _required_mapping_sequence(raw, "targets") + ) + if not targets: + raise ValueError(f"target profile {profile_id!r} must declare targets.") + duplicate_ids = sorted( + target_id + for target_id in {target.target_id for target in targets} + if sum(target.target_id == target_id for target in targets) > 1 + ) + if duplicate_ids: + raise ValueError( + f"target profile {profile_id!r} has duplicate target_id(s): " + f"{duplicate_ids}." + ) + return TargetProfile( + profile_id=profile_id, + country=country, + label=label, + base_period_policy=base_period_policy, + default_operation=default_operation, + targets=targets, + ) + + +def _target_from_mapping(raw: Mapping[str, Any]) -> TargetProfileTarget: + _reject_forbidden_value_keys(raw, context="target profile row") + target_id = _required_string(raw, "target_id") + family = _required_string(raw, "family") + geography_levels = tuple(_required_string_sequence(raw, "geography_levels")) + if not geography_levels: + raise ValueError(f"target profile row {target_id!r} needs geography_levels.") + ledger_selector = _required_mapping(raw, "ledger_selector") + measurement = _required_mapping(raw, "measurement") + _reject_forbidden_contract_keys( + ledger_selector, + context=f"target profile row {target_id!r} ledger_selector", + ) + _reject_forbidden_contract_keys( + measurement, + context=f"target profile row {target_id!r} measurement", + ) + bindings_payload = _required_mapping(raw, "bindings") + bindings = { + backend: _binding_from_mapping( + backend, + payload, + target_id=target_id, + ) + for backend, payload in bindings_payload.items() + } + if not bindings: + raise ValueError(f"target profile row {target_id!r} needs bindings.") + tolerance = raw.get("tolerance") + if tolerance is not None: + if not isinstance(tolerance, int | float) or isinstance(tolerance, bool): + raise ValueError(f"target profile row {target_id!r}: invalid tolerance.") + tolerance = float(tolerance) + return TargetProfileTarget( + target_id=target_id, + family=family, + geography_levels=geography_levels, + ledger_selector=ledger_selector, + measurement=measurement, + bindings=bindings, + tolerance=tolerance, + ) + + +def _binding_from_mapping( + backend: str, + raw: Any, + *, + target_id: str, +) -> TargetProfileBinding: + if not isinstance(backend, str) or not backend: + raise ValueError(f"target profile row {target_id!r}: bad binding backend.") + if not isinstance(raw, Mapping): + raise ValueError( + f"target profile row {target_id!r}: binding {backend!r} must be an object." + ) + _reject_forbidden_value_keys(raw, context=f"{backend} binding") + _reject_forbidden_contract_keys( + raw, + context=f"target profile row {target_id!r} {backend} binding", + ) + metric_name = _required_string(raw, "metric_name") + return TargetProfileBinding( + backend=backend, + metric_name=metric_name, + payload=dict(raw), + ) + + +def _reject_forbidden_value_keys(raw: Mapping[str, Any], *, context: str) -> None: + forbidden = {"aggregation", "operation", "registry", "target_value", "value"} + present = sorted(key for key in forbidden if key in raw) + if present: + raise ValueError( + f"{context} must not declare {present}; Ledger profiles use implicit " + "Ledger source selection and sum-only measurement, with values " + "coming from Ledger facts." + ) + + +def _reject_forbidden_contract_keys(value: Any, *, context: str) -> None: + """Reject target-value or registry controls nested in contract payloads. + + Filter thresholds such as ``{"operator": ">", "value": 0}`` are valid + measurement predicates, so this recursive guard allows ``value`` only in + recognized filter predicate objects. Other ``value`` keys are rejected so + target amounts cannot hide inside selectors or measurement contracts. + """ + + if isinstance(value, Mapping): + forbidden = {"aggregation", "operation", "registry", "target_value"} + if not _is_filter_predicate(value): + forbidden = forbidden | {"value"} + present = sorted(key for key in forbidden if key in value) + if present: + raise ValueError( + f"{context} must not declare {present}; Ledger target profiles " + "use implicit source selection and sum-only measurement, with " + "values coming from Ledger facts." + ) + for key, item in value.items(): + _reject_forbidden_contract_keys(item, context=f"{context}.{key}") + elif isinstance(value, list | tuple): + for index, item in enumerate(value): + _reject_forbidden_contract_keys(item, context=f"{context}[{index}]") + + +def _is_filter_predicate(value: Mapping[str, Any]) -> bool: + return ( + "value" in value + and "operator" in value + and ("concept" in value or "variable" in value) + ) + + +def _required_string(raw: Mapping[str, Any], key: str) -> str: + value = raw.get(key) + if not isinstance(value, str) or not value: + raise ValueError(f"target profile field {key!r} must be a non-empty string.") + return value + + +def _required_mapping(raw: Mapping[str, Any], key: str) -> Mapping[str, Any]: + value = raw.get(key) + if not isinstance(value, Mapping): + raise ValueError(f"target profile field {key!r} must be an object.") + return value + + +def _required_mapping_sequence( + raw: Mapping[str, Any], + key: str, +) -> tuple[Mapping[str, Any], ...]: + value = raw.get(key) + if not isinstance(value, list | tuple): + raise ValueError(f"target profile field {key!r} must be a list.") + rows: list[Mapping[str, Any]] = [] + for index, row in enumerate(value): + if not isinstance(row, Mapping): + raise ValueError( + f"target profile field {key!r} row {index} must be an object." + ) + rows.append(row) + return tuple(rows) + + +def _required_string_sequence(raw: Mapping[str, Any], key: str) -> tuple[str, ...]: + value = raw.get(key) + if not isinstance(value, list | tuple): + raise ValueError(f"target profile field {key!r} must be a list.") + strings: list[str] = [] + for index, item in enumerate(value): + if not isinstance(item, str) or not item: + raise ValueError( + f"target profile field {key!r} item {index} must be a non-empty string." + ) + strings.append(item) + return tuple(strings) + + +__all__ = [ + "TARGET_PROFILE_SCHEMA_VERSION", + "TargetProfile", + "TargetProfileBinding", + "TargetProfileTarget", + "load_target_profile", + "target_profile_from_mapping", +] diff --git a/policyengine_ledger/target_profiles/uk_local_geography.json b/policyengine_ledger/target_profiles/uk_local_geography.json new file mode 100644 index 0000000..42065c1 --- /dev/null +++ b/policyengine_ledger/target_profiles/uk_local_geography.json @@ -0,0 +1,346 @@ +{ + "schema_version": "policyengine_ledger.target_profile.v1", + "profile_id": "uk_local_geography", + "country": "uk", + "label": "UK local geography calibration", + "defaults": { + "base_period_policy": "latest_not_after_build_base_period", + "operation": "sum" + }, + "targets": [ + { + "target_id": "hmrc.self_employment_income.amount", + "family": "hmrc", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": { + "source_name": "hmrc", + "source_measure_id": "self_employment_income_amount" + }, + "measurement": { + "entity": "person", + "map_to": "household", + "concept": "uk.income.self_employment.amount", + "filters": [{"concept": "uk.tax.income_tax", "operator": ">", "value": 0}] + }, + "bindings": { + "policyengine": { + "metric_name": "hmrc/self_employment_income/amount", + "value_variable": "self_employment_income", + "from_entity": "person", + "map_to": "household", + "filters": [{"variable": "income_tax", "operator": ">", "value": 0}] + }, + "axiom": { + "metric_name": "hmrc/self_employment_income/amount", + "status": "pending", + "value_rule": "uk.income.self_employment.amount" + } + } + }, + { + "target_id": "hmrc.self_employment_income.count", + "family": "hmrc", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": { + "source_name": "hmrc", + "source_measure_id": "self_employment_income_count" + }, + "measurement": { + "entity": "person", + "map_to": "household", + "concept": "uk.person.count", + "filters": [ + {"concept": "uk.income.self_employment.amount", "operator": "!=", "value": 0}, + {"concept": "uk.tax.income_tax", "operator": ">", "value": 0} + ] + }, + "bindings": { + "policyengine": { + "metric_name": "hmrc/self_employment_income/count", + "value_variable": "person_count", + "from_entity": "person", + "map_to": "household", + "filters": [ + {"variable": "self_employment_income", "operator": "!=", "value": 0}, + {"variable": "income_tax", "operator": ">", "value": 0} + ] + }, + "axiom": { + "metric_name": "hmrc/self_employment_income/count", + "status": "pending", + "value_rule": "uk.person.count" + } + } + }, + { + "target_id": "hmrc.employment_income.amount", + "family": "hmrc", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": { + "source_name": "hmrc", + "source_measure_id": "employment_income_amount" + }, + "measurement": { + "entity": "person", + "map_to": "household", + "concept": "uk.income.employment.amount", + "filters": [{"concept": "uk.tax.income_tax", "operator": ">", "value": 0}] + }, + "bindings": { + "policyengine": { + "metric_name": "hmrc/employment_income/amount", + "value_variable": "employment_income", + "from_entity": "person", + "map_to": "household", + "filters": [{"variable": "income_tax", "operator": ">", "value": 0}] + }, + "axiom": { + "metric_name": "hmrc/employment_income/amount", + "status": "pending", + "value_rule": "uk.income.employment.amount" + } + } + }, + { + "target_id": "hmrc.employment_income.count", + "family": "hmrc", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": { + "source_name": "hmrc", + "source_measure_id": "employment_income_count" + }, + "measurement": { + "entity": "person", + "map_to": "household", + "concept": "uk.person.count", + "filters": [ + {"concept": "uk.income.employment.amount", "operator": "!=", "value": 0}, + {"concept": "uk.tax.income_tax", "operator": ">", "value": 0} + ] + }, + "bindings": { + "policyengine": { + "metric_name": "hmrc/employment_income/count", + "value_variable": "person_count", + "from_entity": "person", + "map_to": "household", + "filters": [ + {"variable": "employment_income", "operator": "!=", "value": 0}, + {"variable": "income_tax", "operator": ">", "value": 0} + ] + }, + "axiom": { + "metric_name": "hmrc/employment_income/count", + "status": "pending", + "value_rule": "uk.person.count" + } + } + }, + { + "target_id": "ons.age.0_10", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": { + "entity": "person", + "map_to": "household", + "concept": "uk.person.count", + "filters": [{"concept": "uk.demographics.age", "lower": 0, "upper": 10}] + }, + "bindings": { + "policyengine": { + "metric_name": "age/0_10", + "value_variable": "person_count", + "from_entity": "person", + "map_to": "household", + "filters": [{"variable": "age", "lower": 0, "upper": 10}] + }, + "axiom": {"metric_name": "age/0_10", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.10_20", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 10, "upper": 20}]}, + "bindings": { + "policyengine": {"metric_name": "age/10_20", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 10, "upper": 20}]}, + "axiom": {"metric_name": "age/10_20", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.20_30", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 20, "upper": 30}]}, + "bindings": { + "policyengine": {"metric_name": "age/20_30", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 20, "upper": 30}]}, + "axiom": {"metric_name": "age/20_30", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.30_40", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 30, "upper": 40}]}, + "bindings": { + "policyengine": {"metric_name": "age/30_40", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 30, "upper": 40}]}, + "axiom": {"metric_name": "age/30_40", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.40_50", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 40, "upper": 50}]}, + "bindings": { + "policyengine": {"metric_name": "age/40_50", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 40, "upper": 50}]}, + "axiom": {"metric_name": "age/40_50", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.50_60", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 50, "upper": 60}]}, + "bindings": { + "policyengine": {"metric_name": "age/50_60", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 50, "upper": 60}]}, + "axiom": {"metric_name": "age/50_60", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.60_70", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 60, "upper": 70}]}, + "bindings": { + "policyengine": {"metric_name": "age/60_70", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 60, "upper": 70}]}, + "axiom": {"metric_name": "age/60_70", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "ons.age.70_80", + "family": "ons_population", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "population"}, + "measurement": {"entity": "person", "map_to": "household", "concept": "uk.person.count", "filters": [{"concept": "uk.demographics.age", "lower": 70, "upper": 80}]}, + "bindings": { + "policyengine": {"metric_name": "age/70_80", "value_variable": "person_count", "from_entity": "person", "map_to": "household", "filters": [{"variable": "age", "lower": 70, "upper": 80}]}, + "axiom": {"metric_name": "age/70_80", "status": "pending", "value_rule": "uk.person.count"} + } + }, + { + "target_id": "dwp.universal_credit.households", + "family": "dwp_universal_credit", + "geography_levels": ["constituency", "local_authority"], + "ledger_selector": {"source_name": "dwp", "source_measure_id": "universal_credit_households"}, + "measurement": {"entity": "benunit", "map_to": "household", "concept": "uk.benefit_unit.count", "filters": [{"concept": "uk.benefits.universal_credit.amount", "operator": ">", "value": 0}]}, + "bindings": { + "policyengine": {"metric_name": "uc_households", "value_variable": "benunit_count", "from_entity": "benunit", "map_to": "household", "filters": [{"variable": "universal_credit", "operator": ">", "value": 0}]}, + "axiom": {"metric_name": "uc_households", "status": "pending", "value_rule": "uk.benefit_unit.count"} + } + }, + { + "target_id": "dwp.universal_credit.households.0_children", + "family": "dwp_universal_credit", + "geography_levels": ["constituency"], + "ledger_selector": {"source_name": "dwp", "source_measure_id": "universal_credit_households_0_children"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.benefits.universal_credit.household_receives", "equals": true}, {"concept": "uk.household.children", "equals": 0}]}, + "bindings": {"policyengine": {"metric_name": "uc_hh_0_children", "value_variable": "household_count"}, "axiom": {"metric_name": "uc_hh_0_children", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "dwp.universal_credit.households.1_child", + "family": "dwp_universal_credit", + "geography_levels": ["constituency"], + "ledger_selector": {"source_name": "dwp", "source_measure_id": "universal_credit_households_1_child"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.benefits.universal_credit.household_receives", "equals": true}, {"concept": "uk.household.children", "equals": 1}]}, + "bindings": {"policyengine": {"metric_name": "uc_hh_1_child", "value_variable": "household_count"}, "axiom": {"metric_name": "uc_hh_1_child", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "dwp.universal_credit.households.2_children", + "family": "dwp_universal_credit", + "geography_levels": ["constituency"], + "ledger_selector": {"source_name": "dwp", "source_measure_id": "universal_credit_households_2_children"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.benefits.universal_credit.household_receives", "equals": true}, {"concept": "uk.household.children", "equals": 2}]}, + "bindings": {"policyengine": {"metric_name": "uc_hh_2_children", "value_variable": "household_count"}, "axiom": {"metric_name": "uc_hh_2_children", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "dwp.universal_credit.households.3plus_children", + "family": "dwp_universal_credit", + "geography_levels": ["constituency"], + "ledger_selector": {"source_name": "dwp", "source_measure_id": "universal_credit_households_3plus_children"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.benefits.universal_credit.household_receives", "equals": true}, {"concept": "uk.household.children", "lower": 3}]}, + "bindings": {"policyengine": {"metric_name": "uc_hh_3plus_children", "value_variable": "household_count"}, "axiom": {"metric_name": "uc_hh_3plus_children", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "ons.equiv_net_income_bhc", + "family": "ons_income", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "equivalised_net_income_before_housing_costs"}, + "measurement": {"entity": "household", "concept": "uk.household.equivalised_net_income_bhc"}, + "bindings": {"policyengine": {"metric_name": "ons/equiv_net_income_bhc", "value_variable": "equiv_hbai_household_net_income"}, "axiom": {"metric_name": "ons/equiv_net_income_bhc", "status": "pending", "value_rule": "uk.household.equivalised_net_income_bhc"}} + }, + { + "target_id": "ons.equiv_net_income_ahc", + "family": "ons_income", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "equivalised_net_income_after_housing_costs"}, + "measurement": {"entity": "household", "concept": "uk.household.equivalised_net_income_ahc"}, + "bindings": {"policyengine": {"metric_name": "ons/equiv_net_income_ahc", "value_variable": "equiv_hbai_household_net_income_ahc"}, "axiom": {"metric_name": "ons/equiv_net_income_ahc", "status": "pending", "value_rule": "uk.household.equivalised_net_income_ahc"}} + }, + { + "target_id": "ons.equiv_housing_costs", + "family": "ons_income", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "equivalised_housing_costs"}, + "measurement": {"entity": "household", "concept": "uk.household.equivalised_housing_costs"}, + "bindings": {"policyengine": {"metric_name": "ons/equiv_housing_costs", "value_expression": "equiv_hbai_household_net_income - equiv_hbai_household_net_income_ahc"}, "axiom": {"metric_name": "ons/equiv_housing_costs", "status": "pending", "value_rule": "uk.household.equivalised_housing_costs"}} + }, + { + "target_id": "ons.tenure.owned_outright", + "family": "ons_housing", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "owned_outright_households"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.household.tenure", "equals": "owned_outright"}]}, + "bindings": {"policyengine": {"metric_name": "tenure/owned_outright", "value_variable": "household_count", "filters": [{"variable": "tenure_type", "equals": "OWNED_OUTRIGHT"}]}, "axiom": {"metric_name": "tenure/owned_outright", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "ons.tenure.owned_mortgage", + "family": "ons_housing", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "owned_with_mortgage_households"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.household.tenure", "equals": "owned_with_mortgage"}]}, + "bindings": {"policyengine": {"metric_name": "tenure/owned_mortgage", "value_variable": "household_count", "filters": [{"variable": "tenure_type", "equals": "OWNED_WITH_MORTGAGE"}]}, "axiom": {"metric_name": "tenure/owned_mortgage", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "ons.tenure.private_rent", + "family": "ons_housing", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "private_rent_households"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.household.tenure", "equals": "private_rent"}]}, + "bindings": {"policyengine": {"metric_name": "tenure/private_rent", "value_variable": "household_count", "filters": [{"variable": "tenure_type", "equals": "RENT_PRIVATELY"}]}, "axiom": {"metric_name": "tenure/private_rent", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "ons.tenure.social_rent", + "family": "ons_housing", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "social_rent_households"}, + "measurement": {"entity": "household", "concept": "uk.household.count", "filters": [{"concept": "uk.household.tenure", "in": ["rent_from_council", "rent_from_housing_association"]}]}, + "bindings": {"policyengine": {"metric_name": "tenure/social_rent", "value_variable": "household_count", "filters": [{"variable": "tenure_type", "in": ["RENT_FROM_COUNCIL", "RENT_FROM_HA"]}]}, "axiom": {"metric_name": "tenure/social_rent", "status": "pending", "value_rule": "uk.household.count"}} + }, + { + "target_id": "ons.rent.private_rent", + "family": "ons_housing", + "geography_levels": ["local_authority"], + "ledger_selector": {"source_name": "ons", "source_measure_id": "private_rent"}, + "measurement": {"entity": "benunit", "map_to": "household", "concept": "uk.housing.private_rent.amount"}, + "bindings": {"policyengine": {"metric_name": "rent/private_rent", "value_variable": "benunit_rent", "from_entity": "benunit", "map_to": "household", "filters": [{"variable": "tenure_type", "equals": "RENT_PRIVATELY"}]}, "axiom": {"metric_name": "rent/private_rent", "status": "pending", "value_rule": "uk.housing.private_rent.amount"}} + } + ] +} diff --git a/policyengine_ledger/targets/__init__.py b/policyengine_ledger/targets/__init__.py index acd3795..c47c726 100644 --- a/policyengine_ledger/targets/__init__.py +++ b/policyengine_ledger/targets/__init__.py @@ -1,8 +1,8 @@ """Ledger target-input helpers. -Ledger owns source-backed facts and target-eligible source inputs. Consumers -such as Populace decide which subset is active and how those facts map to model -variables. +Ledger owns source-backed facts, target-eligible source inputs, and target +profiles. Consumers such as Populace decide which profile rows their support +universe can activate and how to execute calibration. """ from arch.targets import * # noqa: F403 diff --git a/tests/test_policyengine_ledger_target_profiles.py b/tests/test_policyengine_ledger_target_profiles.py new file mode 100644 index 0000000..5fa50f4 --- /dev/null +++ b/tests/test_policyengine_ledger_target_profiles.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import pytest + +from policyengine_ledger.target_profiles import ( + TARGET_PROFILE_SCHEMA_VERSION, + load_target_profile, + target_profile_from_mapping, +) + + +def test__given_uk_local_profile__then_it_declares_measurement_contracts() -> None: + # When + profile = load_target_profile("uk_local_geography") + + # Then + assert profile.country == "uk" + assert profile.default_operation == "sum" + assert profile.base_period_policy == "latest_not_after_build_base_period" + + constituency_metrics = [ + target.binding("policyengine").metric_name + for target in profile.targets_for_geography("constituency") + ] + assert constituency_metrics[:4] == [ + "hmrc/self_employment_income/amount", + "hmrc/self_employment_income/count", + "hmrc/employment_income/amount", + "hmrc/employment_income/count", + ] + assert "uc_hh_3plus_children" in constituency_metrics + assert "rent/private_rent" not in constituency_metrics + + local_authority_metrics = [ + target.binding("policyengine").metric_name + for target in profile.targets_for_geography("local_authority") + ] + assert "uc_households" in local_authority_metrics + assert "ons/equiv_net_income_bhc" in local_authority_metrics + assert "rent/private_rent" in local_authority_metrics + assert "uc_hh_0_children" not in local_authority_metrics + + +def test__given_count_like_profile_rows__then_they_are_still_sum_measurements() -> None: + # When + profile = load_target_profile("uk_local_geography") + employment_count = next( + target + for target in profile.targets + if target.target_id == "hmrc.employment_income.count" + ) + + # Then + assert profile.default_operation == "sum" + assert employment_count.measurement["concept"] == "uk.person.count" + assert employment_count.binding("policyengine").payload["value_variable"] == ( + "person_count" + ) + + +@pytest.mark.parametrize("forbidden", ["registry", "aggregation", "target_value"]) +def test__given_forbidden_profile_option__then_profile_is_rejected( + forbidden: str, +) -> None: + # Given + payload = { + "schema_version": TARGET_PROFILE_SCHEMA_VERSION, + "profile_id": "bad", + "country": "uk", + "label": "Bad profile", + "defaults": { + "base_period_policy": "latest_not_after_build_base_period", + "operation": "sum", + }, + "targets": [ + { + "target_id": "bad.target", + "family": "bad", + "geography_levels": ["country"], + "ledger_selector": {"source_name": "bad"}, + "measurement": {"entity": "household", "concept": "bad"}, + "bindings": { + "policyengine": { + "metric_name": "bad", + forbidden: "not allowed", + } + }, + } + ], + } + + # When / Then + with pytest.raises(ValueError, match=forbidden): + target_profile_from_mapping(payload) + + +@pytest.mark.parametrize( + ("container", "forbidden"), + [ + ("ledger_selector", "value"), + ("ledger_selector", "target_value"), + ("measurement", "value"), + ("measurement", "aggregation"), + ("measurement", "registry"), + ], +) +def test__given_nested_forbidden_profile_option__then_profile_is_rejected( + container: str, + forbidden: str, +) -> None: + # Given + payload = _minimal_profile_payload() + payload["targets"][0][container][forbidden] = "not allowed" + + # When / Then + with pytest.raises(ValueError, match=forbidden): + target_profile_from_mapping(payload) + + +def test__given_filter_threshold_values__then_profile_is_allowed() -> None: + # Given + payload = _minimal_profile_payload() + payload["targets"][0]["measurement"]["filters"] = [ + {"concept": "uk.tax.income_tax", "operator": ">", "value": 0} + ] + + # When + profile = target_profile_from_mapping(payload) + + # Then + assert profile.targets[0].measurement["filters"][0]["value"] == 0 + + +def test__given_non_sum_default_operation__then_profile_is_rejected() -> None: + # Given + payload = { + "schema_version": TARGET_PROFILE_SCHEMA_VERSION, + "profile_id": "bad", + "country": "uk", + "label": "Bad profile", + "defaults": { + "base_period_policy": "latest_not_after_build_base_period", + "operation": "count", + }, + "targets": [], + } + + # When / Then + with pytest.raises(ValueError, match="operation 'sum'"): + target_profile_from_mapping(payload) + + +def _minimal_profile_payload() -> dict[str, object]: + return { + "schema_version": TARGET_PROFILE_SCHEMA_VERSION, + "profile_id": "test_profile", + "country": "uk", + "label": "Test profile", + "defaults": { + "base_period_policy": "latest_not_after_build_base_period", + "operation": "sum", + }, + "targets": [ + { + "target_id": "test.target", + "family": "test", + "geography_levels": ["country"], + "ledger_selector": {"source_name": "test"}, + "measurement": {"entity": "household", "concept": "test"}, + "bindings": { + "policyengine": { + "metric_name": "test", + } + }, + } + ], + }