diff --git a/arch/core.py b/arch/core.py index b0684e5..2a86a05 100644 --- a/arch/core.py +++ b/arch/core.py @@ -43,6 +43,7 @@ "government", "dwelling", "institutional_sector", + "firm", } ALLOWED_AGGREGATIONS = { "sum", diff --git a/arch/source_package.py b/arch/source_package.py index 62cef55..b41c5bd 100644 --- a/arch/source_package.py +++ b/arch/source_package.py @@ -157,9 +157,13 @@ "ssa-ssi-table-7b1-2024": Path("ssa/ssi_table_7b1_2024"), "hhs-acf-tanf-caseload-2024": Path("hhs_acf/tanf_caseload_2024"), "hhs-acf-tanf-financial-2024": Path("hhs_acf/tanf_financial_2024"), + "hmrc-vat-firm-targets-2024-25": Path("hmrc/vat_firm_targets_2024_25"), "kff-marketplace-effectuated-enrollment": Path( "kff/marketplace_effectuated_enrollment" ), + "ons-uk-business-firm-targets-2025": Path( + "ons/uk_business_firm_targets_2025" + ), "usda-snap-fy69-to-current": Path("usda_snap/fy69_to_current"), } SOURCE_ARTIFACT_CACHE_ENV = "ARCH_SOURCE_ARTIFACT_CACHE_DIR" diff --git a/arch/sources/cells.py b/arch/sources/cells.py index 2d8c1f9..ff00677 100644 --- a/arch/sources/cells.py +++ b/arch/sources/cells.py @@ -935,6 +935,8 @@ def _delimited_scalar(value: str) -> Scalar: stripped = value.strip() if not stripped: return None + if "_" in stripped: + return stripped numeric = stripped.replace("$", "").replace(",", "") if numeric.lstrip("-").isdigit(): return int(numeric) diff --git a/arch/sources/rows.py b/arch/sources/rows.py index 65563ef..9891c3d 100644 --- a/arch/sources/rows.py +++ b/arch/sources/rows.py @@ -906,6 +906,8 @@ def _delimited_scalar(value: str) -> Scalar: stripped = value.strip() if not stripped: return None + if "_" in stripped: + return stripped numeric = stripped.replace("$", "").replace(",", "") if numeric.lstrip("-").isdigit(): return int(numeric) diff --git a/db/data/hmrc/vat_firm_targets_2024_25/hmrc_vat_firm_targets_2024_25.csv b/db/data/hmrc/vat_firm_targets_2024_25/hmrc_vat_firm_targets_2024_25.csv new file mode 100644 index 0000000..c881a2e --- /dev/null +++ b/db/data/hmrc/vat_firm_targets_2024_25/hmrc_vat_firm_targets_2024_25.csv @@ -0,0 +1,18 @@ +target_group,band_id,band_label,value +vat_registered_count_by_turnover,negative_or_zero,"Negative or zero annual turnover",211400 +vat_registered_count_by_turnover,1_to_threshold,"GBP 1 to VAT registration threshold annual turnover",683700 +vat_registered_count_by_turnover,threshold_to_150k,"VAT registration threshold to GBP 150,000 annual turnover",280400 +vat_registered_count_by_turnover,150k_to_300k,"GBP 150,000 to GBP 300,000 annual turnover",338600 +vat_registered_count_by_turnover,300k_to_500k,"GBP 300,000 to GBP 500,000 annual turnover",186700 +vat_registered_count_by_turnover,500k_to_1m,"GBP 500,000 to GBP 1,000,000 annual turnover",184100 +vat_registered_count_by_turnover,1m_to_10m,"GBP 1,000,000 to GBP 10,000,000 annual turnover",240500 +vat_registered_count_by_turnover,greater_than_10m,"Greater than GBP 10,000,000 annual turnover",45800 +vat_registered_count_by_turnover,unknown,"Unknown annual turnover",159300 +vat_liability_by_turnover,negative_or_zero,"Negative or zero annual turnover",-2910 +vat_liability_by_turnover,1_to_threshold,"GBP 1 to VAT registration threshold annual turnover",-300 +vat_liability_by_turnover,threshold_to_150k,"VAT registration threshold to GBP 150,000 annual turnover",2420 +vat_liability_by_turnover,150k_to_300k,"GBP 150,000 to GBP 300,000 annual turnover",4830 +vat_liability_by_turnover,300k_to_500k,"GBP 300,000 to GBP 500,000 annual turnover",3970 +vat_liability_by_turnover,500k_to_1m,"GBP 500,000 to GBP 1,000,000 annual turnover",6840 +vat_liability_by_turnover,1m_to_10m,"GBP 1,000,000 to GBP 10,000,000 annual turnover",29520 +vat_liability_by_turnover,greater_than_10m,"Greater than GBP 10,000,000 annual turnover",132800 diff --git a/db/data/hmrc/vat_firm_targets_2024_25/manifest.yaml b/db/data/hmrc/vat_firm_targets_2024_25/manifest.yaml new file mode 100644 index 0000000..188728e --- /dev/null +++ b/db/data/hmrc/vat_firm_targets_2024_25/manifest.yaml @@ -0,0 +1,23 @@ +source_id: hmrc-vat-firm-targets-2024-25 +source_name: HMRC Annual UK VAT Statistics 2024 to 2025 +publisher: HM Revenue and Customs +source_page: https://www.gov.uk/government/statistics/value-added-tax-vat-annual-statistics +files: + 2024: + filename: hmrc_vat_firm_targets_2024_25.csv + source_url: https://www.gov.uk/government/statistics/value-added-tax-vat-annual-statistics + source_table: Annual UK VAT Statistics 2024 to 2025 VAT trader population and net VAT liability by turnover band + sha256: 8bea6fdc9a5e397950fbc5bc58b6f26586e02ff76622b21b949592520a7d6d79 + size_bytes: 1675 + storage: + r2: + provider: r2 + bucket: arch-raw + key: raw/hmrc/hmrc-vat-firm-targets-2024-25/2024/8bea6fdc9a5e397950fbc5bc58b6f26586e02ff76622b21b949592520a7d6d79/hmrc_vat_firm_targets_2024_25.csv + uri: r2://arch-raw/raw/hmrc/hmrc-vat-firm-targets-2024-25/2024/8bea6fdc9a5e397950fbc5bc58b6f26586e02ff76622b21b949592520a7d6d79/hmrc_vat_firm_targets_2024_25.csv + source_urls: + - https://www.gov.uk/government/statistics/value-added-tax-vat-annual-statistics + notes: >- + Curated national extract from HMRC Annual UK VAT Statistics 2024 to 2025. + Net VAT liability rows retain source values in GBP millions and are scaled + to GBP by the Ledger source package. diff --git a/db/data/ons/uk_business_firm_targets_2025/manifest.yaml b/db/data/ons/uk_business_firm_targets_2025/manifest.yaml new file mode 100644 index 0000000..0c87806 --- /dev/null +++ b/db/data/ons/uk_business_firm_targets_2025/manifest.yaml @@ -0,0 +1,22 @@ +source_id: ons-uk-business-firm-targets-2025 +source_name: ONS UK Business, Activity, Size and Location 2025 +publisher: Office for National Statistics +source_page: https://www.ons.gov.uk/businessindustryandtrade/business/activitysizeandlocation/datasets/ukbusinessactivitysizeandlocation +files: + 2025: + filename: ons_uk_business_firm_targets_2025.csv + source_url: https://www.ons.gov.uk/businessindustryandtrade/business/activitysizeandlocation/datasets/ukbusinessactivitysizeandlocation + source_table: UK Business, Activity, Size and Location 2025 enterprise turnover and employment size bands + sha256: 929717d7cd58dbd615b8383462c3c9da961663f8c50a16a666c58a2d12df0cf3 + size_bytes: 900 + storage: + r2: + provider: r2 + bucket: arch-raw + key: raw/ons/ons-uk-business-firm-targets-2025/2025/929717d7cd58dbd615b8383462c3c9da961663f8c50a16a666c58a2d12df0cf3/ons_uk_business_firm_targets_2025.csv + uri: r2://arch-raw/raw/ons/ons-uk-business-firm-targets-2025/2025/929717d7cd58dbd615b8383462c3c9da961663f8c50a16a666c58a2d12df0cf3/ons_uk_business_firm_targets_2025.csv + source_urls: + - https://www.ons.gov.uk/businessindustryandtrade/business/activitysizeandlocation/datasets/ukbusinessactivitysizeandlocation + notes: >- + Curated national extract from the ONS UK Business 2025 workbook. Rows + retain published enterprise counts by turnover and employment size band. diff --git a/db/data/ons/uk_business_firm_targets_2025/ons_uk_business_firm_targets_2025.csv b/db/data/ons/uk_business_firm_targets_2025/ons_uk_business_firm_targets_2025.csv new file mode 100644 index 0000000..23a7e6a --- /dev/null +++ b/db/data/ons/uk_business_firm_targets_2025/ons_uk_business_firm_targets_2025.csv @@ -0,0 +1,15 @@ +target_group,band_id,band_label,value +turnover_band,0_49k,"GBP 0 to GBP 49,999 annual turnover",387285 +turnover_band,50_99k,"GBP 50,000 to GBP 99,999 annual turnover",525225 +turnover_band,100_249k,"GBP 100,000 to GBP 249,999 annual turnover",873705 +turnover_band,250_499k,"GBP 250,000 to GBP 499,999 annual turnover",390055 +turnover_band,500_999k,"GBP 500,000 to GBP 999,999 annual turnover",244225 +turnover_band,1000_4999k,"GBP 1,000,000 to GBP 4,999,999 annual turnover",232795 +turnover_band,5000k_plus,"GBP 5,000,000 or more annual turnover",81325 +employment_band,0_4,"0 to 4 employees",2137200 +employment_band,5_9,"5 to 9 employees",300645 +employment_band,10_19,"10 to 19 employees",156590 +employment_band,20_49,"20 to 49 employees",84595 +employment_band,50_99,"50 to 99 employees",29335 +employment_band,100_249,"100 to 249 employees",14835 +employment_band,250_plus,"250 or more employees",11415 diff --git a/packages/hmrc/vat_firm_targets_2024_25/source_package.yaml b/packages/hmrc/vat_firm_targets_2024_25/source_package.yaml new file mode 100644 index 0000000..01528ad --- /dev/null +++ b/packages/hmrc/vat_firm_targets_2024_25/source_package.yaml @@ -0,0 +1,382 @@ +schema_version: arch.source_package.v1 +package_id: hmrc-vat-firm-targets-2024-25 +label: HMRC Annual UK VAT Statistics 2024-25 national VAT firm targets +artifact: + source_name: hmrc + source_table: Annual UK VAT Statistics 2024 to 2025 VAT trader population and net VAT liability by turnover band + resource_package: db + resource_directory: data/hmrc/vat_firm_targets_2024_25 + manifest: manifest.yaml + vintage: fiscal_year_2024_25 + extracted_at: "2026-06-27" + extraction_method: >- + curated CSV extract from HMRC Annual UK VAT Statistics 2024 to 2025 + turnover-band tables; 2024-25 sub-bands between GBP 1 million and GBP 10 + million are re-aggregated to the historical GBP 1m to GBP 10m band + parser: delimited_text_full_rows + artifact_year: 2024 + sheet_name: hmrc_vat_firm_targets_2024_25 +record_sets: + - record_set_id: hmrc.vat.fy2024_25.registered_trader_count.by_turnover_band + record_set_spec_id: hmrc.vat.registered_trader_count.by_turnover_band.v1 + source_record_id_prefix: hmrc.vat.fy2024_25.registered_trader_count.by_turnover_band + sheet_name: hmrc_vat_firm_targets_2024_25 + period_type: fiscal_year + period: 2024 + geography_id: K02000001 + geography_level: country + geography_name: United Kingdom + geography_vintage: current + entity: firm + entity_role: vat_registered_trader + domain: uk_vat_registered_traders + groupby_dimension: uk.firm.annual_turnover + shared_filters: + uk.firm.vat_registered: true + shared_constraints: + - variable: uk.firm.vat_registered + operator: == + value: true + label: VAT registered + rows: + - &negative_or_zero_count + value_id: negative_or_zero + label: Negative or zero annual turnover + ordinal: 0 + row_number: 2 + expected_row_header_column: B + expected_row_header: negative_or_zero + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: negative_or_zero + constraints: + - variable: uk.firm.turnover_band + operator: == + value: negative_or_zero + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: <= + value: 0 + unit: gbp + label: Annual turnover upper bound + - &one_to_threshold_count + value_id: 1_to_threshold + label: GBP 1 to VAT registration threshold annual turnover + ordinal: 1 + row_number: 3 + expected_row_header_column: B + expected_row_header: 1_to_threshold + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: 1_to_threshold + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 1_to_threshold + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 0 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: <= + value: 90000 + unit: gbp + label: Annual turnover upper bound + - &threshold_to_150k_count + value_id: threshold_to_150k + label: VAT registration threshold to GBP 150,000 annual turnover + ordinal: 2 + row_number: 4 + expected_row_header_column: B + expected_row_header: threshold_to_150k + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: threshold_to_150k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: threshold_to_150k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 90000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: <= + value: 150000 + unit: gbp + label: Annual turnover upper bound + - &band_150k_to_300k_count + value_id: 150k_to_300k + label: GBP 150,000 to GBP 300,000 annual turnover + ordinal: 3 + row_number: 5 + expected_row_header_column: B + expected_row_header: 150k_to_300k + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: 150k_to_300k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 150k_to_300k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 150000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: <= + value: 300000 + unit: gbp + label: Annual turnover upper bound + - &band_300k_to_500k_count + value_id: 300k_to_500k + label: GBP 300,000 to GBP 500,000 annual turnover + ordinal: 4 + row_number: 6 + expected_row_header_column: B + expected_row_header: 300k_to_500k + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: 300k_to_500k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 300k_to_500k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 300000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: <= + value: 500000 + unit: gbp + label: Annual turnover upper bound + - &band_500k_to_1m_count + value_id: 500k_to_1m + label: GBP 500,000 to GBP 1,000,000 annual turnover + ordinal: 5 + row_number: 7 + expected_row_header_column: B + expected_row_header: 500k_to_1m + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: 500k_to_1m + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 500k_to_1m + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 500000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: <= + value: 1000000 + unit: gbp + label: Annual turnover upper bound + - &band_1m_to_10m_count + value_id: 1m_to_10m + label: GBP 1,000,000 to GBP 10,000,000 annual turnover + ordinal: 6 + row_number: 8 + expected_row_header_column: B + expected_row_header: 1m_to_10m + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: 1m_to_10m + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 1m_to_10m + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 1000000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: <= + value: 10000000 + unit: gbp + label: Annual turnover upper bound + - &greater_than_10m_count + value_id: greater_than_10m + label: Greater than GBP 10,000,000 annual turnover + ordinal: 7 + row_number: 9 + expected_row_header_column: B + expected_row_header: greater_than_10m + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: greater_than_10m + constraints: + - variable: uk.firm.turnover_band + operator: == + value: greater_than_10m + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">" + value: 10000000 + unit: gbp + label: Annual turnover lower bound + - value_id: unknown + label: Unknown annual turnover + ordinal: 8 + row_number: 10 + expected_row_header_column: B + expected_row_header: unknown + guard_cells: + - column: A + expected_value: vat_registered_count_by_turnover + label: target group + filters: + uk.firm.turnover_band: unknown + constraints: + - variable: uk.firm.turnover_band + operator: == + value: unknown + label: Annual turnover band + measures: + - measure_id: vat_registered_trader_count + label: HMRC VAT-registered trader count + ordinal: 0 + column: D + source_column_id: value + expected_column_header_row: 1 + expected_column_header: value + concept: uk.firm.count + source_concept: hmrc.vat.registered_trader_count + concept_relation: approximate + concept_authority: arch-uk + concept_evidence_url: https://www.gov.uk/government/statistics/value-added-tax-vat-annual-statistics + concept_evidence_notes: >- + HMRC Annual UK VAT Statistics reports VAT-registered traders by + annual turnover band; Ledger represents those traders as VAT-registered + firm count facts. + unit: count + aggregation: sum + expected_cell_type: number + - record_set_id: hmrc.vat.fy2024_25.net_liability.by_turnover_band + record_set_spec_id: hmrc.vat.net_liability.by_turnover_band.v1 + source_record_id_prefix: hmrc.vat.fy2024_25.net_liability.by_turnover_band + sheet_name: hmrc_vat_firm_targets_2024_25 + period_type: fiscal_year + period: 2024 + geography_id: K02000001 + geography_level: country + geography_name: United Kingdom + geography_vintage: current + entity: firm + entity_role: vat_registered_trader + domain: uk_vat_registered_traders + groupby_dimension: uk.firm.annual_turnover + shared_filters: + uk.firm.vat_registered: true + shared_constraints: + - variable: uk.firm.vat_registered + operator: == + value: true + label: VAT registered + rows: + - <<: *negative_or_zero_count + row_number: 11 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *one_to_threshold_count + row_number: 12 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *threshold_to_150k_count + row_number: 13 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *band_150k_to_300k_count + row_number: 14 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *band_300k_to_500k_count + row_number: 15 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *band_500k_to_1m_count + row_number: 16 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *band_1m_to_10m_count + row_number: 17 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + - <<: *greater_than_10m_count + row_number: 18 + guard_cells: + - column: A + expected_value: vat_liability_by_turnover + label: target group + measures: + - measure_id: net_vat_liability + label: HMRC net VAT liability + ordinal: 0 + column: D + source_column_id: value_millions + expected_column_header_row: 1 + expected_column_header: value + concept: uk.tax.vat.net_liability + source_concept: hmrc.vat.net_liability + concept_relation: approximate + concept_authority: arch-uk + concept_evidence_url: https://www.gov.uk/government/statistics/value-added-tax-vat-annual-statistics + concept_evidence_notes: >- + HMRC Annual UK VAT Statistics reports net VAT declared by annual + turnover band in GBP millions. Ledger scales the selected cells to GBP. + unit: gbp + aggregation: sum + value_scale: 1000000 + expected_cell_type: number diff --git a/packages/ons/uk_business_firm_targets_2025/source_package.yaml b/packages/ons/uk_business_firm_targets_2025/source_package.yaml new file mode 100644 index 0000000..fef7742 --- /dev/null +++ b/packages/ons/uk_business_firm_targets_2025/source_package.yaml @@ -0,0 +1,454 @@ +schema_version: arch.source_package.v1 +package_id: ons-uk-business-firm-targets-2025 +label: ONS UK Business 2025 national firm calibration targets +artifact: + source_name: ons + source_table: UK Business, Activity, Size and Location 2025 enterprise turnover and employment size bands + resource_package: db + resource_directory: data/ons/uk_business_firm_targets_2025 + manifest: manifest.yaml + vintage: uk_business_2025 + extracted_at: "2026-06-27" + extraction_method: >- + curated CSV extract from ONS UK Business, Activity, Size and Location 2025 + workbook national enterprise counts by turnover and employment size band + parser: delimited_text_full_rows + artifact_year: 2025 + sheet_name: ons_uk_business_firm_targets_2025 +record_sets: + - record_set_id: ons.uk_business.cy2025.enterprise_count.by_turnover_band + record_set_spec_id: ons.uk_business.enterprise_count.by_turnover_band.v1 + source_record_id_prefix: ons.uk_business.cy2025.enterprise_count.by_turnover_band + sheet_name: ons_uk_business_firm_targets_2025 + period_type: calendar_year + period: 2025 + geography_id: K02000001 + geography_level: country + geography_name: United Kingdom + geography_vintage: current + entity: firm + entity_role: enterprise + domain: uk_business_enterprises + groupby_dimension: uk.firm.annual_turnover + rows: + - value_id: 0_49k + label: GBP 0 to GBP 49,999 annual turnover + ordinal: 0 + row_number: 2 + expected_row_header_column: B + expected_row_header: 0_49k + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 0_49k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 0_49k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 0 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: < + value: 50000 + unit: gbp + label: Annual turnover upper bound + - value_id: 50_99k + label: GBP 50,000 to GBP 99,999 annual turnover + ordinal: 1 + row_number: 3 + expected_row_header_column: B + expected_row_header: 50_99k + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 50_99k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 50_99k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 50000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: < + value: 100000 + unit: gbp + label: Annual turnover upper bound + - value_id: 100_249k + label: GBP 100,000 to GBP 249,999 annual turnover + ordinal: 2 + row_number: 4 + expected_row_header_column: B + expected_row_header: 100_249k + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 100_249k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 100_249k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 100000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: < + value: 250000 + unit: gbp + label: Annual turnover upper bound + - value_id: 250_499k + label: GBP 250,000 to GBP 499,999 annual turnover + ordinal: 3 + row_number: 5 + expected_row_header_column: B + expected_row_header: 250_499k + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 250_499k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 250_499k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 250000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: < + value: 500000 + unit: gbp + label: Annual turnover upper bound + - value_id: 500_999k + label: GBP 500,000 to GBP 999,999 annual turnover + ordinal: 4 + row_number: 6 + expected_row_header_column: B + expected_row_header: 500_999k + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 500_999k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 500_999k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 500000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: < + value: 1000000 + unit: gbp + label: Annual turnover upper bound + - value_id: 1000_4999k + label: GBP 1,000,000 to GBP 4,999,999 annual turnover + ordinal: 5 + row_number: 7 + expected_row_header_column: B + expected_row_header: 1000_4999k + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 1000_4999k + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 1000_4999k + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 1000000 + unit: gbp + label: Annual turnover lower bound + - variable: uk.firm.annual_turnover + operator: < + value: 5000000 + unit: gbp + label: Annual turnover upper bound + - value_id: 5000k_plus + label: GBP 5,000,000 or more annual turnover + ordinal: 6 + row_number: 8 + expected_row_header_column: B + expected_row_header: 5000k_plus + guard_cells: + - column: A + expected_value: turnover_band + label: target group + filters: + uk.firm.turnover_band: 5000k_plus + constraints: + - variable: uk.firm.turnover_band + operator: == + value: 5000k_plus + label: Annual turnover band + - variable: uk.firm.annual_turnover + operator: ">=" + value: 5000000 + unit: gbp + label: Annual turnover lower bound + measures: + - measure_id: enterprise_count + label: ONS enterprise count + ordinal: 0 + column: D + source_column_id: value + expected_column_header_row: 1 + expected_column_header: value + concept: uk.firm.count + source_concept: ons.uk_business.enterprise_count + concept_relation: approximate + concept_authority: arch-uk + concept_evidence_url: https://www.ons.gov.uk/businessindustryandtrade/business/activitysizeandlocation/datasets/ukbusinessactivitysizeandlocation + concept_evidence_notes: >- + ONS UK Business publishes enterprise counts by annual turnover band; + Ledger represents these enterprises as firm calibration count facts. + unit: count + aggregation: sum + expected_cell_type: number + - record_set_id: ons.uk_business.cy2025.enterprise_count.by_employment_band + record_set_spec_id: ons.uk_business.enterprise_count.by_employment_band.v1 + source_record_id_prefix: ons.uk_business.cy2025.enterprise_count.by_employment_band + sheet_name: ons_uk_business_firm_targets_2025 + period_type: calendar_year + period: 2025 + geography_id: K02000001 + geography_level: country + geography_name: United Kingdom + geography_vintage: current + entity: firm + entity_role: enterprise + domain: uk_business_enterprises + groupby_dimension: uk.firm.employees + rows: + - value_id: "0_4" + label: 0 to 4 employees + ordinal: 0 + row_number: 9 + expected_row_header_column: B + expected_row_header: "0_4" + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: "0_4" + constraints: + - variable: uk.firm.employment_band + operator: == + value: "0_4" + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 0 + unit: count + label: Employee lower bound + - variable: uk.firm.employees + operator: < + value: 5 + unit: count + label: Employee upper bound + - value_id: "5_9" + label: 5 to 9 employees + ordinal: 1 + row_number: 10 + expected_row_header_column: B + expected_row_header: "5_9" + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: "5_9" + constraints: + - variable: uk.firm.employment_band + operator: == + value: "5_9" + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 5 + unit: count + label: Employee lower bound + - variable: uk.firm.employees + operator: < + value: 10 + unit: count + label: Employee upper bound + - value_id: "10_19" + label: 10 to 19 employees + ordinal: 2 + row_number: 11 + expected_row_header_column: B + expected_row_header: "10_19" + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: "10_19" + constraints: + - variable: uk.firm.employment_band + operator: == + value: "10_19" + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 10 + unit: count + label: Employee lower bound + - variable: uk.firm.employees + operator: < + value: 20 + unit: count + label: Employee upper bound + - value_id: "20_49" + label: 20 to 49 employees + ordinal: 3 + row_number: 12 + expected_row_header_column: B + expected_row_header: "20_49" + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: "20_49" + constraints: + - variable: uk.firm.employment_band + operator: == + value: "20_49" + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 20 + unit: count + label: Employee lower bound + - variable: uk.firm.employees + operator: < + value: 50 + unit: count + label: Employee upper bound + - value_id: "50_99" + label: 50 to 99 employees + ordinal: 4 + row_number: 13 + expected_row_header_column: B + expected_row_header: "50_99" + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: "50_99" + constraints: + - variable: uk.firm.employment_band + operator: == + value: "50_99" + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 50 + unit: count + label: Employee lower bound + - variable: uk.firm.employees + operator: < + value: 100 + unit: count + label: Employee upper bound + - value_id: "100_249" + label: 100 to 249 employees + ordinal: 5 + row_number: 14 + expected_row_header_column: B + expected_row_header: "100_249" + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: "100_249" + constraints: + - variable: uk.firm.employment_band + operator: == + value: "100_249" + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 100 + unit: count + label: Employee lower bound + - variable: uk.firm.employees + operator: < + value: 250 + unit: count + label: Employee upper bound + - value_id: 250_plus + label: 250 or more employees + ordinal: 6 + row_number: 15 + expected_row_header_column: B + expected_row_header: 250_plus + guard_cells: + - column: A + expected_value: employment_band + label: target group + filters: + uk.firm.employment_band: 250_plus + constraints: + - variable: uk.firm.employment_band + operator: == + value: 250_plus + label: Employment band + - variable: uk.firm.employees + operator: ">=" + value: 250 + unit: count + label: Employee lower bound + measures: + - measure_id: enterprise_count + label: ONS enterprise count + ordinal: 0 + column: D + source_column_id: value + expected_column_header_row: 1 + expected_column_header: value + concept: uk.firm.count + source_concept: ons.uk_business.enterprise_count + concept_relation: approximate + concept_authority: arch-uk + concept_evidence_url: https://www.ons.gov.uk/businessindustryandtrade/business/activitysizeandlocation/datasets/ukbusinessactivitysizeandlocation + concept_evidence_notes: >- + ONS UK Business publishes enterprise counts by employment size band; + Ledger represents these enterprises as firm calibration count facts. + unit: count + aggregation: sum + expected_cell_type: number diff --git a/policyengine_ledger/target_profiles/uk_firms.json b/policyengine_ledger/target_profiles/uk_firms.json new file mode 100644 index 0000000..9dd4890 --- /dev/null +++ b/policyengine_ledger/target_profiles/uk_firms.json @@ -0,0 +1,142 @@ +{ + "schema_version": "policyengine_ledger.target_profile.v1", + "profile_id": "uk_firms", + "country": "uk", + "label": "UK firm calibration", + "defaults": { + "base_period_policy": "latest_not_after_build_base_period", + "operation": "sum" + }, + "targets": [ + { + "target_id": "ons.uk_business.enterprise_count.turnover_bands", + "family": "ons_uk_business", + "geography_levels": ["country"], + "ledger_selector": { + "source_name": "ons", + "source_measure_id": "enterprise_count", + "record_set_id": "ons.uk_business.cy2025.enterprise_count.by_turnover_band", + "groupby_dimension": "uk.firm.annual_turnover" + }, + "measurement": { + "entity": "firm", + "concept": "uk.firm.count", + "groupby_dimension": "uk.firm.annual_turnover" + }, + "bindings": { + "populace": { + "metric_name": "ons/uk_business/enterprise_count/turnover_bands", + "value_variable": "firm_count", + "from_entity": "firm", + "groupby_variable": "annual_turnover" + }, + "axiom": { + "metric_name": "ons/uk_business/enterprise_count/turnover_bands", + "status": "pending", + "value_rule": "uk.firm.count" + } + } + }, + { + "target_id": "ons.uk_business.enterprise_count.employment_bands", + "family": "ons_uk_business", + "geography_levels": ["country"], + "ledger_selector": { + "source_name": "ons", + "source_measure_id": "enterprise_count", + "record_set_id": "ons.uk_business.cy2025.enterprise_count.by_employment_band", + "groupby_dimension": "uk.firm.employees" + }, + "measurement": { + "entity": "firm", + "concept": "uk.firm.count", + "groupby_dimension": "uk.firm.employees" + }, + "bindings": { + "populace": { + "metric_name": "ons/uk_business/enterprise_count/employment_bands", + "value_variable": "firm_count", + "from_entity": "firm", + "groupby_variable": "employment" + }, + "axiom": { + "metric_name": "ons/uk_business/enterprise_count/employment_bands", + "status": "pending", + "value_rule": "uk.firm.count" + } + } + }, + { + "target_id": "hmrc.vat.registered_trader_count.turnover_bands", + "family": "hmrc_vat", + "geography_levels": ["country"], + "ledger_selector": { + "source_name": "hmrc", + "source_measure_id": "vat_registered_trader_count", + "record_set_id": "hmrc.vat.fy2024_25.registered_trader_count.by_turnover_band", + "groupby_dimension": "uk.firm.annual_turnover" + }, + "measurement": { + "entity": "firm", + "concept": "uk.firm.count", + "groupby_dimension": "uk.firm.annual_turnover", + "filters": [ + {"concept": "uk.firm.vat_registered", "operator": "==", "value": true} + ] + }, + "bindings": { + "populace": { + "metric_name": "hmrc/vat/registered_trader_count/turnover_bands", + "value_variable": "firm_count", + "from_entity": "firm", + "groupby_variable": "annual_turnover", + "filters": [ + {"variable": "vat_registered", "operator": "==", "value": true} + ] + }, + "axiom": { + "metric_name": "hmrc/vat/registered_trader_count/turnover_bands", + "status": "pending", + "value_rule": "uk.firm.count", + "filter_rule": "uk:policies/govuk/vat#firm_vat_registered" + } + } + }, + { + "target_id": "hmrc.vat.net_liability.turnover_bands", + "family": "hmrc_vat", + "geography_levels": ["country"], + "ledger_selector": { + "source_name": "hmrc", + "source_measure_id": "net_vat_liability", + "record_set_id": "hmrc.vat.fy2024_25.net_liability.by_turnover_band", + "groupby_dimension": "uk.firm.annual_turnover" + }, + "measurement": { + "entity": "firm", + "concept": "uk.tax.vat.net_liability", + "groupby_dimension": "uk.firm.annual_turnover", + "filters": [ + {"concept": "uk.firm.vat_registered", "operator": "==", "value": true} + ] + }, + "bindings": { + "populace": { + "metric_name": "hmrc/vat/net_liability/turnover_bands", + "value_variable": "vat_liability", + "from_entity": "firm", + "groupby_variable": "annual_turnover", + "filters": [ + {"variable": "vat_registered", "operator": "==", "value": true} + ] + }, + "axiom": { + "metric_name": "hmrc/vat/net_liability/turnover_bands", + "status": "pending", + "value_rule": "uk:policies/govuk/vat#net_vat_liability", + "filter_rule": "uk:policies/govuk/vat#firm_vat_registered" + } + } + } + ] +} diff --git a/tests/test_arch_source_cells.py b/tests/test_arch_source_cells.py index 683d031..5afb6ca 100644 --- a/tests/test_arch_source_cells.py +++ b/tests/test_arch_source_cells.py @@ -21,7 +21,11 @@ validate_source_cells, ) from arch.sources.cells import SourceArtifactMetadata -from arch.sources.rows import SourceRow, source_cells_from_source_rows +from arch.sources.rows import ( + SourceRow, + source_cells_from_source_rows, + source_rows_from_delimited_text, +) from arch.sources.specs import resolve_source_record @@ -143,6 +147,39 @@ def test_delimited_text_selected_rows_preserves_requested_order_with_shared_keys assert values[(3, 4)] == 10 +def test_delimited_text_parsers_preserve_underscore_identifiers(): + artifact = SourceArtifactMetadata( + source_name="ons", + source_table="test", + source_file="test.csv", + url="https://example.test/test.csv", + vintage="test", + sha256="abc123", + size_bytes=10, + extracted_at="2026-06-27", + extraction_method="test", + ) + content = b"band,value\n0_4,123\n10_19,456\n" + + rows = source_rows_from_delimited_text( + content, + artifact, + sheet_name="test", + ) + cells = source_cells_from_delimited_text( + content, + artifact, + sheet_name="test", + ) + + assert [row.values["band"] for row in rows] == ["0_4", "10_19"] + assert [ + cell.raw_value + for cell in cells + if cell.column_number == 1 and cell.row_number > 1 + ] == ["0_4", "10_19"] + + def test_html_tables_and_text_parser_preserves_tables_and_document_numbers(): artifact = SourceArtifactMetadata( source_name="dwp", diff --git a/tests/test_arch_source_package.py b/tests/test_arch_source_package.py index 301e50e..dc07ac0 100644 --- a/tests/test_arch_source_package.py +++ b/tests/test_arch_source_package.py @@ -1400,6 +1400,188 @@ def test_ssa_ssi_table_7b1_source_package_builds_area_category_facts(): assert ca_disabled_payments.geography.id == "0400000US06" +def test_ons_uk_business_firm_targets_source_package_alias_validates_counts(): + report = validate_source_package("ons-uk-business-firm-targets-2025", year=2025) + + assert report.valid + assert report.counts == { + "record_set_count": 2, + "row_count": 14, + "measure_count": 2, + "source_record_count": 14, + "source_region_count": 2, + } + + +def test_ons_uk_business_firm_targets_source_package_builds_firm_facts(): + package = load_source_package("ons-uk-business-firm-targets-2025") + rows = package.build_source_rows(2025) + cells = package.build_source_cells(2025, source_rows=rows) + facts = package.build_facts(2025, cells=cells, source_rows=rows) + values_by_record = {fact.source_record_id: fact for fact in facts} + + assert package.package_id == "ons-uk-business-firm-targets-2025" + assert len(rows) == 14 + assert validate_source_rows(rows).valid + assert validate_source_cells(cells).valid + assert len(cells) == 60 + assert len(facts) == 14 + assert validate_facts(facts).valid + assert validate_consumer_fact_contract(facts).valid + assert all(fact.entity.name == "firm" for fact in facts) + assert all(fact.source.raw_r2_uri for fact in facts) + + small_turnover = values_by_record[ + "ons.uk_business.cy2025.enterprise_count.by_turnover_band." + "0_49k.enterprise_count" + ] + + assert small_turnover.value == 387_285 + assert small_turnover.period.type == "calendar_year" + assert small_turnover.period.value == 2025 + assert small_turnover.geography.id == "K02000001" + assert { + (constraint.variable, constraint.operator, constraint.value) + for constraint in small_turnover.constraints + } == { + ("uk.firm.turnover_band", "==", "0_49k"), + ("uk.firm.annual_turnover", ">=", 0), + ("uk.firm.annual_turnover", "<", 50_000), + } + + expected_employment_band_facts = { + "0_4": ( + 2_137_200, + { + ("uk.firm.employment_band", "==", "0_4"), + ("uk.firm.employees", ">=", 0), + ("uk.firm.employees", "<", 5), + }, + ), + "5_9": ( + 300_645, + { + ("uk.firm.employment_band", "==", "5_9"), + ("uk.firm.employees", ">=", 5), + ("uk.firm.employees", "<", 10), + }, + ), + "10_19": ( + 156_590, + { + ("uk.firm.employment_band", "==", "10_19"), + ("uk.firm.employees", ">=", 10), + ("uk.firm.employees", "<", 20), + }, + ), + "20_49": ( + 84_595, + { + ("uk.firm.employment_band", "==", "20_49"), + ("uk.firm.employees", ">=", 20), + ("uk.firm.employees", "<", 50), + }, + ), + "50_99": ( + 29_335, + { + ("uk.firm.employment_band", "==", "50_99"), + ("uk.firm.employees", ">=", 50), + ("uk.firm.employees", "<", 100), + }, + ), + "100_249": ( + 14_835, + { + ("uk.firm.employment_band", "==", "100_249"), + ("uk.firm.employees", ">=", 100), + ("uk.firm.employees", "<", 250), + }, + ), + "250_plus": ( + 11_415, + { + ("uk.firm.employment_band", "==", "250_plus"), + ("uk.firm.employees", ">=", 250), + }, + ), + } + + for band_id, (expected_value, expected_constraints) in ( + expected_employment_band_facts.items() + ): + fact = values_by_record[ + "ons.uk_business.cy2025.enterprise_count.by_employment_band." + f"{band_id}.enterprise_count" + ] + + assert fact.value == expected_value + assert { + (constraint.variable, constraint.operator, constraint.value) + for constraint in fact.constraints + } == expected_constraints + + +def test_hmrc_vat_firm_targets_source_package_alias_validates_counts(): + report = validate_source_package("hmrc-vat-firm-targets-2024-25", year=2024) + + assert report.valid + assert report.counts == { + "record_set_count": 2, + "row_count": 17, + "measure_count": 2, + "source_record_count": 17, + "source_region_count": 2, + } + + +def test_hmrc_vat_firm_targets_source_package_builds_vat_facts(): + package = load_source_package("hmrc-vat-firm-targets-2024-25") + rows = package.build_source_rows(2024) + cells = package.build_source_cells(2024, source_rows=rows) + facts = package.build_facts(2024, cells=cells, source_rows=rows) + values_by_record = {fact.source_record_id: fact for fact in facts} + + assert package.package_id == "hmrc-vat-firm-targets-2024-25" + assert len(rows) == 17 + assert validate_source_rows(rows).valid + assert validate_source_cells(cells).valid + assert len(cells) == 72 + assert len(facts) == 17 + assert validate_facts(facts).valid + assert validate_consumer_fact_contract(facts).valid + assert all(fact.entity.name == "firm" for fact in facts) + assert all(fact.source.raw_r2_uri for fact in facts) + + threshold_count = values_by_record[ + "hmrc.vat.fy2024_25.registered_trader_count.by_turnover_band." + "threshold_to_150k.vat_registered_trader_count" + ] + large_liability = values_by_record[ + "hmrc.vat.fy2024_25.net_liability.by_turnover_band." + "greater_than_10m.net_vat_liability" + ] + + assert threshold_count.value == 280_400 + assert threshold_count.period.type == "fiscal_year" + assert threshold_count.period.value == 2024 + assert threshold_count.measure.concept == "uk.firm.count" + assert threshold_count.filters["uk.firm.vat_registered"] is True + assert { + (constraint.variable, constraint.operator, constraint.value) + for constraint in threshold_count.constraints + } == { + ("uk.firm.vat_registered", "==", True), + ("uk.firm.turnover_band", "==", "threshold_to_150k"), + ("uk.firm.annual_turnover", ">", 90_000), + ("uk.firm.annual_turnover", "<=", 150_000), + } + + assert large_liability.value == 132_800_000_000 + assert large_liability.measure.concept == "uk.tax.vat.net_liability" + assert large_liability.measure.unit == "gbp" + + def test_census_pep_source_package_alias_validates_fixture_counts(): report = validate_source_package("census-pep-2024-national-age-sex", year=2023) diff --git a/tests/test_policyengine_ledger_target_profiles.py b/tests/test_policyengine_ledger_target_profiles.py index 5fa50f4..7be88c1 100644 --- a/tests/test_policyengine_ledger_target_profiles.py +++ b/tests/test_policyengine_ledger_target_profiles.py @@ -58,6 +58,48 @@ def test__given_count_like_profile_rows__then_they_are_still_sum_measurements() ) +def test__given_uk_firms_profile__then_it_declares_ledger_only_firm_targets() -> None: + # When + profile = load_target_profile("uk_firms") + + # Then + assert profile.country == "uk" + assert profile.default_operation == "sum" + assert profile.base_period_policy == "latest_not_after_build_base_period" + assert [target.target_id for target in profile.targets_for_geography("country")] == [ + "ons.uk_business.enterprise_count.turnover_bands", + "ons.uk_business.enterprise_count.employment_bands", + "hmrc.vat.registered_trader_count.turnover_bands", + "hmrc.vat.net_liability.turnover_bands", + ] + + turnover_count = profile.targets[0] + assert turnover_count.measurement["entity"] == "firm" + assert turnover_count.ledger_selector == { + "source_name": "ons", + "source_measure_id": "enterprise_count", + "record_set_id": "ons.uk_business.cy2025.enterprise_count.by_turnover_band", + "groupby_dimension": "uk.firm.annual_turnover", + } + assert turnover_count.binding("populace").metric_name == ( + "ons/uk_business/enterprise_count/turnover_bands" + ) + + registered_count = profile.targets[2] + assert registered_count.binding("axiom").payload["filter_rule"] == ( + "uk:policies/govuk/vat#firm_vat_registered" + ) + + vat_liability = profile.targets[-1] + assert vat_liability.measurement["concept"] == "uk.tax.vat.net_liability" + assert vat_liability.binding("axiom").payload["value_rule"] == ( + "uk:policies/govuk/vat#net_vat_liability" + ) + assert vat_liability.binding("axiom").payload["filter_rule"] == ( + "uk:policies/govuk/vat#firm_vat_registered" + ) + + @pytest.mark.parametrize("forbidden", ["registry", "aggregation", "target_value"]) def test__given_forbidden_profile_option__then_profile_is_rejected( forbidden: str,