From 24ee840a89784a2ee9cf0e10e51f2a9aa746a1a2 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sat, 27 Jun 2026 08:10:40 -0400 Subject: [PATCH] Expand SOI congressional district source facts --- .../source_package.yaml | 674 ++++++++++++++++++ tests/test_arch_source_package.py | 28 +- 2 files changed, 701 insertions(+), 1 deletion(-) diff --git a/packages/irs_soi/congressional_district_2022/source_package.yaml b/packages/irs_soi/congressional_district_2022/source_package.yaml index d5fe2c3..06417c4 100644 --- a/packages/irs_soi/congressional_district_2022/source_package.yaml +++ b/packages/irs_soi/congressional_district_2022/source_package.yaml @@ -13995,3 +13995,677 @@ record_sets: aggregation: sum value_scale: 1000 expected_cell_type: number + - measure_id: taxable_interest_returns + label: Returns with taxable interest + ordinal: 3 + column: Z + source_column_id: N00300 + expected_column_header_row: 1 + expected_column_header: N00300 + concept: irs_soi.returns_with_taxable_interest + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: taxable_interest_amount + label: Taxable interest + ordinal: 4 + column: AA + source_column_id: A00300 + expected_column_header_row: 1 + expected_column_header: A00300 + concept: irs_soi.taxable_interest + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: tax_exempt_interest_returns + label: Returns with tax-exempt interest + ordinal: 5 + column: AB + source_column_id: N00400 + expected_column_header_row: 1 + expected_column_header: N00400 + concept: irs_soi.returns_with_tax_exempt_interest + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: tax_exempt_interest_amount + label: Tax-exempt interest + ordinal: 6 + column: AC + source_column_id: A00400 + expected_column_header_row: 1 + expected_column_header: A00400 + concept: irs_soi.tax_exempt_interest + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: ordinary_dividends_returns + label: Returns with ordinary dividends + ordinal: 7 + column: AD + source_column_id: N00600 + expected_column_header_row: 1 + expected_column_header: N00600 + concept: irs_soi.returns_with_ordinary_dividends + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: ordinary_dividends_amount + label: Ordinary dividends + ordinal: 8 + column: AE + source_column_id: A00600 + expected_column_header_row: 1 + expected_column_header: A00600 + concept: irs_soi.ordinary_dividends + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: qualified_dividends_returns + label: Returns with qualified dividends + ordinal: 9 + column: AF + source_column_id: N00650 + expected_column_header_row: 1 + expected_column_header: N00650 + concept: irs_soi.returns_with_qualified_dividends + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: qualified_dividends_amount + label: Qualified dividends + ordinal: 10 + column: AG + source_column_id: A00650 + expected_column_header_row: 1 + expected_column_header: A00650 + concept: irs_soi.qualified_dividends + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: schedule_c_income_returns + label: Returns with business or profession net income + ordinal: 11 + column: AJ + source_column_id: N00900 + expected_column_header_row: 1 + expected_column_header: N00900 + concept: irs_soi.returns_with_business_or_profession_net_income + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: schedule_c_income_amount + label: Business or profession net income + ordinal: 12 + column: AK + source_column_id: A00900 + expected_column_header_row: 1 + expected_column_header: A00900 + concept: irs_soi.business_or_profession_net_income + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: net_capital_gains_returns + label: Returns with net capital gains + ordinal: 13 + column: AL + source_column_id: N01000 + expected_column_header_row: 1 + expected_column_header: N01000 + concept: irs_soi.returns_with_net_capital_gains + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: net_capital_gains_amount + label: Net capital gains + ordinal: 14 + column: AM + source_column_id: A01000 + expected_column_header_row: 1 + expected_column_header: A01000 + concept: irs_soi.net_capital_gains + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: taxable_ira_distributions_returns + label: Returns with taxable IRA distributions + ordinal: 15 + column: AN + source_column_id: N01400 + expected_column_header_row: 1 + expected_column_header: N01400 + concept: irs_soi.returns_with_taxable_ira_distributions + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: taxable_ira_distributions_amount + label: Taxable IRA distributions + ordinal: 16 + column: AO + source_column_id: A01400 + expected_column_header_row: 1 + expected_column_header: A01400 + concept: irs_soi.taxable_ira_distributions + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: taxable_pension_income_returns + label: Returns with taxable pensions and annuities + ordinal: 17 + column: AP + source_column_id: N01700 + expected_column_header_row: 1 + expected_column_header: N01700 + concept: irs_soi.returns_with_taxable_pensions_and_annuities + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: taxable_pension_income_amount + label: Taxable pensions and annuities + ordinal: 18 + column: AQ + source_column_id: A01700 + expected_column_header_row: 1 + expected_column_header: A01700 + concept: irs_soi.taxable_pensions_and_annuities + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: unemployment_compensation_returns + label: Returns with unemployment compensation + ordinal: 19 + column: AS + source_column_id: N02300 + expected_column_header_row: 1 + expected_column_header: N02300 + concept: irs_soi.returns_with_unemployment_compensation + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: unemployment_compensation_amount + label: Unemployment compensation + ordinal: 20 + column: AT + source_column_id: A02300 + expected_column_header_row: 1 + expected_column_header: A02300 + concept: irs_soi.unemployment_compensation + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: taxable_social_security_returns + label: Returns with taxable Social Security benefits + ordinal: 21 + column: AU + source_column_id: N02500 + expected_column_header_row: 1 + expected_column_header: N02500 + concept: irs_soi.returns_with_taxable_social_security_benefits + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: taxable_social_security_amount + label: Taxable Social Security benefits + ordinal: 22 + column: AV + source_column_id: A02500 + expected_column_header_row: 1 + expected_column_header: A02500 + concept: irs_soi.taxable_social_security_benefits + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: partnership_scorp_income_returns + label: Returns with partnership and S-corporation net income + ordinal: 23 + column: AW + source_column_id: N26270 + expected_column_header_row: 1 + expected_column_header: N26270 + concept: irs_soi.returns_with_partnership_s_corporation_net_income + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: partnership_scorp_income_amount + label: Partnership and S-corporation net income + ordinal: 24 + column: AX + source_column_id: A26270 + expected_column_header_row: 1 + expected_column_header: A26270 + concept: irs_soi.partnership_s_corporation_net_income + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: rental_royalty_income_returns + label: Returns with rental and royalty net income + ordinal: 25 + column: AY + source_column_id: N25870 + expected_column_header_row: 1 + expected_column_header: N25870 + concept: irs_soi.returns_with_rental_and_royalty_net_income + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: rental_royalty_income_amount + label: Rental and royalty net income + ordinal: 26 + column: AZ + source_column_id: A25870 + expected_column_header_row: 1 + expected_column_header: A25870 + concept: irs_soi.rental_and_royalty_net_income + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: itemized_deductions_amount + label: Total itemized deductions + ordinal: 27 + column: BT + source_column_id: A04470 + expected_column_header_row: 1 + expected_column_header: A04470 + concept: irs_soi.total_itemized_deductions + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: medical_dental_expense_returns + label: Returns with medical and dental expense deductions + ordinal: 28 + column: BV + source_column_id: N17000 + expected_column_header_row: 1 + expected_column_header: N17000 + concept: irs_soi.returns_with_medical_dental_expense_deduction + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: medical_dental_expense_amount + label: Medical and dental expense deductions + ordinal: 29 + column: BW + source_column_id: A17000 + expected_column_header_row: 1 + expected_column_header: A17000 + concept: irs_soi.medical_dental_expense_deduction + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: limited_state_local_taxes_returns + label: Returns with limited state and local taxes + ordinal: 30 + column: BX + source_column_id: N18425 + expected_column_header_row: 1 + expected_column_header: N18425 + concept: irs_soi.returns_with_limited_state_local_taxes + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: limited_state_local_taxes_amount + label: Limited state and local taxes + ordinal: 31 + column: BY + source_column_id: A18425 + expected_column_header_row: 1 + expected_column_header: A18425 + concept: irs_soi.limited_state_local_taxes + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: income_tax_before_credits_returns + label: Returns with income tax before credits + ordinal: 32 + column: CZ + source_column_id: N05800 + expected_column_header_row: 1 + expected_column_header: N05800 + concept: irs_soi.returns_with_income_tax_before_credits + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: income_tax_before_credits_amount + label: Income tax before credits + ordinal: 33 + column: DA + source_column_id: A05800 + expected_column_header_row: 1 + expected_column_header: A05800 + concept: irs_soi.income_tax_before_credits + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: qualified_business_income_deduction_returns + label: Returns with qualified business income deduction + ordinal: 34 + column: CV + source_column_id: N04475 + expected_column_header_row: 1 + expected_column_header: N04475 + concept: irs_soi.returns_with_qualified_business_income_deduction + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: qualified_business_income_deduction_amount + label: Qualified business income deduction + ordinal: 35 + column: CW + source_column_id: A04475 + expected_column_header_row: 1 + expected_column_header: A04475 + concept: irs_soi.qualified_business_income_deduction + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: interest_paid_deduction_returns + label: Returns with deductible interest paid + ordinal: 36 + column: CJ + source_column_id: N19300 + expected_column_header_row: 1 + expected_column_header: N19300 + concept: irs_soi.returns_with_interest_paid_deduction + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: interest_paid_deduction_amount + label: Deductible interest paid + ordinal: 37 + column: CK + source_column_id: A19300 + expected_column_header_row: 1 + expected_column_header: A19300 + concept: irs_soi.interest_paid_deduction + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: charitable_returns + label: Returns with charitable contributions deduction + ordinal: 38 + column: CR + source_column_id: N19700 + expected_column_header_row: 1 + expected_column_header: N19700 + concept: irs_soi.returns_with_charitable_contributions_deduction + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: charitable_amount + label: Charitable contributions deduction + ordinal: 39 + column: CS + source_column_id: A19700 + expected_column_header_row: 1 + expected_column_header: A19700 + concept: irs_soi.charitable_contributions_deduction + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: eitc_claims + label: Returns with earned income credit + ordinal: 40 + column: EB + source_column_id: N59660 + expected_column_header_row: 1 + expected_column_header: N59660 + concept: irs_soi.returns_with_earned_income_credit + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: eitc_amount + label: Earned income credit + ordinal: 41 + column: EC + source_column_id: A59660 + expected_column_header_row: 1 + expected_column_header: A59660 + concept: irs_soi.earned_income_credit + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: eitc_no_children_claims + label: Returns with earned income credit and no qualifying children + ordinal: 42 + column: ED + source_column_id: N59661 + expected_column_header_row: 1 + expected_column_header: N59661 + concept: irs_soi.returns_with_earned_income_credit + unit: count + aggregation: sum + expected_cell_type: number + filters: + eitc_child_count: '0' + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: == + value: 0 + unit: count + label: EITC qualifying children + - measure_id: eitc_no_children_amount + label: Earned income credit with no qualifying children + ordinal: 43 + column: EE + source_column_id: A59661 + expected_column_header_row: 1 + expected_column_header: A59661 + concept: irs_soi.earned_income_credit + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + filters: + eitc_child_count: '0' + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: == + value: 0 + unit: count + label: EITC qualifying children + - measure_id: eitc_one_child_claims + label: Returns with earned income credit and one qualifying child + ordinal: 44 + column: EF + source_column_id: N59662 + expected_column_header_row: 1 + expected_column_header: N59662 + concept: irs_soi.returns_with_earned_income_credit + unit: count + aggregation: sum + expected_cell_type: number + filters: + eitc_child_count: '1' + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: == + value: 1 + unit: count + label: EITC qualifying children + - measure_id: eitc_one_child_amount + label: Earned income credit with one qualifying child + ordinal: 45 + column: EG + source_column_id: A59662 + expected_column_header_row: 1 + expected_column_header: A59662 + concept: irs_soi.earned_income_credit + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + filters: + eitc_child_count: '1' + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: == + value: 1 + unit: count + label: EITC qualifying children + - measure_id: eitc_two_children_claims + label: Returns with earned income credit and two qualifying children + ordinal: 46 + column: EH + source_column_id: N59663 + expected_column_header_row: 1 + expected_column_header: N59663 + concept: irs_soi.returns_with_earned_income_credit + unit: count + aggregation: sum + expected_cell_type: number + filters: + eitc_child_count: '2' + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: == + value: 2 + unit: count + label: EITC qualifying children + - measure_id: eitc_two_children_amount + label: Earned income credit with two qualifying children + ordinal: 47 + column: EI + source_column_id: A59663 + expected_column_header_row: 1 + expected_column_header: A59663 + concept: irs_soi.earned_income_credit + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + filters: + eitc_child_count: '2' + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: == + value: 2 + unit: count + label: EITC qualifying children + - measure_id: eitc_three_or_more_children_claims + label: Returns with earned income credit and three or more qualifying children + ordinal: 48 + column: EJ + source_column_id: N59664 + expected_column_header_row: 1 + expected_column_header: N59664 + concept: irs_soi.returns_with_earned_income_credit + unit: count + aggregation: sum + expected_cell_type: number + filters: + eitc_child_count: 3plus + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: '>=' + value: 3 + unit: count + label: EITC qualifying children + - measure_id: eitc_three_or_more_children_amount + label: Earned income credit with three or more qualifying children + ordinal: 49 + column: EK + source_column_id: A59664 + expected_column_header_row: 1 + expected_column_header: A59664 + concept: irs_soi.earned_income_credit + unit: usd + aggregation: sum + value_scale: 1 + expected_cell_type: number + filters: + eitc_child_count: 3plus + constraints: + - variable: us.tax.earned_income_credit_qualifying_children + operator: '>=' + value: 3 + unit: count + label: EITC qualifying children + - measure_id: actc_claims + label: Returns with additional child tax credit + ordinal: 50 + column: EN + source_column_id: N11070 + expected_column_header_row: 1 + expected_column_header: N11070 + concept: irs_soi.returns_with_additional_child_tax_credit + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: actc_amount + label: Additional child tax credit + ordinal: 51 + column: EO + source_column_id: A11070 + expected_column_header_row: 1 + expected_column_header: A11070 + concept: irs_soi.additional_child_tax_credit + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: income_tax_liability_returns + label: Returns with income tax liability after credits + ordinal: 52 + column: ET + source_column_id: N06500 + expected_column_header_row: 1 + expected_column_header: N06500 + concept: irs_soi.returns_with_income_tax_liability_after_credits + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: income_tax_liability_amount + label: Income tax liability after credits + ordinal: 53 + column: EU + source_column_id: A06500 + expected_column_header_row: 1 + expected_column_header: A06500 + concept: irs_soi.income_tax_liability_after_credits + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number + - measure_id: premium_tax_credit_returns + label: Returns with premium tax credit + ordinal: 54 + column: EX + source_column_id: N85530 + expected_column_header_row: 1 + expected_column_header: N85530 + concept: irs_soi.returns_with_premium_tax_credit + unit: count + aggregation: sum + expected_cell_type: number + - measure_id: premium_tax_credit_amount + label: Premium tax credit + ordinal: 55 + column: EY + source_column_id: A85530 + expected_column_header_row: 1 + expected_column_header: A85530 + concept: irs_soi.premium_tax_credit + unit: usd + aggregation: sum + value_scale: 1000 + expected_cell_type: number diff --git a/tests/test_arch_source_package.py b/tests/test_arch_source_package.py index f13446e..301e50e 100644 --- a/tests/test_arch_source_package.py +++ b/tests/test_arch_source_package.py @@ -1983,7 +1983,7 @@ def test_soi_congressional_district_2022_builds_all_return_facts(): assert package.package_id == "soi-congressional-district-2022" assert len(rows) == 4_791 assert len(cells) == 79_365 - assert len(facts) == 1_440 + assert len(facts) == 26_880 assert validate_source_rows(rows).valid assert validate_source_cells(cells).valid assert validate_facts(facts).valid @@ -2002,6 +2002,26 @@ def test_soi_congressional_district_2022_builds_all_return_facts(): ].value == 14_424_810_411_000 ) + assert ( + values_by_record[ + "irs_soi.ty2022.congressional_district_2022.all_returns." + "us.eitc_amount" + ].value + == 58_124_026_000 + ) + eitc_three_or_more_amount = values_by_record[ + "irs_soi.ty2022.congressional_district_2022.all_returns." + "us.eitc_three_or_more_children_amount" + ] + assert eitc_three_or_more_amount.value == 13_600_954_503 + assert eitc_three_or_more_amount.layout.source_column_id == "A59664" + assert eitc_three_or_more_amount.filters["eitc_child_count"] == "3plus" + assert { + (constraint.variable, constraint.operator, constraint.value) + for constraint in eitc_three_or_more_amount.constraints + } == { + ("us.tax.earned_income_credit_qualifying_children", ">=", 3), + } assert ( values_by_record[ "irs_soi.ty2022.congressional_district_2022.all_returns." @@ -2020,6 +2040,12 @@ def test_soi_congressional_district_2022_builds_all_return_facts(): assert al_01_agi.value == 22_915_824_000 assert al_01_agi.geography.id == "5001700US0101" assert al_01_agi.geography.name == "Alabama Congressional District 1" + al_01_taxable_interest = values_by_record[ + "irs_soi.ty2022.congressional_district_2022.all_returns." + "al_01.taxable_interest_amount" + ] + assert al_01_taxable_interest.value == 135_822_000 + assert al_01_taxable_interest.layout.source_column_id == "A00300" assert ca_53_returns.value == 383_160 assert ca_53_returns.geography.id == "5001700US0653"