From e567403e69ad410face6d912491bb79a24abaac4 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 06:28:46 +0000 Subject: [PATCH 1/7] ci: add kernel-e2e workflow + KERNEL_REV pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires up CI coverage for use_kernel=True. The kernel is a private repo with no published wheel, so we pin a kernel SHA in KERNEL_REV and build the wheel inline via maturin develop using the existing INTEGRATION_TEST_APP GitHub App (extended to include databricks/databricks-sql-kernel in its repo allowlist). Gate semantics mirror trigger-integration-tests.yml: - Plain PR events post a synthetic-success Kernel E2E check so the required check doesn't block PRs that don't touch kernel code. - The kernel-e2e label triggers a preview run on the PR and is auto-removed on synchronize for the same security reason as the integration-test label. - merge_group is the real gate — runs when kernel-relevant files change (src/databricks/sql/backend/kernel/, test_kernel_backend.py, KERNEL_REV, etc.), auto-passes otherwise. Unit tests are unchanged: tests/unit/test_kernel_*.py already runs in every code-quality-checks.yml matrix combo against a fake databricks_sql_kernel module injected at sys.modules import time. Required follow-up before this merges: 1. Extend the INTEGRATION_TEST_APP allowlist to include databricks/databricks-sql-kernel. 2. Create the kernel-e2e label in this repo. 3. Add Kernel E2E as a required check on main once a green run lands. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/kernel-e2e.yml | 354 +++++++++++++++++++++++++++++++ KERNEL_REV | 1 + 2 files changed, 355 insertions(+) create mode 100644 .github/workflows/kernel-e2e.yml create mode 100644 KERNEL_REV diff --git a/.github/workflows/kernel-e2e.yml b/.github/workflows/kernel-e2e.yml new file mode 100644 index 000000000..efc38a6d8 --- /dev/null +++ b/.github/workflows/kernel-e2e.yml @@ -0,0 +1,354 @@ +name: Kernel E2E Tests + +# Runs tests/e2e/test_kernel_backend.py against a real Databricks +# warehouse with a freshly-built databricks-sql-kernel wheel. +# +# The kernel is a private repo with no published artifact. We pin a +# kernel SHA in the `KERNEL_REV` file at the repo root, check the +# kernel out via a GitHub App token, and run `maturin develop` to +# install the wheel into the same venv as the connector. Bumping +# `KERNEL_REV` is the only way to pick up a new kernel version — +# this keeps the connector ↔ kernel pair bisectable. +# +# Gate semantics mirror trigger-integration-tests.yml: +# - Plain PR events post a synthetic-success check so the required +# "Kernel E2E" check doesn't block PRs that don't touch the kernel +# path. Real tests run in the merge queue. +# - `kernel-e2e` label triggers a preview run on the PR. The label +# is auto-removed on `synchronize` for the same security reason +# trigger-integration-tests.yml does it. +# - merge_group fires the real gate — dispatches when kernel-relevant +# files changed, auto-passes otherwise. +# +# Required external setup: +# 1. `kernel-e2e` label exists in this repo. +# 2. `INTEGRATION_TEST_APP_ID` / `INTEGRATION_TEST_PRIVATE_KEY` +# secrets exist (already installed for the proxy-tests workflow). +# The GitHub App's repo allowlist must include +# `databricks/databricks-sql-kernel` — extend the existing App +# config; do not create a new App. +# 3. `KERNEL_REV` file at the repo root containing a 40-char kernel +# commit SHA. +# 4. `azure-prod` environment exposes DATABRICKS_HOST / +# TEST_PECO_WAREHOUSE_HTTP_PATH / DATABRICKS_TOKEN +# (already configured for code-coverage.yml). + +on: + pull_request: + types: [opened, synchronize, reopened, labeled] + merge_group: + +permissions: + contents: read + +# Cancel in-flight kernel-e2e runs on PR pushes — the warehouse state +# is shared with code-coverage.yml so we already pay this cost there. +# Don't cancel on main / merge_group; each commit needs its own signal. +concurrency: + group: kernel-e2e-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + # ─────────────────────────────────────────────────────────────── + # Security: auto-remove `kernel-e2e` label on new commits, same as + # trigger-integration-tests.yml. + # ─────────────────────────────────────────────────────────────── + remove-label-on-new-commit: + if: github.event_name == 'pull_request' && github.event.action == 'synchronize' + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + permissions: + pull-requests: write + issues: write + steps: + - name: Remove kernel-e2e label + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 + with: + script: | + const labels = context.payload.pull_request.labels.map(l => l.name); + if (!labels.includes('kernel-e2e')) { + console.log('Label not present, nothing to remove.'); + return; + } + try { + await github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + name: 'kernel-e2e' + }); + console.log('Removed kernel-e2e label.'); + } catch (error) { + if (error.status !== 404) throw error; + } + + # ─────────────────────────────────────────────────────────────── + # Synthetic success on every non-label PR event so the required + # "Kernel E2E" check doesn't permablock PRs that don't touch kernel + # code. Real run happens in the merge queue (or via explicit label). + # ─────────────────────────────────────────────────────────────── + skip-kernel-e2e-pr: + if: github.event_name == 'pull_request' && github.event.action != 'labeled' + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + permissions: + checks: write + steps: + - name: Post synthetic-success check + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 + with: + github-token: ${{ github.token }} + script: | + await github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: 'Kernel E2E', + head_sha: context.payload.pull_request.head.sha, + status: 'completed', + conclusion: 'success', + completed_at: new Date().toISOString(), + output: { + title: 'Skipped on PR — runs in merge queue', + summary: 'Kernel E2E is skipped on PRs and runs as a required gate in the merge queue. Add the `kernel-e2e` label to preview on this PR.' + } + }); + + # ─────────────────────────────────────────────────────────────── + # Detect whether kernel-relevant files changed. Used by both the + # labelled PR path and the merge-queue path to decide between + # "really run the suite" and "auto-pass the check". + # ─────────────────────────────────────────────────────────────── + detect-changes: + if: | + github.event_name == 'merge_group' || + (github.event_name == 'pull_request' && + github.event.action == 'labeled' && + contains(github.event.pull_request.labels.*.name, 'kernel-e2e')) + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + outputs: + run_tests: ${{ steps.changed.outputs.run_tests }} + head_sha: ${{ steps.refs.outputs.head_sha }} + pr_number: ${{ steps.refs.outputs.pr_number }} + steps: + - name: Resolve head SHA + PR number + id: refs + env: + MERGE_QUEUE_REF: ${{ github.event.merge_group.head_ref }} + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 + with: + script: | + if (context.eventName === 'pull_request') { + core.setOutput('head_sha', context.payload.pull_request.head.sha); + core.setOutput('pr_number', String(context.payload.pull_request.number)); + return; + } + // merge_group — extract PR # from gh-readonly-queue//pr-- + const ref = process.env.MERGE_QUEUE_REF || ''; + const m = ref.match(/pr-(\d+)/); + if (!m) core.setFailed(`could not extract pr number from ${ref}`); + core.setOutput('head_sha', context.payload.merge_group.head_sha); + core.setOutput('pr_number', m ? m[1] : ''); + + - name: Check out repo at head SHA + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + ref: ${{ steps.refs.outputs.head_sha }} + fetch-depth: 0 + + - name: Detect kernel-relevant changes + id: changed + env: + HEAD_SHA: ${{ steps.refs.outputs.head_sha }} + BASE_SHA: ${{ github.event_name == 'merge_group' && github.event.merge_group.base_sha || github.event.pull_request.base.sha }} + run: | + CHANGED=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA") + echo "Changed files:" + echo "$CHANGED" + # Run when the connector kernel backend, kernel e2e tests, + # this workflow, the kernel revision pin, or core deps move. + if echo "$CHANGED" | grep -qE "^(src/databricks/sql/backend/kernel/|tests/e2e/test_kernel_backend\.py|tests/unit/test_kernel_|\.github/workflows/kernel-e2e\.yml|KERNEL_REV|pyproject\.toml|poetry\.lock)"; then + echo "run_tests=true" >> "$GITHUB_OUTPUT" + else + echo "run_tests=false" >> "$GITHUB_OUTPUT" + fi + + # ─────────────────────────────────────────────────────────────── + # Real test job. Builds the kernel wheel from the pinned SHA and + # runs the connector's kernel e2e suite against the dogfood + # warehouse. + # ─────────────────────────────────────────────────────────────── + run-kernel-e2e: + needs: detect-changes + if: needs.detect-changes.outputs.run_tests == 'true' + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + # azure-prod holds the warehouse secrets. Fork PRs are paused at + # "approval required" — same model as code-coverage.yml. + environment: azure-prod + permissions: + contents: read + checks: write + env: + DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }} + DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + steps: + - name: Check out connector + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + ref: ${{ needs.detect-changes.outputs.head_sha }} + + - name: Read pinned kernel SHA + id: kernel-rev + run: | + if [[ ! -f KERNEL_REV ]]; then + echo "::error::KERNEL_REV file missing" + exit 1 + fi + REV=$(tr -d '[:space:]' < KERNEL_REV) + if [[ ! "$REV" =~ ^[0-9a-f]{40}$ ]]; then + echo "::error::KERNEL_REV must be a 40-char commit SHA, got: $REV" + exit 1 + fi + echo "rev=$REV" >> "$GITHUB_OUTPUT" + echo "Pinned kernel SHA: $REV" + + - name: Generate GitHub App token (kernel repo read access) + id: app-token + uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3.0.0 + with: + app-id: ${{ secrets.INTEGRATION_TEST_APP_ID }} + private-key: ${{ secrets.INTEGRATION_TEST_PRIVATE_KEY }} + owner: databricks + repositories: databricks-sql-kernel + + - name: Check out kernel at pinned SHA + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + repository: databricks/databricks-sql-kernel + ref: ${{ steps.kernel-rev.outputs.rev }} + token: ${{ steps.app-token.outputs.token }} + path: databricks-sql-kernel + + - name: Set up Python 3.10 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: "3.10" + + - name: Set up Rust toolchain + uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 + + - name: Cache cargo build artifacts (keyed on kernel SHA) + uses: Swatinem/rust-cache@98c8021b550208e191a6a3145459bfc9fb29c4c0 # v2.8.0 + with: + workspaces: databricks-sql-kernel + # Keying on the kernel SHA means each pinned version gets a + # warm cache; bumping KERNEL_REV invalidates and rewarms. + key: kernel-${{ steps.kernel-rev.outputs.rev }} + + - name: Install Kerberos system deps + run: | + sudo apt-get update + sudo apt-get install -y libkrb5-dev + + - name: Setup Poetry + connector deps + uses: ./.github/actions/setup-poetry + with: + python-version: "3.10" + install-args: "--all-extras" + cache-suffix: "kernel-e2e-" + + - name: Install maturin into the poetry venv + run: poetry run pip install 'maturin>=1.5,<2.0' + + - name: Build + install kernel wheel into poetry venv + working-directory: databricks-sql-kernel/pyo3 + # `maturin develop` builds the extension and installs it into + # whichever Python is on PATH. `poetry run` resolves to the + # connector's .venv, so the wheel lands where pytest will + # import it. + run: poetry run maturin develop --release + + - name: Smoke-check kernel import + run: | + poetry run python -c "import databricks_sql_kernel as k; assert k.__file__, 'kernel module has no __file__ — wheel install failed'; print('kernel ok:', k.__file__)" + + - name: Run kernel e2e tests + run: poetry run pytest tests/e2e/test_kernel_backend.py -v + + - name: Post Kernel E2E check (success) + if: success() && github.event_name == 'merge_group' + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 + with: + github-token: ${{ github.token }} + script: | + await github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: 'Kernel E2E', + head_sha: '${{ needs.detect-changes.outputs.head_sha }}', + status: 'completed', + conclusion: 'success', + completed_at: new Date().toISOString(), + output: { + title: 'Kernel E2E passed', + summary: 'tests/e2e/test_kernel_backend.py ran green against the pinned kernel SHA.' + } + }); + + - name: Post Kernel E2E check (failure) + if: failure() && github.event_name == 'merge_group' + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 + with: + github-token: ${{ github.token }} + script: | + await github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: 'Kernel E2E', + head_sha: '${{ needs.detect-changes.outputs.head_sha }}', + status: 'completed', + conclusion: 'failure', + completed_at: new Date().toISOString(), + output: { + title: 'Kernel E2E failed', + summary: 'See workflow logs for details.' + } + }); + + # ─────────────────────────────────────────────────────────────── + # Auto-pass the Kernel E2E check in the merge queue when no kernel- + # relevant files changed. + # ─────────────────────────────────────────────────────────────── + auto-pass-merge-queue: + needs: detect-changes + if: github.event_name == 'merge_group' && needs.detect-changes.outputs.run_tests != 'true' + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + permissions: + checks: write + steps: + - name: Auto-pass + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 + with: + github-token: ${{ github.token }} + script: | + await github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + name: 'Kernel E2E', + head_sha: '${{ github.event.merge_group.head_sha }}', + status: 'completed', + conclusion: 'success', + completed_at: new Date().toISOString(), + output: { + title: 'Skipped — no kernel-relevant changes', + summary: 'No files under src/databricks/sql/backend/kernel/, tests/e2e/test_kernel_backend.py, KERNEL_REV, pyproject.toml, or poetry.lock changed.' + } + }); diff --git a/KERNEL_REV b/KERNEL_REV new file mode 100644 index 000000000..dfd1d198b --- /dev/null +++ b/KERNEL_REV @@ -0,0 +1 @@ +aed2efbed8087171d61848f5ad98c7e171827698 From f7ed5e3ec7dbede6e5e6a9a3fef4a2cd116ec5a3 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 06:33:20 +0000 Subject: [PATCH 2/7] ci(kernel-e2e): add id-token: write for JFrog OIDC exchange MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit setup-poetry runs setup-jfrog, which exchanges a GitHub OIDC token for a JFrog access token to reach the internal PyPI mirror. That needs id-token: write on the job, which was missing — the labelled preview run failed at setup-poetry with "ACTIONS_ID_TOKEN_REQUEST_TOKEN: unbound variable". Declared at both workflow scope and on run-kernel-e2e directly: a job-level permissions block fully overrides workflow scope, so the redundancy is intentional. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/kernel-e2e.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/kernel-e2e.yml b/.github/workflows/kernel-e2e.yml index efc38a6d8..e0104b5bd 100644 --- a/.github/workflows/kernel-e2e.yml +++ b/.github/workflows/kernel-e2e.yml @@ -40,6 +40,12 @@ on: permissions: contents: read + # id-token: write is needed by .github/actions/setup-jfrog (OIDC + # exchange with JFrog for the connector's PyPI mirror). Declared + # workflow-wide so the labelled-PR / merge-queue jobs that invoke + # setup-poetry inherit it. Individual jobs still scope down to the + # minimum they actually use (checks: write etc.). + id-token: write # Cancel in-flight kernel-e2e runs on PR pushes — the warehouse state # is shared with code-coverage.yml so we already pay this cost there. @@ -193,6 +199,10 @@ jobs: permissions: contents: read checks: write + # OIDC token exchange with JFrog inside setup-poetry. A job-level + # permissions block fully overrides workflow-level, so this must + # be redeclared here even though the workflow declares it too. + id-token: write env: DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }} DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }} From 93f76dde5a3ae0622331f3dbf0fc429267f19997 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 06:39:05 +0000 Subject: [PATCH 3/7] ci(kernel-e2e): build kernel wheel into connector venv, not a new one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `poetry run maturin develop` from inside databricks-sql-kernel/pyo3/ makes poetry create a fresh, empty .venv next to the kernel source (it discovers pyo3/pyproject.toml first and treats it as the project root). That venv has no maturin → "Command not found: maturin". Resolve the connector venv's python path explicitly before changing working directory, then call maturin from that python via `-m maturin`. `--interpreter ` pins the produced wheel to the connector venv so the resulting extension is installed where pytest will look for it. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/kernel-e2e.yml | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/.github/workflows/kernel-e2e.yml b/.github/workflows/kernel-e2e.yml index e0104b5bd..126bbb21d 100644 --- a/.github/workflows/kernel-e2e.yml +++ b/.github/workflows/kernel-e2e.yml @@ -273,16 +273,29 @@ jobs: install-args: "--all-extras" cache-suffix: "kernel-e2e-" - - name: Install maturin into the poetry venv - run: poetry run pip install 'maturin>=1.5,<2.0' + - name: Install maturin into the connector venv + # The connector's poetry venv is in-project (.venv at repo + # root). The kernel's pyo3/ subtree carries its own + # pyproject.toml — running `poetry run …` from inside it + # makes poetry create a *second* venv next to the kernel + # source, which won't have maturin or the connector + # installed. We side-step that by resolving the connector + # venv's python here and calling maturin via its absolute + # path for the build step. + run: | + poetry run pip install 'maturin>=1.5,<2.0' + VENV_PY=$(poetry run python -c "import sys; print(sys.executable)") + echo "CONNECTOR_VENV_PY=$VENV_PY" >> "$GITHUB_ENV" + echo "Using connector venv python: $VENV_PY" - - name: Build + install kernel wheel into poetry venv + - name: Build + install kernel wheel into connector venv working-directory: databricks-sql-kernel/pyo3 - # `maturin develop` builds the extension and installs it into - # whichever Python is on PATH. `poetry run` resolves to the - # connector's .venv, so the wheel lands where pytest will - # import it. - run: poetry run maturin develop --release + # `--interpreter $CONNECTOR_VENV_PY` pins maturin to the + # connector venv even though we're cwd'd inside the kernel's + # own pyproject tree. `python -m maturin` invokes the + # maturin we just installed into that same venv, sidestepping + # the nested-poetry-project detection entirely. + run: $CONNECTOR_VENV_PY -m maturin develop --release --interpreter $CONNECTOR_VENV_PY - name: Smoke-check kernel import run: | From dd0245d97e20e77004f387b7de8747c858a6f9ac Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 06:44:02 +0000 Subject: [PATCH 4/7] ci(kernel-e2e): drop --interpreter from maturin develop (not a valid flag) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit maturin develop installs into whichever python invoked it; the flag exists on `maturin build` only. The previous commit's extra `--interpreter $CONNECTOR_VENV_PY` was redundant — we're already calling maturin via `$CONNECTOR_VENV_PY -m maturin`, so the venv python is the one doing the build and install. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/kernel-e2e.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/kernel-e2e.yml b/.github/workflows/kernel-e2e.yml index 126bbb21d..6769ee331 100644 --- a/.github/workflows/kernel-e2e.yml +++ b/.github/workflows/kernel-e2e.yml @@ -290,12 +290,12 @@ jobs: - name: Build + install kernel wheel into connector venv working-directory: databricks-sql-kernel/pyo3 - # `--interpreter $CONNECTOR_VENV_PY` pins maturin to the - # connector venv even though we're cwd'd inside the kernel's - # own pyproject tree. `python -m maturin` invokes the - # maturin we just installed into that same venv, sidestepping - # the nested-poetry-project detection entirely. - run: $CONNECTOR_VENV_PY -m maturin develop --release --interpreter $CONNECTOR_VENV_PY + # `maturin develop` builds the extension against — and installs + # it into — whichever python invoked it. Calling it via + # `$CONNECTOR_VENV_PY -m maturin` from inside the kernel's + # pyo3/ tree is what targets the connector venv without + # tripping poetry's nested-project detection. + run: $CONNECTOR_VENV_PY -m maturin develop --release - name: Smoke-check kernel import run: | From 58babaccd89e780ea8b378f48fdc789977793f75 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 06:56:04 +0000 Subject: [PATCH 5/7] ci(kernel-e2e): route cargo through JFrog + audit cleanups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit databricks-protected-runner-group blocks direct egress to index.crates.io, so the maturin build was failing with SSL EOF on the cargo metadata step. Extend setup-jfrog with an opt-in `configure-cargo` input that writes ~/.cargo/config.toml + credentials.toml against the JFrog db-cargo-remote proxy (recipe borrowed verbatim from databricks-odbc's setup-jfrog action) and forward it through setup-poetry so the kernel-e2e workflow can enable it without bypassing the wrapper. Bundled cleanups from a workflow audit: - Drop the redundant `Set up Python 3.10` step — setup-poetry runs actions/setup-python internally at the matching version. - Smoke-check now uses `$CONNECTOR_VENV_PY` (same interpreter we built the wheel with), so a wheel installed into the wrong venv would surface here rather than be masked by `poetry run python` re-resolving. - Post `Kernel E2E` check on the labelled-PR path as well as the merge-queue path; previously the PR would still show the synthetic-success check forever even after a real labelled run failed. - Add a comment to fetch-depth: 0 explaining why we keep it. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/actions/setup-jfrog/action.yml | 43 ++++++++++++++++++++++++- .github/actions/setup-poetry/action.yml | 9 ++++++ .github/workflows/kernel-e2e.yml | 33 +++++++++++++------ 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/.github/actions/setup-jfrog/action.yml b/.github/actions/setup-jfrog/action.yml index 97ae146ba..a19859b68 100644 --- a/.github/actions/setup-jfrog/action.yml +++ b/.github/actions/setup-jfrog/action.yml @@ -1,5 +1,15 @@ name: Setup JFrog OIDC -description: Obtain a JFrog access token via GitHub OIDC and configure pip to use JFrog PyPI proxy +description: Obtain a JFrog access token via GitHub OIDC and configure pip / cargo to use JFrog package proxies + +inputs: + configure-cargo: + description: | + Write ~/.cargo/config.toml + credentials.toml pointing at the + Databricks JFrog Cargo proxy. Required for any job that runs + `cargo` on `databricks-protected-runner-group`, where direct + access to index.crates.io is blocked. Off by default because + most jobs in this repo are Python-only. + default: "false" runs: using: composite @@ -30,3 +40,34 @@ runs: set -euo pipefail echo "PIP_INDEX_URL=https://gha-service-account:${JFROG_ACCESS_TOKEN}@databricks.jfrog.io/artifactory/api/pypi/db-pypi/simple" >> "$GITHUB_ENV" echo "pip configured to use JFrog registry" + + - name: Configure Cargo + if: inputs.configure-cargo == 'true' + shell: bash + # databricks-protected-runner-group blocks direct egress to + # index.crates.io, so cargo must route through JFrog's + # db-cargo-remote proxy. Mirrors the recipe used in + # databricks-odbc's setup-jfrog action. + # + # Note: JFrog's Cargo proxy quarantines crates released within + # the last 7 days. If a fresh dependency version isn't yet + # mirrored, the build will fail until JFrog ingests it — bump + # Cargo.lock to an older version or wait it out. + run: | + set -euo pipefail + mkdir -p ~/.cargo + cat > ~/.cargo/config.toml << 'EOF' + [source.crates-io] + replace-with = "jfrog" + [source.jfrog] + registry = "sparse+https://databricks.jfrog.io/artifactory/api/cargo/db-cargo-remote/index/" + [registries.jfrog] + index = "sparse+https://databricks.jfrog.io/artifactory/api/cargo/db-cargo-remote/index/" + credential-provider = ["cargo:token"] + EOF + cat > ~/.cargo/credentials.toml << EOF + [registries.jfrog] + token = "Bearer ${JFROG_ACCESS_TOKEN}" + EOF + echo "CARGO_REGISTRIES_JFROG_TOKEN=Bearer ${JFROG_ACCESS_TOKEN}" >> "$GITHUB_ENV" + echo "Cargo configured to use JFrog registry" diff --git a/.github/actions/setup-poetry/action.yml b/.github/actions/setup-poetry/action.yml index f7e15b1c0..a2b502527 100644 --- a/.github/actions/setup-poetry/action.yml +++ b/.github/actions/setup-poetry/action.yml @@ -17,12 +17,21 @@ inputs: description: Extra suffix for the cache key to avoid collisions across job variants required: false default: "" + configure-cargo: + description: | + Forwarded to setup-jfrog. Set to "true" for jobs that also need + Cargo configured against the JFrog crates proxy (e.g. anything + that builds a Rust extension via maturin). + required: false + default: "false" runs: using: composite steps: - name: Setup JFrog uses: ./.github/actions/setup-jfrog + with: + configure-cargo: ${{ inputs.configure-cargo }} - name: Set up python ${{ inputs.python-version }} id: setup-python diff --git a/.github/workflows/kernel-e2e.yml b/.github/workflows/kernel-e2e.yml index 6769ee331..8984e457a 100644 --- a/.github/workflows/kernel-e2e.yml +++ b/.github/workflows/kernel-e2e.yml @@ -163,6 +163,9 @@ jobs: uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: ref: ${{ steps.refs.outputs.head_sha }} + # Full history so `git diff BASE_SHA HEAD_SHA` resolves both + # commits regardless of how far base has diverged. The repo + # is small enough that depth 0 costs only a few seconds. fetch-depth: 0 - name: Detect kernel-relevant changes @@ -245,11 +248,10 @@ jobs: token: ${{ steps.app-token.outputs.token }} path: databricks-sql-kernel - - name: Set up Python 3.10 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: "3.10" - + # `setup-poetry` below runs `actions/setup-python` internally + # with the matching version, so we don't repeat it here. We do + # set up the Rust toolchain + cargo cache before maturin so they + # are on PATH when the kernel build step runs. - name: Set up Rust toolchain uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 @@ -266,12 +268,16 @@ jobs: sudo apt-get update sudo apt-get install -y libkrb5-dev - - name: Setup Poetry + connector deps + - name: Setup Poetry + connector deps (and Cargo via JFrog) uses: ./.github/actions/setup-poetry with: python-version: "3.10" install-args: "--all-extras" cache-suffix: "kernel-e2e-" + # databricks-protected-runner-group blocks index.crates.io; + # route cargo through the JFrog db-cargo-remote proxy so + # maturin's cargo invocation below can resolve deps. + configure-cargo: "true" - name: Install maturin into the connector venv # The connector's poetry venv is in-project (.venv at repo @@ -298,14 +304,23 @@ jobs: run: $CONNECTOR_VENV_PY -m maturin develop --release - name: Smoke-check kernel import + # Use the same interpreter we built the wheel with, so a wheel + # accidentally installed into the wrong venv would be visible + # here rather than masked by `poetry run python` re-resolving. run: | - poetry run python -c "import databricks_sql_kernel as k; assert k.__file__, 'kernel module has no __file__ — wheel install failed'; print('kernel ok:', k.__file__)" + $CONNECTOR_VENV_PY -c "import databricks_sql_kernel as k; assert k.__file__, 'kernel module has no __file__ — wheel install failed'; print('kernel ok:', k.__file__)" - name: Run kernel e2e tests run: poetry run pytest tests/e2e/test_kernel_backend.py -v + # Post a Kernel E2E check on both the labeled-PR and merge-queue + # paths so the named check on the PR reflects the latest real + # run (overwriting the synthetic-success check that + # skip-kernel-e2e-pr posted on the initial open). Without this + # the PR would still show synthetic-success even after a real + # labeled run failed. - name: Post Kernel E2E check (success) - if: success() && github.event_name == 'merge_group' + if: success() uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 with: github-token: ${{ github.token }} @@ -325,7 +340,7 @@ jobs: }); - name: Post Kernel E2E check (failure) - if: failure() && github.event_name == 'merge_group' + if: failure() uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 with: github-token: ${{ github.token }} From 1b9ff05211c2759cdc6125cf33a3038f4f39d342 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 07:06:58 +0000 Subject: [PATCH 6/7] ci(kernel-e2e): bump KERNEL_REV to current kernel main The original pin (aed2efb) predates kernel PR #36 which added `complex_types_as_json` to Session.__new__. Connector main already passes that kwarg (added in PR #795), so every e2e test was failing with: TypeError: Session.__new__() got an unexpected keyword argument 'complex_types_as_json' Bump to current kernel main (3aa25b21) which has the kwarg plus the rest of the comparator-parity changes the connector code already expects. This is a good demonstration of why the bisectable KERNEL_REV pin matters: the connector and kernel evolved in lockstep on `main` before this CI existed, so the very first thing the workflow does once it can actually build the wheel is catch that we'd been shipping a stale pin. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- KERNEL_REV | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KERNEL_REV b/KERNEL_REV index dfd1d198b..a0b62cf0e 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -aed2efbed8087171d61848f5ad98c7e171827698 +3aa25b219ac4ec2c1e95c6f836b67d5475ae9a7d From bcfab283e0e7c1b861d50628871d68f63577346a Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 27 May 2026 08:46:29 +0000 Subject: [PATCH 7/7] ci(kernel-e2e): disable bundled rust-cache in setup-rust-toolchain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit actions-rust-lang/setup-rust-toolchain invokes Swatinem/rust-cache internally, which runs `cargo metadata` from the workflow's working directory. Our job's CWD is the connector repo root (no Cargo.toml there — the kernel checkout is in a subdir), so the bundled cache attempt fails with exit 101 and dumps a Node stack trace into the log. It's cosmetic — the action handles its own errors — but reads as a failure on first glance, and the bundled cache races with the explicit rust-cache step we already configure with the correct `workspaces: databricks-sql-kernel` path. Disabling the bundled cache leaves a single, correctly-keyed rust-cache invocation and cleans up the log. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/kernel-e2e.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/kernel-e2e.yml b/.github/workflows/kernel-e2e.yml index 8984e457a..d24d4bd20 100644 --- a/.github/workflows/kernel-e2e.yml +++ b/.github/workflows/kernel-e2e.yml @@ -254,6 +254,13 @@ jobs: # are on PATH when the kernel build step runs. - name: Set up Rust toolchain uses: actions-rust-lang/setup-rust-toolchain@1780873c7b576612439a134613cc4cc74ce5538c # v1.15.2 + with: + # Disable the bundled Swatinem/rust-cache invocation; it tries + # `cargo metadata` from the connector repo root (no Cargo.toml) + # and dumps a scary-looking exit-101 stack into the log even + # though the action ignores it. We run our own rust-cache step + # below with the correct workspaces path. + cache: false - name: Cache cargo build artifacts (keyed on kernel SHA) uses: Swatinem/rust-cache@98c8021b550208e191a6a3145459bfc9fb29c4c0 # v2.8.0