From 3a5581f80d44f71219fe8a848ee833186f8dca47 Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 11:55:05 -0400 Subject: [PATCH 01/12] ci: add Linux and Windows CUDA builds per compute capability Adds ubuntu-latest-cuda and windows-latest-cuda jobs covering sm_75 through sm_120, modeled after the llama.cpp CUDA CI. Linux installs CUDA 12.8 via apt, bundles cublas runtime libs, and patches RPATHs. Windows uses Jimver/cuda-toolkit@v0.2.22 with MSVC + Ninja Multi-Config. Both jobs are wired into the release job. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 170 ++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dd3e5e284..cff04c095 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -95,6 +95,174 @@ jobs: path: | sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip + ubuntu-latest-cuda: + runs-on: ubuntu-22.04 + + strategy: + fail-fast: false + matrix: + include: + - sm: sm_75 + - sm: sm_80 + - sm: sm_86 + - sm: sm_89 + - sm: sm_90 + - sm: sm_100 + - sm: sm_120 + + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: recursive + fetch-depth: 0 + repository: 'leejet/stable-diffusion.cpp' + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-cuda-${{ matrix.sm }} + evict-old-files: 1d + + - name: Free disk space + run: | + sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' 'php.*' 'ruby.*' \ + google-cloud-cli azure-cli google-chrome-stable firefox powershell 2>/dev/null || true + sudo apt-get autoremove -y + df -h + + - name: Install CUDA Toolkit + run: | + wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-toolkit-12-8 cmake ninja-build patchelf + + - name: Set CUDA environment + run: | + echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV" + echo "/usr/local/cuda/bin" >> "$GITHUB_PATH" + echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV" + + - name: Build + id: cmake_build + run: | + cmake_arch="${{ matrix.sm }}" + cmake_arch="${cmake_arch#sm_}" + cmake -B build -S . \ + -DSD_CUBLAS=ON \ + -DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \ + -DGGML_NATIVE=OFF \ + -DSD_BUILD_SHARED_LIBS=ON \ + -DCMAKE_BUILD_TYPE=Release + cmake --build build --config Release -j $(nproc) + + - name: Bundle CUDA runtime libraries + run: | + cuda_lib=/usr/local/cuda/lib64 + cp -v ${cuda_lib}/libcublas.so* build/bin/ 2>/dev/null || true + cp -v ${cuda_lib}/libcublasLt.so* build/bin/ 2>/dev/null || true + cp -v ${cuda_lib}/libcurand.so* build/bin/ 2>/dev/null || true + + - name: Set RPATH for portable distribution + run: | + for f in build/bin/*.so* build/bin/sd; do + [ -f "$f" ] && ! [ -L "$f" ] && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true + done + + - name: Get commit hash + id: commit + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: prompt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + run: | + cp ggml/LICENSE ./build/bin/ggml.txt + cp LICENSE ./build/bin/stable-diffusion.cpp.txt + zip -y -r sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip ./build/bin + + - name: Upload artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip + + windows-latest-cuda: + runs-on: windows-2022 + + strategy: + fail-fast: false + matrix: + include: + - sm: sm_75 + - sm: sm_80 + - sm: sm_86 + - sm: sm_89 + - sm: sm_90 + - sm: sm_100 + - sm: sm_120 + + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: recursive + fetch-depth: 0 + repository: 'leejet/stable-diffusion.cpp' + + - name: Install CUDA Toolkit + uses: Jimver/cuda-toolkit@v0.2.22 + with: + cuda: '12.8.0' + method: 'network' + sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-cuda-${{ matrix.sm }} + variant: ccache + evict-old-files: 1d + + - name: Install Ninja + run: choco install ninja + + - name: Build + shell: cmd + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + set sm=${{ matrix.sm }} + set cmake_arch=%sm:sm_=% + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DSD_CUBLAS=ON ^ + -DCMAKE_CUDA_ARCHITECTURES=%cmake_arch% ^ + -DGGML_NATIVE=OFF ^ + -DSD_BUILD_SHARED_LIBS=ON + cmake --build build --config Release + + - name: Get commit hash + id: commit + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: prompt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + run: | + Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt + Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt + 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip + windows-latest-cmake: runs-on: windows-2022 @@ -640,9 +808,11 @@ jobs: needs: - ubuntu-latest-rocm - ubuntu-latest-cmake + - ubuntu-latest-cuda - windows-latest-cmake-hip - windows-latest-rocm - windows-latest-cmake + - windows-latest-cuda - macos-arm64-cmake steps: From a0d011290f3378966f0f26c92388d17bf4a315a0 Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 15:47:35 -0400 Subject: [PATCH 02/12] ci: fix CUDA artifact names to match Lemonade SDK expectations Linux CUDA artifacts are now named ubuntu-cuda-sm_XX-x64.tar.xz and Windows CUDA artifacts are named windows-cuda-sm_XX-x64.zip, matching the filenames Lemonade SDK constructs when downloading from releases. Also removes the sd-* pattern filter from the release download step so CUDA artifacts (which don't carry the sd- prefix) are included, and adds .tar.xz support to the release upload script. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cff04c095..926276034 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -170,25 +170,20 @@ jobs: [ -f "$f" ] && ! [ -L "$f" ] && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true done - - name: Get commit hash - id: commit - if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} - uses: prompt/actions-commit-hash@v2 - - name: Pack artifacts id: pack_artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} run: | cp ggml/LICENSE ./build/bin/ggml.txt cp LICENSE ./build/bin/stable-diffusion.cpp.txt - zip -y -r sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip ./build/bin + tar -cJf ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz -C ./build/bin . - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip + name: ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz + path: ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz windows-latest-cuda: runs-on: windows-2022 @@ -243,25 +238,20 @@ jobs: -DSD_BUILD_SHARED_LIBS=ON cmake --build build --config Release - - name: Get commit hash - id: commit - if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} - uses: prompt/actions-commit-hash@v2 - - name: Pack artifacts id: pack_artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* + 7z a windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip + name: windows-cuda-${{ matrix.sm }}-x64.zip + path: windows-cuda-${{ matrix.sm }}-x64.zip windows-latest-cmake: runs-on: windows-2022 @@ -829,7 +819,6 @@ jobs: uses: actions/download-artifact@v4 with: path: ./artifact - pattern: sd-* merge-multiple: true - name: Get commit count @@ -881,7 +870,7 @@ jobs: const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; for (let file of await fs.readdirSync('./artifact')) { - if (path.extname(file) === '.zip') { + if (path.extname(file) === '.zip' || file.endsWith('.tar.xz')) { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ owner: context.repo.owner, From adb7fd07ec1d261abbe8b5c663287415901900fe Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 15:54:35 -0400 Subject: [PATCH 03/12] ci: use sd-{branch}-{hash}-ubuntu/windows-cuda-{sm}-x64 artifact names Renames CUDA artifacts to match the format Lemonade SDK expects: sd-master-148f69f-ubuntu-cuda-sm_89-x64.tar.xz sd-master-148f69f-windows-cuda-sm_89-x64.zip Also restores the sd-* download pattern in the release job and keeps .tar.xz support in the upload script. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 926276034..f463d928a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -170,20 +170,25 @@ jobs: [ -f "$f" ] && ! [ -L "$f" ] && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true done + - name: Get commit hash + id: commit + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: prompt/actions-commit-hash@v2 + - name: Pack artifacts id: pack_artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} run: | cp ggml/LICENSE ./build/bin/ggml.txt cp LICENSE ./build/bin/stable-diffusion.cpp.txt - tar -cJf ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz -C ./build/bin . + tar -cJf sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz -C ./build/bin . - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz - path: ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz windows-latest-cuda: runs-on: windows-2022 @@ -238,20 +243,25 @@ jobs: -DSD_BUILD_SHARED_LIBS=ON cmake --build build --config Release + - name: Get commit hash + id: commit + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: prompt/actions-commit-hash@v2 + - name: Pack artifacts id: pack_artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - 7z a windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* + 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: windows-cuda-${{ matrix.sm }}-x64.zip - path: windows-cuda-${{ matrix.sm }}-x64.zip + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip windows-latest-cmake: runs-on: windows-2022 @@ -819,6 +829,7 @@ jobs: uses: actions/download-artifact@v4 with: path: ./artifact + pattern: sd-* merge-multiple: true - name: Get commit count From cb3206243534eb91824677a960734cd7af9b5ae7 Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 16:04:17 -0400 Subject: [PATCH 04/12] ci: revert CUDA artifact names to original sd-*-bin-* format Restores the filenames introduced in the initial CUDA PR: sd-master-{hash}-bin-Linux-Ubuntu-22.04-x86_64-cuda-{sm}.zip sd-master-{hash}-bin-win-cuda-{sm}-x64.zip Reverts the intermediate ubuntu-cuda-* and windows-cuda-* name experiments, and drops the .tar.xz branch from the release upload script. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f463d928a..cff04c095 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -181,14 +181,14 @@ jobs: run: | cp ggml/LICENSE ./build/bin/ggml.txt cp LICENSE ./build/bin/stable-diffusion.cpp.txt - tar -cJf sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz -C ./build/bin . + zip -y -r sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip ./build/bin - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip windows-latest-cuda: runs-on: windows-2022 @@ -254,14 +254,14 @@ jobs: run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* + 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip windows-latest-cmake: runs-on: windows-2022 @@ -881,7 +881,7 @@ jobs: const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; for (let file of await fs.readdirSync('./artifact')) { - if (path.extname(file) === '.zip' || file.endsWith('.tar.xz')) { + if (path.extname(file) === '.zip') { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ owner: context.repo.owner, From 36778b2765bd057c6280c850b0d611a3988dae68 Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 16:10:52 -0400 Subject: [PATCH 05/12] ci: standardize CUDA artifact names and compression formats Linux: sd-master-{hash}-ubuntu-cuda-sm_XX-x64.tar.xz Windows: sd-master-{hash}-windows-cuda-sm_XX-x64.zip Also adds .tar.xz support to the release upload script. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cff04c095..f463d928a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -181,14 +181,14 @@ jobs: run: | cp ggml/LICENSE ./build/bin/ggml.txt cp LICENSE ./build/bin/stable-diffusion.cpp.txt - zip -y -r sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip ./build/bin + tar -cJf sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz -C ./build/bin . - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-Linux-Ubuntu-22.04-x86_64-cuda-${{ matrix.sm }}.zip + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-x64.tar.xz windows-latest-cuda: runs-on: windows-2022 @@ -254,14 +254,14 @@ jobs: run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* + 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-cuda-${{ matrix.sm }}-x64.zip + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip windows-latest-cmake: runs-on: windows-2022 @@ -881,7 +881,7 @@ jobs: const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; for (let file of await fs.readdirSync('./artifact')) { - if (path.extname(file) === '.zip') { + if (path.extname(file) === '.zip' || file.endsWith('.tar.xz')) { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ owner: context.repo.owner, From b55463d29b7260eca79a73799dc51230643148ec Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 19:58:49 -0400 Subject: [PATCH 06/12] ci: package Windows CUDA artifacts as .7z (matches Lemonade expectation) Lemonade's sd_server.cpp builds the Windows CUDA download URL with a .7z extension. The CI was producing .zip, causing a 404 on download which presented as an extraction failure. Also adds .7z to the release upload script allowlist. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f463d928a..4fc066cbf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -254,14 +254,14 @@ jobs: run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* + 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.7z .\build\bin\Release\* - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.7z + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.7z windows-latest-cmake: runs-on: windows-2022 @@ -881,7 +881,7 @@ jobs: const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; for (let file of await fs.readdirSync('./artifact')) { - if (path.extname(file) === '.zip' || file.endsWith('.tar.xz')) { + if (path.extname(file) === '.zip' || file.endsWith('.tar.xz') || path.extname(file) === '.7z') { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ owner: context.repo.owner, From c69f4790fa8d634e8e43cadc6a9f115d84b4ae9f Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 20:03:15 -0400 Subject: [PATCH 07/12] ci: revert Windows CUDA artifacts back to .zip Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4fc066cbf..f463d928a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -254,14 +254,14 @@ jobs: run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.7z .\build\bin\Release\* + 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} uses: actions/upload-artifact@v4 with: - name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.7z - path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.7z + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip windows-latest-cmake: runs-on: windows-2022 @@ -881,7 +881,7 @@ jobs: const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; for (let file of await fs.readdirSync('./artifact')) { - if (path.extname(file) === '.zip' || file.endsWith('.tar.xz') || path.extname(file) === '.7z') { + if (path.extname(file) === '.zip' || file.endsWith('.tar.xz')) { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ owner: context.repo.owner, From cb803219734d536f2a655e2c5d539595b2191c49 Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 21:55:41 -0400 Subject: [PATCH 08/12] ci: bundle CUDA runtime libs for portability on both platforms Linux: add libcudart.so* to the bundled runtime libraries so binaries can run on systems without a matching CUDA toolkit installed. Windows: add id to cuda-toolkit step and robocopy cudart64/cublas64/ cublasLt64 DLLs into the zip so the build runs on machines without a pre-installed CUDA runtime. Addresses Copilot review comments on lemonade-sdk/stable-diffusion.cpp#7. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f463d928a..0fd5524a3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -160,6 +160,7 @@ jobs: - name: Bundle CUDA runtime libraries run: | cuda_lib=/usr/local/cuda/lib64 + cp -v ${cuda_lib}/libcudart.so* build/bin/ 2>/dev/null || true cp -v ${cuda_lib}/libcublas.so* build/bin/ 2>/dev/null || true cp -v ${cuda_lib}/libcublasLt.so* build/bin/ 2>/dev/null || true cp -v ${cuda_lib}/libcurand.so* build/bin/ 2>/dev/null || true @@ -214,6 +215,7 @@ jobs: repository: 'leejet/stable-diffusion.cpp' - name: Install CUDA Toolkit + id: cuda-toolkit uses: Jimver/cuda-toolkit@v0.2.22 with: cuda: '12.8.0' @@ -254,6 +256,7 @@ jobs: run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt + robocopy "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}\bin" .\build\bin\Release cudart64_*.dll cublas64_*.dll cublasLt64_*.dll; if ($LASTEXITCODE -le 7) { $LASTEXITCODE = 0 } 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts From 80187053297a7c2b100b377fccd8c52e2a643741 Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 22:09:36 -0400 Subject: [PATCH 09/12] ci: match lemonade-sdk/llama.cpp CUDA library bundling Linux: - Add libcudart.so*, libnvJitLink.so* to bundled runtime libs - Use cp -av (preserves symlinks) instead of cp -v - RPATH step now patches all ELF files via file(1) instead of glob Windows: - Add curand and nvjitlink to CUDA toolkit sub-packages - Copy cudart64, cublas64, cublasLt64, curand64, nvJitLink DLLs from CUDA_PATH\bin before zipping Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0fd5524a3..af439e5de 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -160,15 +160,19 @@ jobs: - name: Bundle CUDA runtime libraries run: | cuda_lib=/usr/local/cuda/lib64 - cp -v ${cuda_lib}/libcudart.so* build/bin/ 2>/dev/null || true - cp -v ${cuda_lib}/libcublas.so* build/bin/ 2>/dev/null || true - cp -v ${cuda_lib}/libcublasLt.so* build/bin/ 2>/dev/null || true - cp -v ${cuda_lib}/libcurand.so* build/bin/ 2>/dev/null || true + cp -av ${cuda_lib}/libcudart.so* build/bin/ + cp -av ${cuda_lib}/libcublas.so* build/bin/ + cp -av ${cuda_lib}/libcublasLt.so* build/bin/ + cp -av ${cuda_lib}/libcurand.so* build/bin/ + cp -av ${cuda_lib}/libnvJitLink.so* build/bin/ - name: Set RPATH for portable distribution run: | - for f in build/bin/*.so* build/bin/sd; do - [ -f "$f" ] && ! [ -L "$f" ] && patchelf --set-rpath '$ORIGIN' "$f" 2>/dev/null || true + for f in build/bin/*; do + [ -f "$f" ] && ! [ -L "$f" ] || continue + if file "$f" | grep -q 'ELF'; then + patchelf --set-rpath '$ORIGIN' "$f" + fi done - name: Get commit hash @@ -220,7 +224,7 @@ jobs: with: cuda: '12.8.0' method: 'network' - sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' + sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "curand", "nvjitlink", "thrust", "visual_studio_integration"]' - name: ccache uses: ggml-org/ccache-action@v1.2.16 @@ -256,7 +260,12 @@ jobs: run: | Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt - robocopy "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}\bin" .\build\bin\Release cudart64_*.dll cublas64_*.dll cublasLt64_*.dll; if ($LASTEXITCODE -le 7) { $LASTEXITCODE = 0 } + $cudaBin = Join-Path $env:CUDA_PATH 'bin' + $runtimeDllPatterns = @('cudart64_*.dll', 'cublas64_*.dll', 'cublasLt64_*.dll', 'curand64_*.dll', 'nvJitLink_*.dll') + foreach ($pattern in $runtimeDllPatterns) { + $dll = Get-ChildItem -Path $cudaBin -Filter $pattern | Sort-Object Name -Descending | Select-Object -First 1 + if ($dll) { Copy-Item $dll.FullName .\build\bin\Release\ } + } 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* - name: Upload artifacts From 73bb113fc7dc99db2a1ddea25b7ef19e92140816 Mon Sep 17 00:00:00 2001 From: Ken VanDine Date: Sun, 31 May 2026 22:38:08 -0400 Subject: [PATCH 10/12] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .github/workflows/build.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index af439e5de..ea9cd5e19 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -263,8 +263,10 @@ jobs: $cudaBin = Join-Path $env:CUDA_PATH 'bin' $runtimeDllPatterns = @('cudart64_*.dll', 'cublas64_*.dll', 'cublasLt64_*.dll', 'curand64_*.dll', 'nvJitLink_*.dll') foreach ($pattern in $runtimeDllPatterns) { - $dll = Get-ChildItem -Path $cudaBin -Filter $pattern | Sort-Object Name -Descending | Select-Object -First 1 - if ($dll) { Copy-Item $dll.FullName .\build\bin\Release\ } + $dll = Get-ChildItem -Path $cudaBin -Filter $pattern -ErrorAction Stop | Sort-Object Name -Descending | Select-Object -First 1 + if (-not $dll) { throw "Required CUDA runtime DLL matching '$pattern' was not found in $cudaBin" } + Copy-Item $dll.FullName .\build\bin\Release\ + } } 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\* From fa19b4115bd4ab11e32d61fd42b532b9830dc9de Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 22:53:11 -0400 Subject: [PATCH 11/12] ci: pin CUDA clone steps to ref: master Without an explicit ref, actions/checkout makes a GitHub API call to determine the default branch of leejet/stable-diffusion.cpp. With 14 parallel CUDA jobs all doing this simultaneously this call can fail, presenting as a checkout error. Pinning to master skips the API call. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index af439e5de..f7d55f446 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -117,6 +117,7 @@ jobs: submodules: recursive fetch-depth: 0 repository: 'leejet/stable-diffusion.cpp' + ref: master - name: ccache uses: ggml-org/ccache-action@v1.2.16 @@ -217,6 +218,7 @@ jobs: submodules: recursive fetch-depth: 0 repository: 'leejet/stable-diffusion.cpp' + ref: master - name: Install CUDA Toolkit id: cuda-toolkit From 7437d050c0b7e3b37466e02e26670666ffa067ea Mon Sep 17 00:00:00 2001 From: Drew VanDine Date: Sun, 31 May 2026 23:07:24 -0400 Subject: [PATCH 12/12] ci: fix PowerShell parser error in Windows CUDA pack step Use multi-line array literal format for $runtimeDllPatterns and remove the trailing backslash from the Copy-Item destination path, which was causing an 'Unexpected token }' parser error at the foreach closing brace. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/build.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f7d55f446..28a656412 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -263,10 +263,16 @@ jobs: Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt $cudaBin = Join-Path $env:CUDA_PATH 'bin' - $runtimeDllPatterns = @('cudart64_*.dll', 'cublas64_*.dll', 'cublasLt64_*.dll', 'curand64_*.dll', 'nvJitLink_*.dll') + $runtimeDllPatterns = @( + 'cudart64_*.dll', + 'cublas64_*.dll', + 'cublasLt64_*.dll', + 'curand64_*.dll', + 'nvJitLink_*.dll' + ) foreach ($pattern in $runtimeDllPatterns) { $dll = Get-ChildItem -Path $cudaBin -Filter $pattern | Sort-Object Name -Descending | Select-Object -First 1 - if ($dll) { Copy-Item $dll.FullName .\build\bin\Release\ } + if ($dll) { Copy-Item $dll.FullName .\build\bin\Release } } 7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip .\build\bin\Release\*