Skip to content

Build Wheels (CU128) for Windows #16

Build Wheels (CU128) for Windows

Build Wheels (CU128) for Windows #16

name: Build Wheels (CU128) for Windows
on:
workflow_dispatch:
permissions:
contents: write
jobs:
build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['windows-2022']
pyver: ["3.10", "3.11", "3.12", "3.13"]
cuda: ["12.8.1"]
releasetag: ["AVX2"]
cudaarch: ["75-real;80-real;86-real;87-real;89-real;90-real;100-real;101-real;120-real"]
defaults:
run:
shell: pwsh
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
CUDAARCHVER: ${{ matrix.cudaarch }}
# https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html
# https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#gpu-feature-list
# e.g. "all" "89" "90" "100" "120"
MAX_JOBS: 8
steps:
- name: Add MSBuild to PATH
if: runner.os == 'Windows'
uses: microsoft/setup-msbuild@v2
with:
msbuild-architecture: x64
- uses: actions/checkout@v5
with:
submodules: "recursive"
# from kingbri1/flash-attention build-wheels.yml
- name: Install CUDA ${{ matrix.cuda }}
uses: N-Storm/cuda-toolkit@v0.2.28
id: cuda-toolkit
with:
cuda: "${{ matrix.cuda }}"
use-github-cache: false
# from astral-sh/setup-uv
- name: Install the latest version of uv and set the python version
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.pyver }}
activate-environment: true
enable-cache: true
- name: Install Dependencies
run: |
git config --system core.longpaths true
uv pip install --upgrade build setuptools wheel packaging
- name: Build Wheel
run: |
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
$env:CUDA_HOME = $env:CUDA_PATH
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CUDA_PATH
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=' + $env:CUDAARCHVER + ' -DCMAKE_BUILD_PARALLEL_LEVEL=' + $env:MAX_JOBS
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=on -DCUDA_SEPARABLE_COMPILATION=on $env:CMAKE_ARGS"
$env:CMAKE_ARGS = "-DENABLE_CCACHE=on -DLLAMA_CURL=off -DLLAMA_HTTPLIB=on $env:CMAKE_ARGS"
if ($env:AVXVER -eq 'AVX') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=on -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
}
if ($env:AVXVER -eq 'AVX2') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off'
}
if ($env:AVXVER -eq 'AVXVNNI') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX_VNNI=on -DGGML_FMA=on -DGGML_F16C=off'
}
# if ($env:AVXVER -eq 'AVX512') {
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
# }
# Basic options for compiling without AVX instructions
if ($env:AVXVER -eq 'Basic') {
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off'
}
python -m build --wheel
# Check if wheel was built
if (!(Test-Path '.\dist\*.whl')) {
Write-Error "No wheel built in dist/ directory"
exit 1
}
# write the build tag to the output
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
$wheel = (gi '.\dist\*.whl')[0]
$tagVer = $wheel.name.split('-')[1]
Write-Output "TAG_VERSION=$tagVer" >> $env:GITHUB_ENV
- name: Get Current Date
id: get-date
run: |
$currentDate = Get-Date -UFormat "%Y%m%d"
Write-Output "BUILD_DATE=$currentDate" >> $env:GITHUB_ENV
- name: Create Release
if: always() && env.TAG_VERSION != ''
uses: softprops/action-gh-release@v2
with:
files: dist/*
# Set tag_name to <tag>-cu<cuda_version>-<date>-win
tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-${{ env.AVXVER }}-win-${{ env.BUILD_DATE }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}