@@ -132,9 +132,7 @@ WORKDIR /workspace
132132COPY requirements/common.txt requirements/common.txt
133133COPY requirements/cuda.txt requirements/cuda.txt
134134RUN --mount=type=cache,target=/root/.cache/uv \
135- # TODO: remove apache-tvm-ffi once FlashInfer is fixed https://github.com/flashinfer-ai/flashinfer/issues/1962
136- uv pip install --python /opt/venv/bin/python3 --pre apache-tvm-ffi==0.1.0b15 \
137- && uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
135+ uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
138136 --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
139137
140138# cuda arch list used by torch
@@ -356,16 +354,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \
356354# Install vllm wheel first, so that torch etc will be installed.
357355RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
358356 --mount=type=cache,target=/root/.cache/uv \
359- # TODO: remove apache-tvm-ffi once FlashInfer is fixed https://github.com/flashinfer-ai/flashinfer/issues/1962
360- uv pip install --system --pre apache-tvm-ffi==0.1.0b15 \
361- && uv pip install --system dist/*.whl --verbose \
357+ uv pip install --system dist/*.whl --verbose \
362358 --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' )
363359
364360# Install FlashInfer pre-compiled kernel cache and binaries
365361# https://docs.flashinfer.ai/installation.html
366362RUN --mount=type=cache,target=/root/.cache/uv \
367- uv pip install --system flashinfer-cubin==0.4.1 \
368- && uv pip install --system flashinfer-jit-cache==0.4.1 \
363+ uv pip install --system flashinfer-cubin==0.5.2 \
364+ && uv pip install --system flashinfer-jit-cache==0.5.2 \
369365 --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.' ) \
370366 && flashinfer show-config
371367
0 commit comments