Skip to content

Commit 9c40dbc

Browse files
bigPYJ1151devpatelio
authored andcommitted
[CPU] Refactor CPU attention backend (vllm-project#27954)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
1 parent f757cea commit 9c40dbc

34 files changed

+4352
-1900
lines changed

.buildkite/release-pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ steps:
132132
queue: cpu_queue_postmerge
133133
commands:
134134
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
135-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
135+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --build-arg VLLM_CPU_AMXBF16=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
136136
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
137137
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
138138
env:

.buildkite/scripts/hardware_ci/run-cpu-test.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ function cpu_tests() {
4949
# Run kernel tests
5050
docker exec cpu-test-"$NUMA_NODE" bash -c "
5151
set -e
52+
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
5253
pytest -x -v -s tests/kernels/test_onednn.py"
5354

5455
# Run basic model test
@@ -116,4 +117,4 @@ function cpu_tests() {
116117

117118
# All of CPU tests are expected to be finished less than 40 mins.
118119
export -f cpu_tests
119-
timeout 2h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"
120+
timeout 2.5h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"

cmake/cpu_extension.cmake

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ endif()
1515
#
1616
set(ENABLE_AVX512BF16 $ENV{VLLM_CPU_AVX512BF16})
1717
set(ENABLE_AVX512VNNI $ENV{VLLM_CPU_AVX512VNNI})
18+
set(ENABLE_AMXBF16 $ENV{VLLM_CPU_AMXBF16})
1819

1920
include_directories("${CMAKE_SOURCE_DIR}/csrc")
2021

@@ -140,6 +141,22 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
140141
set(ENABLE_AVX512VNNI OFF)
141142
message(WARNING "Disable AVX512-VNNI ISA support, no avx512_vnni found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512VNNI=1.")
142143
endif()
144+
145+
find_isa(${CPUINFO} "amx_bf16" AMXBF16_FOUND)
146+
if (AMXBF16_FOUND OR ENABLE_AMXBF16)
147+
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
148+
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3)
149+
list(APPEND CXX_COMPILE_FLAGS "-mamx-bf16" "-mamx-tile")
150+
set(ENABLE_AMXBF16 ON)
151+
add_compile_definitions(-DCPU_CAPABILITY_AMXBF16)
152+
else()
153+
set(ENABLE_AMXBF16 OFF)
154+
message(WARNING "Disable AMX_BF16 ISA support, requires gcc/g++ >= 12.3")
155+
endif()
156+
else()
157+
set(ENABLE_AMXBF16 OFF)
158+
message(WARNING "Disable AMX_BF16 ISA support, no amx_bf16 found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AMXBF16=1.")
159+
endif()
143160

144161
elseif (AVX2_FOUND)
145162
list(APPEND CXX_COMPILE_FLAGS "-mavx2")
@@ -275,7 +292,10 @@ if ((AVX512_FOUND AND NOT AVX512_DISABLED) OR (ASIMD_FOUND AND NOT APPLE_SILICON
275292
set(ONEDNN_VERBOSE "OFF")
276293
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
277294

295+
set(VLLM_BUILD_TYPE ${CMAKE_BUILD_TYPE})
296+
set(CMAKE_BUILD_TYPE "Release") # remove oneDNN debug symbols to reduce size
278297
FetchContent_MakeAvailable(oneDNN)
298+
set(CMAKE_BUILD_TYPE ${VLLM_BUILD_TYPE})
279299
add_library(dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp")
280300
target_include_directories(
281301
dnnl_ext
@@ -305,14 +325,14 @@ endif()
305325
#
306326
set(VLLM_EXT_SRC
307327
"csrc/cpu/activation.cpp"
308-
"csrc/cpu/attention.cpp"
309-
"csrc/cpu/cache.cpp"
310328
"csrc/cpu/utils.cpp"
311329
"csrc/cpu/layernorm.cpp"
312330
"csrc/cpu/mla_decode.cpp"
313331
"csrc/cpu/pos_encoding.cpp"
314-
"csrc/cpu/torch_bindings.cpp"
315-
"csrc/moe/dynamic_4bit_int_moe_cpu.cpp")
332+
"csrc/moe/dynamic_4bit_int_moe_cpu.cpp"
333+
"csrc/cpu/cpu_attn.cpp"
334+
"csrc/cpu/scratchpad_manager.cpp"
335+
"csrc/cpu/torch_bindings.cpp")
316336

317337
if (AVX512_FOUND AND NOT AVX512_DISABLED)
318338
set(VLLM_EXT_SRC

0 commit comments

Comments
 (0)