Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 4f34df1

Browse files
authored
Merge pull request #38 from janhq/chore/rebase-cortex-to-rel-branch
Add cortex.tensorrt-llm
2 parents a9356d4 + 87f75c9 commit 4f34df1

File tree

17 files changed

+11076
-0
lines changed

17 files changed

+11076
-0
lines changed

cpp/tensorrt_llm/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,7 @@ if(BUILD_PYBIND)
263263
endif()
264264

265265
add_subdirectory(plugins)
266+
267+
if(BUILD_CORTEX_TENSORRT-LLM)
268+
add_subdirectory(cortex.tensorrt-llm)
269+
endif()
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
2+
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
# use this file except in compliance with the License. You may obtain a copy of
6+
# the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
# License for the specific language governing permissions and limitations under
14+
# the License.
15+
# C++17
16+
# engine init
17+
include(CheckIncludeFileCXX)
18+
19+
check_include_file_cxx(any HAS_ANY)
20+
check_include_file_cxx(string_view HAS_STRING_VIEW)
21+
check_include_file_cxx(coroutine HAS_COROUTINE)
22+
if(HAS_ANY
23+
AND HAS_STRING_VIEW
24+
AND HAS_COROUTINE)
25+
set(CMAKE_CXX_STANDARD 20)
26+
elseif(HAS_ANY AND HAS_STRING_VIEW)
27+
set(CMAKE_CXX_STANDARD 17)
28+
else()
29+
set(CMAKE_CXX_STANDARD 14)
30+
endif()
31+
32+
33+
set(CMAKE_CXX_STANDARD 17)
34+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
35+
set(CMAKE_CXX_EXTENSIONS OFF)
36+
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install)
37+
38+
message(STATUS "Current Source Directory CORTEX: ${CMAKE_CURRENT_SOURCE_DIR}")
39+
message(STATUS "Current Cmake Prefix Path of CORTEX: ${CMAKE_PREFIX_PATH}")
40+
41+
42+
set(OPENSSL_USE_STATIC_LIBS TRUE)
43+
44+
45+
# Enable pkg-config support in CMake
46+
find_package(PkgConfig REQUIRED)
47+
find_library(TRANTOR
48+
NAMES trantor
49+
HINTS "${CMAKE_PREFIX_PATH}/lib"
50+
)
51+
find_library(JSONCPP
52+
NAMES jsoncpp
53+
HINTS "${CMAKE_PREFIX_PATH}/lib"
54+
)
55+
56+
# Use pkg-config to find the SentencePiece library
57+
58+
if(NOT WIN32) # Linux
59+
# Use pkg-config to find the SentencePiece library
60+
pkg_search_module(SENTENCEPIECE REQUIRED sentencepiece)
61+
else() # Windows
62+
set(SENTENCEPIECE_INCLUDE_DIRS "${CMAKE_PREFIX_PATH}/include")
63+
set(SENTENCEPIECE_LIBRARY_DIRS "${CMAKE_PREFIX_PATH}/lib")
64+
endif()
65+
66+
message(STATUS "SentencePiece library dirs: ${SENTENCEPIECE_LIBRARY_DIRS}")
67+
message(STATUS "SentencePiece header dirs: ${SENTENCEPIECE_INCLUDE_DIRS}")
68+
69+
include_directories(${PROJECT_SOURCE_DIR}/include ${SENTENCEPIECE_INCLUDE_DIRS})
70+
71+
link_directories(${SENTENCEPIECE_LIBRARY_DIRS})
72+
73+
set(TOP_LEVEL_DIR "${PROJECT_SOURCE_DIR}/..")
74+
75+
add_custom_target(engine_proj)
76+
77+
set(CXXOPTS_SRC_DIR ${PROJECT_SOURCE_DIR}/../3rdparty/cxxopts)
78+
add_subdirectory(${CXXOPTS_SRC_DIR} ${CMAKE_CURRENT_BINARY_DIR}/cxxopts)
79+
80+
# main
81+
# add_executable(engine main.cc)
82+
add_library(engine SHARED src/tensorrt-llm_engine.cc)
83+
target_link_libraries(
84+
engine PUBLIC ${SHARED_TARGET} nvinfer_plugin_tensorrt_llm cxxopts::cxxopts sentencepiece PRIVATE ${JSONCPP} ${TRANTOR} ${CMAKE_THREAD_LIBS_INIT} )
85+
86+
target_compile_features(engine PRIVATE cxx_std_17)
87+
target_compile_definitions(engine PUBLIC TOP_LEVEL_DIR="${TOP_LEVEL_DIR}")
88+
89+
aux_source_directory(src SRC)
90+
91+
target_include_directories(engine PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
92+
target_sources(engine PRIVATE ${SRC})
93+
94+
95+
add_dependencies(engine_proj engine)
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
CMAKE_EXTRA_FLAGS ?= ""
2+
RUN_TESTS ?= true
3+
4+
# Default target, does nothing
5+
all:
6+
@echo "Specify a target to run"
7+
8+
install-dependencies:
9+
ifeq ($(OS),Windows_NT) # Windows
10+
cmd /C install_deps.bat
11+
else # Unix-like systems (Linux and MacOS)
12+
bash ./install_deps.sh
13+
endif
14+
15+
build-engine:
16+
ifeq ($(OS),Windows_NT)
17+
@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;"
18+
else
19+
# Go to cpp/ dir
20+
@cd ../../ && \
21+
mkdir -p build && \
22+
cd build && \
23+
cmake .. -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DBUILD_CORTEX_TENSORRT-LLM=ON -DBUILD_BATCH_MANAGER_DEFAULT=OFF -DCMAKE_CUDA_ARCHITECTURES=89-real -DTRT_LIB_DIR=/usr/local/tensorrt/lib -DTRT_INCLUDE_DIR=/usr/local/tensorrt/include -DCMAKE_BUILD_TYPE=Release && \
24+
make -j $(nproc)
25+
endif
26+
27+
build-example-server:
28+
ifeq ($(OS),Windows_NT)
29+
else
30+
@cd examples/server && \
31+
mkdir -p build && cd build && \
32+
cmake .. && cmake --build . --config Release -j12
33+
endif
34+
35+
package:
36+
ifeq ($(OS),Windows_NT)
37+
else
38+
@mkdir -p cortex.tensorrt-llm && \
39+
cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.$(shell uname | tr '[:upper:]' '[:lower:]' | sed 's/darwin/dylib/;s/linux/so/') cortex.tensorrt-llm && \
40+
cp /usr/local/cuda-12.4/targets/x86_64-linux/lib/libcublas.so.12 cortex.tensorrt-llm && \
41+
cp /usr/local/cuda-12.4/targets/x86_64-linux/lib/libcublas.so.12.4.2.65 cortex.tensorrt-llm && \
42+
cp /usr/local/cuda-12.4/targets/x86_64-linux/lib/libcublasLt.so.12 cortex.tensorrt-llm && \
43+
cp /usr/local/cuda-12.4/targets/x86_64-linux/lib/libcublasLt.so.12.4.2.65 cortex.tensorrt-llm && \
44+
cp /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/libnvinfer.so.10 cortex.tensorrt-llm && \
45+
cp /usr/local/tensorrt/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.0.1 cortex.tensorrt-llm && \
46+
cp /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs/libnvinfer_plugin_tensorrt_llm.so cortex.tensorrt-llm && \
47+
cp /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs/libnvinfer_plugin_tensorrt_llm.so.10 cortex.tensorrt-llm && \
48+
cp /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs/libtensorrt_llm.so cortex.tensorrt-llm && \
49+
cp /usr/local/lib/python3.10/dist-packages/tensorrt_llm/libs/libtensorrt_llm_nvrtc_wrapper.so cortex.tensorrt-llm && \
50+
cp /usr/lib/x86_64-linux-gnu/libnccl.so.2 cortex.tensorrt-llm && \
51+
cp /usr/lib/x86_64-linux-gnu/libnccl.so.2.20.5 cortex.tensorrt-llm && \
52+
tar -czvf cortex.tensorrt-llm.tar.gz cortex.tensorrt-llm
53+
endif
54+
55+
run-e2e-test:
56+
ifeq ($(RUN_TESTS),false)
57+
@echo "Skipping tests"
58+
else
59+
ifeq ($(OS),Windows_NT)
60+
else
61+
mkdir -p examples/server/build/engines/cortex.tensorrt-llm;
62+
cp ../../build/tensorrt_llm/cortex.tensorrt-llm/libengine.so examples/server/build/engines/cortex.tensorrt-llm;
63+
@cd ../../../ && \
64+
bash ./.github/scripts/e2e-test-server-linux-and-mac.sh "$$(pwd)"
65+
endif
66+
endif
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# How to set up dev env for nitro-tensorrt-llm (And future cortex-tensorrtllm)
2+
3+
Follow below steps:
4+
5+
1. Get a machine with NVIDIA GPU (recommend at least more than Ampere generation)
6+
7+
2. Clone this repo (or TensorRT-llm repo will do, but the upstream commit must match)
8+
9+
3. Make sure the below installations is available on your computer:
10+
- Install latest cuda-toolkit, it is available through [Download CUDA](https://developer.nvidia.com/cuda-downloads)
11+
- Install NVIDIA container toolkit [Installing with Apt](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt)
12+
- Install latest NVIDIA driver
13+
- Install git lfs (apt install git-lfs)
14+
- Recommend to use ubuntu or debian
15+
16+
3. Build the TensorRT image using the below command:
17+
```zsh
18+
cd nitro-tensorrt-llm
19+
git lfs install
20+
git lfs pull
21+
make -C docker release_build
22+
```
23+
After building the image you will have an image with tag `tensorrt_llm/release:latest`
24+
25+
4. How to start the dev environment properly
26+
Use this docker-compose.yaml template below for the image
27+
```yaml
28+
services:
29+
......
30+
deploy:
31+
resources:
32+
reservations:
33+
devices:
34+
- driver: nvidia
35+
count: 1
36+
capabilities: [gpu]
37+
```
38+
You can put values per your taste for deployment of the docker image (personally i use neovim image with base from the tensorrt_llm image) but need to have that deploy section, if you have 2 gpus or more just increase the count of gpu or change the setting, the setting is set for using the first gpu on single gpu machine.
39+
40+
After you have started the docker environment you can either use vscode to ssh into the container, or, use neovim to develop directly, your choice.
41+
42+
5. Install or build nitro-tensorrt-llm for the first time
43+
Now you are inside nitro-tensorrt-llm, just clone nitro-tensorrt-llm again
44+
```zsh
45+
apt update && apt install git-lfs
46+
git clone --recurse https://github.com/janhq/nitro-tensorrt-llm
47+
cd nitro-tensorrt-llm
48+
git lfs install
49+
git lfs pull
50+
```
51+
After that you need to install uuid-dev
52+
```zsh
53+
apt install uuid-dev
54+
```
55+
Now you need to install nitro-tensorrt-llm dependencies
56+
```zsh
57+
cd cpp/tensorrt_llm/nitro
58+
./install_deps.sh
59+
```
60+
After you have installed dependencies go back to main cpp folder and build nitro
61+
```zsh
62+
cd ../../
63+
./build_nitro.sh
64+
```
65+
66+
**notes**: inside the build_nitro.sh script you can see parameter of the gpu name, i set 89-real as for ada lovelace, you can change to whatever you like per this tutorial [Arch](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
67+
68+
6. Build the engine to test
69+
Binary already built but you need to test it if it's running properly, you need a tensorRT engine (it's a model for tensorRT in this context)
70+
71+
Go to the root dir and do `cd examples/llama`
72+
73+
Make sure you set the correct link dir
74+
```zsh
75+
export LD_LIBRARY_PATH=/usr/local/tensorrt/lib
76+
```
77+
78+
Clone a model (need to be chatML template compatible), i use hermes
79+
```zsh
80+
git clone https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
81+
```
82+
83+
Now first I recommend to quantize it to FP8 to make it smaller
84+
```zsh
85+
python ../quantization/quantize.py --model_dir ./Hermes-2-Pro-Mistral-7B \
86+
--dtype float16 \
87+
--qformat fp8 \
88+
--kv_cache_dtype fp8 \
89+
--output_dir ./tllm_checkpoint_1gpu_fp8_hermes \
90+
--calib_size 512 \
91+
--tp_size 1
92+
```
93+
94+
After you have already quantized, you can build the engine
95+
```zsh
96+
trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_fp8_hermes \
97+
--output_dir ./tllm_checkpoint_1gpu_fp8_hermes_engine \
98+
--gemm_plugin float16 \
99+
--strongly_typed \
100+
--workers 1
101+
```
102+
103+
Now ./tllm_checkpoint_1gpu_fp8_hermes_engine is already the path for the "engine" that you can load with your freshly built nitro binary
104+
105+
Go to main README page to follow the process of testing with the engine.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
3+
#include <functional>
4+
#include <memory>
5+
6+
#include "json/value.h"
7+
8+
class CortexTensorrtLlmEngineI {
9+
public:
10+
virtual ~CortexTensorrtLlmEngineI() {}
11+
12+
virtual void HandleChatCompletion(
13+
std::shared_ptr<Json::Value> jsonBody,
14+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
15+
virtual void LoadModel(
16+
std::shared_ptr<Json::Value> jsonBody,
17+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
18+
virtual void Destroy(
19+
std::shared_ptr<Json::Value> jsonBody,
20+
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
21+
};
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
2+
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
# use this file except in compliance with the License. You may obtain a copy of
6+
# the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
# License for the specific language governing permissions and limitations under
14+
# the License.
15+
# C++17
16+
# cortex.tensorrt-llm init
17+
cmake_minimum_required(VERSION 3.5)
18+
project(server)
19+
find_package(Threads REQUIRED)
20+
21+
if(UNIX AND NOT APPLE)
22+
set(LINKER_FLAGS -ldl)
23+
endif()
24+
25+
include(CheckIncludeFileCXX)
26+
# CPP version
27+
check_include_file_cxx(any HAS_ANY)
28+
check_include_file_cxx(string_view HAS_STRING_VIEW)
29+
check_include_file_cxx(coroutine HAS_COROUTINE)
30+
if(HAS_ANY
31+
AND HAS_STRING_VIEW
32+
AND HAS_COROUTINE)
33+
set(CMAKE_CXX_STANDARD 20)
34+
elseif(HAS_ANY AND HAS_STRING_VIEW)
35+
set(CMAKE_CXX_STANDARD 17)
36+
else()
37+
set(CMAKE_CXX_STANDARD 14)
38+
endif()
39+
40+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
41+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
42+
set(CMAKE_CXX_EXTENSIONS OFF)
43+
44+
add_executable(${PROJECT_NAME}
45+
server.cc
46+
dylib.h
47+
httplib.h
48+
)
49+
50+
set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../build_deps/_install)
51+
set(CORTEX_COMMON_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../base/)
52+
53+
find_library(JSONCPP
54+
NAMES jsoncpp
55+
HINTS "${THIRD_PARTY_PATH}/lib"
56+
)
57+
58+
find_library(TRANTOR
59+
NAMES trantor
60+
HINTS "${THIRD_PARTY_PATH}/lib"
61+
)
62+
63+
target_link_libraries(${PROJECT_NAME} PRIVATE ${JSONCPP} ${TRANTOR} ${LINKER_FLAGS}
64+
${CMAKE_THREAD_LIBS_INIT})
65+
66+
target_include_directories(${PROJECT_NAME} PRIVATE
67+
${CORTEX_COMMON_PATH}
68+
${THIRD_PARTY_PATH}/include)

0 commit comments

Comments
 (0)