Skip to content

Commit 272e962

Browse files
adding dependencies
1 parent b88558b commit 272e962

File tree

8 files changed

+4823
-0
lines changed

8 files changed

+4823
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,4 @@ cython_debug/
160160
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
161161
.idea/
162162

163+
.pixi

configs/pymcs-benchmark/base.yaml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# start and stop GCP instances
2+
name: pymcs-benchmark-base
3+
4+
resources:
5+
cloud: gcp
6+
cpus: 2+
7+
ports:
8+
# Ports for Ray head node and worker nodes
9+
- 6383 # GCS server (Ray head node port)
10+
- 8263 # Dashboard port (optional, if --include-dashboard is true)
11+
- 50001 # Ray client server port
12+
13+
num_nodes: 1
14+
envs:
15+
LLM_DEVICE: NONE # CPU or CUDA
16+
17+
# this will be synced to the node as `~/sky_workdir`
18+
workdir: ./
19+
# The setup command. Will be run under the working directory.
20+
setup: |
21+
set -e # Exit if any command failed.
22+
23+
# install pixi and project dependencies
24+
curl -fsSL https://pixi.sh/install.sh | bash
25+
source /home/gcpuser/.bashrc
26+
pixi install --manifest-path pyproject.toml -e server
27+
28+
29+
# FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification
30+
pixi run \
31+
--environment server \
32+
--manifest-path pyproject.toml \
33+
pip3 install "ray[default,client]==2.37.0"
34+
35+
36+
# start separate ray for pymc-server
37+
# TODO: Launch the head-only command only on the first node in multinode setup
38+
pixi run \
39+
--environment server\
40+
--manifest-path pyproject.toml \
41+
ray start \
42+
--head \
43+
--port=6383 \
44+
--ray-client-server-port=50001 \
45+
--dashboard-host=0.0.0.0 \
46+
--dashboard-port=8263 \
47+
--disable-usage-stats
48+
49+
50+

configs/remote_consumer_model.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# load this via `pymcs -b configs -m synthetic-consumers remote_consumer_model.yaml`
2+
# The command to run. Will be run under the working directory.
3+
resources:
4+
cloud: gcp
5+
cpus: 1+
6+
accelerators: L4:1
7+
run: |
8+
set -e # Exit if any command failed.
9+
echo "Available models on this instance:"
10+
# list locally available models
11+
HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
12+
--environment ray \
13+
--manifest-path pyproject.toml \
14+
huggingface-cli scan-cache
15+
16+
echo "Your instance is ready. Connect to it with pymc_server.connect(IP-ADDRESS). Find the IP-ADDRESS by running 'pymcs status', note the NODE-NAME and run 'pymcs status --ip NODE-NAME' to print your IP"
17+
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# start and stop GCP instances
2+
name: synthetic-consumers
3+
4+
resources:
5+
cloud: gcp
6+
cpus: 1+
7+
accelerators: L4:1
8+
ports:
9+
# Ports for Ray head node and worker nodes
10+
- 6383 # GCS server (Ray head node port)
11+
- 8263 # Dashboard port (optional, if --include-dashboard is true)
12+
- 50001 # Ray client server port
13+
14+
num_nodes: 1
15+
envs:
16+
LLM_DEVICE: CUDA # CPU or CUDA
17+
18+
# this will be synced to the node as `~/sky_workdir`
19+
workdir: ./
20+
# The setup command. Will be run under the working directory.
21+
setup: |
22+
set -e # Exit if any command failed.
23+
24+
# install pixi and project dependencies
25+
curl -fsSL https://pixi.sh/install.sh | bash
26+
source /home/gcpuser/.bashrc
27+
pixi install --manifest-path pyproject.toml -e ray
28+
29+
# install system requirements needed for CPU based vllm inference
30+
if [ "${LLM_DEVICE}" == "CPU" ]; then
31+
echo "THIS FEATURE IS NOT IMPLEMENTED YET. Please set envs: LLM_DEVICE to CPU" >&2 # Print error message to stderr
32+
exit 1 # Exit with status code 1
33+
34+
sudo apt-get install -y libssl-dev
35+
sudo mkdir /opt/vllm && sudo chown gcpuser /opt/vllm
36+
git clone https://github.com/vllm-project/vllm.git /opt/vllm && cd /opt/vllm && git fetch --all --tags && git checkout tags/v0.6.2
37+
38+
# Build vllm for CPU using a docker environment. This saves us from a lot of hustle for the >1 year old Google Deep Learning Base Images.
39+
echo "NOTICE!: Building NDLL - this process can take **up to an hour** if using a minimal compute instance. Switch to a stronger instance or better use a GPU instance to avoid this step alltogether. "
40+
# FIXME: this builds wheels for python 3.10, but we need them for 3.12
41+
cd /opt/vllm && DOCKER_BUILDKIT=1 docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
42+
# TODO. copy wheels from /workspace/vllm/build/ to local filesystem and install them
43+
44+
# /* REMOVE
45+
pixi run \
46+
--environment ray \
47+
--manifest-path pyproject.toml \
48+
pip3 install wheel packaging ninja setuptools>=49.4.0 numpy setuptools-scm
49+
50+
# build torch cpu
51+
pixi run \
52+
--environment ray \
53+
--manifest-path pyproject.toml \
54+
pip3 install torch --index-url https://download.pytorch.org/whl/cpu # torch CPU
55+
56+
# build vllm torch integration
57+
VLLM_TARGET_DEVICE=cpu pixi run \
58+
--environment ray \
59+
--manifest-path pyproject.toml \
60+
bash -c "cd /opt/vllm/ && python setup.py install" # vllm setup is required for CPU
61+
#
62+
# REMOVE END */
63+
fi
64+
65+
# FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification
66+
pixi run \
67+
--environment ray \
68+
--manifest-path pyproject.toml \
69+
pip3 install "ray[default,client]==2.37.0" "huggingface_hub[hf_transfer]"
70+
71+
pixi run \
72+
--environment ray \
73+
--manifest-path pyproject.toml \
74+
pip3 install --force-reinstall "torch"
75+
76+
# start separate ray for pymc-server
77+
# TODO: Launch the head-only command only on the first node in multinode setup
78+
pixi run \
79+
--environment ray \
80+
--manifest-path pyproject.toml \
81+
ray start \
82+
--head \
83+
--port=6383 \
84+
--ray-client-server-port=50001 \
85+
--dashboard-host=0.0.0.0 \
86+
--dashboard-port=8263 \
87+
--disable-usage-stats
88+
89+
# Download the model early. Downloads to ~/.cache/huggingface . All HF compatible libraries will try to find a model here.
90+
echo "Downloading your model - depending on the size of the model this may take a while"
91+
HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
92+
--environment ray \
93+
--manifest-path pyproject.toml \
94+
huggingface-cli download microsoft/Phi-3-mini-4k-instruct
95+
96+
# TODO: download the model from HF via MODLE_NAME env (might need HF_HUB_TOKEN)
97+
98+
99+
100+
# The command to run. Will be run under the working directory.
101+
run: |
102+
set -e # Exit if any command failed.
103+
echo "Available models on this instance:"
104+
# list locally available models
105+
HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
106+
--environment ray \
107+
--manifest-path pyproject.toml \
108+
huggingface-cli scan-cache
109+
110+
echo "Your instance is ready. Connect to it with pymc_server.connect(IP-ADDRESS). Find the IP-ADDRESS by running 'pymcs status', note the NODE-NAME and run 'pymcs status --ip NODE-NAME' to print your IP"
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# start and stop GCP instances
2+
name: synthetic-consumers-base
3+
4+
resources:
5+
cloud: gcp
6+
cpus: 1+
7+
accelerators: L4:1
8+
ports:
9+
# Ports for Ray head node and worker nodes
10+
- 6383 # GCS server (Ray head node port)
11+
- 8263 # Dashboard port (optional, if --include-dashboard is true)
12+
- 50001 # Ray client server port
13+
14+
num_nodes: 1
15+
envs:
16+
LLM_DEVICE: CUDA # CPU or CUDA
17+
18+
# this will be synced to the node as `~/sky_workdir`
19+
workdir: ./
20+
# The setup command. Will be run under the working directory.
21+
setup: |
22+
set -e # Exit if any command failed.
23+
24+
# install pixi and project dependencies
25+
curl -fsSL https://pixi.sh/install.sh | bash
26+
source /home/gcpuser/.bashrc
27+
pixi install --manifest-path pyproject.toml -e ray
28+
29+
# install system requirements needed for CPU based vllm inference
30+
if [ "${LLM_DEVICE}" == "CPU" ]; then
31+
echo "THIS FEATURE IS NOT IMPLEMENTED YET. Please set envs: LLM_DEVICE to CPU" >&2 # Print error message to stderr
32+
exit 1 # Exit with status code 1
33+
34+
sudo apt-get install -y libssl-dev
35+
sudo mkdir /opt/vllm && sudo chown gcpuser /opt/vllm
36+
git clone https://github.com/vllm-project/vllm.git /opt/vllm && cd /opt/vllm && git fetch --all --tags && git checkout tags/v0.6.2
37+
38+
# Build vllm for CPU using a docker environment. This saves us from a lot of hustle for the >1 year old Google Deep Learning Base Images.
39+
echo "NOTICE!: Building NDLL - this process can take **up to an hour** if using a minimal compute instance. Switch to a stronger instance or better use a GPU instance to avoid this step alltogether. "
40+
# FIXME: this builds wheels for python 3.10, but we need them for 3.12
41+
cd /opt/vllm && DOCKER_BUILDKIT=1 docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
42+
# TODO. copy wheels from /workspace/vllm/build/ to local filesystem and install them
43+
44+
# /* REMOVE
45+
pixi run \
46+
--environment ray \
47+
--manifest-path pyproject.toml \
48+
pip3 install wheel packaging ninja setuptools>=49.4.0 numpy setuptools-scm
49+
50+
# build torch cpu
51+
pixi run \
52+
--environment ray \
53+
--manifest-path pyproject.toml \
54+
pip3 install torch --index-url https://download.pytorch.org/whl/cpu # torch CPU
55+
56+
# build vllm torch integration
57+
VLLM_TARGET_DEVICE=cpu pixi run \
58+
--environment ray \
59+
--manifest-path pyproject.toml \
60+
bash -c "cd /opt/vllm/ && python setup.py install" # vllm setup is required for CPU
61+
#
62+
# REMOVE END */
63+
fi
64+
65+
# FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification
66+
pixi run \
67+
--environment ray \
68+
--manifest-path pyproject.toml \
69+
pip3 install "ray[default,client]==2.37.0" "huggingface_hub[hf_transfer]"
70+
71+
pixi run \
72+
--environment ray \
73+
--manifest-path pyproject.toml \
74+
pip3 install --force-reinstall "torch"
75+
76+
# start separate ray for pymc-server
77+
# TODO: Launch the head-only command only on the first node in multinode setup
78+
pixi run \
79+
--environment ray \
80+
--manifest-path pyproject.toml \
81+
ray start \
82+
--head \
83+
--port=6383 \
84+
--ray-client-server-port=50001 \
85+
--dashboard-host=0.0.0.0 \
86+
--dashboard-port=8263 \
87+
--disable-usage-stats
88+
89+
# Download the model early. Downloads to ~/.cache/huggingface . All HF compatible libraries will try to find a model here.
90+
echo "Downloading your model - depending on the size of the model this may take a while"
91+
HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
92+
--environment ray \
93+
--manifest-path pyproject.toml \
94+
huggingface-cli download microsoft/Phi-3-mini-4k-instruct
95+
96+
# TODO: download the model from HF via MODLE_NAME env (might need HF_HUB_TOKEN)
97+
98+

0 commit comments

Comments
 (0)