pymc-labs
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎configs/pymcs-benchmark/base.yaml‎
Lines changed: 50 additions & 0 deletions b/‎configs/pymcs-benchmark/base.yaml‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎configs/remote_consumer_model.yaml‎
Lines changed: 17 additions & 0 deletions b/‎configs/remote_consumer_model.yaml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎configs/single_gpu_gcp_l40_24GB.pymc-server.yaml‎
Lines changed: 110 additions & 0 deletions b/‎configs/single_gpu_gcp_l40_24GB.pymc-server.yaml‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎configs/synthetic-consumer/base.yaml‎
Lines changed: 98 additions & 0 deletions b/‎configs/synthetic-consumer/base.yaml‎
Lines changed: 98 additions & 0 deletions
@@ -160,3 +160,4 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
 
+.pixi
@@ -0,0 +1,50 @@
+# start and stop GCP instances
+name: pymcs-benchmark-base
+
+resources:
+  cloud: gcp
+  cpus: 2+
+  ports:
+    # Ports for Ray head node and worker nodes
+    - 6383  # GCS server (Ray head node port)
+    - 8263  # Dashboard port (optional, if --include-dashboard is true)
+    - 50001  # Ray client server port
+
+num_nodes: 1
+envs:
+  LLM_DEVICE: NONE # CPU or CUDA
+
+# this will be synced to the node as `~/sky_workdir`
+workdir: ./
+# The setup command.  Will be run under the working directory.
+setup: |
+  set -e  # Exit if any command failed.
+
+  # install pixi and project dependencies
+  curl -fsSL https://pixi.sh/install.sh | bash
+  source /home/gcpuser/.bashrc
+  pixi install --manifest-path pyproject.toml -e server
+
+
+  # FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification
+  pixi run \
+        --environment server \
+        --manifest-path pyproject.toml \
+        pip3 install "ray[default,client]==2.37.0"
+
+   
+  # start separate ray for pymc-server
+  # TODO: Launch the head-only command only on the first node in multinode setup
+  pixi run \
+        --environment server\
+        --manifest-path pyproject.toml \
+        ray start \
+            --head \
+            --port=6383 \
+            --ray-client-server-port=50001 \
+            --dashboard-host=0.0.0.0 \
+            --dashboard-port=8263 \
+            --disable-usage-stats
+            
+
+
@@ -0,0 +1,17 @@
+# load this via `pymcs -b configs -m synthetic-consumers remote_consumer_model.yaml`
+# The command to run.  Will be run under the working directory.
+resources:
+  cloud: gcp
+  cpus: 1+
+  accelerators: L4:1
+run: |
+  set -e  # Exit if any command failed.
+  echo "Available models on this instance:"
+  # list locally available models
+  HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        huggingface-cli scan-cache
+
+  echo "Your instance is ready. Connect to it with pymc_server.connect(IP-ADDRESS). Find the IP-ADDRESS by running 'pymcs status', note the NODE-NAME and run 'pymcs status --ip NODE-NAME' to print your IP"
+
@@ -0,0 +1,110 @@
+# start and stop GCP instances
+name: synthetic-consumers
+
+resources:
+  cloud: gcp
+  cpus: 1+
+  accelerators: L4:1
+  ports:
+    # Ports for Ray head node and worker nodes
+    - 6383  # GCS server (Ray head node port)
+    - 8263  # Dashboard port (optional, if --include-dashboard is true)
+    - 50001  # Ray client server port
+
+num_nodes: 1
+envs:
+  LLM_DEVICE: CUDA # CPU or CUDA
+
+# this will be synced to the node as `~/sky_workdir`
+workdir: ./
+# The setup command.  Will be run under the working directory.
+setup: |
+  set -e  # Exit if any command failed.
+
+  # install pixi and project dependencies
+  curl -fsSL https://pixi.sh/install.sh | bash
+  source /home/gcpuser/.bashrc
+  pixi install --manifest-path pyproject.toml -e ray
+
+  # install system requirements needed for CPU based vllm inference
+  if [ "${LLM_DEVICE}" == "CPU" ]; then
+    echo "THIS FEATURE IS NOT IMPLEMENTED YET. Please set envs: LLM_DEVICE to CPU" >&2  # Print error message to stderr
+    exit 1  # Exit with status code 1 
+
+    sudo apt-get install -y libssl-dev
+    sudo mkdir /opt/vllm && sudo chown gcpuser /opt/vllm
+    git clone https://github.com/vllm-project/vllm.git /opt/vllm && cd /opt/vllm && git fetch --all --tags && git checkout tags/v0.6.2
+
+    # Build vllm for CPU using a docker environment. This saves us from a lot of hustle for the >1 year old Google Deep Learning Base Images.
+    echo "NOTICE!: Building NDLL - this process can take **up to an hour** if using a minimal compute instance. Switch to a stronger instance or better use a GPU instance to avoid this step alltogether. "
+    # FIXME: this builds wheels for python 3.10, but we need them for 3.12
+    cd /opt/vllm && DOCKER_BUILDKIT=1  docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
+    # TODO. copy wheels from /workspace/vllm/build/ to local filesystem and install them
+     
+    # /* REMOVE
+    pixi run \
+          --environment ray \
+          --manifest-path pyproject.toml \
+          pip3 install wheel packaging ninja setuptools>=49.4.0 numpy setuptools-scm
+          
+    # build torch cpu
+    pixi run \
+          --environment ray \
+          --manifest-path pyproject.toml \
+          pip3 install torch --index-url https://download.pytorch.org/whl/cpu # torch CPU 
+
+    # build vllm torch integration
+    VLLM_TARGET_DEVICE=cpu pixi run \
+          --environment ray \
+          --manifest-path pyproject.toml \
+         bash -c "cd /opt/vllm/ && python setup.py install" # vllm setup is required for CPU
+   #
+   # REMOVE END */
+  fi
+
+  # FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification
+  pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        pip3 install "ray[default,client]==2.37.0" "huggingface_hub[hf_transfer]"
+
+  pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        pip3 install --force-reinstall "torch"
+   
+  # start separate ray for pymc-server
+  # TODO: Launch the head-only command only on the first node in multinode setup
+  pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        ray start \
+            --head \
+            --port=6383 \
+            --ray-client-server-port=50001 \
+            --dashboard-host=0.0.0.0 \
+            --dashboard-port=8263 \
+            --disable-usage-stats
+            
+  # Download the model early. Downloads to ~/.cache/huggingface . All HF compatible libraries will try to find a model here.
+  echo "Downloading your model - depending on the size of the model this may take a while"
+  HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        huggingface-cli download microsoft/Phi-3-mini-4k-instruct
+
+  # TODO: download the model from HF via MODLE_NAME env (might need HF_HUB_TOKEN)
+
+
+
+# The command to run.  Will be run under the working directory.
+run: |
+  set -e  # Exit if any command failed.
+  echo "Available models on this instance:"
+  # list locally available models
+  HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        huggingface-cli scan-cache
+
+  echo "Your instance is ready. Connect to it with pymc_server.connect(IP-ADDRESS). Find the IP-ADDRESS by running 'pymcs status', note the NODE-NAME and run 'pymcs status --ip NODE-NAME' to print your IP"
@@ -0,0 +1,98 @@
+# start and stop GCP instances
+name: synthetic-consumers-base
+
+resources:
+  cloud: gcp
+  cpus: 1+
+  accelerators: L4:1
+  ports:
+    # Ports for Ray head node and worker nodes
+    - 6383  # GCS server (Ray head node port)
+    - 8263  # Dashboard port (optional, if --include-dashboard is true)
+    - 50001  # Ray client server port
+
+num_nodes: 1
+envs:
+  LLM_DEVICE: CUDA # CPU or CUDA
+
+# this will be synced to the node as `~/sky_workdir`
+workdir: ./
+# The setup command.  Will be run under the working directory.
+setup: |
+  set -e  # Exit if any command failed.
+
+  # install pixi and project dependencies
+  curl -fsSL https://pixi.sh/install.sh | bash
+  source /home/gcpuser/.bashrc
+  pixi install --manifest-path pyproject.toml -e ray
+
+  # install system requirements needed for CPU based vllm inference
+  if [ "${LLM_DEVICE}" == "CPU" ]; then
+    echo "THIS FEATURE IS NOT IMPLEMENTED YET. Please set envs: LLM_DEVICE to CPU" >&2  # Print error message to stderr
+    exit 1  # Exit with status code 1 
+
+    sudo apt-get install -y libssl-dev
+    sudo mkdir /opt/vllm && sudo chown gcpuser /opt/vllm
+    git clone https://github.com/vllm-project/vllm.git /opt/vllm && cd /opt/vllm && git fetch --all --tags && git checkout tags/v0.6.2
+
+    # Build vllm for CPU using a docker environment. This saves us from a lot of hustle for the >1 year old Google Deep Learning Base Images.
+    echo "NOTICE!: Building NDLL - this process can take **up to an hour** if using a minimal compute instance. Switch to a stronger instance or better use a GPU instance to avoid this step alltogether. "
+    # FIXME: this builds wheels for python 3.10, but we need them for 3.12
+    cd /opt/vllm && DOCKER_BUILDKIT=1  docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
+    # TODO. copy wheels from /workspace/vllm/build/ to local filesystem and install them
+     
+    # /* REMOVE
+    pixi run \
+          --environment ray \
+          --manifest-path pyproject.toml \
+          pip3 install wheel packaging ninja setuptools>=49.4.0 numpy setuptools-scm
+          
+    # build torch cpu
+    pixi run \
+          --environment ray \
+          --manifest-path pyproject.toml \
+          pip3 install torch --index-url https://download.pytorch.org/whl/cpu # torch CPU 
+
+    # build vllm torch integration
+    VLLM_TARGET_DEVICE=cpu pixi run \
+          --environment ray \
+          --manifest-path pyproject.toml \
+         bash -c "cd /opt/vllm/ && python setup.py install" # vllm setup is required for CPU
+   #
+   # REMOVE END */
+  fi
+
+  # FIXME: check why ray client is not installed from pixi, setup is correct according to https://pixi.sh/latest/reference/project_configuration/#version-specification
+  pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        pip3 install "ray[default,client]==2.37.0" "huggingface_hub[hf_transfer]"
+
+  pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        pip3 install --force-reinstall "torch"
+   
+  # start separate ray for pymc-server
+  # TODO: Launch the head-only command only on the first node in multinode setup
+  pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        ray start \
+            --head \
+            --port=6383 \
+            --ray-client-server-port=50001 \
+            --dashboard-host=0.0.0.0 \
+            --dashboard-port=8263 \
+            --disable-usage-stats
+            
+  # Download the model early. Downloads to ~/.cache/huggingface . All HF compatible libraries will try to find a model here.
+  echo "Downloading your model - depending on the size of the model this may take a while"
+  HF_HUB_ENABLE_HF_TRANSFER=1 pixi run \
+        --environment ray \
+        --manifest-path pyproject.toml \
+        huggingface-cli download microsoft/Phi-3-mini-4k-instruct
+
+  # TODO: download the model from HF via MODLE_NAME env (might need HF_HUB_TOKEN)
+
+
Original file line number	Diff line number	Diff line change
`@@ -160,3 +160,4 @@ cython_debug/`
`160`	`160`	`# option (not recommended) you can uncomment the following to ignore the entire idea folder.`
`161`	`161`	`.idea/`
`162`	`162`
	`163`	`+.pixi`