pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt‎
Lines changed: 2 additions & 3 deletions b/‎.ci/docker/requirements-ci.txt‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/export_model_cuda_artifact.sh‎
Lines changed: 150 additions & 0 deletions b/‎.ci/scripts/export_model_cuda_artifact.sh‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎.ci/scripts/setup-windows-msvc.ps1‎
Lines changed: 52 additions & 0 deletions b/‎.ci/scripts/setup-windows-msvc.ps1‎
Lines changed: 52 additions & 0 deletions
@@ -1 +1 @@
-44d8d54e38c0258357d4e92e1fefe21e845947a3
+4361747abfc55e40e929396ed986efe775d745f9
@@ -1 +1 @@
-e6f766c7d750d40603eee3f66c5915bac606b3ea
+b31bad1b8f1331bf43d47f46602cf6141db56844
@@ -6,7 +6,7 @@ sympy==1.12
 timm==0.6.13
 tomli==2.0.1
 torchsr==1.0.4
-transformers==4.47.1
+transformers==4.56.1
 zstd==1.5.5.1
 pandas>=2.2.2; python_version >= '3.10'
 pytest==7.2.0
@@ -30,7 +30,6 @@ sphinx-reredirects==0.1.4
 matplotlib>=3.9.4
 sphinx-copybutton==0.5.2
 # PyTorch Theme
--e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
-
+pytorch_sphinx_theme2==0.2.0
 # script unit test requirements
 yaspin==3.1.0
@@ -18,7 +18,7 @@ build_qnn_backend() {
   export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
 
   parallelism=$(( $(nproc) - 1 ))
-  bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release
+  bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release
 }
 
 set_up_aot() {
 
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Export model to CUDA format with optional quantization
+
+show_help() {
+  cat << EOF
+Usage: export_model_cuda_artifact.sh <hf_model> [quant_name] [output_dir]
+
+Export a HuggingFace model to CUDA format with optional quantization.
+
+Arguments:
+  hf_model     HuggingFace model ID (required)
+               Supported models:
+                 - mistralai/Voxtral-Mini-3B-2507
+                 - openai/whisper-small
+                 - google/gemma-3-4b-it
+
+  quant_name   Quantization type (optional, default: non-quantized)
+               Options:
+                 - non-quantized
+                 - quantized-int4-tile-packed
+                 - quantized-int4-weight-only
+
+  output_dir   Output directory for artifacts (optional, default: current directory)
+
+Examples:
+  export_model_cuda_artifact.sh "openai/whisper-small"
+  export_model_cuda_artifact.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
+  export_model_cuda_artifact.sh "google/gemma-3-4b-it" "non-quantized" "./output"
+EOF
+}
+
+if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
+  show_help
+  exit 0
+fi
+
+if [ -z "${1:-}" ]; then
+  echo "Error: hf_model argument is required"
+  echo "Run with -h or --help for usage information"
+  exit 1
+fi
+
+set -eux
+
+HF_MODEL="$1"
+QUANT_NAME="${2:-non-quantized}"
+OUTPUT_DIR="${3:-.}"
+
+# Determine model configuration based on HF model ID
+case "$HF_MODEL" in
+  mistralai/Voxtral-Mini-3B-2507)
+    MODEL_NAME="voxtral"
+    TASK="multimodal-text-to-text"
+    MAX_SEQ_LEN="1024"
+    EXTRA_PIP="mistral-common librosa"
+    PREPROCESSOR_FEATURE_SIZE="128"
+    PREPROCESSOR_OUTPUT="voxtral_preprocessor.pte"
+    ;;
+  openai/whisper-small)
+    MODEL_NAME="whisper"
+    TASK="automatic-speech-recognition"
+    MAX_SEQ_LEN=""
+    EXTRA_PIP="librosa"
+    PREPROCESSOR_FEATURE_SIZE="80"
+    PREPROCESSOR_OUTPUT="whisper_preprocessor.pte"
+    ;;
+  google/gemma-3-4b-it)
+    MODEL_NAME="gemma3"
+    TASK="multimodal-text-to-text"
+    MAX_SEQ_LEN="64"
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
+  *)
+    echo "Error: Unsupported model '$HF_MODEL'"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
+    exit 1
+    ;;
+esac
+
+# Determine quantization args based on quant name
+case "$QUANT_NAME" in
+  non-quantized)
+    EXTRA_ARGS=""
+    ;;
+  quantized-int4-tile-packed)
+    EXTRA_ARGS="--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
+    ;;
+  quantized-int4-weight-only)
+    EXTRA_ARGS="--qlinear_encoder 4w"
+    ;;
+  *)
+    echo "Error: Unsupported quantization '$QUANT_NAME'"
+    echo "Supported quantizations: non-quantized, quantized-int4-tile-packed, quantized-int4-weight-only"
+    exit 1
+    ;;
+esac
+
+echo "::group::Export $MODEL_NAME"
+
+if [ -n "$EXTRA_PIP" ]; then
+  pip install $EXTRA_PIP
+fi
+pip list
+
+MAX_SEQ_LEN_ARG=""
+if [ -n "$MAX_SEQ_LEN" ]; then
+  MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"
+fi
+optimum-cli export executorch \
+    --model "$HF_MODEL" \
+    --task "$TASK" \
+    --recipe "cuda" \
+    --dtype bfloat16 \
+    --device cuda \
+    ${MAX_SEQ_LEN_ARG} \
+    ${EXTRA_ARGS} \
+    --output_dir ./
+
+if [ -n "$PREPROCESSOR_OUTPUT" ]; then
+  python -m executorch.extension.audio.mel_spectrogram \
+      --feature_size $PREPROCESSOR_FEATURE_SIZE \
+      --stack_output \
+      --max_audio_len 300 \
+      --output_file $PREPROCESSOR_OUTPUT
+fi
+
+test -f model.pte
+test -f aoti_cuda_blob.ptd
+if [ -n "$PREPROCESSOR_OUTPUT" ]; then
+  test -f $PREPROCESSOR_OUTPUT
+fi
+echo "::endgroup::"
+
+echo "::group::Store $MODEL_NAME Artifacts"
+mkdir -p "${OUTPUT_DIR}"
+cp model.pte "${OUTPUT_DIR}/"
+cp aoti_cuda_blob.ptd "${OUTPUT_DIR}/"
+if [ -n "$PREPROCESSOR_OUTPUT" ]; then
+  cp $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
+fi
+ls -al "${OUTPUT_DIR}"
+echo "::endgroup::"
@@ -0,0 +1,52 @@
+conda create --yes --quiet -n et python=3.12
+conda activate et
+
+# Install cmake
+conda install -y cmake
+
+# Activate the VS environment - this is required for MSVC to work
+# There are a bunch of environment variables that it requires.
+# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
+& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
+
+# Install CI requirements
+pip install -r .ci/docker/requirements-ci.txt
+
+# Create build directory
+$buildDir = "cmake-out-msvc"
+if (Test-Path -Path $buildDir) {
+    Remove-Item -Path $buildDir -Recurse -Force
+}
+New-Item -Path $buildDir -ItemType Directory
+
+# Configure CMake with MSVC (not ClangCL) and disable custom/quantized ops
+cmake -S . -B $buildDir `
+    -DCMAKE_BUILD_TYPE=Release `
+    -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON `
+    -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF `
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF `
+    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF `
+    -DEXECUTORCH_BUILD_XNNPACK=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+# Build with MSVC
+cmake --build $buildDir --config Release -j16
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Build failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+Write-Host "MSVC build completed successfully!"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-44d8d54e38c0258357d4e92e1fefe21e845947a3`
	`1`	`+4361747abfc55e40e929396ed986efe775d745f9`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-e6f766c7d750d40603eee3f66c5915bac606b3ea`
	`1`	`+b31bad1b8f1331bf43d47f46602cf6141db56844`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ build_qnn_backend() {`
`18`	`18`	`export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"`
`19`	`19`
`20`	`20`	`parallelism=$(( $(nproc) - 1 ))`
`21`		`- bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release`
	`21`	`+ bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release`
`22`	`22`	`}`
`23`	`23`
`24`	`24`	`set_up_aot() {`