From 7115bdab421f3f7fefb0e349fe0daaf3ad7bcf07 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 12:26:47 -0500
Subject: [PATCH 01/23] remove training support

---
 .../workflows/test-check-transformers.yaml    |   4 +-
 examples/trl_mixin/README.md                  |  32 --
 examples/trl_mixin/ex_trl_constant.py         |  64 ---
 examples/trl_mixin/ex_trl_distillation.py     |  79 ---
 examples/trl_mixin/sft_trainer.py             |  22 -
 src/llmcompressor/args/dataset_arguments.py   |  15 +-
 src/llmcompressor/args/training_arguments.py  |  44 --
 src/llmcompressor/entrypoints/__init__.py     |   1 -
 src/llmcompressor/entrypoints/train.py        | 146 -----
 src/llmcompressor/transformers/__init__.py    |   1 +
 .../{finetune => }/data/__init__.py           |   0
 .../transformers/{finetune => }/data/base.py  |   0
 .../transformers/{finetune => }/data/c4.py    |   0
 .../{finetune => }/data/cnn_dailymail.py      |   0
 .../{finetune => }/data/custom.py             |   0
 .../{finetune => }/data/data_helpers.py       |   0
 .../{finetune => }/data/evolcodealpaca.py     |   0
 .../{finetune => }/data/flickr_30k.py         |   0
 .../transformers/{finetune => }/data/gsm8k.py |   0
 .../{finetune => }/data/open_platypus.py      |   0
 .../{finetune => }/data/peoples_speech.py     |   0
 .../{finetune => }/data/ultrachat_200k.py     |   0
 .../{finetune => }/data/wikitext.py           |   0
 .../transformers/finetune/README.md           |  85 ---
 .../transformers/finetune/__init__.py         |   4 -
 .../transformers/finetune/callbacks.py        | 121 ----
 .../transformers/finetune/session_mixin.py    | 537 ------------------
 .../transformers/finetune/trainer.py          |  18 -
 .../{finetune => data}/__init__.py            |   0
 .../{finetune => }/data/conftest.py           |   0
 .../data/test_dataset_helpers.py              |   0
 .../data/test_dataset_loading.py              |   0
 .../{finetune => }/data/test_registry.py      |   0
 .../transformers/finetune/data/__init__.py    |   0
 .../finetune/finetune_custom/config1.yaml     |   5 -
 .../finetune/finetune_custom/config2.yaml     |   5 -
 .../finetune_custom/gpu/gpu_config.yaml       |   5 -
 .../finetune/finetune_generic/config1.yaml    |   4 -
 .../finetune_oneshot_configs/config.yaml      |   8 -
 .../gpu/gpu_config.yaml                       |   7 -
 .../finetune/finetune_tokenizer/config1.yaml  |   5 -
 .../finetune/test_alternate_recipe.yaml       |  22 -
 .../test_finetune_no_recipe_custom_dataset.py | 137 -----
 .../finetune/test_finetune_recipe.yaml        |  19 -
 .../finetune/test_finetune_without_recipe.py  |  31 -
 .../finetune/test_oneshot_and_finetune.py     | 122 ----
 ...est_oneshot_and_finetune_with_tokenizer.py |  62 --
 .../finetune/test_oneshot_then_finetune.py    | 160 ------
 .../finetune/test_quantization.yaml           |  31 -
 .../transformers/finetune/test_safetensors.py |  42 --
 .../finetune/test_session_mixin.py            |  65 ---
 51 files changed, 7 insertions(+), 1896 deletions(-)
 delete mode 100644 examples/trl_mixin/README.md
 delete mode 100644 examples/trl_mixin/ex_trl_constant.py
 delete mode 100644 examples/trl_mixin/ex_trl_distillation.py
 delete mode 100644 examples/trl_mixin/sft_trainer.py
 delete mode 100644 src/llmcompressor/args/training_arguments.py
 delete mode 100644 src/llmcompressor/entrypoints/train.py
 rename src/llmcompressor/transformers/{finetune => }/data/__init__.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/base.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/c4.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/cnn_dailymail.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/custom.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/data_helpers.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/evolcodealpaca.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/flickr_30k.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/gsm8k.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/open_platypus.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/peoples_speech.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/ultrachat_200k.py (100%)
 rename src/llmcompressor/transformers/{finetune => }/data/wikitext.py (100%)
 delete mode 100644 src/llmcompressor/transformers/finetune/README.md
 delete mode 100644 src/llmcompressor/transformers/finetune/__init__.py
 delete mode 100644 src/llmcompressor/transformers/finetune/callbacks.py
 delete mode 100644 src/llmcompressor/transformers/finetune/session_mixin.py
 delete mode 100644 src/llmcompressor/transformers/finetune/trainer.py
 rename tests/llmcompressor/transformers/{finetune => data}/__init__.py (100%)
 rename tests/llmcompressor/transformers/{finetune => }/data/conftest.py (100%)
 rename tests/llmcompressor/transformers/{finetune => }/data/test_dataset_helpers.py (100%)
 rename tests/llmcompressor/transformers/{finetune => }/data/test_dataset_loading.py (100%)
 rename tests/llmcompressor/transformers/{finetune => }/data/test_registry.py (100%)
 delete mode 100644 tests/llmcompressor/transformers/finetune/data/__init__.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/finetune_tokenizer/config1.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_finetune_recipe.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_quantization.yaml
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_safetensors.py
 delete mode 100644 tests/llmcompressor/transformers/finetune/test_session_mixin.py

diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index 368a85a1da..12dc6baeb9 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -93,10 +93,10 @@ jobs:
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
           pytest -v tests/llmcompressor/transformers/compression
-      - name: Run Finetune Tests
+      - name: Run Data Tests
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
-          pytest -v tests/llmcompressor/transformers/finetune
+          pytest -v tests/llmcompressor/transformers/data
       - name: Running GPTQ Tests
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
diff --git a/examples/trl_mixin/README.md b/examples/trl_mixin/README.md
deleted file mode 100644
index fde2d3d1c8..0000000000
--- a/examples/trl_mixin/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Sparse Finetuning with TRL's SFTTrainer
-
-The `SessionManagerMixin` can be added to other Trainer classes that inherit from 
-[Hugging Face's Trainer](https://huggingface.co/docs/transformers/en/main_classes/trainer).
-
-For example, we can add LLM Compressor support to TRL's SFTTrainer like so: 
-
-Note: install `trl` using `pip install trl`
-
-```python
-from trl import SFTTrainer as TRLSFTTrainer
-
-class SFTTrainer(SessionManagerMixIn, TRLSFTTrainer):
-    ...
-```
-
-The new `SFTTrainer` class can now apply LLM Compressor recipes and modifiers during 
-supervised finetuning, will full support for all of the original TRL features. The full
-class is defined in the script `sft_trainer.py` and requires very minimal 
-additional code: just a dataset load override to support passing in tokenized datasets 
-to the Trainer. 
-
-### Examples
-
-* Script `ex_trl_constant.py`: finetunes a 50% sparse Llama-7b model,
-using TRL's dataset preprocessing. Sparsity is maintained throughout training by 
-applying a `ConstantPruningModifier` recipe to the `SFTTrainer` 
-
-* Script `ex_trl_distillation.py`: finetunes a 50% sparse Llama-7b 
-model using knowledge distillation from a dense Llama-7b model. Sparsity is maintained 
-throughout training with a `ConstantPruningModifier` and layer-wise knowledge 
-distillation is handled by the `OutputDistillationModifier`
\ No newline at end of file
diff --git a/examples/trl_mixin/ex_trl_constant.py b/examples/trl_mixin/ex_trl_constant.py
deleted file mode 100644
index b0abb75202..0000000000
--- a/examples/trl_mixin/ex_trl_constant.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# NOTE: Fine tuning can require more steps than is shown in the example
-# See the Axolotl integration blog post for best fine tuning practices
-# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
-
-from datasets import load_dataset
-from sft_trainer import SFTTrainer
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from trl import DataCollatorForCompletionOnlyLM
-
-from llmcompressor.args import ModelArguments
-
-model_path = "neuralmagic/Llama-2-7b-pruned50-retrained"
-output_dir = "./output_trl_sft_test_7b_gsm8k_sft_data"
-model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto")
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-tokenizer.pad_token = tokenizer.eos_token
-
-# recipe for maintaining model sparsity during finetuning
-recipe = """
-test_stage:
-  pruning_modifiers:
-    ConstantPruningModifier:
-      targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight',
-      're:.*o_proj.weight','re:.*gate_proj.weight', 're:.*up_proj.weight',
-      're:.*down_proj.weight']
-      start: 0
-"""
-
-# Load gsm8k using TRL dataset tools
-dataset = load_dataset("gsm8k", "main", split="train")
-
-
-def formatting_prompts_func(example):
-    output_texts = []
-    for i in range(len(example["question"])):
-        text = f"Question: {example['question'][i]}\n Answer: {example['answer'][i]}"
-        output_texts.append(text)
-    return output_texts
-
-
-response_template = "Answer:"
-collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
-
-trl_sft_config_args = dict(
-    output_dir=output_dir,
-    num_train_epochs=0.6,
-    logging_steps=50,
-    gradient_checkpointing=True,
-    max_seq_length=512,
-)
-model_args = ModelArguments(model=model)
-
-# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
-trainer = SFTTrainer(
-    model=model,
-    processing_class=tokenizer,
-    recipe=recipe,
-    train_dataset=dataset,
-    formatting_func=formatting_prompts_func,
-    data_collator=collator,
-    trl_sft_config_args=trl_sft_config_args,
-    model_args=model_args,
-)
-trainer.train()
diff --git a/examples/trl_mixin/ex_trl_distillation.py b/examples/trl_mixin/ex_trl_distillation.py
deleted file mode 100644
index 421fa96f37..0000000000
--- a/examples/trl_mixin/ex_trl_distillation.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# NOTE: Fine tuning can require more steps than is shown in the example
-# See the Axolotl integration blog post for best fine tuning practices
-# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
-
-from sft_trainer import SFTTrainer
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from llmcompressor.args import DatasetArguments, ModelArguments
-from llmcompressor.transformers import TextGenerationDataset
-
-model_path = "neuralmagic/Llama-2-7b-pruned50-retrained"
-teacher_path = "neuralmagic/Llama-2-7b-gsm8k"
-output_dir = "./output_trl_sft_test_7b_gsm8k"
-
-model = AutoModelForCausalLM.from_pretrained(
-    model_path, torch_dtype="auto", device_map="auto"
-)
-teacher = AutoModelForCausalLM.from_pretrained(
-    teacher_path, torch_dtype="auto", device_map="auto"
-)
-
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-max_seq_length = 512
-
-# Load gsm8k using SparseML dataset tools
-dataset_args = DatasetArguments(
-    dataset="gsm8k", dataset_config_name="main", max_seq_length=max_seq_length
-)
-dataset_manager = TextGenerationDataset.load_from_registry(
-    dataset_args.dataset,
-    dataset_args=dataset_args,
-    split="train",
-    processor=tokenizer,
-)
-train_dataset = dataset_manager()
-print(f"--> Training Set Length = {len(train_dataset)}")
-
-# recipe for maintaining model sparsity during finetuning
-recipe = """
-test_stage:
-  pruning_modifiers:
-    ConstantPruningModifier:
-      targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight',
-      're:.*o_proj.weight', 're:.*gate_proj.weight', 're:.*up_proj.weight',
-      're:.*down_proj.weight']
-      start: 0
-    OutputDistillationModifier:
-      targets: ['re:model.layers.\\d+$']
-      comparison: "square_head"
-      start: 0
-      orig_scale: 1.0
-      distill_scale: 1.0
-"""
-
-trl_sft_config_args = dict(
-    output_dir=output_dir,
-    num_train_epochs=0.6,
-    logging_steps=50,
-    gradient_checkpointing=True,
-    bf16=True,
-    save_safetensors=False,  # workaround for shared tensors
-    max_seq_length=max_seq_length,
-    packing=True,
-)
-model_args = ModelArguments(model=model, distill_teacher=teacher)
-
-# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
-trainer = SFTTrainer(
-    model=model,
-    teacher=teacher,
-    processing_class=tokenizer,
-    recipe=recipe,
-    train_dataset=train_dataset,
-    trl_sft_config_args=trl_sft_config_args,
-    dataset_args=dataset_args,
-    model_args=model_args,
-)
-trainer.train()
-trainer.save_model(output_dir)
diff --git a/examples/trl_mixin/sft_trainer.py b/examples/trl_mixin/sft_trainer.py
deleted file mode 100644
index 5abb05f4ef..0000000000
--- a/examples/trl_mixin/sft_trainer.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from typing import Dict, Optional
-
-from trl import SFTConfig as TRLSFTConfig
-from trl import SFTTrainer as TRLSFTTrainer
-
-from llmcompressor.transformers.finetune.session_mixin import SessionManagerMixIn
-
-__all__ = ["SFTTrainer"]
-
-
-class SFTTrainer(SessionManagerMixIn, TRLSFTTrainer):
-    def __init__(self, trl_sft_config_args: Optional[Dict] = None, *args, **kwargs):
-        if trl_sft_config_args is not None:
-            kwargs["args"] = TRLSFTConfig(**trl_sft_config_args)
-        super().__init__(*args, **kwargs)
-
-    def _prepare_dataset(self, dataset, *args, **kwargs):
-        if "input_ids" in dataset.column_names:
-            # dataset is already tokenized, skip preprocessing
-            return dataset
-
-        return super()._prepare_dataset(dataset, *args, **kwargs)
diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py
index d94837b264..2618b90197 100644
--- a/src/llmcompressor/args/dataset_arguments.py
+++ b/src/llmcompressor/args/dataset_arguments.py
@@ -16,7 +16,7 @@
 @dataclass
 class DVCDatasetArguments:
     """
-    Arguments for training using DVC
+    Arguments for calibration using DVC
     """
 
     dvc_data_repository: str | None = field(
@@ -28,7 +28,7 @@ class DVCDatasetArguments:
 @dataclass
 class CustomDatasetArguments(DVCDatasetArguments):
     """
-    Arguments for training using custom datasets
+    Arguments for calibration using custom datasets
     """
 
     dataset_path: str | None = field(
@@ -78,8 +78,8 @@ class CustomDatasetArguments(DVCDatasetArguments):
 @dataclass
 class DatasetArguments(CustomDatasetArguments):
     """
-    Arguments pertaining to what data we are going to input our model for
-    calibration, training
+    Arguments pertaining to what data we are going to use for
+    calibration
 
     Using `HfArgumentParser` we can turn this class into argparse
     arguments to be able to specify them on the command line
@@ -152,13 +152,6 @@ class DatasetArguments(CustomDatasetArguments):
             "in the batch (which can be faster on GPU but will be slower on TPU)."
         },
     )
-    max_train_samples: int | None = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number "
-            "of training examples to this value if set."
-        },
-    )
     min_tokens_per_module: float | None = field(
         default=None,
         metadata={
diff --git a/src/llmcompressor/args/training_arguments.py b/src/llmcompressor/args/training_arguments.py
deleted file mode 100644
index b5fb508e73..0000000000
--- a/src/llmcompressor/args/training_arguments.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Training argument classes for LLM compression workflows.
-
-This module defines dataclass-based argument containers for configuring
-training and one-shot calibration workflows. Extends HuggingFace's
-TrainingArguments with additional parameters specific to compression and
-stage-based execution.
-"""
-
-from dataclasses import dataclass, field
-
-from transformers import TrainingArguments as HFTrainingArgs
-
-__all__ = [
-    "TrainingArguments",
-]
-
-
-@dataclass
-class TrainingArguments(HFTrainingArgs):
-    """
-    Training arguments specific to LLM Compressor Transformers workflow using
-    HFTrainingArgs as base class
-
-    """
-
-    do_oneshot: bool | None = field(
-        default=False,
-        metadata={"help": "Whether to run one-shot calibration in stages"},
-    )
-    run_stages: bool | None = field(
-        default=False, metadata={"help": "Whether to trigger recipe stage by stage"}
-    )
-    output_dir: str = field(
-        default="./output",
-        metadata={
-            "help": "The output directory where the model safetensors, "
-            "recipe, config, and optionally checkpoints will be written."
-        },
-    )
-
-    @property
-    def place_model_on_device(self):
-        return False
diff --git a/src/llmcompressor/entrypoints/__init__.py b/src/llmcompressor/entrypoints/__init__.py
index 5f1ba6b9c7..b6c5e94ba2 100644
--- a/src/llmcompressor/entrypoints/__init__.py
+++ b/src/llmcompressor/entrypoints/__init__.py
@@ -8,6 +8,5 @@
 """
 
 from .oneshot import Oneshot, oneshot
-from .train import train
 from .model_free import model_free_ptq
 from .utils import post_process, pre_process
diff --git a/src/llmcompressor/entrypoints/train.py b/src/llmcompressor/entrypoints/train.py
deleted file mode 100644
index d5b9ed951e..0000000000
--- a/src/llmcompressor/entrypoints/train.py
+++ /dev/null
@@ -1,146 +0,0 @@
-"""
-Training entrypoint for fine-tuning models with compression support.
-
-Provides the main training entry point that supports both vanilla
-fine-tuning and compression-aware training workflows. Integrates with
-HuggingFace transformers and supports knowledge distillation, pruning,
-and quantization during the training process.
-"""
-
-import math
-import os
-
-from compressed_tensors.utils import deprecated
-from loguru import logger
-from transformers import PreTrainedModel
-
-from llmcompressor.args import parse_args
-from llmcompressor.core.session_functions import active_session
-from llmcompressor.datasets.utils import get_processed_dataset
-from llmcompressor.transformers.finetune.trainer import Trainer
-from llmcompressor.utils.dev import dispatch_for_generation
-
-from .utils import post_process, pre_process
-
-
-@deprecated(
-    message=(
-        "Training support will be removed in future releases. Please use "
-        "the llmcompressor Axolotl integration for fine-tuning "
-        "https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open"  # noqa: E501
-    )
-)
-def train(**kwargs) -> PreTrainedModel:
-    """
-    Fine-tuning entrypoint that supports vanilla fine-tuning and
-    knowledge distillation for compressed model using `oneshot`.
-
-
-    This entrypoint is responsible the entire fine-tuning lifecycle, including
-    preprocessing (model and tokenizer/processor initialization), fine-tuning,
-    and postprocessing (saving outputs). The intructions for fine-tuning compressed
-    model can be specified by using a recipe.
-
-    - **Input Keyword Arguments:**
-        `kwargs` are parsed into:
-        - `model_args`: Arguments for loading and configuring a pretrained model
-          (e.g., `AutoModelForCausalLM`).
-        - `dataset_args`: Arguments for dataset-related configurations, such as
-          calibration dataloaders.
-        - `recipe_args`: Arguments for defining and configuring recipes that specify
-          optimization actions.
-        - `training_args`: rguments for defining and configuring training parameters
-
-        Parsers are defined in `src/llmcompressor/args/`.
-
-    - **Lifecycle Overview:**
-        The fine-tuning lifecycle consists of three steps:
-        1. **Preprocessing**:
-            - Instantiates a pretrained model and tokenizer/processor.
-            - Ensures input and output embedding layers are untied if they share
-              tensors.
-            - Patches the model to include additional functionality for saving with
-              quantization configurations.
-        2. **Training**:
-            - Finetunes the model using a global `CompressionSession` and applies
-              recipe-defined modifiers (e.g., `ConstantPruningModifier`,
-                `OutputDistillationModifier`)
-        3. **Postprocessing**:
-            - Saves the model, tokenizer/processor, and configuration to the specified
-              `output_dir`.
-
-    - **Usage:**
-        ```python
-        train(model=model, recipe=recipe, dataset=dataset)
-
-        ```
-
-    """
-    model_args, dataset_args, recipe_args, training_args, output_dir = parse_args(
-        include_training_args=True, **kwargs
-    )
-
-    pre_process(model_args, dataset_args, output_dir)
-    dispatch_for_generation(model_args.model)  # train is dispatched same as generation
-
-    processed_dataset = get_processed_dataset(
-        dataset_args=dataset_args,
-        processor=model_args.processor,
-    )
-    training_dataset = processed_dataset.get("train")
-
-    # create output dir for stages
-    original_output_dir = output_dir = training_args.output_dir
-    if all([output_dir, recipe_args, getattr(recipe_args, "stage", None)]):
-        output_dir = os.path.join(original_output_dir, recipe_args.stage)
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-        # update output dir in training args
-        logger.info(
-            f"Stage detected for training. Updating output dir to: {output_dir}"
-        )
-        training_args.output_dir = output_dir
-
-    trainer = Trainer(
-        model=model_args.model,
-        teacher=model_args.distill_teacher,
-        recipe=recipe_args.recipe,
-        recipe_args=recipe_args.recipe_args,
-        args=training_args,
-        model_args=model_args,
-        dataset_args=dataset_args,
-        train_dataset=training_dataset,
-        processing_class=model_args.processor,
-        data_collator=dataset_args.data_collator,
-    )
-
-    checkpoint = None
-    if training_args.resume_from_checkpoint is not None:
-        checkpoint = training_args.resume_from_checkpoint
-
-    logger.info("*** Train ***")
-
-    session = active_session()
-    session.reset()
-    train_result = trainer.train(
-        resume_from_checkpoint=checkpoint,
-        stage=recipe_args.stage,
-    )
-
-    # return output
-    metrics = train_result.metrics
-    metrics["train_samples"] = len(training_dataset)
-    metrics["perplexity"] = math.exp(metrics["train_loss"])
-    trainer.log_metrics("train", metrics)
-    trainer.save_metrics("train", metrics)
-
-    # this includes saving the state, optimizer and scheduler
-    # TODO: support all save args, not just skip_sparsity_compression_stats
-    trainer.save_model(
-        output_dir=training_args.output_dir, skip_sparsity_compression_stats=False
-    )
-
-    post_process(recipe_args=recipe_args)
-    training_args.output_dir = original_output_dir
-
-    return model_args.model
diff --git a/src/llmcompressor/transformers/__init__.py b/src/llmcompressor/transformers/__init__.py
index fd751a6a26..2e018413ac 100644
--- a/src/llmcompressor/transformers/__init__.py
+++ b/src/llmcompressor/transformers/__init__.py
@@ -7,3 +7,4 @@
 # (import order matters for circular import avoidance)
 from .utils import *
 from .finetune import *
+from .data import TextGenerationDataset
diff --git a/src/llmcompressor/transformers/finetune/data/__init__.py b/src/llmcompressor/transformers/data/__init__.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/__init__.py
rename to src/llmcompressor/transformers/data/__init__.py
diff --git a/src/llmcompressor/transformers/finetune/data/base.py b/src/llmcompressor/transformers/data/base.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/base.py
rename to src/llmcompressor/transformers/data/base.py
diff --git a/src/llmcompressor/transformers/finetune/data/c4.py b/src/llmcompressor/transformers/data/c4.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/c4.py
rename to src/llmcompressor/transformers/data/c4.py
diff --git a/src/llmcompressor/transformers/finetune/data/cnn_dailymail.py b/src/llmcompressor/transformers/data/cnn_dailymail.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/cnn_dailymail.py
rename to src/llmcompressor/transformers/data/cnn_dailymail.py
diff --git a/src/llmcompressor/transformers/finetune/data/custom.py b/src/llmcompressor/transformers/data/custom.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/custom.py
rename to src/llmcompressor/transformers/data/custom.py
diff --git a/src/llmcompressor/transformers/finetune/data/data_helpers.py b/src/llmcompressor/transformers/data/data_helpers.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/data_helpers.py
rename to src/llmcompressor/transformers/data/data_helpers.py
diff --git a/src/llmcompressor/transformers/finetune/data/evolcodealpaca.py b/src/llmcompressor/transformers/data/evolcodealpaca.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/evolcodealpaca.py
rename to src/llmcompressor/transformers/data/evolcodealpaca.py
diff --git a/src/llmcompressor/transformers/finetune/data/flickr_30k.py b/src/llmcompressor/transformers/data/flickr_30k.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/flickr_30k.py
rename to src/llmcompressor/transformers/data/flickr_30k.py
diff --git a/src/llmcompressor/transformers/finetune/data/gsm8k.py b/src/llmcompressor/transformers/data/gsm8k.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/gsm8k.py
rename to src/llmcompressor/transformers/data/gsm8k.py
diff --git a/src/llmcompressor/transformers/finetune/data/open_platypus.py b/src/llmcompressor/transformers/data/open_platypus.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/open_platypus.py
rename to src/llmcompressor/transformers/data/open_platypus.py
diff --git a/src/llmcompressor/transformers/finetune/data/peoples_speech.py b/src/llmcompressor/transformers/data/peoples_speech.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/peoples_speech.py
rename to src/llmcompressor/transformers/data/peoples_speech.py
diff --git a/src/llmcompressor/transformers/finetune/data/ultrachat_200k.py b/src/llmcompressor/transformers/data/ultrachat_200k.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/ultrachat_200k.py
rename to src/llmcompressor/transformers/data/ultrachat_200k.py
diff --git a/src/llmcompressor/transformers/finetune/data/wikitext.py b/src/llmcompressor/transformers/data/wikitext.py
similarity index 100%
rename from src/llmcompressor/transformers/finetune/data/wikitext.py
rename to src/llmcompressor/transformers/data/wikitext.py
diff --git a/src/llmcompressor/transformers/finetune/README.md b/src/llmcompressor/transformers/finetune/README.md
deleted file mode 100644
index f677cfd0a3..0000000000
--- a/src/llmcompressor/transformers/finetune/README.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Sparse Finetuning
-
-## Launching from Python
-
-```python
-from llmcompressor import train
-
-model = "./sparsegpt_deployment"
-teacher_model = "Xenova/llama2.c-stories15M"
-dataset_name = "open_platypus"
-concatenate_data = False
-output_dir = "./output_finetune"
-recipe = "test_trainer_recipe.yaml"
-num_train_epochs=2
-overwrite_output_dir = True
-splits = {
-    "train": "train[:50%]",
-}
-
-train(
-    model=model,
-    distill_teacher=teacher_model,
-    dataset=dataset_name,
-    output_dir=output_dir,
-    recipe=recipe,
-    num_train_epochs=num_train_epochs,
-    overwrite_output_dir=overwrite_output_dir,
-    concatenate_data = concatenate_data,
-    splits = splits
-)
-```
-
-## Additional Configuration
-
-Finetuning arguments are split up into 3 groups:
-
-* ModelArguments: `src/llmcompressor/args/model_arguments.py`
-* TrainingArguments: `src/llmcompressor/args/training_arguments.py`
-* DatasetArguments: `src/llmcompressor/args/dataset_arguments.py`
-* RecipeArguments: `src/llmcompressor/args/recipe_arguments.py`
-
-
-## Running Multi-Stage Recipes
-
-A recipe can be run stage-by-stage by setting `run_stages` to `True` or calling the 
-`llmcompressor.transformers.apply/compress` pathways. Each stage in the recipe should have 
-a `run_type` attribute set to either `oneshot` or `train` when running in sequential 
-mode.
-
-See [example_alternating_recipe.yaml](../../../../examples/finetuning/example_alternating_recipe.yaml) for an example 
-of a staged recipe for Llama. 
-
-test_multi.py
-```python
-from llmcompressor.transformers import apply
-from transformers import AutoModelForCausalLM
-
-model = "../ml-experiments/nlg-text_generation/llama_pretrain-llama_7b-base/dense/training"
-
-dataset_name = "open_platypus"
-concatenate_data = False
-run_stages=True
-output_dir = "./output_finetune_multi"
-recipe = "example_alternating_recipe.yaml"
-num_train_epochs=1
-overwrite_output_dir = True
-splits = {
-    "train": "train[:95%]",
-    "calibration": "train[95%:100%]"
-}
-
-apply(
-    model_name_or_path=model,
-    dataset_name=dataset_name,
-    run_stages=run_stages,
-    output_dir=output_dir,
-    recipe=recipe,
-    num_train_epochs=num_train_epochs,
-    overwrite_output_dir=overwrite_output_dir,
-    concatenate_data = concatenate_data,
-    remove_unused_columns = False,
-    splits = splits
-)
-
-```
\ No newline at end of file
diff --git a/src/llmcompressor/transformers/finetune/__init__.py b/src/llmcompressor/transformers/finetune/__init__.py
deleted file mode 100644
index 4d76c27542..0000000000
--- a/src/llmcompressor/transformers/finetune/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# ruff: noqa
-
-from .data import TextGenerationDataset
-from .session_mixin import SessionManagerMixIn
diff --git a/src/llmcompressor/transformers/finetune/callbacks.py b/src/llmcompressor/transformers/finetune/callbacks.py
deleted file mode 100644
index daed32057e..0000000000
--- a/src/llmcompressor/transformers/finetune/callbacks.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""
-Training callbacks for compression-aware fine-tuning workflows.
-
-This module provides custom trainer callbacks that integrate compression
-session management with HuggingFace training loops. Handles precision
-control, training loop monitoring, and compression lifecycle events
-during model fine-tuning.
-"""
-
-import math
-
-from transformers import TrainerCallback, TrainerControl, TrainingArguments
-from transformers.trainer_callback import TrainerState
-
-from llmcompressor.core import active_session
-from llmcompressor.core import callbacks as session_callbacks
-
-__all__ = [
-    "DisableHalfPrecisionCallback",
-    "TrainingLoopCallbacks",
-]
-
-
-class TrainingLoopCallbacks(TrainerCallback):
-    """
-    TrainerCallback for triggering CompressionSession callbacks in the training loop.
-    Used to update the model reference(for running with FSDP) and trigger the post-
-    optim callbacks in each modifier.
-
-    :param trainer: LLM Compressor trainer that will call back into this object
-    :param args: args to be passed to base TrainerCallback
-    :param kwargs: key word arguments to be passed to base TrainerCallback
-    """
-
-    def __init__(self, trainer, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.trainer = trainer
-
-    def on_train_begin(
-        self,
-        args: TrainingArguments,
-        state: TrainerState,
-        control: TrainerControl,
-        **kwargs,
-    ):
-        """
-        Event called at the beginning of training. Update the session reference to the
-        model, as it will have changed to a wrapper if FSDP is enabled
-        """
-        super().on_train_begin(args, state, control, **kwargs)
-        session = active_session()
-        session.state.model = self.trainer.model
-
-    def on_step_end(
-        self,
-        args: TrainingArguments,
-        state: TrainerState,
-        control: TrainerControl,
-        **kwargs,
-    ):
-        """
-        Event called at the end of a training step. If using gradient accumulation,
-        one training step might take several inputs.
-
-        Triggers optimizer post_step and batch_end in the active CompressionSession
-        """
-        super().on_step_end(args, state, control, **kwargs)
-        session_callbacks.optim_post_step()
-        session_callbacks.batch_end()
-
-    def on_substep_end(
-        self,
-        args: TrainingArguments,
-        state: TrainerState,
-        control: TrainerControl,
-        **kwargs,
-    ):
-        """
-        Event called at the end of an substep during gradient accumulation.
-
-        Triggers optimizer post_step and batch_end in the active CompressionSession
-        """
-        super().on_substep_end(args, state, control, **kwargs)
-        session_callbacks.optim_post_step()
-        session_callbacks.batch_end()
-
-
-class DisableHalfPrecisionCallback(TrainerCallback):
-    """
-    TrainerCallback for disabling FP16 training before QAT training begins
-
-    :param trainer: LLM Compressor trainer that will call back into this object
-    :param args: args to be passed to base TrainerCallback
-    :param kwargs: key word arguments to be passed to base TrainerCallback
-    """
-
-    def __init__(self, trainer, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.trainer = trainer
-        self.on_begin_called = False
-        self.quant_start_epoch = math.inf
-
-    def qat_active(self) -> bool:
-        """
-        :return: True if a quantization modifier is active in the current session
-        """
-        session = active_session()
-        return session.state.model.qat_active()
-
-    def on_epoch_begin(
-        self,
-        args: TrainingArguments,
-        state: TrainerState,
-        control: TrainerControl,
-        **kwargs,
-    ):
-        """
-        Event called at the beginning of an epoch.
-        """
-        super().on_epoch_begin(args, state, control, **kwargs)
-        self.on_begin_called = True
diff --git a/src/llmcompressor/transformers/finetune/session_mixin.py b/src/llmcompressor/transformers/finetune/session_mixin.py
deleted file mode 100644
index e344705d71..0000000000
--- a/src/llmcompressor/transformers/finetune/session_mixin.py
+++ /dev/null
@@ -1,537 +0,0 @@
-import inspect
-import math
-import os
-from dataclasses import asdict
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
-
-import torch
-from loguru import logger
-from torch.nn import Module
-from torch.utils.data import IterableDataset
-from transformers.trainer_callback import TrainerState
-from transformers.trainer_utils import get_last_checkpoint
-
-from llmcompressor.core import active_session, callbacks, create_session
-from llmcompressor.metrics import LoggerManager
-from llmcompressor.modifiers.distillation.utils.pytorch.model_wrapper import (
-    KDModelWrapper,
-)
-from llmcompressor.pytorch.model_load.helpers import get_session_model, save_checkpoint
-from llmcompressor.pytorch.utils import ModuleSparsificationInfo
-from llmcompressor.transformers.finetune.callbacks import (
-    DisableHalfPrecisionCallback,
-    TrainingLoopCallbacks,
-)
-from llmcompressor.utils.fsdp.context import summon_full_params_context
-from llmcompressor.utils.pytorch import qat_active
-
-if TYPE_CHECKING:
-    from llmcompressor.args import DatasetArguments, ModelArguments
-
-__all__ = [
-    "SessionManagerMixIn",
-]
-
-TRAINER_STATE_NAME = "trainer_state.json"
-METADATA_ARGS = [
-    "per_device_train_batch_size",
-    "max_seq_length",
-    "save_safetensors",
-    "fp16",
-]
-
-
-class SessionManagerMixIn:
-    """
-    Mix-In class to extend the Hugging Face Trainer class to support LLM Compressor
-    recipes for one-shot and finetuning flows.
-
-    :param recipe: path to recipe file to apply during training
-    :param recipe_args: additional kwargs to use for evaluating recipe
-    :param dataset_args: kwargs for configuring dataset loading
-    :param teacher: optional teacher model to use for distillation
-    """
-
-    def __init__(
-        self,
-        recipe: str,
-        model_args: "ModelArguments",
-        dataset_args: Optional["DatasetArguments"] = None,
-        teacher: Optional[Union[Module, str]] = None,
-        recipe_args: Optional[Union[Dict[str, Any], str]] = None,
-        **kwargs,
-    ):
-        self.recipe = recipe
-        self.recipe_args = recipe_args
-        self.model_args = model_args
-        self.teacher = teacher
-
-        # parse training and metadata args
-        training_args = kwargs.get("args")
-
-        self.metadata = None
-        if training_args is not None:
-            # trl_sft_trainer pathway. Both training_args and dataset_args
-            # have `max_seq_length` which causes collision error. This is the
-            # only shared parameter, where training arg is `TRLSFTConfig` that
-            # inherits HuggingFace's `TrainingArguments`
-            training_args_dict = training_args.to_dict()
-            if "max_seq_length" in training_args_dict:
-                training_args_dict["training_args_max_seq_length"] = (
-                    training_args_dict.pop("max_seq_length")
-                )
-                logger.warning(
-                    "Detected `max_seq_length` in both dataset_args ",
-                    "and training_args. This is expected for TRL in distillation. ",
-                    "Updating metadata to `training_args_max_seq_length`",
-                )
-
-            self.metadata = self._extract_metadata(
-                metadata_args=METADATA_ARGS,
-                training_args_dict=training_args_dict,
-                dataset_args_dict=asdict(dataset_args) if dataset_args else {},
-            )
-
-        # setup metrics and session
-        self.logger_manager = LoggerManager(log_python=False)
-        create_session()
-
-        # call Trainer initialization
-        super().__init__(**kwargs)
-        self.accelerator.wait_for_everyone()
-
-        # setup callbacks and loss
-        self.optim_callbacks = TrainingLoopCallbacks(self)
-        self.callback_handler.add_callback(self.optim_callbacks)
-        self.callback_disable_fp16 = DisableHalfPrecisionCallback(self)
-        self.callback_handler.add_callback(self.callback_disable_fp16)
-        self.criterion = torch.nn.CrossEntropyLoss()
-
-        model_signature = inspect.signature(self.model.forward)
-        self._signature_columns = list(model_signature.parameters.keys())
-
-        if self.teacher is not None and teacher not in ("disable", "self"):
-            teacher_signature = inspect.signature(self.teacher.forward)
-            self._teacher_signature_columns = list(teacher_signature.parameters.keys())
-        else:
-            self._teacher_signature_columns = None
-
-        if self.is_fsdp_enabled:
-            self._prepare_model_for_fsdp()
-
-        if dataset_args is not None:
-            self.min_tokens_per_module = dataset_args.min_tokens_per_module
-
-    def initialize_session(
-        self,
-        epoch: float,
-        checkpoint: Optional[str] = None,
-        stage: Optional[str] = None,
-    ):
-        """
-        Initialize the CompressionSession from the specified epoch, evaluates the recipe
-        and initialized the modifiers for the training session
-
-        :param epoch: Epoch to initialize session from, usually 0 unless loading
-        from a checkpoint
-        :param checkpoint: Optional checkpoint to initialize from to continue training
-        :param stage: Optional stage of recipe to run, or None to run all stages
-        """
-        session = active_session()
-        if session.lifecycle.initialized_ or session.lifecycle.finalized:
-            return False
-
-        train_data = self.get_train_dataloader()
-
-        self.accelerator.wait_for_everyone()
-        with summon_full_params_context(self.model, offload_to_cpu=True):
-            active_session().initialize(
-                recipe=self.recipe,
-                recipe_stage=stage,
-                recipe_args=self.recipe_args,
-                model=self.model,
-                teacher_model=self.teacher,  # TODO: what about for self/disable?
-                train_data=train_data,
-                start=epoch,
-                copy_data=False,
-                attach_optim_callbacks=True,
-                fsdp_active=self.is_fsdp_enabled,
-                metadata=self.metadata,
-            )
-
-        self.accelerator.wait_for_everyone()
-        model = get_session_model()
-        self.model_wrapped = self.model = model
-
-        if self.recipe is None:
-            logger.warning(
-                "No training recipe was provided, finetuning will be run "
-                "without event callbacks to LLM Compressor. To supply a recipe "
-                "pass a yaml file or string to the `recipe` argument."
-            )
-
-        if hasattr(torch, "xpu") and torch.xpu.is_available():
-            torch.xpu.empty_cache()
-        else:
-            torch.cuda.empty_cache()
-
-    def finalize_session(self):
-        """
-        Wrap up training by finalizing all modifiers initialized in the current session
-        """
-        session = active_session()
-        if not session.lifecycle.initialized_ or session.lifecycle.finalized:
-            return False
-
-        with summon_full_params_context(self.model, offload_to_cpu=True):
-            # in order to update each layer we need to gathers all its parameters
-            active_session().finalize()
-        logger.info("Finalized LLM Compressor session")
-        model = get_session_model()
-        self.model = model
-        if hasattr(torch, "xpu") and torch.xpu.is_available():
-            torch.xpu.empty_cache()
-        else:
-            torch.cuda.empty_cache()
-
-    def create_optimizer(self):
-        """
-        Override the optimizer to apply and update the recipe while training.
-        create_optimizer must exist in the parent class and should set
-        self.optimizer to the optimizer state and optionally set self.scaler
-        if using amp.
-        """
-
-        self._check_super_defined("create_optimizer")
-        super().create_optimizer()
-
-        # n_gpu handled internally by dataloader
-        total_batch_size = (
-            self.args.per_device_train_batch_size
-            * self.args.gradient_accumulation_steps
-        )
-
-        if isinstance(self.train_dataset, IterableDataset):
-            logger.warning(
-                "Training is being run with a streamed dataset, "
-                "steps_per_epoch cannot be determined and will default to "
-                "1. LLM Compressor modifiers utilizing this statistic may not "
-                "behave as expected. "
-            )
-            self.total_steps_per_epoch = 1
-        else:
-            self.total_steps_per_epoch = math.ceil(
-                len(self.train_dataset) / total_batch_size
-            )
-
-        active_session().initialize(
-            optimizer=self.optimizer, steps_per_epoch=self.total_steps_per_epoch
-        )
-
-        return self.optimizer
-
-    def create_scheduler(
-        self, num_training_steps: int, optimizer: torch.optim.Optimizer = None
-    ):
-        """
-        Create an LR scheduler to work with the applied recipes. This is a placeholder
-        that just calls the super method, but would be expanded upon if we ever
-        implement a LearningRateModifier.
-
-        :param num_training_steps: the total number of training steps
-        :param optimizer: pre-initialized optimizer
-        """
-
-        # TODO: we don't currently have a LR scheduler in the new modifier framework
-        self._check_super_defined("create_scheduler")
-        return super().create_scheduler(
-            num_training_steps=num_training_steps, optimizer=optimizer
-        )
-
-    def training_step(
-        self,
-        model: torch.nn.Module,
-        inputs: Dict[str, Union[torch.Tensor, Any]],
-        num_items_in_batch: Optional[int] = None,
-    ) -> torch.Tensor:
-        """
-        Overrides the Trainer's training step to trigger the batch_start callback to
-        the modifiers, then calls the parent function.
-
-        :param model: the model to compute the loss for
-        :param inputs: the inputs to pass through the model for calculating the loss
-        :return: output of the model
-        """
-        self._check_super_defined("training_step")
-
-        callbacks.batch_start(batch_data=inputs, global_step=self.state.epoch)
-        model_outputs = super().training_step(
-            model=model, inputs=inputs, num_items_in_batch=num_items_in_batch
-        )
-
-        return model_outputs
-
-    def compute_loss(
-        self,
-        model: Module,
-        inputs: Dict[str, Any],
-        return_outputs: bool = False,
-        num_items_in_batch: Optional[torch.Tensor] = None,
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, Any]]:
-        """
-        Override for the compute_loss to factor trigger callbacks and filter columns
-
-        :param model: the model to compute the loss for
-        :param inputs: the inputs to pass through the model for calculating the loss
-        :param return_outputs: True to return the outputs with the loss,
-            False otherwise
-        :param num_items_in_batch: the number of items which contribute to loss
-        :return: the resulting loss if not return_outputs, otherwise a tuple
-            containing the loss and the model's outputs
-        """
-        self._check_super_defined("compute_loss")
-
-        # TODO: do we need these model signature columns?
-        inputs = {k: inputs[k] for k in inputs if k in self._signature_columns}
-        loss = super().compute_loss(
-            model=model,
-            inputs=inputs,
-            return_outputs=return_outputs,
-            num_items_in_batch=num_items_in_batch,
-        )
-
-        # take the mean across multiple GPUs
-        # this is done outside the compute_loss function in the parent, replicating it
-        # here for LLM Compressor logging and distillation
-        loss = loss.mean()
-
-        # Log step-wise loss and perplexity, for llama-recipes comparison
-        # we want this before distillation loss so perplexity isn't thrown off
-        do_log = self.state.global_step % self.args.logging_steps == 0
-        if do_log:
-            log = {}
-            log["step_loss"] = loss.item()
-            log["perplexity"] = torch.exp(loss).item()
-
-        if active_session().lifecycle.initialized_:
-            state = callbacks.loss_calculated(loss=loss)
-            if state and state.loss is not None:
-                loss = state.loss
-                if do_log:
-                    log["distill_step_loss"] = loss.item() - log["step_loss"]
-            callbacks.optim_pre_step()
-
-        if do_log:
-            self.log(log)
-
-        return loss
-
-    def train(self, *args, stage: Optional[str] = None, **kwargs):
-        """
-        Run a sparsification training cycle. Runs initialization for the sparse session
-        before calling super().train() and finalization of the session after.
-
-        Logs sparsification details for the trained model.
-
-        :param args: positional args to pass to super().train()
-        :param stage: Optional stage of recipe to run, or None to run all stages
-        :param kwargs: keyword args to pass to super().train()
-        :return: the output from super.train()
-        """
-
-        # lifecycle
-        checkpoint, epoch = self._calculate_checkpoint_info(kwargs)
-        self.initialize_session(epoch=epoch, checkpoint=checkpoint, stage=stage)
-
-        # do not save checkpoints as compressed
-        original_save_compressed = self.model_args.save_compressed
-        self.model_args.save_compressed = False
-
-        # train with accelerator
-        self.accelerator.wait_for_everyone()
-        output = super().train(*args, **kwargs)
-        self.accelerator.wait_for_everyone()
-
-        # restore original setting for saving final model
-        self.model_args.save_compressed = original_save_compressed
-
-        # lifecycle
-        self.finalize_session()
-        self.accelerator.wait_for_everyone()
-
-        # log model sparsity
-        self.maybe_log_model_sparsification()
-        self.accelerator.wait_for_everyone()
-
-        return output
-
-    # TODO: support all save args, not just skip_sparsity_compression_stats
-    def save_model(
-        self,
-        output_dir: str,
-        _internal_call: bool = False,
-        skip_sparsity_compression_stats: Optional[bool] = True,
-    ):
-        """
-        Override of the save_model function and expects it to exist in the parent.
-        Calls into super() to save the model and additionally saves any recipes
-        that were used with the model within the model folder.
-
-        :param output_dir: the path to save the recipes into
-        :param _internal_call: True if this is an internal call from
-            the trainer in super(). Called from
-            self.save_model(output_dir, _internal_call=True)
-            in transformers/trainer/Trainer::_save_checkpoint
-
-        """
-        if active_session() is None:
-            logger.warning(
-                "No active session found, skipping saving of recipes and model."
-            )
-            return
-
-        # knowledge distillation requires making wrappers transparent during
-        if isinstance(self.model, KDModelWrapper):
-            self.model.prepare_for_save()  # TODO: move to finalize
-
-        # save checkpoint
-        # note that skip_sparsity_compression_stats
-        # is True by default to avoid high runtime cost
-        self.save_state()
-        if self.accelerator.is_main_process:
-            processor = getattr(self, "processing_class", self.tokenizer)
-            # TODO: need to port over all saving parameters so that all
-            # checkpoints are saved in the same way
-            save_checkpoint(
-                output_dir,
-                model=self.model,
-                processor=processor,
-                save_safetensors=self.args.save_safetensors,
-                save_compressed=self.model_args.save_compressed,
-                skip_sparsity_compression_stats=skip_sparsity_compression_stats,
-            )
-        self.accelerator.wait_for_everyone()
-
-        if isinstance(self.model, KDModelWrapper):
-            self.model.finish_save()
-
-    def maybe_log_model_sparsification(self):
-        """
-        Log info on model sparsity and quantization if possible. Only print logs on the
-        main process, and avoid logging for quantized FSDP models
-        """
-        with summon_full_params_context(self.model, offload_to_cpu=True):
-            # offload to avoid OOM errors
-            if not self.accelerator.is_main_process:
-                # only calculate stats rank0 GPU
-                return
-            if self.is_fsdp_enabled and qat_active(self.model):
-                # due to state dict changes we can't log sparsity info with quantized
-                # models in FSDP
-                return
-
-            self.log_model_sparsification()
-
-    def log_model_sparsification(self):
-        """
-        Log the current model sparsification info including pruned and quantized states
-        """
-        sparsification_info = ModuleSparsificationInfo(self.model)
-
-        logger.info(
-            f"Sparsification info for {type(self.model).__name__}: "
-            f"{sparsification_info.params_total} total params. "
-        )
-        sparsity_percent_formatted = "{:.2f}".format(
-            sparsification_info.params_sparse_percent
-        )
-        logger.info(
-            f"There are {sparsification_info.params_total} prunable "
-            f"params which have {sparsity_percent_formatted}% "
-            "avg sparsity."
-        )
-
-        quant_percent_formatted = "{:.2f}".format(
-            sparsification_info.params_quantized_percent
-        )
-        logger.info(
-            f"There are {sparsification_info.params_total} quantizable "
-            f"params, with a quantization percentage of "
-            f"{quant_percent_formatted}%."
-        )
-
-    def _prepare_model_for_fsdp(self):
-        """
-        Sets up FSDP ahead of time so we can run one-shot in FSDP mode
-        """
-        self.model.to("cpu")
-        self.model = self.accelerator.prepare(self.model)
-        self.accelerator.wait_for_everyone()
-
-        if self.teacher is not None:
-            self.teacher.to("cpu")
-            for n, p in self.teacher.named_parameters():
-                p.requires_grad = False
-            self.teacher = self.accelerator.prepare(self.teacher)
-            self.teacher.eval()
-            self.accelerator.wait_for_everyone()
-
-    def _extract_metadata(
-        self,
-        metadata_args: List[str],
-        training_args_dict: Dict[str, Any],
-        dataset_args_dict: Dict[str, Any],
-    ) -> Dict[str, Any]:
-        metadata = {}
-        if not training_args_dict.keys().isdisjoint(dataset_args_dict.keys()):
-            raise ValueError(
-                "Found common keys in `training_args` and `data args`. "
-                "This is prohibitive and may lead to undesired behavior."
-            )
-
-        args_dict = {**training_args_dict, **dataset_args_dict}
-
-        for arg in metadata_args:
-            if arg not in args_dict.keys():
-                logger.warning(
-                    f"Required metadata argument {arg} was not found "
-                    f"in the training arguments. Setting {arg} to None."
-                )
-                metadata[arg] = None
-            else:
-                metadata[arg] = args_dict[arg]
-
-        return metadata
-
-    def _check_super_defined(self, func: str):
-        if not hasattr(super(), func):
-            raise NotImplementedError(
-                f"The super class for SessionManagerMixIn must define a {func} function"
-            )
-
-    def _calculate_checkpoint_info(self, kwargs) -> Tuple[Optional[str], float]:
-        """
-        If resuming from checkpoint is set, get checkpoint and epoch to resume from
-        """
-        checkpoint = None
-        epoch = 0.0
-
-        if not kwargs or "resume_from_checkpoint" not in kwargs:
-            logger.warning(
-                "resume_from_checkpoint not passed into LLM Compressor Trainer.train. "
-                "This will cause issues with restoring recipes when "
-                "running from a checkpoint."
-            )
-        elif kwargs["resume_from_checkpoint"]:
-            if (
-                isinstance(kwargs["resume_from_checkpoint"], bool)
-                and kwargs["resume_from_checkpoint"]
-            ):
-                checkpoint = get_last_checkpoint(self.args.output_dir)
-            else:
-                checkpoint = kwargs["resume_from_checkpoint"]
-            epoch = TrainerState.load_from_json(
-                os.path.join(checkpoint, TRAINER_STATE_NAME)
-            ).epoch
-
-        return checkpoint, epoch
diff --git a/src/llmcompressor/transformers/finetune/trainer.py b/src/llmcompressor/transformers/finetune/trainer.py
deleted file mode 100644
index 6bb3a1739b..0000000000
--- a/src/llmcompressor/transformers/finetune/trainer.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-Enhanced trainer class for fine-tuning with compression support.
-
-This module provides a Trainer class that extends HuggingFace's Trainer with
-LLM compression session management capabilities. Integrates compression
-workflows into the standard training loop for seamless model optimization
-during fine-tuning.
-"""
-
-from transformers import Trainer as HFTransformersTrainer
-
-from llmcompressor.transformers.finetune.session_mixin import SessionManagerMixIn
-
-__all__ = ["Trainer"]
-
-
-class Trainer(SessionManagerMixIn, HFTransformersTrainer):
-    pass
diff --git a/tests/llmcompressor/transformers/finetune/__init__.py b/tests/llmcompressor/transformers/data/__init__.py
similarity index 100%
rename from tests/llmcompressor/transformers/finetune/__init__.py
rename to tests/llmcompressor/transformers/data/__init__.py
diff --git a/tests/llmcompressor/transformers/finetune/data/conftest.py b/tests/llmcompressor/transformers/data/conftest.py
similarity index 100%
rename from tests/llmcompressor/transformers/finetune/data/conftest.py
rename to tests/llmcompressor/transformers/data/conftest.py
diff --git a/tests/llmcompressor/transformers/finetune/data/test_dataset_helpers.py b/tests/llmcompressor/transformers/data/test_dataset_helpers.py
similarity index 100%
rename from tests/llmcompressor/transformers/finetune/data/test_dataset_helpers.py
rename to tests/llmcompressor/transformers/data/test_dataset_helpers.py
diff --git a/tests/llmcompressor/transformers/finetune/data/test_dataset_loading.py b/tests/llmcompressor/transformers/data/test_dataset_loading.py
similarity index 100%
rename from tests/llmcompressor/transformers/finetune/data/test_dataset_loading.py
rename to tests/llmcompressor/transformers/data/test_dataset_loading.py
diff --git a/tests/llmcompressor/transformers/finetune/data/test_registry.py b/tests/llmcompressor/transformers/data/test_registry.py
similarity index 100%
rename from tests/llmcompressor/transformers/finetune/data/test_registry.py
rename to tests/llmcompressor/transformers/data/test_registry.py
diff --git a/tests/llmcompressor/transformers/finetune/data/__init__.py b/tests/llmcompressor/transformers/finetune/data/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml b/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml
deleted file mode 100644
index fd4d5f07ce..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-cadence: "commit"
-test_type: "sanity"
-model: "nm-testing/tinysmokellama-3.2"
-file_extension: json
-num_train_epochs: 1
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml b/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml
deleted file mode 100644
index 9a83729922..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-cadence: "commit"
-test_type: "sanity"
-model: "nm-testing/tinysmokellama-3.2"
-file_extension: csv
-num_train_epochs: 1
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml b/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml
deleted file mode 100644
index 1828cc6ba3..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-cadence: "nightly"
-test_type: "regression"
-model: "neuralmagic/Llama-2-7b-ultrachat200k"
-file_extension: json
-num_train_epochs: 0.5
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml b/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml
deleted file mode 100644
index b7a7c87d87..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-cadence: "nightly"
-test_type: "regression"
-model: "nm-testing/tinysmokellama-3.2"
-dataset: open_platypus
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml b/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml
deleted file mode 100644
index 48ae2741b9..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-cadence: "commit"
-test_type: "sanity"
-model: "nm-testing/tinysmokellama-3.2"
-dataset: wikitext
-dataset_config_name: "wikitext-2-raw-v1"
-recipe: "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml"
-num_train_epochs: 0.25
-concat_txt: False
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml b/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml
deleted file mode 100644
index f81362ea1c..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-cadence: "nightly"
-test_type: "regression"
-model: "neuralmagic/Llama-2-7b-ultrachat200k"
-dataset: "ultrachat-200k"
-recipe: "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml"
-num_train_epochs: 0.05
-concat_txt: False
diff --git a/tests/llmcompressor/transformers/finetune/finetune_tokenizer/config1.yaml b/tests/llmcompressor/transformers/finetune/finetune_tokenizer/config1.yaml
deleted file mode 100644
index 2b5999c3dc..0000000000
--- a/tests/llmcompressor/transformers/finetune/finetune_tokenizer/config1.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-cadence: "nightly"
-test_type: "regression"
-model: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-dataset_config_name: wikitext-2-raw-v1
-dataset: wikitext
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml b/tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml
deleted file mode 100644
index 96283cbfaf..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-test_oneshot_stage:
-  obcq_modifiers:
-    SparseGPTModifier:
-      sparsity: 0.7
-      block_size: 128
-      dampening_frac: 0.01
-      mask_structure: "0:0"
-      targets: ["Linear"]
-      ignore: ["re:.*lm_head"]
-test_train_stage:
-  pruning_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        "re:.*self_attn.q_proj",
-        "re:.*self_attn.k_proj",
-        "re:.*self_attn.v_proj",
-        "re:.*self_attn.o_proj",
-        "re:.*mlp.down_proj",
-        "re:.*mlp.gate_proj",
-        "re:.*mlp.up_proj"
-      ]
-      start: 0
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py
deleted file mode 100644
index 85a0935ff9..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import csv
-import json
-import os
-import tempfile
-from io import StringIO
-from pathlib import Path
-
-import pytest
-import torch
-from transformers import AutoModelForCausalLM
-
-from llmcompressor import train
-from tests.testing_utils import parse_params, requires_gpu
-
-CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/finetune/finetune_custom"
-GPU_CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/finetune/finetune_custom/gpu"
-
-
-def create_mock_file(extension, content, path, filename):
-    os.makedirs(path, exist_ok=True)
-
-    if extension == "json":
-        mock_data = {"text": content}
-        mock_content = json.dumps(mock_data, indent=2)
-
-    else:
-        fieldnames = ["text"]
-        mock_data = [{"text": content}]
-        csv_output = StringIO()
-        csv_writer = csv.DictWriter(csv_output, fieldnames=fieldnames)
-        csv_writer.writeheader()
-        csv_writer.writerows(mock_data)
-        mock_content = csv_output.getvalue()
-
-    mock_filename = f"{filename}.{extension}"
-    mock_filepath = os.path.join(path, mock_filename)
-
-    with open(mock_filepath, "w") as mock_file:
-        mock_file.write(mock_content)
-
-    return mock_filepath  # Return the file path
-
-
-def create_mock_custom_dataset_folder_structure(tmp_dir_data, file_extension):
-    train_path = os.path.join(tmp_dir_data, "train")
-    test_path = os.path.join(tmp_dir_data, "test")
-    validate_path = os.path.join(tmp_dir_data, "validate")
-
-    # create tmp mock data files
-    create_mock_file(
-        extension=file_extension,
-        content="text for train data 1",
-        path=train_path,
-        filename="data1",
-    )
-    create_mock_file(
-        extension=file_extension,
-        content="text for train data 2",
-        path=train_path,
-        filename="data2",
-    )
-    create_mock_file(
-        extension=file_extension,
-        content="text for test data 1",
-        path=test_path,
-        filename="data3",
-    )
-    create_mock_file(
-        extension=file_extension,
-        content="text for validate data 1",
-        path=validate_path,
-        filename="data4",
-    )
-    return True
-
-
-def _test_finetune_wout_recipe_custom_dataset(
-    model, file_extension, num_train_epochs, output
-):
-    dataset_path = Path(tempfile.mkdtemp())
-
-    created_success = create_mock_custom_dataset_folder_structure(
-        dataset_path, file_extension
-    )
-    assert created_success
-
-    def preprocessing_func(example):
-        example["text"] = "Review: " + example["text"]
-        return example
-
-    concatenate_data = False
-
-    train(
-        model=model,
-        dataset=file_extension,
-        output_dir=output,
-        recipe=None,
-        num_train_epochs=num_train_epochs,
-        concatenate_data=concatenate_data,
-        text_column="text",
-        dataset_path=dataset_path,
-        preprocessing_func=preprocessing_func,
-        precision="bfloat16",
-        bf16=True,
-    )
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
-def test_oneshot_then_finetune_small(config, tmp_path):
-    model = config["model"]
-    file_extension = config["file_extension"]
-    num_train_epochs = config["num_train_epochs"]
-
-    output = tmp_path / "oneshot_output"
-
-    _test_finetune_wout_recipe_custom_dataset(
-        model, file_extension, num_train_epochs, output
-    )
-
-
-@requires_gpu
-@pytest.mark.integration
-@pytest.mark.parametrize("config", parse_params(GPU_CONFIGS_DIRECTORY))
-def test_oneshot_then_finetune_gpu(config, tmp_path):
-    model = config["model"]
-    file_extension = config["file_extension"]
-    num_train_epochs = config["num_train_epochs"]
-    output = tmp_path / "oneshot_output"
-
-    device = "cuda:0"
-    model = AutoModelForCausalLM.from_pretrained(
-        model, device_map=device, torch_dtype=torch.bfloat16
-    )
-    _test_finetune_wout_recipe_custom_dataset(
-        model, file_extension, num_train_epochs, output
-    )
diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_recipe.yaml b/tests/llmcompressor/transformers/finetune/test_finetune_recipe.yaml
deleted file mode 100644
index a0eb314988..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_finetune_recipe.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-test_stage:
-  pruning_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        "re:.*self_attn.q_proj",
-        "re:.*self_attn.k_proj",
-        "re:.*self_attn.v_proj",
-        "re:.*self_attn.o_proj",
-        "re:.*mlp.gate_proj",
-        "re:.*mlp.up_proj"
-      ]
-      start: 0
-  distillation_modifiers:
-    OutputDistillationModifier:
-      targets: ["re:model.layers.\\d+$"]
-      comparison: "square_head"
-      start: 0
-      orig_scale: 1.0
-      distill_scale: 1.0
\ No newline at end of file
diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py b/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py
deleted file mode 100644
index e9901eb9fc..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import pytest
-
-from llmcompressor import train
-from tests.testing_utils import parse_params, requires_gpu
-
-CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/finetune/finetune_generic"
-
-
-@pytest.mark.integration
-@requires_gpu
-@pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
-def test_finetune_without_recipe(config, tmp_path):
-    model = config["model"]
-    dataset = config["dataset"]
-    output = tmp_path / "finetune_output"
-
-    recipe_str = None
-
-    concatenate_data = False
-    max_steps = 50
-    splits = "train"
-
-    train(
-        model=model,
-        dataset=dataset,
-        output_dir=output,
-        recipe=recipe_str,
-        max_steps=max_steps,
-        concatenate_data=concatenate_data,
-        splits=splits,
-    )
diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
deleted file mode 100644
index 32c8310332..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import os
-
-import pytest
-import torch
-from compressed_tensors.compressors import ModelCompressor
-from transformers import AutoConfig, AutoModelForCausalLM
-
-from llmcompressor import oneshot, train
-from llmcompressor.transformers.compression.compressed_tensors_utils import (
-    get_model_compressor,
-)
-from tests.testing_utils import parse_params, requires_gpu
-
-CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/finetune/finetune_oneshot_configs"
-GPU_CONFIGS_DIRECTORY = (
-    "tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu"
-)
-
-
-def _test_oneshot_and_finetune(
-    model, dataset, recipe, dataset_config_name, concat_txt, output, num_train_epochs
-):
-    splits = {"train": "train[:5%]", "calibration": "train[5%:10%]"}
-    if dataset == "ultrachat-200k":
-        splits = {"train": "train_gen[:5%]", "calibration": "train_gen[5%:10%]"}
-
-    oneshot_args = dict(
-        dataset=dataset,
-        splits=splits,
-        recipe=recipe,
-        num_calibration_samples=64,
-        dataset_config_name=dataset_config_name,
-        concatenate_data=concat_txt,
-        output_dir=output,
-    )
-
-    oneshot_model = oneshot(
-        model=model,
-        **oneshot_args,
-        stage="test_oneshot_stage",
-    )
-
-    compressor = get_model_compressor(model=oneshot_model, save_compressed=True)
-    if compressor is not None:
-        compressor.decompress_model(oneshot_model)
-
-    train_args = dict(
-        num_train_epochs=num_train_epochs,
-        precision="bfloat16",
-        bf16=True,
-    )
-    train(
-        model=oneshot_model,
-        **oneshot_args,
-        **train_args,
-        stage="test_train_stage",
-    )
-
-    config_sparse_applied = ModelCompressor.parse_sparsity_config(
-        AutoConfig.from_pretrained(
-            os.path.join(output, "test_oneshot_stage")
-        ).quantization_config
-    )
-    config_finetune_applied = ModelCompressor.parse_sparsity_config(
-        AutoConfig.from_pretrained(
-            os.path.join(output, "test_train_stage")
-        ).quantization_config
-    )
-    # model is first sparsified, then finetuned, both should have the same sparsity
-    assert config_sparse_applied["global_sparsity"] == pytest.approx(
-        config_finetune_applied["global_sparsity"], abs=1e-5
-    )
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
-def test_oneshot_and_finetune_small(config, tmp_path):
-    model = config["model"]
-    dataset = config["dataset"]
-    recipe = config["recipe"]
-    dataset_config_name = config.get("dataset_config_name")
-    num_train_epochs = config["num_train_epochs"]
-    concat_txt = config["concat_txt"]
-    output = tmp_path / "finetune_output"
-
-    _test_oneshot_and_finetune(
-        model,
-        dataset,
-        recipe,
-        dataset_config_name,
-        concat_txt,
-        output,
-        num_train_epochs,
-    )
-
-
-@requires_gpu
-@pytest.mark.integration
-@pytest.mark.parametrize("config", parse_params(GPU_CONFIGS_DIRECTORY))
-def test_oneshot_and_finetune_gpu(config, tmp_path):
-    model = config["model"]
-    dataset = config["dataset"]
-    recipe = config["recipe"]
-    dataset_config_name = config.get("dataset_config_name")
-    num_train_epochs = config["num_train_epochs"]
-    concat_txt = config["concat_txt"]
-    output = tmp_path / "finetune_output"
-
-    device = "cuda:0"
-    model = AutoModelForCausalLM.from_pretrained(
-        model, device_map=device, torch_dtype=torch.bfloat16
-    )
-
-    _test_oneshot_and_finetune(
-        model,
-        dataset,
-        recipe,
-        dataset_config_name,
-        concat_txt,
-        output,
-        num_train_epochs,
-    )
diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py
deleted file mode 100644
index 5aa8ca2743..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import pytest
-from datasets import load_dataset
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from llmcompressor import oneshot, train
-from tests.testing_utils import parse_params, requires_gpu
-
-CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/finetune/finetune_tokenizer"
-
-
-@pytest.mark.integration
-@requires_gpu
-@pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
-def test_oneshot_and_finetune_with_tokenizer(config, tmp_path):
-    model = config["model"]
-    dataset = config["dataset"]
-    dataset_config_name = config["dataset_config_name"]
-
-    output = tmp_path / "sparsity_finetune_output"
-    # finetune workflows in general seem to have trouble with multi-gpus
-    # use just one atm
-
-    recipe_str = "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml"
-    tokenizer = AutoTokenizer.from_pretrained(
-        model,
-    )
-    model_loaded = AutoModelForCausalLM.from_pretrained(model, torch_dtype="auto")
-
-    dataset_loaded = load_dataset(dataset, dataset_config_name, split="train[:50%]")
-
-    concatenate_data = True
-    run_stages = True
-    max_steps = 50
-    splits = {"train": "train[:50%]", "calibration": "train[50%:60%]"}
-
-    model_and_data_kwargs = dict(
-        dataset=dataset_loaded,
-        dataset_config_name=dataset_config_name,
-        recipe=recipe_str,
-        concatenate_data=concatenate_data,
-        splits=splits,
-        tokenizer=tokenizer,
-    )
-
-    oneshot_model = oneshot(
-        model=model_loaded,
-        **model_and_data_kwargs,
-        stage="test_oneshot_stage",
-    )
-
-    finetune_model = train(
-        run_stages=run_stages,
-        model=oneshot_model,
-        max_steps=max_steps,
-        stage="test_train_stage",
-        **model_and_data_kwargs,
-        output_dir=output,
-    )
-
-    input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
-    output = finetune_model.generate(input_ids, max_new_tokens=20)
-    print(tokenizer.decode(output[0]))
diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py
deleted file mode 100644
index b309c07672..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import pytest
-from transformers import AutoModelForCausalLM
-from transformers.utils.quantization_config import CompressedTensorsConfig
-
-from llmcompressor import oneshot, train
-from llmcompressor.core import create_session
-from llmcompressor.modifiers.quantization import QuantizationModifier
-
-
-@pytest.mark.unit
-def test_oneshot_sparsification_then_finetune(tmp_path):
-    output = tmp_path / "finetune_output"
-    quantization_config = CompressedTensorsConfig(run_compressed=False)
-
-    recipe_str = "tests/llmcompressor/transformers/sparsegpt/recipes/test_tiny2.yaml"
-    model = AutoModelForCausalLM.from_pretrained(
-        "nm-testing/tinysmokellama-3.2", torch_dtype="auto"
-    )
-    dataset = "open_platypus"
-    concatenate_data = False
-    num_calibration_samples = 64
-    output_dir = output / "oneshot_out"
-    splits = {"calibration": "train[:5%]"}
-
-    with create_session():
-        oneshot(
-            model=model,
-            dataset=dataset,
-            output_dir=output_dir,
-            num_calibration_samples=num_calibration_samples,
-            recipe=recipe_str,
-            concatenate_data=concatenate_data,
-            splits=splits,
-        )
-
-    recipe_str = "tests/llmcompressor/transformers/finetune/test_finetune_recipe.yaml"
-
-    # Explictly decompress the model for training using quantization_config
-    model = AutoModelForCausalLM.from_pretrained(
-        output / "oneshot_out",
-        torch_dtype="auto",
-        quantization_config=quantization_config,
-    )
-    distill_teacher = AutoModelForCausalLM.from_pretrained(
-        "nm-testing/tinysmokellama-3.2", torch_dtype="auto"
-    )
-    dataset = "open_platypus"
-    concatenate_data = False
-    output_dir = output / "finetune_out"
-    splits = "train[5%:7%]"
-
-    recipe = """
-    test_stage:
-        pruning_modifiers:
-            ConstantPruningModifier:
-                targets: ['re:.*q_proj.weight', 're:.*k_proj.weight',
-                're:.*v_proj.weight', 're:.*o_proj.weight',
-                're:.*gate_proj.weight', 're:.*up_proj.weight',
-                're:.*down_proj.weight']
-                start: 0
-    """
-
-    with create_session():
-        train(
-            model=model,
-            distill_teacher=distill_teacher,
-            dataset=dataset,
-            output_dir=output_dir,
-            num_train_epochs=0.05,
-            concatenate_data=concatenate_data,
-            splits=splits,
-            recipe=recipe,
-        )
-
-    # test reloading checkpoint and final model
-    # verify checkpoint reloading and can carry out finetune
-    # with the saved model
-    # Explictly decompress the model for training using quantization_config
-    model = AutoModelForCausalLM.from_pretrained(
-        output_dir,
-        torch_dtype="auto",
-        quantization_config=quantization_config,
-    )
-
-    with create_session():
-        train(
-            model=model,
-            distill_teacher=distill_teacher,
-            dataset=dataset,
-            output_dir=output_dir,
-            num_train_epochs=0.05,
-            concatenate_data=concatenate_data,
-            splits=splits,
-            recipe=recipe,
-        )
-
-
-def test_oneshot_quantization_then_finetune(tmp_path):
-    recipe = QuantizationModifier(
-        targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"]
-    )
-
-    model = AutoModelForCausalLM.from_pretrained(
-        "TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype="auto"
-    )
-    dataset = "open_platypus"
-    concatenate_data = False
-    num_calibration_samples = 64
-    output_dir = tmp_path / "oneshot_out"
-    splits = {"calibration": "train[:5%]"}
-
-    with create_session():
-        oneshot(
-            model=model,
-            dataset=dataset,
-            output_dir=output_dir,
-            num_calibration_samples=num_calibration_samples,
-            recipe=recipe,
-            concatenate_data=concatenate_data,
-            splits=splits,
-        )
-
-    quantization_config = CompressedTensorsConfig(run_compressed=False)
-    model = AutoModelForCausalLM.from_pretrained(
-        output_dir,
-        torch_dtype="auto",
-        quantization_config=quantization_config,
-    )
-
-    dataset = "open_platypus"
-    concatenate_data = False
-    output_dir = tmp_path / "finetune_out"
-    splits = {"calibration": "train[:5%]", "train": "train[5%:7%]"}
-
-    with create_session():
-        train(
-            model=model,
-            dataset=dataset,
-            output_dir=output_dir,
-            concatenate_data=concatenate_data,
-            splits=splits,
-            num_train_epochs=0.05,
-        )
-
-    # test reloading checkpoint and final model
-    model = AutoModelForCausalLM.from_pretrained(
-        output_dir,
-        torch_dtype="auto",
-        quantization_config=quantization_config,
-    )
-
-    with create_session():
-        train(
-            model=model,
-            dataset=dataset,
-            output_dir=output_dir,
-            concatenate_data=concatenate_data,
-            splits=splits,
-            num_train_epochs=0.05,
-        )
diff --git a/tests/llmcompressor/transformers/finetune/test_quantization.yaml b/tests/llmcompressor/transformers/finetune/test_quantization.yaml
deleted file mode 100644
index 5651232707..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_quantization.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-test_stage:
-  quant_modifiers:
-    QuantizationModifier:
-      ignore:
-        - model.layers.0.mlp.down_proj
-        - model.layers.1.mlp.down_proj
-        - model.layers.2.mlp.down_proj
-        - model.layers.3.mlp.down_proj
-        - model.layers.4.mlp.down_proj
-        - model.layers.5.mlp.down_proj
-      config_groups:
-          group_0:
-              weights:
-                  num_bits: 8
-                  type: "int"
-                  symmetric: False
-                  strategy: "tensor"
-              input_activations: null
-              output_activations: null
-              targets: ["Linear"]
-  pruning_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        "re:.*self_attn.q_proj",
-        "re:.*self_attn.k_proj",
-        "re:.*self_attn.v_proj",
-        "re:.*self_attn.o_proj",
-        "re:.*mlp.gate_proj",
-        "re:.*mlp.up_proj"
-      ]
-      start: 0
diff --git a/tests/llmcompressor/transformers/finetune/test_safetensors.py b/tests/llmcompressor/transformers/finetune/test_safetensors.py
deleted file mode 100644
index 7036516ab8..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_safetensors.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import os
-
-import pytest
-
-from llmcompressor import train
-from tests.testing_utils import parse_params, requires_gpu
-
-CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/finetune/finetune_generic"
-
-
-@pytest.mark.integration
-@requires_gpu
-@pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
-def test_safetensors(config, tmp_path):
-    model = config["model"]
-    dataset = config["dataset"]
-    output = tmp_path / "finetune_output"
-
-    output_dir = output / "output1"
-    max_steps = 10
-    splits = {"train": "train[:10%]"}
-
-    train(
-        model=model,
-        dataset=dataset,
-        output_dir=output_dir,
-        max_steps=max_steps,
-        splits=splits,
-    )
-
-    assert os.path.exists(output_dir / "model.safetensors")
-    assert not os.path.exists(output_dir / "pytorch_model.bin")
-
-    # test we can also load
-    new_output_dir = output / "output2"
-    train(
-        model=output_dir,
-        dataset=dataset,
-        output_dir=new_output_dir,
-        max_steps=max_steps,
-        splits=splits,
-    )
diff --git a/tests/llmcompressor/transformers/finetune/test_session_mixin.py b/tests/llmcompressor/transformers/finetune/test_session_mixin.py
deleted file mode 100644
index 81a83ec565..0000000000
--- a/tests/llmcompressor/transformers/finetune/test_session_mixin.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from typing import Any, Dict, Optional, Union
-
-import pytest
-from torch.nn import Module
-from transformers import AutoModelForCausalLM, Trainer
-
-from llmcompressor.core import active_session
-from llmcompressor.transformers.finetune.session_mixin import SessionManagerMixIn
-
-
-class MixInTest(SessionManagerMixIn, Trainer):
-    def __init__(
-        self,
-        model: Module,
-        recipe: Optional[str],
-        recipe_args: Optional[Union[Dict[str, Any], str]] = None,
-        model_args: Optional[Union[Dict[str, Any], str]] = None,
-        dataset_args: Optional[Union[Dict[str, Any], str]] = None,
-        teacher: Optional[Union[Module, str]] = None,
-        **kwargs,
-    ):
-        super().__init__(
-            model=model,
-            recipe=recipe,
-            recipe_args=recipe_args,
-            model_args=model_args,
-            dataset_args=dataset_args,
-            teacher=teacher,
-            **kwargs,
-        )
-
-
-@pytest.mark.unit
-def test_mixin_init():
-    model_state_path = "nm-testing/tinysmokellama-3.2"
-    model = AutoModelForCausalLM.from_pretrained(model_state_path)
-    recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"
-
-    session_mixin = MixInTest(model=model, recipe=recipe)
-    assert isinstance(session_mixin, SessionManagerMixIn)
-    assert isinstance(session_mixin, Trainer)
-    assert session_mixin.recipe == recipe
-    assert session_mixin.model == model
-
-
-@pytest.fixture
-def mixin_trainer():
-    model_state_path = "nm-testing/tinysmokellama-3.2"
-    model = AutoModelForCausalLM.from_pretrained(model_state_path)
-    recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"
-    train_dataset = "open-platypus"
-
-    return MixInTest(
-        model=model,
-        recipe=recipe,
-        train_dataset=train_dataset,
-    )
-
-
-@pytest.mark.unit
-def test_mixin_session_init(mixin_trainer):
-    mixin_trainer.initialize_session(epoch=0.0, checkpoint=None)
-    session = active_session()
-
-    assert session.lifecycle.initialized_

From 912060906d24b1fa826f000bba6392445078a4b0 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 12:32:51 -0500
Subject: [PATCH 02/23] fix import

---
 src/llmcompressor/args/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/llmcompressor/args/__init__.py b/src/llmcompressor/args/__init__.py
index 22079b5139..16b605b992 100644
--- a/src/llmcompressor/args/__init__.py
+++ b/src/llmcompressor/args/__init__.py
@@ -10,5 +10,4 @@
 from .dataset_arguments import DatasetArguments
 from .model_arguments import ModelArguments
 from .recipe_arguments import RecipeArguments
-from .training_arguments import TrainingArguments
 from .utils import parse_args

From b07287f585c6e42b4ae6ac8792b9e9cd0457c96d Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 12:47:02 -0500
Subject: [PATCH 03/23] fix arg parsing

---
 src/llmcompressor/args/README.md         |  8 ++-----
 src/llmcompressor/args/utils.py          | 27 ++++++------------------
 src/llmcompressor/entrypoints/oneshot.py |  2 +-
 3 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/src/llmcompressor/args/README.md b/src/llmcompressor/args/README.md
index 4691a615cc..d5ced3d5dd 100644
--- a/src/llmcompressor/args/README.md
+++ b/src/llmcompressor/args/README.md
@@ -1,4 +1,4 @@
-# Input arguments for `oneshot`, `train`, `eval` entrypoints
+# Input arguments for `oneshot` and `eval` entrypoints
 
 Parsers in `llm-compressor` define the input arguments required for various entry points, including `oneshot`, `train`, and `eval`.
 
@@ -38,8 +38,4 @@ Handles model loading and saving. For example, `ModelArguments.model` can be a H
 Manages data loading and preprocessing. The dataset argument can specify a Hugging Face dataset stub or a local dataset compatible with [`load_dataset`](https://github.com/huggingface/datasets/blob/3a4e74a9ace62ecd5c9cde7dcb6bcabd65cc7857/src/datasets/load.py#L1905). The preprocessing_func is a callable function that applies custom logic, such as formatting the data using a chat template.
 
 ## RecipeArguments
-Defines the model recipe. A `recipe` consists of user-defined instructions for optimizing the model. Examples of recipes can be found in the `/examples` directory.
-
-## TrainingArguments
-Specifies training parameters based on Hugging Face's [TrainingArguments class](https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py). These parameters include settings like learning rate (`learning_rate`), and the optimizer to use (`optim`).
-
+Defines the model recipe. A `recipe` consists of user-defined instructions for optimizing the model. Examples of recipes can be found in the `/examples` directory.
\ No newline at end of file
diff --git a/src/llmcompressor/args/utils.py b/src/llmcompressor/args/utils.py
index fd420109f9..29d42fed05 100644
--- a/src/llmcompressor/args/utils.py
+++ b/src/llmcompressor/args/utils.py
@@ -14,18 +14,16 @@
     DatasetArguments,
     ModelArguments,
     RecipeArguments,
-    TrainingArguments,
 )
 from llmcompressor.transformers.utils.helpers import resolve_processor_from_model_args
 
 
 def parse_args(
-    include_training_args: bool = False, **kwargs
+    **kwargs,
 ) -> tuple[
     ModelArguments,
     DatasetArguments,
-    RecipeArguments,
-    TrainingArguments | None,
+    RecipeArguments | None,
     str | None,
 ]:
     """
@@ -38,31 +36,18 @@ def parse_args(
             src/llmcompressor/args/dataset_args.py
         * RecipeArguments in
             src/llmcompressor/args/recipe_args.py
-        * TrainingArguments in
-            src/llmcompressor/args/training_args.py
 
-    ModelArguments, DatasetArguments, and RecipeArguments are used for both
-    `oneshot` and `train`. TrainingArguments is only used for `train`.
+    ModelArguments, DatasetArguments, and RecipeArguments used for
+    oneshot.
 
     """
-
-    # pop output_dir, used as an attr in TrainingArguments, where oneshot is not used
     output_dir = kwargs.pop("output_dir", None)
 
     parser_args = (ModelArguments, DatasetArguments, RecipeArguments)
-    if include_training_args:
-        parser_args += (TrainingArguments,)
-
     parser = HfArgumentParser(parser_args)
     parsed_args = parser.parse_dict(kwargs)
 
-    training_args = None
-    if include_training_args:
-        model_args, dataset_args, recipe_args, training_args = parsed_args
-        if output_dir is not None:
-            training_args.output_dir = output_dir
-    else:
-        model_args, dataset_args, recipe_args = parsed_args
+    model_args, dataset_args, recipe_args = parsed_args
 
     if recipe_args.recipe_args is not None:
         if not isinstance(recipe_args.recipe_args, dict):
@@ -83,4 +68,4 @@ def parse_args(
     # silently assign tokenizer to processor
     resolve_processor_from_model_args(model_args)
 
-    return model_args, dataset_args, recipe_args, training_args, output_dir
+    return model_args, dataset_args, recipe_args, output_dir
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index 66c320d1b3..36d5706a4d 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -140,7 +140,7 @@ def __init__(
                 level="DEBUG",
             )
 
-        model_args, dataset_args, recipe_args, _, output_dir = parse_args(**kwargs)
+        model_args, dataset_args, recipe_args, output_dir = parse_args(**kwargs)
 
         self.model_args = model_args
         self.dataset_args = dataset_args

From dd5c58e62d425a6f66163e8c4e68d3a8d2f6e29a Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 12:49:38 -0500
Subject: [PATCH 04/23] fix import

---
 src/llmcompressor/transformers/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/llmcompressor/transformers/__init__.py b/src/llmcompressor/transformers/__init__.py
index 2e018413ac..401ebc8544 100644
--- a/src/llmcompressor/transformers/__init__.py
+++ b/src/llmcompressor/transformers/__init__.py
@@ -6,5 +6,4 @@
 
 # (import order matters for circular import avoidance)
 from .utils import *
-from .finetune import *
 from .data import TextGenerationDataset

From 841723c91daaed8eb1a598d6960f16290670bd80 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:06:24 -0500
Subject: [PATCH 05/23] update

---
 src/llmcompressor/datasets/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/datasets/utils.py b/src/llmcompressor/datasets/utils.py
index 8aef67fcd4..2b80b1ed9a 100644
--- a/src/llmcompressor/datasets/utils.py
+++ b/src/llmcompressor/datasets/utils.py
@@ -18,7 +18,7 @@
 from transformers.data import default_data_collator
 
 from llmcompressor.args import DatasetArguments
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 

From a68de4b7cb031a61fa1ec66eb6fdf59a5c6844b3 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:08:38 -0500
Subject: [PATCH 06/23] more updates

---
 src/llmcompressor/transformers/data/base.py                     | 2 +-
 src/llmcompressor/transformers/utils/preprocessing_functions.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/transformers/data/base.py b/src/llmcompressor/transformers/data/base.py
index a6300fe404..968c2555a8 100644
--- a/src/llmcompressor/transformers/data/base.py
+++ b/src/llmcompressor/transformers/data/base.py
@@ -18,7 +18,7 @@
 from loguru import logger
 
 from llmcompressor.args import DatasetArguments
-from llmcompressor.transformers.finetune.data.data_helpers import (
+from llmcompressor.transformers.data.data_helpers import (
     LABELS_MASK_VALUE,
     get_custom_datasets_from_path,
     get_raw_dataset,
diff --git a/src/llmcompressor/transformers/utils/preprocessing_functions.py b/src/llmcompressor/transformers/utils/preprocessing_functions.py
index e6749d6a51..16466b6419 100644
--- a/src/llmcompressor/transformers/utils/preprocessing_functions.py
+++ b/src/llmcompressor/transformers/utils/preprocessing_functions.py
@@ -12,7 +12,7 @@
 from compressed_tensors.registry import RegistryMixin
 
 if TYPE_CHECKING:
-    from llmcompressor.transformers.finetune.data.base import TextGenerationDataset
+    from llmcompressor.transformers.data.base import TextGenerationDataset
 
 
 class PreprocessingFunctionRegistry(RegistryMixin):

From caeacc1526d37e75a7850f38b42ad95a97ebcb41 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:11:55 -0500
Subject: [PATCH 07/23] update

---
 src/llmcompressor/transformers/data/c4.py             | 2 +-
 src/llmcompressor/transformers/data/cnn_dailymail.py  | 2 +-
 src/llmcompressor/transformers/data/custom.py         | 2 +-
 src/llmcompressor/transformers/data/evolcodealpaca.py | 2 +-
 src/llmcompressor/transformers/data/flickr_30k.py     | 2 +-
 src/llmcompressor/transformers/data/gsm8k.py          | 2 +-
 src/llmcompressor/transformers/data/open_platypus.py  | 2 +-
 src/llmcompressor/transformers/data/peoples_speech.py | 4 ++--
 src/llmcompressor/transformers/data/ultrachat_200k.py | 2 +-
 src/llmcompressor/transformers/data/wikitext.py       | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/llmcompressor/transformers/data/c4.py b/src/llmcompressor/transformers/data/c4.py
index e4fe6431cd..52627e5985 100644
--- a/src/llmcompressor/transformers/data/c4.py
+++ b/src/llmcompressor/transformers/data/c4.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/cnn_dailymail.py b/src/llmcompressor/transformers/data/cnn_dailymail.py
index fcc67482f9..d205d44507 100644
--- a/src/llmcompressor/transformers/data/cnn_dailymail.py
+++ b/src/llmcompressor/transformers/data/cnn_dailymail.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/custom.py b/src/llmcompressor/transformers/data/custom.py
index 72b6ac6bb4..80a0478964 100644
--- a/src/llmcompressor/transformers/data/custom.py
+++ b/src/llmcompressor/transformers/data/custom.py
@@ -7,7 +7,7 @@
 user-provided datasets.
 """
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 
 
 @TextGenerationDataset.register(name="custom", alias=["json", "csv"])
diff --git a/src/llmcompressor/transformers/data/evolcodealpaca.py b/src/llmcompressor/transformers/data/evolcodealpaca.py
index 8a7892c131..014545614c 100644
--- a/src/llmcompressor/transformers/data/evolcodealpaca.py
+++ b/src/llmcompressor/transformers/data/evolcodealpaca.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/flickr_30k.py b/src/llmcompressor/transformers/data/flickr_30k.py
index 8ada07a0e2..e257f17e79 100644
--- a/src/llmcompressor/transformers/data/flickr_30k.py
+++ b/src/llmcompressor/transformers/data/flickr_30k.py
@@ -3,7 +3,7 @@
 
 from loguru import logger
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/gsm8k.py b/src/llmcompressor/transformers/data/gsm8k.py
index ae1318571e..55396d1df5 100644
--- a/src/llmcompressor/transformers/data/gsm8k.py
+++ b/src/llmcompressor/transformers/data/gsm8k.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/open_platypus.py b/src/llmcompressor/transformers/data/open_platypus.py
index 81413e7852..fcf08bbaac 100644
--- a/src/llmcompressor/transformers/data/open_platypus.py
+++ b/src/llmcompressor/transformers/data/open_platypus.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/peoples_speech.py b/src/llmcompressor/transformers/data/peoples_speech.py
index 31d0668316..9e0b9e544e 100644
--- a/src/llmcompressor/transformers/data/peoples_speech.py
+++ b/src/llmcompressor/transformers/data/peoples_speech.py
@@ -4,8 +4,8 @@
 from datasets.formatting.formatting import LazyRow
 from loguru import logger
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
-from llmcompressor.transformers.finetune.data.base import get_columns
+from llmcompressor.transformers.data import TextGenerationDataset
+from llmcompressor.transformers.data.base import get_columns
 from llmcompressor.typing import DatasetType, Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/ultrachat_200k.py b/src/llmcompressor/transformers/data/ultrachat_200k.py
index 296eb3db56..308722fbfb 100644
--- a/src/llmcompressor/transformers/data/ultrachat_200k.py
+++ b/src/llmcompressor/transformers/data/ultrachat_200k.py
@@ -3,7 +3,7 @@
 
 from loguru import logger
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:
diff --git a/src/llmcompressor/transformers/data/wikitext.py b/src/llmcompressor/transformers/data/wikitext.py
index 73142d671c..1bce90cc20 100644
--- a/src/llmcompressor/transformers/data/wikitext.py
+++ b/src/llmcompressor/transformers/data/wikitext.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.typing import Processor
 
 if TYPE_CHECKING:

From 9246d0507f9a2c753f5f5b01f0404ad187429b43 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:20:04 -0500
Subject: [PATCH 08/23] remove

---
 src/llmcompressor/args/model_arguments.py     |  6 ---
 src/llmcompressor/entrypoints/utils.py        | 51 +------------------
 .../transformers/utils/helpers.py             | 40 +--------------
 3 files changed, 3 insertions(+), 94 deletions(-)

diff --git a/src/llmcompressor/args/model_arguments.py b/src/llmcompressor/args/model_arguments.py
index d927bd62e7..279287524f 100644
--- a/src/llmcompressor/args/model_arguments.py
+++ b/src/llmcompressor/args/model_arguments.py
@@ -26,12 +26,6 @@ class ModelArguments:
             )
         },
     )
-    distill_teacher: str | None = field(
-        default=None,
-        metadata={
-            "help": "Teacher model (a trained text generation model)",
-        },
-    )
     config_name: str | None = field(
         default=None,
         metadata={
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 0b482727ee..0e6564833a 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -1,8 +1,8 @@
 """
 Utility functions for entrypoint pre and post-processing operations.
 
-Provides common utility functions used by training and one-shot
-compression entrypoints. Includes model loading, configuration setup,
+Provides common utility functions used by the one-shot
+entrypoint. Includes model loading, configuration setup,
 preprocessing steps, and post-processing operations for compression
 workflows.
 """
@@ -19,7 +19,6 @@
     AutoModelForCausalLM,
     AutoProcessor,
     PreTrainedModel,
-    set_seed,
 )
 from transformers.utils.quantization_config import CompressedTensorsConfig
 
@@ -27,7 +26,6 @@
     DatasetArguments,
     ModelArguments,
     RecipeArguments,
-    TrainingArguments,
 )
 from llmcompressor.core import reset_session
 from llmcompressor.pytorch.model_load.helpers import parse_dtype
@@ -36,7 +34,6 @@
     untie_word_embeddings,
 )
 from llmcompressor.transformers.utils.helpers import (
-    detect_last_checkpoint,
     is_model_ct_quantized_from_path,
 )
 from llmcompressor.typing import Processor
@@ -109,8 +106,6 @@ def post_process(
     Saves the model and tokenizer/processor to the output directory if model_args,
     output_dir is provided.
 
-    Save is skipped for stage runs for `train` - saves using the trainer.save_model()
-
     If the `output_dir` is not the default directory, the method resets lifecycle
     actions. The model is saved in a compressed format if specified in `model_args`.
     Additionally, the tokenizer or processor, if available, is also saved.
@@ -150,7 +145,6 @@ def post_process(
 
 def initialize_model_from_path(
     model_args: ModelArguments,
-    training_args: TrainingArguments | None = None,
 ) -> tuple[PreTrainedModel, PreTrainedModel | None]:
     # Load pretrained model
     # The .from_pretrained methods guarantee that only one local process can
@@ -167,47 +161,6 @@ def initialize_model_from_path(
     last_checkpoint = None
     teacher = None
 
-    if training_args is not None:
-        # Load teacher configuration if applicable
-        teacher_config = (
-            AutoConfig.from_pretrained(
-                model_args.distill_teacher,
-                use_auth_token=True if model_args.use_auth_token else None,
-                trust_remote_code=model_args.trust_remote_code_model,
-            )
-            if model_args.distill_teacher
-            else None
-        )
-
-        # Detect last checkpoint
-        last_checkpoint = detect_last_checkpoint(training_args, model_args=model_args)
-
-        # Set seed before initializing model
-        set_seed(training_args.seed)
-
-        # Initialize teacher model if teacher path is provided
-        if model_args.distill_teacher is not None:
-            teacher_device_map = (
-                None
-                if os.environ.get("ACCELERATE_USE_FSDP", "false") == "true"
-                else "auto"
-            )
-            teacher_kwargs = {
-                "config": teacher_config,
-                "cache_dir": None,
-                "use_auth_token": True if model_args.use_auth_token else None,
-                "torch_dtype": parse_dtype(model_args.precision),
-                "device_map": teacher_device_map,
-                "trust_remote_code": model_args.trust_remote_code_model,
-            }
-
-            teacher = AutoModelForCausalLM.from_pretrained(
-                model_args.distill_teacher,
-                **teacher_kwargs,
-            )
-            if "sequence_length" in teacher_kwargs:
-                teacher.seqlen = teacher_kwargs["sequence_length"]
-
     model_path = (
         last_checkpoint or model_args.model
         if hasattr(model_args, "model")
diff --git a/src/llmcompressor/transformers/utils/helpers.py b/src/llmcompressor/transformers/utils/helpers.py
index 5df8354870..1834c19b00 100644
--- a/src/llmcompressor/transformers/utils/helpers.py
+++ b/src/llmcompressor/transformers/utils/helpers.py
@@ -16,56 +16,18 @@
 )
 from loguru import logger
 from transformers import AutoConfig
-from transformers.trainer_utils import get_last_checkpoint
 
 if TYPE_CHECKING:
-    from llmcompressor.args import ModelArguments, TrainingArguments
+    from llmcompressor.args import ModelArguments
 
 __all__ = [
     "RECIPE_FILE_NAME",
-    "detect_last_checkpoint",
     "is_model_ct_quantized_from_path",
 ]
 
 RECIPE_FILE_NAME = "recipe.yaml"
 
 
-def detect_last_checkpoint(
-    training_args: "TrainingArguments",
-    model_args: Optional["ModelArguments"] = None,
-):
-    last_checkpoint = None
-    if (
-        os.path.isdir(training_args.output_dir)
-        and training_args.do_train
-        and not training_args.overwrite_output_dir
-    ):
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if training_args.run_stages and model_args is not None:
-            model = (
-                model_args.model
-                if hasattr(model_args, "model")
-                else model_args.model_name_or_path
-            )
-            if os.path.isdir(model):
-                last_checkpoint = get_last_checkpoint(model_args.model_name_or_path)
-        if last_checkpoint is None and (len(os.listdir(training_args.output_dir)) > 0):
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already "
-                "exists and is not empty. Use --overwrite_output_dir to overcome."
-            )
-        elif (
-            last_checkpoint is not None and training_args.resume_from_checkpoint is None
-        ):
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To "
-                "avoid this behavior, change  the `--output_dir` or add "
-                "`--overwrite_output_dir` to train from scratch."
-            )
-
-    return last_checkpoint
-
-
 def is_model_ct_quantized_from_path(path: str) -> bool:
     """
     Determine if model from path is quantized based

From d82ca8e142c71fe6816ae6785a13b79225f784d7 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:22:56 -0500
Subject: [PATCH 09/23] fix import

---
 src/llmcompressor/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/__init__.py b/src/llmcompressor/__init__.py
index 2e9547a0fc..227052d94e 100644
--- a/src/llmcompressor/__init__.py
+++ b/src/llmcompressor/__init__.py
@@ -26,4 +26,4 @@
     create_session,
     reset_session,
 )
-from llmcompressor.entrypoints import Oneshot, oneshot, train, model_free_ptq
+from llmcompressor.entrypoints import Oneshot, oneshot, model_free_ptq

From 39f3fcaa98f8446703c4118dc4b2337dc745cdb4 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:45:28 -0500
Subject: [PATCH 10/23] update

---
 src/llmcompressor/entrypoints/oneshot.py |  1 -
 src/llmcompressor/entrypoints/utils.py   | 46 +++---------------------
 2 files changed, 5 insertions(+), 42 deletions(-)

diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index 36d5706a4d..c2b29aa97c 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -229,7 +229,6 @@ def apply_recipe_modifiers(
 def oneshot(
     # Model arguments
     model: str | PreTrainedModel,
-    distill_teacher: str | None = None,
     config_name: str | None = None,
     tokenizer: str | PreTrainedTokenizerBase | None = None,
     processor: str | ProcessorMixin | None = None,
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
index 0e6564833a..3c1b354ce7 100644
--- a/src/llmcompressor/entrypoints/utils.py
+++ b/src/llmcompressor/entrypoints/utils.py
@@ -7,13 +7,11 @@
 workflows.
 """
 
-import inspect
 import os
 from pathlib import PosixPath
 
 from compressed_tensors.utils import remove_dispatch
 from loguru import logger
-from torch.nn import Module
 from transformers import (
     AutoConfig,
     AutoModelForCausalLM,
@@ -58,14 +56,13 @@ def pre_process(
 
     # Initialize model
     if isinstance(model_args.model, (str, PosixPath)):
-        model, distill_teacher = initialize_model_from_path(model_args)
+        model = initialize_model_from_path(model_args)
         if is_fsdp_model(model):
             raise NotImplementedError(
                 "FSDP models are not supported in the current release but will be "
                 "suported in future releases of LLM Compressor."
             )
         model_args.model = model
-        model_args.distill_teacher = distill_teacher
 
     # Initialize processor if dataset provided
     if isinstance(model_args.processor, (str, type(None))):
@@ -145,7 +142,7 @@ def post_process(
 
 def initialize_model_from_path(
     model_args: ModelArguments,
-) -> tuple[PreTrainedModel, PreTrainedModel | None]:
+) -> PreTrainedModel:
     # Load pretrained model
     # The .from_pretrained methods guarantee that only one local process can
     # concurrently download model & vocab.
@@ -159,7 +156,6 @@ def initialize_model_from_path(
     )
 
     last_checkpoint = None
-    teacher = None
 
     model_path = (
         last_checkpoint or model_args.model
@@ -186,17 +182,13 @@ def initialize_model_from_path(
     if "sequence_length" in model_kwargs:
         model.seqlen = model_kwargs["sequence_length"]
 
-    return model, teacher
+    return model
 
 
 def initialize_processor_from_path(
-    model_args: ModelArguments,
-    model: PreTrainedModel,
-    teacher: PreTrainedModel | None = None,
+    model_args: ModelArguments, model: PreTrainedModel
 ) -> Processor:
-    processor_src = model_args.processor or get_processor_name_from_model(
-        model, teacher
-    )
+    processor_src = model_args.processor or model.config._name_or_path
     # The use_fast=True option is not currently supported safely in Transformers
     # See: https://github.com/huggingface/transformers/pull/34836#issuecomment-2491809727  # noqa: E501
     try:
@@ -229,31 +221,3 @@ def initialize_processor_from_path(
         )
 
     return processor
-
-
-def get_processor_name_from_model(student: Module, teacher: Module | None) -> str:
-    """
-    Get a processor/tokenizer source used for both student and teacher, assuming
-    that they could be shared
-
-    :param student: the student model
-    :param teacher: the teacher model
-    :return: the source for the processor/tokenizer shared between teacher and model
-    """
-    if teacher is not None and teacher not in ("disable", "self"):
-        student_forward_params = list(
-            inspect.signature(student.forward).parameters.keys()
-        )
-        teacher_forward_params = list(
-            inspect.signature(teacher.forward).parameters.keys()
-        )
-        diff = [p for p in student_forward_params if p not in teacher_forward_params]
-        if diff:
-            raise RuntimeError(
-                "Teacher tokenizer cannot be used for student "
-                f"due to missing args: {diff}"
-            )
-        src_model = teacher
-    else:
-        src_model = student
-    return src_model.config._name_or_path

From 2761ebaf926f4d2bfe164e5ae38d7bbaa13d3236 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:49:11 -0500
Subject: [PATCH 11/23] updoate

---
 .../llmcompressor/transformers/compression/test_quantization.py | 2 +-
 tests/llmcompressor/transformers/data/test_dataset_helpers.py   | 2 +-
 tests/llmcompressor/transformers/data/test_registry.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/llmcompressor/transformers/compression/test_quantization.py b/tests/llmcompressor/transformers/compression/test_quantization.py
index 60ed7ed94e..29d5c21c0f 100644
--- a/tests/llmcompressor/transformers/compression/test_quantization.py
+++ b/tests/llmcompressor/transformers/compression/test_quantization.py
@@ -7,7 +7,7 @@
 from llmcompressor import oneshot
 from llmcompressor.args import DatasetArguments
 from llmcompressor.pytorch.utils import tensors_to_device
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from llmcompressor.utils.dev import dispatch_for_generation
 from tests.testing_utils import parse_params, requires_gpu
 
diff --git a/tests/llmcompressor/transformers/data/test_dataset_helpers.py b/tests/llmcompressor/transformers/data/test_dataset_helpers.py
index a7138b186d..ed0335bfd1 100644
--- a/tests/llmcompressor/transformers/data/test_dataset_helpers.py
+++ b/tests/llmcompressor/transformers/data/test_dataset_helpers.py
@@ -2,7 +2,7 @@
 
 from llmcompressor.args import DatasetArguments
 from llmcompressor.datasets import make_dataset_splits
-from llmcompressor.transformers.finetune.data.data_helpers import get_raw_dataset
+from llmcompressor.transformers.data.data_helpers import get_raw_dataset
 
 
 @pytest.mark.unit
diff --git a/tests/llmcompressor/transformers/data/test_registry.py b/tests/llmcompressor/transformers/data/test_registry.py
index 29895b4a4c..d775f39441 100644
--- a/tests/llmcompressor/transformers/data/test_registry.py
+++ b/tests/llmcompressor/transformers/data/test_registry.py
@@ -1,7 +1,7 @@
 import pytest
 
 from llmcompressor.args import DatasetArguments
-from llmcompressor.transformers.finetune.data import (
+from llmcompressor.transformers.data import (
     C4Dataset,
     OpenPlatypusDataset,
     TextGenerationDataset,

From 6998fb9937a1e1a3f606c030074a5294bad5a8cf Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 13:51:30 -0500
Subject: [PATCH 12/23] update

---
 src/llmcompressor/args/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/args/README.md b/src/llmcompressor/args/README.md
index d5ced3d5dd..57c2a4a938 100644
--- a/src/llmcompressor/args/README.md
+++ b/src/llmcompressor/args/README.md
@@ -1,4 +1,4 @@
-# Input arguments for `oneshot` and `eval` entrypoints
+# Input arguments for the `oneshot` entrypoint
 
 Parsers in `llm-compressor` define the input arguments required for various entry points, including `oneshot`, `train`, and `eval`.
 

From 7958288d98486448ab06af53820368e77c813227 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:11:29 -0500
Subject: [PATCH 13/23] update

---
 .../transformers/sparsegpt/test_sparsegpt_completion.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py
index 724a7b12a2..b1e1fcd165 100644
--- a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py
+++ b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py
@@ -10,7 +10,7 @@
 from llmcompressor.transformers.compression.compressed_tensors_utils import (
     get_model_compressor,
 )
-from llmcompressor.transformers.finetune.data import TextGenerationDataset
+from llmcompressor.transformers.data import TextGenerationDataset
 from tests.testing_utils import parse_params, requires_gpu
 
 CONFIGS_DIRECTORY = (

From bf3acb42f3ef766c17502095037670a6181c38c5 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:14:55 -0500
Subject: [PATCH 14/23] remove old links

---
 src/llmcompressor/entrypoints/README.md | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/llmcompressor/entrypoints/README.md b/src/llmcompressor/entrypoints/README.md
index f023d3c027..25a85bae30 100644
--- a/src/llmcompressor/entrypoints/README.md
+++ b/src/llmcompressor/entrypoints/README.md
@@ -261,11 +261,4 @@ with create_session():
         distill_teacher=distill_teacher,  # The teacher model
         recipe=recipe,  # The recipe to use
     )
-```
-
-### SFT Trainer
-
-TRL's SFT Trainer can be used for sparse fine-tuning or applying sparse knowledge distillation. Examples are available in the `examples/` folder.
-
-- [Sparse-fine-tune a 50% sparse Llama-7b model](../../../examples/trl_mixin/README.md)
-- [Sparse-fine-tune a 50% sparse Llama-7b model using knowledge distillation](../../../examples/trl_mixin/README.md)
\ No newline at end of file
+```
\ No newline at end of file

From 2c01fcb874a450af8a70d51ef3f2648e3389e4f8 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:30:31 -0500
Subject: [PATCH 15/23] update

---
 .../workflows/test-check-transformers.yaml    | 32 ++-----------------
 .github/workflows/test-check.yaml             | 20 ++++++++----
 2 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index 12dc6baeb9..53ab8e0435 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -73,7 +73,7 @@ jobs:
         run: uv pip install .[dev]
       - uses: actions/checkout@v4
         with:
-          repository: "neuralmagic/compressed-tensors"
+          repository: "vllm/compressed-tensors"
           path: "compressed-tensors"
           fetch-depth: 0
           fetch-tags: true
@@ -92,35 +92,7 @@ jobs:
       - name: "🔬 Running transformers tests"
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
-          pytest -v tests/llmcompressor/transformers/compression
-      - name: Run Data Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/data
-      - name: Running GPTQ Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/gptq
-      - name: Running AutoRound Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/autoround
-      - name: Running ONESHOT Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/oneshot
-      - name: Running SparseGPT Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/sparsegpt
-      - name: Running Tracing Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/tracing
-      - name: Running KV Cache Tests
-        if: (success() || failure()) && steps.install.outcome == 'success'
-        run: |
-          pytest -v tests/llmcompressor/transformers/kv_cache
+          pytest -v tests/llmcompressor/transformers/
       - name: "Upload coverage report"
         if: (success() || failure()) && inputs.code_coverage
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml
index 7e12aba897..63afa9b31d 100644
--- a/.github/workflows/test-check.yaml
+++ b/.github/workflows/test-check.yaml
@@ -22,10 +22,14 @@ jobs:
     runs-on: ubuntu-22.04
     env:
       COVERAGE_FILE: ".coverage.base"
+    strategy:
+      matrix:
+        python: ["3.10", "3.13"]
     steps:
-      - uses: actions/setup-python@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
         with:
-          python-version: '3.12'
+          python-version: ${{ matrix.python }}
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
@@ -36,7 +40,7 @@ jobs:
         run: uv pip install .[dev]
       - uses: actions/checkout@v4
         with:
-          repository: "neuralmagic/compressed-tensors"
+          repository: "vllm/compressed-tensors"
           path: "compressed-tensors"
           fetch-depth: 0
           fetch-tags: true
@@ -73,10 +77,14 @@ jobs:
     runs-on: ubuntu-22.04
     env:
       COVERAGE_FILE: ".coverage.pytorch"
+    strategy:
+      matrix:
+        python: ["3.10", "3.13"]
     steps:
-      - uses: actions/setup-python@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: ${{ matrix.python }}
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
@@ -87,7 +95,7 @@ jobs:
         run: uv pip install .[dev]
       - uses: actions/checkout@v4
         with:
-          repository: "neuralmagic/compressed-tensors"
+          repository: "vllm/compressed-tensors"
           path: "compressed-tensors"
           fetch-depth: 0
           fetch-tags: true

From f4cb04021d6e20dd90be6295cfe61e92362e8894 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:31:35 -0500
Subject: [PATCH 16/23] updatge

---
 .github/workflows/test-check-transformers.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index 53ab8e0435..c61f598647 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -62,7 +62,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.12'
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0

From ac31948ff9f08f6954fdb06ebb2a27a152064309 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:32:58 -0500
Subject: [PATCH 17/23] update

---
 .github/workflows/test-check-transformers.yaml | 2 +-
 .github/workflows/test-check.yaml              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index c61f598647..96528f51a5 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -73,7 +73,7 @@ jobs:
         run: uv pip install .[dev]
       - uses: actions/checkout@v4
         with:
-          repository: "vllm/compressed-tensors"
+          repository: "vllm-project/compressed-tensors"
           path: "compressed-tensors"
           fetch-depth: 0
           fetch-tags: true
diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml
index 63afa9b31d..ea6a7d4884 100644
--- a/.github/workflows/test-check.yaml
+++ b/.github/workflows/test-check.yaml
@@ -40,7 +40,7 @@ jobs:
         run: uv pip install .[dev]
       - uses: actions/checkout@v4
         with:
-          repository: "vllm/compressed-tensors"
+          repository: "vllm-project/compressed-tensors"
           path: "compressed-tensors"
           fetch-depth: 0
           fetch-tags: true
@@ -95,7 +95,7 @@ jobs:
         run: uv pip install .[dev]
       - uses: actions/checkout@v4
         with:
-          repository: "vllm/compressed-tensors"
+          repository: "vllm-project/compressed-tensors"
           path: "compressed-tensors"
           fetch-depth: 0
           fetch-tags: true

From 048fabb976721cf69bec9632b0ab1d6c0600b58f Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:40:11 -0500
Subject: [PATCH 18/23] revert

---
 .../workflows/test-check-transformers.yaml    | 30 ++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index 96528f51a5..4835f5537d 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -92,7 +92,35 @@ jobs:
       - name: "🔬 Running transformers tests"
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
-          pytest -v tests/llmcompressor/transformers/
+          pytest -v tests/llmcompressor/transformers/compression
+      - name: Run Finetune Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/finetune
+      - name: Running GPTQ Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/gptq
+      - name: Running AutoRound Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/autoround
+      - name: Running ONESHOT Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/oneshot
+      - name: Running SparseGPT Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/sparsegpt
+      - name: Running Tracing Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/tracing
+      - name: Running KV Cache Tests
+        if: (success() || failure()) && steps.install.outcome == 'success'
+        run: |
+          pytest -v tests/llmcompressor/transformers/kv_cache
       - name: "Upload coverage report"
         if: (success() || failure()) && inputs.code_coverage
         uses: actions/upload-artifact@v4

From 16098492e28e162cbba43aec5fde515658a96bec Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 14:56:32 -0500
Subject: [PATCH 19/23] update

---
 .github/workflows/test-check-transformers.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-check-transformers.yaml b/.github/workflows/test-check-transformers.yaml
index 4835f5537d..4753e9145b 100644
--- a/.github/workflows/test-check-transformers.yaml
+++ b/.github/workflows/test-check-transformers.yaml
@@ -93,10 +93,10 @@ jobs:
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
           pytest -v tests/llmcompressor/transformers/compression
-      - name: Run Finetune Tests
+      - name: Run Data Tests
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
-          pytest -v tests/llmcompressor/transformers/finetune
+          pytest -v tests/llmcompressor/transformers/data
       - name: Running GPTQ Tests
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |

From 5e2fce04a76ce4086b150e34df53a3a4063a997b Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 18:36:34 -0500
Subject: [PATCH 20/23] remove distillation modifier

---
 .../modifiers/distillation/__init__.py        |   9 -
 .../modifiers/distillation/output/__init__.py |   3 -
 .../modifiers/distillation/output/base.py     | 196 ---------
 .../modifiers/distillation/utils/__init__.py  |   0
 .../distillation/utils/pytorch/__init__.py    |   5 -
 .../distillation/utils/pytorch/kd_factory.py  | 408 ------------------
 .../distillation/utils/pytorch/kd_wrapper.py  | 116 -----
 .../utils/pytorch/model_wrapper.py            | 135 ------
 8 files changed, 872 deletions(-)
 delete mode 100644 src/llmcompressor/modifiers/distillation/__init__.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/output/__init__.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/output/base.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/utils/__init__.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/utils/pytorch/__init__.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/utils/pytorch/kd_factory.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/utils/pytorch/kd_wrapper.py
 delete mode 100644 src/llmcompressor/modifiers/distillation/utils/pytorch/model_wrapper.py

diff --git a/src/llmcompressor/modifiers/distillation/__init__.py b/src/llmcompressor/modifiers/distillation/__init__.py
deleted file mode 100644
index 735b9d3755..0000000000
--- a/src/llmcompressor/modifiers/distillation/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# ruff: noqa
-
-"""
-Provides model distillation functionality, specifically importing output-based
-    distillation modifiers for transferring knowledge from teacher to student
-    models during compression.
-"""
-
-from .output import *
diff --git a/src/llmcompressor/modifiers/distillation/output/__init__.py b/src/llmcompressor/modifiers/distillation/output/__init__.py
deleted file mode 100644
index a4291054b4..0000000000
--- a/src/llmcompressor/modifiers/distillation/output/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# ruff: noqa
-
-from .base import *
diff --git a/src/llmcompressor/modifiers/distillation/output/base.py b/src/llmcompressor/modifiers/distillation/output/base.py
deleted file mode 100644
index 130e2470ca..0000000000
--- a/src/llmcompressor/modifiers/distillation/output/base.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from typing import Any, Dict, List, Tuple, Union
-
-from torch.nn import Module
-
-from llmcompressor.core import Event, EventType, State
-from llmcompressor.modifiers import Modifier
-from llmcompressor.modifiers.distillation.utils.pytorch import (
-    KDFactory,
-    KDModelWrapper,
-    KDModuleWrapper,
-)
-from llmcompressor.utils.fsdp.context import summon_full_params_context
-from llmcompressor.utils.fsdp.helpers import maybe_get_wrapped, set_wrapped_model
-from llmcompressor.utils.pytorch.module import get_layers, set_layer
-
-__all__ = ["OutputDistillationModifier"]
-
-
-class OutputDistillationModifier(Modifier):
-    targets: Union[str, List[Union[str, Tuple[str, str]]]]
-    projection: str = None
-    projection_args: Dict[str, Any] = None
-    transforms: Union[str, List[str]] = "identity"
-    transforms_args: Union[Dict[str, Any], List[Dict[str, Any]]] = None
-    comparison: str = "kl_divergence"
-    comparison_args: Dict[str, Any] = None
-    orig_scale: float = 1.0
-    distill_scale: float = 1.0
-    offload_layer_output: bool = False
-
-    wrappers_: Dict[str, Any] = None
-    wrapped_kd_model_: Any = None
-    fsdp_active_: bool = False
-
-    def on_initialize(self, state: State, **kwargs) -> bool:
-        if state.model is None or state.teacher_model is None:
-            return False
-
-        self.wrappers_ = {}
-        if kwargs.get("fsdp_active"):
-            self.fsdp_active_ = True
-
-        if not hasattr(state.model.config, "hidden_size"):
-            raise ValueError(
-                "Model config must specify hidden_size in order to use "
-                "OutputDistillationModifier"
-            )
-
-        # needed to initialize intermediate output buffers for student and teacher
-        hidden_size = (
-            kwargs.get("metadata").get("per_device_train_batch_size", 1),
-            kwargs.get("metadata").get("max_seq_length", 512),
-            state.model.config.hidden_size,
-        )
-
-        for target in (
-            self.targets if isinstance(self.targets, list) else [self.targets]
-        ):
-            if isinstance(target, tuple):
-                model_target, teacher_target = target
-            else:
-                model_target, teacher_target = target, target
-
-            model_layers = get_layers(model_target, state.model)
-            teacher_layers = get_layers(teacher_target, state.teacher_model)
-
-            if len(model_layers) < 1:
-                raise ValueError(f"no model layers found for target {target}")
-
-            if len(model_layers) != len(teacher_layers):
-                raise ValueError(
-                    f"model and teacher model layers for target {target} do not match"
-                )
-
-            for (key, student_layer), teacher_layer in zip(
-                model_layers.items(), teacher_layers.values()
-            ):
-                student_wrapper = self._create_layer_wrapper(
-                    student_layer, hidden_size, state
-                )
-                teacher_wrapper = self._create_layer_wrapper(
-                    teacher_layer, hidden_size, state
-                )
-                self.wrappers_[key] = (student_wrapper, teacher_wrapper)
-
-        with summon_full_params_context(state.teacher_model, offload_to_cpu=True):
-            for key, (student_wrapper, teacher_wrapper) in self.wrappers_.items():
-                set_layer(key, student_wrapper, state.model)
-                set_layer(key, teacher_wrapper, state.teacher_model)
-
-        self.wrapped_kd_model_ = self._create_model_wrapper(
-            student_model=maybe_get_wrapped(state.model),
-            teacher_model=state.teacher_model,
-            state=state,
-        )
-
-        set_wrapped_model(state, self.wrapped_kd_model_)
-
-        # for square-head distillation we want to scale the loss by the number of
-        # layers if the user doesn't alter the default scale. This is done so the
-        # distillation loss is roughly equally weighted to the cross entropy loss
-        num_layers = len(self.wrappers_)
-        if self.comparison == "square_head" and self.distill_scale == 1.0:
-            self.distill_scale = float(num_layers)
-        return True
-
-    def on_finalize(self, state: State, **kwargs) -> bool:
-        set_wrapped_model(state, self.wrapped_kd_model_.student_model)
-
-        with summon_full_params_context(state.teacher_model, offload_to_cpu=True):
-            for key, (student_wrapper, teacher_wrapper) in self.wrappers_.items():
-                set_layer(key, student_wrapper.layer, state.model)
-                set_layer(key, teacher_wrapper.layer, state.teacher_model)
-                del student_wrapper
-                del teacher_wrapper
-
-        del self.wrapped_kd_model_
-        return True
-
-    def on_start(self, state: State, event: Event, **kwargs):
-        for student_wrapper, teacher_wrapper in self.wrappers_.values():
-            student_wrapper.kd_enabled = True
-            teacher_wrapper.kd_enabled = True
-        self.wrapped_kd_model_.kd_enabled = True
-
-    def on_update(self, state: State, event: Event, **kwargs):
-        if event.type_ == EventType.LOSS_CALCULATED and event.should_update(
-            self.start, self.end, self.update
-        ):
-            distill_loss = self.wrapped_kd_model_.kd_last_comparison
-            model_loss = self.orig_scale * kwargs["loss"]
-            distill_loss = self.distill_scale * distill_loss.to(model_loss.device)
-            state.loss = model_loss + distill_loss
-
-    def on_end(self, state: State, event: Event, **kwargs):
-        for student_wrapper, teacher_wrapper in self.wrappers_.values():
-            student_wrapper.kd_enabled = False
-            teacher_wrapper.kd_enabled = False
-        self.wrapped_kd_model_.kd_enabled = False
-
-    def _create_model_wrapper(
-        self, student_model: Module, teacher_model: Module, state: State
-    ) -> KDModelWrapper:
-        comparison = KDFactory.create_comparison(
-            self.comparison,
-            student_model,
-            teacher_model,
-            state,
-            **(self.comparison_args or {}),
-        )
-
-        return KDModelWrapper(
-            student_model=student_model,
-            teacher_model=teacher_model,
-            wrappers=self.wrappers_,
-            comparison=comparison,
-            fsdp_active=self.fsdp_active_,
-        )
-
-    def _create_layer_wrapper(
-        self, layer: Module, hidden_size: int, state: State
-    ) -> KDModuleWrapper:
-        transforms = []
-        if self.transforms:
-            tmp_transforms = (
-                self.transforms
-                if isinstance(self.transforms, list)
-                else [self.transforms]
-            )
-            tmp_transform_args = [
-                args
-                for args in (
-                    self.transforms_args
-                    if isinstance(self.transforms_args, list)
-                    else [self.transforms_args if self.transforms_args else {}]
-                )
-                for _ in range(len(tmp_transforms))
-            ]
-
-            for transform, transform_args in zip(tmp_transforms, tmp_transform_args):
-                transforms.append(
-                    KDFactory.create_transform(
-                        transform,
-                        layer,
-                        state,
-                        **transform_args,
-                    )
-                )
-
-        return KDModuleWrapper(
-            layer=layer,
-            hidden_size=hidden_size,
-            transforms=transforms,
-            fsdp_active=self.fsdp_active_,
-            offload_output=self.offload_layer_output,
-        )
diff --git a/src/llmcompressor/modifiers/distillation/utils/__init__.py b/src/llmcompressor/modifiers/distillation/utils/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/src/llmcompressor/modifiers/distillation/utils/pytorch/__init__.py b/src/llmcompressor/modifiers/distillation/utils/pytorch/__init__.py
deleted file mode 100644
index 1b5a1c4465..0000000000
--- a/src/llmcompressor/modifiers/distillation/utils/pytorch/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# ruff: noqa
-
-from .kd_factory import *
-from .kd_wrapper import *
-from .model_wrapper import *
diff --git a/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_factory.py b/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_factory.py
deleted file mode 100644
index 150a0e2220..0000000000
--- a/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_factory.py
+++ /dev/null
@@ -1,408 +0,0 @@
-import re
-from typing import Callable, Dict, Sequence, Tuple, Union
-
-import torch
-import torch.nn.functional as TF
-from torch import Tensor
-from torch.nn import Module
-
-from llmcompressor.core import State
-
-__all__ = [
-    "TensorOrCollectionType",
-    "ProjectionFuncType",
-    "CreateProjectionFuncType",
-    "TransformFuncType",
-    "CreateTransformFuncType",
-    "ComparisonFuncType",
-    "CreateComparisonFuncType",
-    "KDFactory",
-    "recursive_apply",
-    "recursive_combine",
-    "identity_transform",
-    "softmax_transform",
-    "log_softmax_transform",
-    "normalize_transform",
-    "l1_comparison",
-    "l2_comparison",
-    "inner_product_comparison",
-    "cosine_similarity_comparison",
-    "kl_divergence_comparison",
-    "cross_entropy_comparison",
-]
-
-
-TensorOrCollectionType = Union[Tensor, Sequence[Tensor], Dict[str, Tensor]]
-ProjectionFuncType = Callable[
-    [TensorOrCollectionType, TensorOrCollectionType], TensorOrCollectionType
-]
-CreateProjectionFuncType = Callable[
-    [str, Module, Module, State], Tuple[ProjectionFuncType, ProjectionFuncType]
-]
-TransformFuncType = Callable[[TensorOrCollectionType], TensorOrCollectionType]
-CreateTransformFuncType = Callable[[str, Module, Module, State], TransformFuncType]
-ComparisonFuncType = Callable[
-    [TensorOrCollectionType, TensorOrCollectionType], TensorOrCollectionType
-]
-CreateComparisonFuncType = Callable[[str, Module, Module, State], ComparisonFuncType]
-
-
-class KDFactory:
-    registry_projections: Dict[str, CreateProjectionFuncType] = {}
-    registry_transforms: Dict[str, CreateTransformFuncType] = {}
-    registry_comparisons: Dict[str, CreateComparisonFuncType] = {}
-
-    @staticmethod
-    def register_projection(name: str, func: CreateProjectionFuncType):
-        KDFactory.registry_projections[name] = func
-
-    @staticmethod
-    def register_projection_decorator(name: str):
-        def inner(func: CreateProjectionFuncType):
-            KDFactory.registry_projections[name] = func
-            return func
-
-        return inner
-
-    @staticmethod
-    def create_projection(
-        name: str, student_layer: Module, teacher_layer: Module, state: State, **kwargs
-    ) -> Tuple[ProjectionFuncType, ProjectionFuncType]:
-        for pattern, creator in KDFactory.registry_projections:
-            match = pattern == name
-
-            if not match:
-                try:
-                    match = re.match(pattern, name)
-                except Exception:
-                    pass
-
-            if match:
-                return creator(
-                    name=name,
-                    student_layer=student_layer,
-                    teacher_layer=teacher_layer,
-                    state=state,
-                    **kwargs,
-                )
-
-        raise ValueError(f"Invalid projection name: {name}")
-
-    @staticmethod
-    def register_transform(name: str, func: CreateTransformFuncType):
-        KDFactory.registry_transforms[name] = func
-
-    @staticmethod
-    def register_transform_decorator(name: str):
-        def inner(func: CreateTransformFuncType):
-            KDFactory.registry_transforms[name] = func
-            return func
-
-        return inner
-
-    @staticmethod
-    def create_transform(
-        name: str,
-        layer: Module,
-        state: State,
-        **kwargs,
-    ) -> TransformFuncType:
-        for pattern, creator in KDFactory.registry_transforms.items():
-            match = pattern == name
-
-            if not match:
-                try:
-                    match = re.match(pattern, name)
-                except Exception:
-                    pass
-
-            if match:
-                return creator(
-                    name=name,
-                    layer=layer,
-                    state=state,
-                    **kwargs,
-                )
-
-        raise ValueError(f"Invalid transform name: {name}")
-
-    @staticmethod
-    def register_comparison(name: str, func):
-        KDFactory.registry_comparisons[name] = func
-
-    @staticmethod
-    def register_comparison_decorator(name: str):
-        def inner(func):
-            KDFactory.registry_comparisons[name] = func
-            return func
-
-        return inner
-
-    @staticmethod
-    def create_comparison(
-        name: str, student_layer: Module, teacher_layer: Module, state: State, **kwargs
-    ) -> ComparisonFuncType:
-        for pattern, creator in KDFactory.registry_comparisons.items():
-            match = pattern == name
-
-            if not match:
-                try:
-                    match = re.match(pattern, name)
-                except Exception:
-                    pass
-
-            if match:
-                return creator(
-                    name=name,
-                    student_layer=student_layer,
-                    teacher_layer=teacher_layer,
-                    state=state,
-                    **kwargs,
-                )
-
-        raise ValueError(f"Invalid comparison name: {name}")
-
-
-def recursive_apply(
-    val: TensorOrCollectionType,
-    func: Callable[[Tensor], Tensor],
-) -> TensorOrCollectionType:
-    if isinstance(val, Tensor):
-        return func(val)
-
-    if isinstance(val, Sequence):
-        return [recursive_apply(item, func) for item in val]
-
-    if isinstance(val, dict):
-        return {key: recursive_apply(item, func) for key, item in val.items()}
-
-    raise ValueError(f"Unsupported type for recursive_apply: {type(val)}")
-
-
-def recursive_combine(
-    val_one: TensorOrCollectionType,
-    val_two: TensorOrCollectionType,
-    func: Callable[[Tensor, Tensor], Tensor],
-):
-    if not isinstance(val_one, type(val_two)):
-        raise ValueError(
-            f"val_one type of {type(val_one)} must match "
-            f"val_two type of {type(val_two)}"
-        )
-
-    if isinstance(val_one, Tensor):
-        return func(val_one, val_two)
-
-    if isinstance(val_one, Sequence):
-        return [
-            recursive_combine(item_one, item_two, func)
-            for item_one, item_two in zip(val_one, val_two)
-        ]
-
-    if isinstance(val_one, dict):
-        return {
-            key: recursive_combine(val_one[key], val_two[key], func)
-            for key in val_one.keys()
-        }
-
-    raise ValueError(f"Unsupported type for recursive_combine: {type(val_one)}")
-
-
-@KDFactory.register_transform_decorator("identity")
-def identity_transform(name: str, **kwargs):
-    if name != "identity":
-        raise ValueError(f"Invalid transform name: {name}")
-
-    def _create_transform(val: TensorOrCollectionType) -> TensorOrCollectionType:
-        return val
-
-    return _create_transform
-
-
-@KDFactory.register_transform_decorator("softmax")
-def softmax_transform(name: str, temperature: float = 1.0, dim: int = -1, **kwargs):
-    if name != "softmax":
-        raise ValueError(f"Invalid transform name: {name}")
-
-    def _softmax(val: Tensor) -> Tensor:
-        val = val / temperature
-
-        return torch.softmax(val, dim=dim)
-
-    def _create_transform(val: TensorOrCollectionType) -> TensorOrCollectionType:
-        return recursive_apply(val, _softmax)
-
-    return _create_transform
-
-
-@KDFactory.register_transform_decorator("log_softmax")
-def log_softmax_transform(name: str, temperature: float = 1.0, dim: int = -1, **kwargs):
-    if name != "log_softmax":
-        raise ValueError(f"Invalid transform name: {name}")
-
-    def _log_softmax(val: Tensor) -> Tensor:
-        val = val / temperature
-
-        return torch.log_softmax(val, dim=dim)
-
-    def _create_transform(val: TensorOrCollectionType) -> TensorOrCollectionType:
-        return recursive_apply(val, _log_softmax)
-
-    return _create_transform
-
-
-@KDFactory.register_transform_decorator("normalize")
-def normalize_transform(
-    name: str,
-    p: float = 1,
-    dim: int = -1,
-    eps: float = 1e-12,
-    mean: bool = False,
-    std: bool = False,
-    **kwargs,
-):
-    if name != "normalize":
-        raise ValueError(f"Invalid transform name: {name}")
-
-    def _normalize(val: Tensor) -> Tensor:
-        out = TF.normalize(val, p=p, dim=dim, eps=eps)
-
-        if mean:
-            out = out - out.mean(dim=dim, keepdim=True)
-
-        if std:
-            out = out / out.std(dim=dim, keepdim=True)
-
-        return out
-
-    def _create_transform(val: TensorOrCollectionType) -> TensorOrCollectionType:
-        return recursive_apply(val, _normalize)
-
-    return _create_transform
-
-
-@KDFactory.register_comparison_decorator("l1_distance")
-def l1_comparison(name: str, dim: int = -1, **kwargs):
-    if name != "l1_distance":
-        raise ValueError(f"Invalid comparison name: {name}")
-
-    def _l1(val_one: Tensor, val_two: Tensor) -> Tensor:
-        return torch.sum(torch.abs(val_one - val_two), dim=dim)
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _l1)
-
-    return _create_comparison
-
-
-@KDFactory.register_comparison_decorator("l2_distance")
-def l2_comparison(name: str, dim: int = -1, **kwargs):
-    if name != "l2_distance":
-        raise ValueError(f"Invalid comparison name: {name}")
-
-    def _l2(val_one: Tensor, val_two: Tensor) -> Tensor:
-        return torch.sum((val_one - val_two) ** 2, dim=dim)
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _l2)
-
-    return _create_comparison
-
-
-@KDFactory.register_comparison_decorator("inner_product")
-def inner_product_comparison(name: str, dim: int = -1, **kwargs):
-    if name != "inner_product":
-        raise ValueError(f"Invalid comparison name: {name}")
-
-    def _inner_product(val_one: Tensor, val_two: Tensor) -> Tensor:
-        return torch.sum(val_one * val_two, dim=dim)
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _inner_product)
-
-    return _create_comparison
-
-
-@KDFactory.register_comparison_decorator("cosine_similarity")
-def cosine_similarity_comparison(name: str, dim: int = -1, **kwargs):
-    if name != "cosine_similarity":
-        raise ValueError(f"Invalid comparison name: {name}")
-
-    def _cosine_similarity(val_one: Tensor, val_two: Tensor) -> Tensor:
-        return torch.sum(val_one * val_two, dim=dim) / (
-            torch.norm(val_one, dim=dim) * torch.norm(val_two, dim=dim)
-        )
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _cosine_similarity)
-
-    return _create_comparison
-
-
-@KDFactory.register_comparison_decorator("kl_divergence")
-def kl_divergence_comparison(
-    name: str, dim: int = -1, temperature: float = 1.0, **kwargs
-):
-    if name != "kl_divergence":
-        raise ValueError(f"Invalid comparison name: {name}")
-
-    def _kl_divergence(val_one: Tensor, val_two: Tensor) -> Tensor:
-        val_one = val_one / temperature
-        val_two = val_two / temperature
-
-        return torch.sum(val_one * torch.log(val_one / val_two), dim=dim)
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _kl_divergence)
-
-    return _create_comparison
-
-
-@KDFactory.register_comparison_decorator("cross_entropy")
-def cross_entropy_comparison(
-    name: str, temperature: float = 1.0, reduction: str = "none", **kwargs
-):
-    if name != "cross_entropy":
-        raise ValueError(f"Invalid projection name: {name}")
-
-    def _cross_entropy(val_one: Tensor, val_two: Tensor) -> Tensor:
-        val_one = val_one / temperature
-        val_two = val_two / temperature
-
-        return TF.cross_entropy(val_one, val_two, reduction=reduction)
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _cross_entropy)
-
-    return _create_comparison
-
-
-@KDFactory.register_comparison_decorator("square_head")
-def square_head_comparison(name: str, **kwargs):
-    if name != "square_head":
-        raise ValueError(f"Invalid projection name: {name}")
-
-    def _square_head(val_one: Tensor, val_two: Tensor) -> Tensor:
-        numerator = torch.sum(torch.square(val_two - val_one))
-        denominator = torch.sum(torch.square(val_two))
-
-        return numerator / denominator
-
-    def _create_comparison(
-        val_one: TensorOrCollectionType, val_two: TensorOrCollectionType
-    ) -> TensorOrCollectionType:
-        return recursive_combine(val_one, val_two, _square_head)
-
-    return _create_comparison
diff --git a/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_wrapper.py b/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_wrapper.py
deleted file mode 100644
index ee96e4763d..0000000000
--- a/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_wrapper.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from typing import List, Optional, Set, Tuple
-
-import torch
-from torch.nn import Module
-
-from llmcompressor.modifiers.distillation.utils.pytorch.kd_factory import (
-    TransformFuncType,
-)
-
-__all__ = ["KDModuleWrapper"]
-
-
-class KDModuleWrapper(Module):
-    KD_TRANSFORMED_BUFFER = "kd_last_transformed"
-
-    def __init__(
-        self,
-        layer: Module,
-        hidden_size: Tuple,
-        transforms: Optional[List[TransformFuncType]],
-        fsdp_active: bool,
-        offload_output: bool,
-    ):
-        super(KDModuleWrapper, self).__init__()
-
-        self.layer = layer
-        self._save_active = False
-        self._fsdp_active = fsdp_active
-        self.offload_output = offload_output
-        self.kd_transforms = transforms
-        self.kd_enabled = False
-        self.register_buffer(
-            self.KD_TRANSFORMED_BUFFER, torch.zeros(hidden_size, device="cpu")
-        )
-        self._init_called = True  # make sure this is last property to be set
-
-        def _clear_missing_keys(module, incompatible_keys):
-            incompatible_keys.missing_keys.clear()
-
-        self.register_load_state_dict_post_hook(_clear_missing_keys)
-
-    def forward(self, *args, **kwargs):
-        if not self.kd_enabled:
-            return self.layer(*args, **kwargs)
-
-        org_output = self.layer(*args, **kwargs)
-        output = org_output if isinstance(org_output, torch.Tensor) else org_output[0]
-
-        if self.kd_transforms is not None:
-            for transform in self.kd_transforms:
-                output = transform(output)
-
-        if self.offload_output:
-            output = output.to("cpu")
-        setattr(self, self.KD_TRANSFORMED_BUFFER, output)
-        return org_output
-
-    def state_dict(self, destination=None, prefix="", keep_vars=False, **kwargs):
-        return self.layer.state_dict(
-            destination=destination, prefix=prefix, keep_vars=keep_vars, **kwargs
-        )
-
-    def load_state_dict(self, state_dict, strict=True):
-        return self.layer.load_state_dict(state_dict, strict=strict)
-
-    def _load_from_state_dict(
-        self,
-        state_dict,
-        prefix,
-        local_metadata,
-        strict,
-        missing_keys,
-        unexpected_keys,
-        error_msgs,
-    ):
-        self.layer._load_from_state_dict(
-            state_dict=state_dict,
-            prefix=prefix,
-            local_metadata=local_metadata,
-            strict=strict,
-            missing_keys=missing_keys,
-            unexpected_keys=unexpected_keys,
-            error_msgs=error_msgs,
-        )
-
-    def named_modules(
-        self,
-        memo: Optional[Set["Module"]] = None,
-        prefix: str = "",
-        remove_duplicate: bool = True,
-    ):
-        # outside of saving, we want the full names of modules in two cases:
-        # 1. trainer initialization, so teacher is moved to the correct device. This is
-        # caught by the kd_enabled flag, which is set when the modifier is started
-        # 2. running in DataParallel (non-FSDP) mode so the replicate function can pick
-        # up the teacher.
-        if self._save_active or (self.kd_enabled and self._fsdp_active):
-            return self.layer.named_modules(
-                memo=memo, prefix=prefix, remove_duplicate=remove_duplicate
-            )
-
-        return super().named_modules(
-            memo=memo, prefix=prefix, remove_duplicate=remove_duplicate
-        )
-
-    def prepare_for_save(self):
-        """
-        Prepare model structure to be saved, specifically `self.named_modules`
-        """
-        self._save_active = True
-
-    def finish_save(self):
-        """
-        Finish saving model
-        """
-        self._save_active = False
diff --git a/src/llmcompressor/modifiers/distillation/utils/pytorch/model_wrapper.py b/src/llmcompressor/modifiers/distillation/utils/pytorch/model_wrapper.py
deleted file mode 100644
index 33ba6f6986..0000000000
--- a/src/llmcompressor/modifiers/distillation/utils/pytorch/model_wrapper.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from typing import Any, Dict, Optional, Set
-
-import torch
-from torch.nn import Module
-
-__all__ = ["KDModelWrapper"]
-
-
-class KDModelWrapper(Module):
-    KD_LAST_COMPARISON = "kd_last_comparison"
-
-    def __init__(
-        self,
-        student_model: Module,
-        teacher_model: Module,
-        wrappers: Dict[str, Any],
-        comparison,
-        fsdp_active: bool,
-    ):
-        super(KDModelWrapper, self).__init__()
-
-        self.student_model = student_model
-        self.teacher_model = teacher_model
-        self.wrappers = wrappers
-        self.kd_comparison = comparison
-        self._save_active = False
-        self._fsdp_active = fsdp_active
-        self.kd_enabled = False
-        self.register_buffer(self.KD_LAST_COMPARISON, torch.zeros(1, device="cpu"))
-        self._init_called = True  # make sure this is last property to be set
-
-        def _clear_missing_keys(module, incompatible_keys):
-            incompatible_keys.missing_keys.clear()
-
-        self.register_load_state_dict_post_hook(_clear_missing_keys)
-
-    def forward(self, *args, **kwargs):
-        if not self.kd_enabled:
-            return self.student_model(*args, **kwargs)
-
-        org_output = self.student_model(*args, **kwargs)
-        with torch.no_grad():
-            self.teacher_model(*args, **kwargs)
-
-        layerwise_comps = []
-        nonpad_tokens = kwargs["attention_mask"] == 1
-        device = nonpad_tokens.device
-        for key, (student_wrapper, teacher_wrapper) in self.wrappers.items():
-            student_out = student_wrapper.kd_last_transformed.to(device)[nonpad_tokens]
-            teacher_out = teacher_wrapper.kd_last_transformed.to(device)[nonpad_tokens]
-            comp = self.kd_comparison(student_out, teacher_out)
-            layerwise_comps.append(comp)
-
-        setattr(self, self.KD_LAST_COMPARISON, torch.stack(layerwise_comps).mean())
-
-        return org_output
-
-    def state_dict(self, destination=None, prefix="", keep_vars=False, **kwargs):
-        return self.student_model.state_dict(
-            destination=destination, prefix=prefix, keep_vars=keep_vars, **kwargs
-        )
-
-    def load_state_dict(self, state_dict, strict=True):
-        return self.student_model.load_state_dict(state_dict, strict=strict)
-
-    def _load_from_state_dict(
-        self,
-        state_dict,
-        prefix,
-        local_metadata,
-        strict,
-        missing_keys,
-        unexpected_keys,
-        error_msgs,
-    ):
-        self.student_model._load_from_state_dict(
-            state_dict=state_dict,
-            prefix=prefix,
-            local_metadata=local_metadata,
-            strict=strict,
-            missing_keys=missing_keys,
-            unexpected_keys=unexpected_keys,
-            error_msgs=error_msgs,
-        )
-
-    def named_modules(
-        self,
-        memo: Optional[Set["Module"]] = None,
-        prefix: str = "",
-        remove_duplicate: bool = True,
-    ):
-        # outside of saving, we want the full names of modules in two cases:
-        # 1. trainer initialization, so teacher is moved to the correct device. This is
-        # caught by the kd_enabled flag, which is set when the modifier is started
-        # 2. running in DataParallel (non-FSDP) mode so the replicate function can pick
-        # up the teacher.
-        if self._save_active or (self.kd_enabled and self._fsdp_active):
-            return self.student_model.named_modules(
-                memo=memo, prefix=prefix, remove_duplicate=remove_duplicate
-            )
-
-        return super().named_modules(
-            memo=memo, prefix=prefix, remove_duplicate=remove_duplicate
-        )
-
-    def named_children(self):
-        return self.student_model.named_children()
-
-    def train(self, mode: bool = True):
-        self.student_model.train(mode)
-        return self
-
-    def prepare_for_save(self):
-        """
-        Prepare model structure to be saved, specifically `self.named_modules`
-        """
-        self._save_active = True
-        for student_wrapper, teacher_wrapper in self.wrappers.values():
-            student_wrapper.prepare_for_save()
-            teacher_wrapper.prepare_for_save()
-
-    def finish_save(self):
-        """
-        Finish saving model
-        """
-        self._save_active = False
-        for student_wrapper, teacher_wrapper in self.wrappers.values():
-            student_wrapper.finish_save()
-            teacher_wrapper.finish_save()
-
-    def __getattr__(self, name: str) -> Any:
-        try:
-            return super().__getattr__(name)
-        except AttributeError:
-            return getattr(self.student_model, name)

From 13f4a7eeb11361af3037a478416864c16067b853 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Tue, 18 Nov 2025 18:41:09 -0500
Subject: [PATCH 21/23] remove link

---
 src/llmcompressor/modifiers/README.md | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/llmcompressor/modifiers/README.md b/src/llmcompressor/modifiers/README.md
index 72ff0b0586..fad84b5196 100644
--- a/src/llmcompressor/modifiers/README.md
+++ b/src/llmcompressor/modifiers/README.md
@@ -65,11 +65,4 @@ rather than the linear smoothing done by SmoothQuant. The implementation is base
 One-shot pruning algorithms often introduce accuracy degradation that can be recovered with finetuning. This 
 modifier ensures that the sparsity mask of the model is maintained during finetuning, allowing a sparse 
 model to recover accuracy while maintaining its sparsity structure. It is intended to be used after a pruning modifier
-such as `SparseGPT` or `WANDA` has already been applied.
-
-### [Distillation](./distillation/output/base.py)
-To better recover accuracy of sparse models during finetuning, we can also use a teacher model of the same architecture
-to influence the loss. This modifier is intended to be used in conjunction with `ConstantPruning` modifier on a 
-pruned model, with the dense version of the model being used as the teacher. Both output distillation loss and 
-layer-by-layer distillation loss are supported. The layer-by-layer implementation follows the Square Head distillation 
-algorithm presented in [Sparse Fine-tuning for Inference Acceleration of Large Language Models](https://arxiv.org/pdf/2310.06927).
\ No newline at end of file
+such as `SparseGPT` or `WANDA` has already been applied.
\ No newline at end of file

From 5b3c280eed4bb53bcdfcfdcb932786fc4682bcc0 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Wed, 19 Nov 2025 12:59:15 -0500
Subject: [PATCH 22/23] update example

---
 .../2of4_w4a16_group-128_recipe.yaml          | 13 ----
 .../quantization_2of4_sparse_w4a16/README.md  | 60 ++++---------------
 .../llama7b_sparse_w4a16.py                   | 53 +++-------------
 3 files changed, 20 insertions(+), 106 deletions(-)

diff --git a/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml b/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml
index 7a002633a1..bb76f11015 100644
--- a/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml
+++ b/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml
@@ -5,19 +5,6 @@ sparsity_stage:
       mask_structure: "2:4"
       targets: ["Linear"]
       ignore: ["re:.*lm_head"]
-finetuning_stage:
-  finetuning_modifiers:
-    ConstantPruningModifier:
-      targets: [
-        're:.*q_proj.weight',
-        're:.*k_proj.weight', 
-        're:.*v_proj.weight',
-        're:.*o_proj.weight',
-        're:.*gate_proj.weight',
-        're:.*up_proj.weight',
-        're:.*down_proj.weight',
-      ]
-      start: 0
 quantization_stage:
   quantization_modifiers:
     GPTQModifier:
diff --git a/examples/quantization_2of4_sparse_w4a16/README.md b/examples/quantization_2of4_sparse_w4a16/README.md
index 932284fe47..8fcd880cc6 100644
--- a/examples/quantization_2of4_sparse_w4a16/README.md
+++ b/examples/quantization_2of4_sparse_w4a16/README.md
@@ -4,9 +4,10 @@
 
 > `2:4 sparisty + int4/int8` mixed precision computation is supported in vLLM on Nvidia capability > 8.0 (Ampere, Ada Lovelace, Hopper).
 
-## NOTE: 
-Fine tuning can require more steps than is shown in the example.
-See the Axolotl integration blog post for best fine tuning practices
+## NOTE: The following example no longer includes finetuning as training
+training support has been deprecated as of v0.9.0. To apply finetuning
+to your sparse model, see the Axolotl integration blog post for best  
+fine tuning practices
 https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
 
 
@@ -78,22 +79,11 @@ output_path = Path(output_dir)
 splits = {"calibration": "train_gen[:5%]", "train": "train_gen"}
 max_seq_length = 512
 num_calibration_samples = 512
-
-# set training parameters for finetuning
-# increase num_train_epochs for longer training
-num_train_epochs = 0.01
-logging_steps = 500
-save_steps = 5000
-gradient_checkpointing = True  # saves memory during training
-learning_rate = 0.0001
-bf16 = False  # using full precision for training
-lr_scheduler_type = "cosine"
-warmup_ratio = 0.1
 preprocessing_num_workers = 8
 ```
 
-## Step 2: Run `sparsification`, `fine-tuning`, and `quantization`
-The compression process now runs in three stages: sparsification, fine-tuning, and quantization.
+## Step 2: Run `sparsification` and `quantization`
+The compression process now runs in two stages: sparsification and quantization.
 Each stage saves the intermediate model outputs to the `output_llama7b_2of4_w4a16_channel` directory.
 
 ```python
@@ -106,47 +96,19 @@ output_path = Path(output_dir)
 # 1. Oneshot sparsification: apply pruning
 oneshot(
     model=model,
-    dataset=dataset,
-    recipe=recipe,
-    splits=splits,
-    num_calibration_samples=num_calibration_samples,
-    preprocessing_num_workers=preprocessing_num_workers,
+    **oneshot_kwargs,
     output_dir=output_dir,
     stage="sparsity_stage",
 )
 
-# 2. Sparse fine-tuning: improve accuracy on pruned model
-train(
-    model=output_path / "sparsity_stage",
-    dataset=dataset,
-    recipe=recipe,
-    splits=splits,
-    num_calibration_samples=num_calibration_samples,
-    preprocessing_num_workers=preprocessing_num_workers,
-    bf16=bf16,
-    max_seq_length=max_seq_length,
-    num_train_epochs=num_train_epochs,
-    logging_steps=logging_steps,
-    save_steps=save_steps,
-    gradient_checkpointing=gradient_checkpointing,
-    learning_rate=learning_rate,
-    lr_scheduler_type=lr_scheduler_type,
-    warmup_ratio=warmup_ratio,
-    output_dir=output_dir,
-    stage="finetuning_stage",
-)
 
-# 3. Oneshot quantization: compress model weights to lower precision
+# 2. Oneshot quantization: compress model weights to lower precision
 quantized_model = oneshot(
-    model=output_path / "finetuning_stage",
-    dataset=dataset,
-    recipe=recipe,
-    splits=splits,
-    num_calibration_samples=num_calibration_samples,
-    preprocessing_num_workers=preprocessing_num_workers,
-    output_dir=output_dir,
+    model=(output_path / "sparsity_stage"),
+    **oneshot_kwargs,
     stage="quantization_stage",
 )
+
 # skip_sparsity_compression_stats is set to False
 # to account for sparsity in the model when compressing
 quantized_model.save_pretrained(
diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
index 51e24f0063..1c54e906c6 100644
--- a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
+++ b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
@@ -1,5 +1,7 @@
-# NOTE: Fine tuning can require more steps than is shown in the example
-# See the Axolotl integration blog post for best fine tuning practices
+# NOTE: The following example no longer includes finetuning as training
+# training support has been deprecated as of v0.9.0. To apply finetuning
+# to your sparse model, see the Axolotl integration blog post for best
+# fine tuning practices
 # https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
 
 from pathlib import Path
@@ -8,7 +10,7 @@
 from loguru import logger
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from llmcompressor import oneshot, train
+from llmcompressor import oneshot
 
 # load the model in as bfloat16 to save on memory and compute
 model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
@@ -26,22 +28,11 @@
 output_path = Path(output_dir)
 
 # set dataset config parameters
-splits = {"calibration": "train_gen[:5%]", "train": "train_gen"}
+splits = {"calibration": "train_gen[:5%]"}
 max_seq_length = 512
-num_calibration_samples = 512
-
-# set training parameters for finetuning
-num_train_epochs = 0.01
-logging_steps = 500
-save_steps = 5000
-gradient_checkpointing = True  # saves memory during training
-learning_rate = 0.0001
-bf16 = False  # using full precision for training
-lr_scheduler_type = "cosine"
-warmup_ratio = 0.1
+num_calibration_samples = 10
 preprocessing_num_workers = 64
 
-
 oneshot_kwargs = dict(
     dataset=dataset,
     recipe=recipe,
@@ -50,26 +41,10 @@
     splits=splits,
 )
 
-training_kwargs = dict(
-    bf16=bf16,
-    max_seq_length=max_seq_length,
-    num_train_epochs=num_train_epochs,
-    logging_steps=logging_steps,
-    save_steps=save_steps,
-    gradient_checkpointing=gradient_checkpointing,
-    learning_rate=learning_rate,
-    lr_scheduler_type=lr_scheduler_type,
-    warmup_ratio=warmup_ratio,
-)
-
-# This will run the targeted stage of the recipe
-# oneshot sparsification -> finetuning -> oneshot quantization
-
 # Models are automatically saved in
-# ./output_llama7b_2of4_w4a16_channel/ + (finetuning/sparsity/quantization)_stage
+# ./output_llama7b_2of4_w4a16_channel/ + (sparsity/quantization)_stage
 
 # Oneshot sparsification
-
 oneshot(
     model=model,
     **oneshot_kwargs,
@@ -77,19 +52,9 @@
     stage="sparsity_stage",
 )
 
-# Sparse finetune
-# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
-train(
-    model=(output_path / "sparsity_stage"),
-    **oneshot_kwargs,
-    **training_kwargs,
-    output_dir=output_dir,
-    stage="finetuning_stage",
-)
-
 # Oneshot quantization
 quantized_model = oneshot(
-    model=(output_path / "finetuning_stage"),
+    model=(output_path / "sparsity_stage"),
     **oneshot_kwargs,
     stage="quantization_stage",
 )

From 1cea473281574ff4ad8e5c17ea000774c953f417 Mon Sep 17 00:00:00 2001
From: Dipika Sikka <dipikasikka1@gmail.com>
Date: Wed, 19 Nov 2025 13:21:56 -0500
Subject: [PATCH 23/23] update readme

---
 .../quantization_2of4_sparse_w4a16/README.md  |   2 +-
 .../llama7b_sparse_w4a16.py                   |   5 +-
 src/llmcompressor/entrypoints/README.md       | 188 +-----------------
 3 files changed, 13 insertions(+), 182 deletions(-)

diff --git a/examples/quantization_2of4_sparse_w4a16/README.md b/examples/quantization_2of4_sparse_w4a16/README.md
index 8fcd880cc6..178f4373b8 100644
--- a/examples/quantization_2of4_sparse_w4a16/README.md
+++ b/examples/quantization_2of4_sparse_w4a16/README.md
@@ -5,7 +5,7 @@
 > `2:4 sparisty + int4/int8` mixed precision computation is supported in vLLM on Nvidia capability > 8.0 (Ampere, Ada Lovelace, Hopper).
 
 ## NOTE: The following example no longer includes finetuning as training
-training support has been deprecated as of v0.9.0. To apply finetuning
+Training support has been deprecated as of v0.9.0. To apply finetuning
 to your sparse model, see the Axolotl integration blog post for best  
 fine tuning practices
 https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
index 1c54e906c6..b2f4e57b64 100644
--- a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
+++ b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
@@ -1,5 +1,6 @@
-# NOTE: The following example no longer includes finetuning as training
-# training support has been deprecated as of v0.9.0. To apply finetuning
+# NOTE: The following example no longer includes finetuning as training.
+
+# Training support has been deprecated as of v0.9.0. To apply finetuning
 # to your sparse model, see the Axolotl integration blog post for best
 # fine tuning practices
 # https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
diff --git a/src/llmcompressor/entrypoints/README.md b/src/llmcompressor/entrypoints/README.md
index 25a85bae30..97da4fcaab 100644
--- a/src/llmcompressor/entrypoints/README.md
+++ b/src/llmcompressor/entrypoints/README.md
@@ -1,21 +1,18 @@
-# Compression and Fine-tuning Entrypoint
+# Compression Entrypoints
 
 ## Oneshot
-
 An ideal compression technique reduces memory footprint while maintaining accuracy. One-shot in LLM-Compressor supports faster inference on vLLM by applying post-training quantization (PTQ) or sparsification.
 
 ### PTQ
-PTQ is performed to reduce the precision of quantizable weights (e.g., linear layers) to a lower bit-width. Supported formats are:
-- [W4A16](../../../examples/quantization_w4a16/README.md)
-- [W8A8-INT8](../../../examples/quantization_w8a8_int8/README.md)
-- [W8A8-FP8](../../../examples/quantization_w8a8_fp8/README.md)
+PTQ is performed to reduce the precision of quantizable weights (e.g., linear layers) to a lower bit-width. 
+A complete list of formats can be found here: https://docs.vllm.ai/projects/llm-compressor/en/latest/guides/compression_schemes/
 
 ### Sparsification
 Sparsification reduces model complexity by pruning selected weight values to zero while retaining essential weights in a subset of parameters. Supported formats include:
 -  [2:4-Sparsity with FP4 Weight](../../../examples/quantization_2of4_sparse_w4a16/README.md)
 -  [2:4-Sparsity with FP8 Weight, FP8 Input Activation](../../../examples/sparse_2of4_quantization_fp8/README.md)
 
-## Code
+### Example
 
 Example scripts for all the above formats are located in the [examples](../../../examples/) folder. The [W8A8-FP8](../../../examples/quantization_w8a8_fp8/llama3_example.py) example is shown below: 
 
@@ -68,7 +65,6 @@ oneshot(
 )
 ```    
 
-
 ### Lifecycle
 
 The oneshot calibration lifecycle consists of three steps:
@@ -88,177 +84,11 @@ The oneshot calibration lifecycle consists of three steps:
 
 This will automatically save the model weights to a compressed SafeTensors format. The tokenizer/processor, recipe, and the configuration file will also be saved.
 
-## Train / Finetune
-Compressed models can be trained to improve accuracy. Training is carried out using HuggingFace's Trainer.
-
-### Finetuning a Compressed Model
-LLM-Compressor supports fine-tuning of quantized, sparsified, and sparse-quantized models. It offers both standard fine-tuning, knowledge distillation and SFT Trainer.
-
-## Code
+## Model-Free PTQ
+For certain cases, it may be beneficial to consider the `model_free_ptq` entrypoint such as when a model definition is lacking or if the `oneshot` entrypoint fails. 
+`model_free_ptq` can be applied for schemes that do not require data, such as Round-To-Nearest with FP8 or NVFP4A16. Examples applying the entrypoint can be found
+here: https://github.com/vllm-project/llm-compressor/tree/main/examples/model_free_ptq.
 
 ### Finetuning
 
-A compressed model generated using `oneshot` is saved to disk in a compressed format. To load it, the model must be decompressed using `CompressedTensorsConfig` with `AutoModelForCausalLM`. If the above `oneshot` example script was executed and the compressed model was saved to `./oneshot_model`, the following code is used to perform fine-tuning:
-
-
-```python
-from transformers.utils.quantization_config import CompressedTensorsConfig
-
-from llmcompressor import create_session, train
-
-# The saving directory
-output_dir = "./oneshot_model"
-
-# The model to train
-model = AutoModelForCausalLM.from_pretrained(
-    output_dir,
-    quantization_config=CompressedTensorsConfig(run_compressed=False),
-)
-
-dataset = "open_platypus"  # Define dataset to use for kd
-output_dir = "./finetuned_model"
-splits = "train[:50%]"  # Use 50% of the training data
-max_steps = (
-    25  # Number of training steps (updates) before stopping the training process
-)
-num_calibration_samples = 8  # Number of workers processing datasets in parallel
-
-# Create an isolated session independent from the previous runs
-with create_session():
-    train(
-        model=model,  # The model to finetune
-        dataset=dataset,  # The data to carry out finetuning
-        output_dir=output_dir,  # The output directory to save
-        num_calibration_samples=num_calibration_samples,  # The number of workers to carry out dataset processing
-        splits=splits,  # The dataset key and percentage of samples to use
-        max_steps=max_steps,  # The total number of iterations to carry out training
-    )
-```
-
-
-### Knowledge Distillation
-
-To perform knowledge distillation, a teacher model and a student model (the compressed model) must be defined. The loss between the student and the teacher can be specified in the recipe by defining the `comparison` key. In this case, KL divergence is used to compare the output distributions of the student and the teacher.
-Comparisons are defined in `/src/llmcompressor/modifiers/distillation/utils/pytorch/kd_factory.py`.
-
-```python
-# Define the teacher model
-distill_teacher = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Meta-Llama-3-8B-Instruct",  
-)
-
-# Define the recipe, use knowledge distillation modifier and target the `model.layers` using a regex with
-recipe = r"""
-kd_stage:
-  distillation_modifiers:
-    OutputDistillationModifier:
-        targets: ["re:model.layers.\\d+$"]
-        comparison: "kl_divergence"
-        start: 0
-        orig_scale: 1.0
-        distill_scale: 1.0
-"""
-
-# Create an isolated session from the previous runs
-with create_session():
-    train(
-        ...
-        distill_teacher=distill_teacher,    # The teacher model
-        recipe=recipe,                      # The recipe to use
-    )
-
-```
-
-The output terminal will provide the sparsification, quantization and training metrics:
-
-```bash
-2025-02-25T18:39:08.984855-0500 | log_model_sparsification | INFO - There are 8033013760 prunable params which have 0.02% avg sparsity.
-2025-02-25T18:39:08.987302-0500 | log_model_sparsification | INFO - There are 8033013760 quantizable params, with a quantization percentage of 86.88%.
-***** train metrics *****
-  epoch                    =      0.016
-  perplexity               =     1.5422
-  total_flos               =  3221945GF
-  train_loss               =     0.4332
-  train_runtime            = 0:03:53.39
-  train_samples            =      12463
-  train_samples_per_second =      0.857
-  train_steps_per_second   =      0.107
-```
-
-### End-to-end Script 
-The end-to-end script for carrying out `oneshot` for `W8A8-FP8` and then knowledge distillation is shown below:
-
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from llmcompressor import oneshot
-from llmcompressor.modifiers.quantization import QuantizationModifier
-
-MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
-
-# The directory for saving
-oneshot_output_dir = "./oneshot_model"
-
-# Load the model
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
-# Load the tokenizer
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-
-# Define the recipe. `scheme="FP8_DYNAMIC"` compresses to W8A8-FP8, which is
-# FP8 channel-wise for weight, and FP8 dynamic per token activation
-recipe = QuantizationModifier(
-    targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"]
-)
-
-# compress the model
-oneshot(model=model, recipe=recipe, output_dir=oneshot_output_dir)
-
-from transformers.utils.quantization_config import CompressedTensorsConfig
-
-from llmcompressor import create_session, train
-
-# Student model
-model = AutoModelForCausalLM.from_pretrained(
-    oneshot_output_dir,
-    quantization_config=CompressedTensorsConfig(run_compressed=False),
-)
-
-dataset = "open_platypus"  # Define dataset to use for knowledge distillation
-finetune_output_dir = "./finetuned_model"  # The output saving directory
-splits = "train[:50%]"  # Use 50% of the training data
-max_steps = (
-    25  # The number of training steps (updates) before stopping the training process
-)
-num_calibration_samples = 8  # The number of workers processing datasets in parallel
-
-# Define teacher model
-distill_teacher = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Meta-Llama-3-8B-Instruct",
-)
-
-# Define the recipe, use knowledge distillation modifier and target the `model.layers` using a regex with
-# KL divergence comparison
-recipe = r"""
-kd_stage:
-  distillation_modifiers:
-    OutputDistillationModifier:
-        targets: ["re:model.layers.\\d+$"]
-        comparison: "kl_divergence"
-        start: 0
-        orig_scale: 1.0
-        distill_scale: 1.0
-"""
-
-# Create an isolated session from the previous runs
-with create_session():
-    train(
-        model=model,  # The student model
-        dataset=dataset,  # The data to carry out finetuning
-        output_dir=finetune_output_dir,  # Output directory to save
-        num_calibration_samples=num_calibration_samples,  # The number of workers to carry out dataset processing
-        splits=splits,  # The percentage of the subsets of a dataset to use
-        max_steps=max_steps,  # The number of training steps
-        distill_teacher=distill_teacher,  # The teacher model
-        recipe=recipe,  # The recipe to use
-    )
-```
\ No newline at end of file
+As of LLM Compressor v0.9.0, training support has been deprecated. To apply finetuning to your model, such as in the case of sparse-finetuning, Axolotl training can be applied. A step-by-step guide explaining how to apply the Axolotl integration can be found here: https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open# as well as in the Axolotl documentation: https://docs.axolotl.ai/docs/custom_integrations.html#llmcompressor.
\ No newline at end of file