From 5bda9c6cb5e889e3bdf6ad6be1777e5c4df30edf Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Tue, 25 Nov 2025 14:07:37 +0200 Subject: [PATCH] Update NNCF doc --- .../compressing-models-during-training.rst | 9 +- .../filter-pruning.rst | 66 +-------------- .../quantization-aware-training.rst | 41 ---------- .../quantizing-models-post-training.rst | 2 +- .../basic-quantization-flow.rst | 24 +----- .../nncf/code/pruning_tf.py | 82 ------------------- docs/optimization_guide/nncf/code/qat_tf.py | 45 ---------- .../nncf/ptq/code/ptq_tensorflow.py | 38 --------- 8 files changed, 8 insertions(+), 299 deletions(-) delete mode 100644 docs/optimization_guide/nncf/code/pruning_tf.py delete mode 100644 docs/optimization_guide/nncf/code/qat_tf.py delete mode 100644 docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training.rst index 395320487d64fd..a7624084228113 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training.rst @@ -16,13 +16,13 @@ accuracy and higher performance, and better results than post-training quantizat enables you to set the minimum acceptable accuracy value for your optimized model, determining the optimization efficiency. -With a few lines of code, you can apply NNCF compression to a PyTorch or TensorFlow training +With a few lines of code, you can apply NNCF compression to a PyTorch training script. Once the model is optimized, you may convert it to the :doc:`OpenVINO IR format <../../documentation/openvino-ir-format>`, getting even better inference results with OpenVINO Runtime. To optimize your model, you will need: -* A PyTorch or TensorFlow floating-point model. -* A training pipeline set up in the original framework (PyTorch or TensorFlow). +* A PyTorch floating-point model. +* A training pipeline set up in the PyTorch framework. * Training and validation datasets. * A `JSON configuration file `__ specifying which compression methods to use. @@ -45,9 +45,8 @@ quantization errors part of the overall training loss and tries to minimize thei To learn more, see: -* guide on quantization for :doc:`PyTorch and TensorFlow <./compressing-models-during-training/quantization-aware-training>`. +* guide on quantization for :doc:`PyTorch <./compressing-models-during-training/quantization-aware-training>`. * Jupyter notebook on `Quantization Aware Training with NNCF and PyTorch `__. -* Jupyter notebook on `Quantization Aware Training with NNCF and TensorFlow `__. Filter pruning diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst index b372e11a807d8b..f262e905cd5440 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/filter-pruning.rst @@ -46,13 +46,6 @@ In this step, NNCF-related imports are added in the beginning of the training sc :language: python :fragment: [imports] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [imports] - 2. Create NNCF configuration ++++++++++++++++++++++++++++ @@ -68,13 +61,6 @@ of optimization methods (`"compression"` section). :language: python :fragment: [nncf_congig] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [nncf_congig] - Here is a brief description of the required parameters of the Filter Pruning method. For a full description refer to the `GitHub `__ page. @@ -103,13 +89,6 @@ optimization. :language: python :fragment: [wrap_model] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [wrap_model] - 4. Fine-tune the model ++++++++++++++++++++++ @@ -126,14 +105,6 @@ of the original model. :language: python :fragment: [tune_model] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [tune_model] - - 5. Multi-GPU distributed training +++++++++++++++++++++++++++++++++ @@ -149,18 +120,11 @@ fine-tuning that will inform optimization methods to do some adjustments to func :language: python :fragment: [distributed] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [distributed] - 6. Export quantized model +++++++++++++++++++++++++ When fine-tuning finishes, the quantized model can be exported to the corresponding format for further inference: ONNX in -the case of PyTorch and frozen graph - for TensorFlow 2. +the case of PyTorch. .. tab-set:: @@ -171,14 +135,6 @@ the case of PyTorch and frozen graph - for TensorFlow 2. :language: python :fragment: [export] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [export] - - These were the basic steps to applying the QAT method from the NNCF. However, it is required in some cases to save/load model checkpoints during the training. Since NNCF wraps the original model with its own object it provides an API for these needs. @@ -197,14 +153,6 @@ To save model checkpoint use the following API: :language: python :fragment: [save_checkpoint] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [save_checkpoint] - - 8. (Optional) Restore from checkpoint +++++++++++++++++++++++++++++++++++++ @@ -219,20 +167,13 @@ To restore the model from checkpoint you should use the following API: :language: python :fragment: [load_checkpoint] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/pruning_tf.py - :language: python - :fragment: [load_checkpoint] - For more details, see the following `documentation `__. Deploying pruned model ###################### The pruned model requires an extra step that should be done to get performance improvement. This step involves removal of the -zero filters from the model. This is done at the model conversion step using :doc:`model conversion API <../../model-preparation>` tool when model is converted from the framework representation (ONNX, TensorFlow, etc.) to OpenVINO Intermediate Representation. +zero filters from the model. This is done at the model conversion step using :doc:`model conversion API <../../model-preparation>` tool when model is converted from the framework representation (ONNX, etc.) to OpenVINO Intermediate Representation. * To remove zero filters from the pruned model add the following parameter to the model conversion command: ``transform=Pruning`` @@ -244,6 +185,3 @@ Examples #################### * `PyTorch Image Classification example `__ - -* `TensorFlow Image Classification example `__ - diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training.rst index a0610d7fdd5a80..e57d81db7f2d98 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/compressing-models-during-training/quantization-aware-training.rst @@ -12,11 +12,6 @@ knowledgeable in Python programming and familiar with the training code for the Steps required to apply QAT to the model: -.. note:: - Currently, NNCF for TensorFlow supports the optimization of models created using the Keras - `Sequential API `__ or - `Functional API `__. - 1. Apply Post Training Quantization to the Model ################################################# @@ -31,13 +26,6 @@ Quantize the model using the :doc:`Post-Training Quantization <../quantizing-mod :language: python :fragment: [quantize] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/qat_tf.py - :language: python - :fragment: [quantize] - 2. Fine-tune the Model ####################### @@ -56,13 +44,6 @@ forward and backward passes. :language: python :fragment: [tune_model] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/qat_tf.py - :language: python - :fragment: [tune_model] - .. note:: The precision of weight transitions to INT8 only after converting the model to OpenVINO Intermediate Representation. You can expect a reduction in the model footprint only for @@ -85,13 +66,6 @@ To save a model checkpoint, use the following API: :language: python :fragment: [save_checkpoint] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/qat_tf.py - :language: python - :fragment: [save_checkpoint] - 4. (Optional) Restore from Checkpoint ###################################### @@ -106,13 +80,6 @@ To restore the model from checkpoint, use the following API: :language: python :fragment: [load_checkpoint] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/code/qat_tf.py - :language: python - :fragment: [load_checkpoint] - Deploying the Quantized Model ############################## @@ -128,13 +95,6 @@ any additional steps. :language: python :fragment: [inference] - .. tab-item:: TensorFlow 2 - :sync: tensorflow-2 - - .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py - :language: python - :fragment: [inference] - For more details, see the corresponding :doc:`documentation <../../running-inference>`. Examples @@ -142,4 +102,3 @@ Examples * `Quantization-aware Training of Resnet18 PyTorch Model `__ * `Quantization-aware Training of STFPM PyTorch Model `__ -* `Quantization-aware Training of MobileNet v2 TensorFlow Model `__ diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training.rst index aec99cc6f296c5..cb8bc6c52f38e9 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training.rst @@ -25,7 +25,7 @@ flows: .. note - NNCF offers a Python API, for compressing PyTorch, TensorFlow 2.x, ONNX, and OpenVINO IR + NNCF offers a Python API, for compressing PyTorch, ONNX, and OpenVINO IR model formats. OpenVINO IR offers the most comprehensive support. diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst index 75b94741339b93..0dff58d601e968 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst @@ -5,7 +5,7 @@ Basic Quantization Flow Introduction #################### -The basic quantization flow is the simplest way to apply 8-bit quantization to the model. It is available for models in the following frameworks: OpenVINO, PyTorch, TensorFlow 2.x, and ONNX. The basic quantization flow is based on the following steps: +The basic quantization flow is the simplest way to apply 8-bit quantization to the model. It is available for models in the following frameworks: OpenVINO, PyTorch, and ONNX. The basic quantization flow is based on the following steps: * Set up an environment and install dependencies. * Prepare a representative **calibration dataset** that is used to estimate quantization parameters of the activations within the model, for example, of 300 samples. @@ -56,13 +56,6 @@ The transformation function is a function that takes a sample from the dataset a :language: python :fragment: [dataset] - .. tab-item:: TensorFlow - :sync: tensorflow - - .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py - :language: python - :fragment: [dataset] - .. tab-item:: TorchFX :sync: torch_fx @@ -102,13 +95,6 @@ See the `example section <#examples-of-how-to-apply-nncf-post-training-quantizat :language: python :fragment: [quantization] - .. tab-item:: TensorFlow - :sync: tensorflow - - .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py - :language: python - :fragment: [quantization] - .. tab-item:: TorchFX :sync: torch_fx @@ -142,13 +128,6 @@ If you have not already installed OpenVINO developer tools, install it with ``pi :language: python :fragment: [inference] - .. tab-item:: TensorFlow - :sync: tensorflow - - .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py - :language: python - :fragment: [inference] - TorchFX models can utilize OpenVINO optimizations using `torch.compile(..., backend="openvino") `__ functionality: .. tab-set:: @@ -242,5 +221,4 @@ Examples of how to apply NNCF post-training quantization: * `Post-Training Quantization of MobileNet v2 PyTorch Model `__ * `Post-Training Quantization of SSD PyTorch Model `__ * `Post-Training Quantization of MobileNet v2 ONNX Model `__ -* `Post-Training Quantization of MobileNet v2 TensorFlow Model `__ diff --git a/docs/optimization_guide/nncf/code/pruning_tf.py b/docs/optimization_guide/nncf/code/pruning_tf.py deleted file mode 100644 index 93e1b120c3f83b..00000000000000 --- a/docs/optimization_guide/nncf/code/pruning_tf.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (C) 2018-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -#! [imports] -import tensorflow as tf - -from nncf import NNCFConfig -from nncf.tensorflow import create_compressed_model, create_compression_callbacks, \ - register_default_init_args -#! [imports] - -#! [nncf_congig] -nncf_config_dict = { - "input_info": {"sample_size": [1, 3, 224, 224]}, # input shape required for model tracing - "compression": [ - { - "algorithm": "filter_pruning", - "pruning_init": 0.1, - "params": { - "pruning_target": 0.4, - "pruning_steps": 15 - } - }, - { - "algorithm": "quantization", # 8-bit quantization with default settings - }, - ] -} -nncf_config = NNCFConfig.from_dict(nncf_config_dict) -nncf_config = register_default_init_args(nncf_config, train_dataset, batch_size=1) # train_dataset is an instance of tf.data.Dataset -#! [nncf_congig] - -#! [wrap_model] -model = KerasModel() # instance of the tensorflow.keras.Model -compression_ctrl, model = create_compressed_model(model, nncf_config) -#! [wrap_model] - -#! [distributed] -compression_ctrl.distributed() # call it before the training -#! [distributed] - -#! [tune_model] -... # fine-tuning preparations, e.g. dataset, loss, optimization setup, etc. - -# create compression callbacks to control pruning parameters and dump compression statistics -# all the setting are being taked from compression_ctrl, i.e. from NNCF config -compression_callbacks = create_compression_callbacks(compression_ctrl, log_dir="./compression_log") - -# tune quantized model for 50 epochs as the baseline -model.fit(train_dataset, epochs=50, callbacks=compression_callbacks) -#! [tune_model] - -#! [export] -compression_ctrl.export_model("compressed_model.pb") #export to Frozen Graph -#! [export] - -#! [save_checkpoint] -from nncf.tensorflow.utils.state import TFCompressionState -from nncf.tensorflow.callbacks.checkpoint_callback import CheckpointManagerCallback - -checkpoint = tf.train.Checkpoint(model=model, - compression_state=TFCompressionState(compression_ctrl), - ... # the rest of the user-defined objects to save - ) -callbacks = [] -callbacks.append(CheckpointManagerCallback(checkpoint, path_to_checkpoint)) -... -model.fit(..., callbacks=callbacks) -#! [save_checkpoint] - -#! [load_checkpoint] -from nncf.tensorflow.utils.state import TFCompressionStateLoader - -checkpoint = tf.train.Checkpoint(compression_state=TFCompressionStateLoader()) -checkpoint.restore(path_to_checkpoint) -compression_state = checkpoint.compression_state.state - -compression_ctrl, model = create_compressed_model(model, nncf_config, compression_state) -checkpoint = tf.train.Checkpoint(model=model, - ...) -checkpoint.restore(path_to_checkpoint) -#! [load_checkpoint] diff --git a/docs/optimization_guide/nncf/code/qat_tf.py b/docs/optimization_guide/nncf/code/qat_tf.py deleted file mode 100644 index c65cff04dc3b98..00000000000000 --- a/docs/optimization_guide/nncf/code/qat_tf.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2018-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -#! [quantize] -model = KerasModel() # instance of the tensorflow.keras.Model -quantized_model = nncf.quantize(model, ...) -#! [quantize] - -#! [tune_model] -... # fine-tuning preparations, e.g. dataset, loss, optimization setup, etc. - -# tune quantized model for 5 epochs the same way as the baseline -quantized_model.fit(train_dataset, epochs=5) -#! [tune_model] - -#! [save_checkpoint] -from nncf.tensorflow import ConfigState -from nncf.tensorflow import get_config -from nncf.tensorflow.callbacks.checkpoint_callback import CheckpointManagerCallback - -nncf_config = get_config(quantized_model) -checkpoint = tf.train.Checkpoint(model=quantized_model, - nncf_config_state=ConfigState(nncf_config), - ... # the rest of the user-defined objects to save - ) -callbacks = [] -callbacks.append(CheckpointManagerCallback(checkpoint, path_to_checkpoint)) -... -quantized_model.fit(..., callbacks=callbacks) -#! [save_checkpoint] - -#! [load_checkpoint] -from nncf.tensorflow import ConfigState -from nncf.tensorflow import load_from_config - -checkpoint = tf.train.Checkpoint(nncf_config_state=ConfigState()) -checkpoint.restore(path_to_checkpoint) - -quantized_model = load_from_config(model, checkpoint.nncf_config_state.config) - -checkpoint = tf.train.Checkpoint(model=quantized_model - ... # the rest of the user-defined objects to load - ) -checkpoint.restore(path_to_checkpoint) -#! [load_checkpoint] diff --git a/docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py b/docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py deleted file mode 100644 index 0d2b2f56891c51..00000000000000 --- a/docs/optimization_guide/nncf/ptq/code/ptq_tensorflow.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2018-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -#! [dataset] -import nncf -import tensorflow_datasets as tfds - -calibration_loader = tfds.load(...) - -def transform_fn(data_item): - images, _ = data_item - return images - -calibration_dataset = nncf.Dataset(calibration_loader, transform_fn) -#! [dataset] - -#! [quantization] -import tensorflow as tf -model = tf.saved_model.load("model_path") - -quantized_model = nncf.quantize(model, calibration_dataset) -#! [quantization] - -#! [inference] -import openvino as ov - -# convert TensorFlow model to OpenVINO model -ov_quantized_model = ov.convert_model(quantized_model) - -# compile the model to transform quantized operations to int8 -model_int8 = ov.compile_model(ov_quantized_model) - -input_fp32 = ... # FP32 model input -res = model_int8(input_fp32) - -# save the model -ov.save_model(ov_quantized_model, "quantized_model.xml") -#! [inference]