IREE fix problems and comments

Vladislav-Denisov · Vladislav-Denisov · commit a283d6b67e58 · 2025-11-17T20:52:41.000+03:00
diff --git a/src/inference/README.md b/src/inference/README.md
@@ -1569,4 +1569,4 @@ python3 inference_iree.py \
 [dgl]: https://www.dgl.ai/pages/start.html
 [ogb]: https://ogb.stanford.edu/
 [tensorflow-gpu]: https://www.tensorflow.org/install/pip
-[iree]: https://iree.dev/
+[iree]: https://iree.dev
diff --git a/src/inference/inference_iree.py b/src/inference/inference_iree.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 import sys
 import traceback
 from pathlib import Path
@@ -25,14 +26,13 @@
 try:
     import iree.runtime as ireert  # noqa: E402
 except ImportError as e:
-    log.error(f"IREE import error: {e}")
+    log.error(f'IREE import error: {e}')
     sys.exit(1)
 
 
 def cli_argument_parser():
     parser = argparse.ArgumentParser()
 
-    
     parser.add_argument('-m', '--model',
                         help='Path to .vmfb file with compiled model or .mlir.',
                         required=True,
@@ -129,12 +129,12 @@ def cli_argument_parser():
                         nargs=3,
                         dest='channel_swap')
     parser.add_argument('-tb', '--target_backend',
-                        help='Target backend, for example "llvm-cpu" for CPU.',
+                        help='Target backend, for example `llvm-cpu` for CPU.',
                         default='llvm-cpu',
                         type=str,
                         dest='target_backend')
     parser.add_argument('--opt_level',
-                        help='The optimization level of the task extractions.',
+                        help='The optimization level of the compilation.',
                         type=int,
                         choices=[0, 1, 2, 3],
                         default=2)
@@ -149,38 +149,67 @@ def cli_argument_parser():
 
 def compile_mlir(mlir_path, target_backend, opt_level, extra_compile_args):
     try:
-        log.info(f'Starting model compilation')
+        log.info('Starting model compilation')
         return IREECompiler.compile(mlir_path, target_backend, opt_level, extra_compile_args)
     except Exception as e:
-        log.error(f"Failed to compile MLIR: {e}")
+        log.error(f'Failed to compile MLIR: {e}')
         raise
 
 
-def load_iree_model(vmfb_buffer):
+def load_model_buffer(model_path, target_backend, opt_level, extra_compile_args):
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f'Model file not found: {model_path}')
+
+    file_type = model_path.split('.')[-1]
+
+    if file_type == 'mlir':
+        if target_backend is None:
+            raise ValueError('target_backend is required for MLIR compilation')
+        vmfb_buffer = compile_mlir(model_path, target_backend, opt_level, extra_compile_args)
+    elif file_type == 'vmfb':
+        with open(model_path, 'rb') as f:
+            vmfb_buffer = f.read()
+    else:
+        raise ValueError(f'The file type {file_type} is not supported. Supported types: .mlir, .vmfb')
+
+    log.info(f'Successfully loaded model buffer from {model_path}')
+    return vmfb_buffer
+
+
+def create_iree_context_from_buffer(vmfb_buffer):
     try:
         config = ireert.Config('local-task')
-
         vm_module = ireert.VmModule.from_flatbuffer(config.vm_instance, vmfb_buffer)
         context = ireert.SystemContext(config=config)
         context.add_vm_module(vm_module)
 
-        log.info(f"Successfully loaded IREE model")
+        log.info('Successfully created IREE context from buffer')
         return context
 
     except Exception as e:
-        log.error(f"Failed to load IREE model: {e}")
+        log.error(f'Failed to create IREE context: {e}')
         raise
 
 
+def load_model(model_path, target_backend, opt_level, extra_compile_args):
+    vmfb_buffer = load_model_buffer(
+        model_path,
+        target_backend=target_backend,
+        opt_level=opt_level,
+        extra_compile_args=extra_compile_args
+    )
+    return create_iree_context_from_buffer(vmfb_buffer)
+
+
 def get_inference_function(model_context, function_name):
     try:
         main_module = model_context.modules.module
         inference_func = main_module[function_name]
-        log.info(f"Using function '{function_name}' for inference")
+        log.info(f'Using function {function_name} for inference')
         return inference_func
 
     except Exception as e:
-        log.error(f"Failed to get inference function: {e}")
+        log.error(f'Failed to get inference function: {e}')
         raise
 
 
@@ -196,7 +225,7 @@ def inference_iree(inference_func, number_iter, get_slice, test_duration):
         time_infer = loop_inference(number_iter, test_duration)(
             inference_iteration
         )(inference_func, get_slice)['time_infer']
-    
+
     log.info('Inference completed')
     return result, time_infer
 
@@ -215,7 +244,7 @@ def infer_slice(inference_func, slice_input):
     input_buffers = list()
     for input_ in slice_input:
         input_buffers.append(ireert.asdevicearray(device, input_))
-    
+
     result = inference_func(*input_buffers)
 
     if hasattr(result, 'to_host'):
@@ -230,7 +259,7 @@ def prepare_output(result, task):
     elif task == 'classification':
         if hasattr(result, 'to_host'):
             result = result.to_host()
-        
+
         # Extract tensor from dict if needed
         if isinstance(result, dict):
             result_key = next(iter(result))
@@ -239,18 +268,18 @@ def prepare_output(result, task):
         else:
             logits = np.array(result)
             output_key = 'output'
-        
+
         # Ensure correct shape (batch_size, num_classes)
         if logits.ndim == 1:
             logits = logits.reshape(1, -1)
         elif logits.ndim > 2:
             logits = logits.reshape(logits.shape[0], -1)
-        
+
         # Apply softmax
         max_logits = np.max(logits, axis=-1, keepdims=True)
         exp_logits = np.exp(logits - max_logits)
         probabilities = exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
-        
+
         return {output_key: probabilities}
     else:
         raise ValueError(f'Unsupported task {task}')
@@ -270,7 +299,7 @@ def create_dict_for_transformer(args):
 
 def main():
     args = cli_argument_parser()
-    
+
     try:
         model_wrapper = IREEModelWrapper(args)
         data_transformer = IREETransformer(create_dict_for_transformer(args))
@@ -284,16 +313,13 @@ def main():
             target_device=args.target_backend
         )
 
-        file_type = args.model.split('.')[-1]
-        if file_type == 'mlir':
-            vmfb_buffer = compile_mlir(args.model, args.target_backend, args.opt_level, args.extra_compile_args)
-        elif file_type == 'vmfb':
-            with open(args.model, 'rb') as f:
-                vmfb_buffer = f.read()
-        else:
-            raise ValueError(f'The file type {file_type} is not supported')
-
-        model_context = load_iree_model(vmfb_buffer)
+        log.info('Loading model')
+        model_context = load_model(
+            model_path=args.model,
+            target_backend=args.target_backend,
+            opt_level=args.opt_level,
+            extra_compile_args=args.extra_compile_args
+        )
         inference_func = get_inference_function(model_context, args.function_name)
 
         log.info(f'Preparing input data: {args.input}')
@@ -309,10 +335,10 @@ def main():
 
         log.info('Computing performance metrics')
         inference_result = pp.calculate_performance_metrics_sync_mode(
-            args.batch_size, 
+            args.batch_size,
             inference_time
         )
-    
+
         report_writer.update_execution_results(**inference_result)
         report_writer.write_report(args.report_path)
 
diff --git a/src/inference/io_model_wrapper.py b/src/inference/io_model_wrapper.py
@@ -415,7 +415,7 @@ class IREEModelWrapper(IOModelWrapper):
     def __init__(self, args):
         self._input_shapes = [args.input_shape]
         self._model_path = args.model
-    
+
     def get_input_layer_names(self, model):
         return ['input']
 
diff --git a/src/model_converters/iree_converter/README.md b/src/model_converters/iree_converter/README.md
@@ -27,8 +27,8 @@ This script converts model from `<source_framework>` to the IREE MLIR format.
 - `-w / --weights` is a path to an `.pth` file with trained weights for PyTorch models.
 - `-tm / --torch_module` is a module with the model architecture for PyTorch models. Default: `torchvision.models`.
 - `-is / --input_shape` is an input shape in the format BxWxHxC, where B is a batch size, W is an input tensor width, H is an input tensor height, C is an input tensor number of channels. Required for PyTorch models.
-- `--onnx_opset_version` is the ONNX opset version for ONNX models. Default: `18`.
-- `-o / --output_mlir` is path to save the MLIR file. Required.
+- `--onnx_opset_version` is an ONNX opset version for ONNX models. Default: `18`.
+- `-o / --output_mlir` is a path to save the MLIR file. Required.
 
 ### Parameter combinations
 #### For ONNX models:
@@ -44,21 +44,21 @@ Two loading methods are supported (mutually exclusive):
 - Optional: `--weights <path/to/weights.pth>`
 
 ### Examples of usage
-ONNX model conversion:
+ONNX model conversion ([source of the model efficientnet-b0.onnx](https://github.com/onnx/models/blob/main/Computer_Vision/efficientnet_b0_Opset17_timm/efficientnet_b0_Opset17.onnx)):
 ```sh
 python3 iree_converter.py -f onnx -m efficientnet-b0.onnx \
                          --onnx_opset_version 18 \
                          -o ./output/efficientnet-b0.mlir
 ```
 
-PyTorch model from file:
+PyTorch model from file (`.pt` can be created using [tutorial](https://docs.pytorch.org/docs/main/notes/serialization.html#saving-and-loading-torch-nn-modules)):
 ```sh
 python3 iree_converter.py -f pytorch -m resnet50.pt \
                          -is 1 224 224 3 \
                          -o ./output/resnet50.mlir
 ```
 
-PyTorch model from torchvision with pretrained weights:
+PyTorch model from [torchvision](https://docs.pytorch.org/vision/main/models.html) with pretrained weights:
 ```sh
 python3 iree_converter.py -f pytorch -mn resnet50 \
                          -tm torchvision.models \
@@ -88,19 +88,19 @@ iree_compiler.py --mlir <input.mlir> \
 This script compiles model from `.mlir` format to the deployable binary format for the specified target backend.
 
 ### IREE compiler parameters
-- `-m / --mlir` - Path to an .mlir file with a model. Required.
-- `-tb / --target_backend` - Target backend for compilation. Required. Examples: `llvm-cpu`, `cuda`, `vulkan`, `vmvx`.
-- `--opt_level` - The optimization level of the compilation. Choices: `0`, `1`, `2`, `3`. Default: `2`.
-- `-o / --output_file` - Path to save the compiled model. Required.
-- `--extra_args` - Extra arguments for compilation. Optional.
+- `-m / --mlir` is a path to an .mlir file with a model. Required.
+- `-tb / --target_backend` is a target backend for compilation. Required. Examples: `llvm-cpu`, `cuda`, `vulkan`, `vmvx`.
+- `--opt_level` is an optimization level of the compilation. Choices: `0`, `1`, `2`, `3`. Default: `2`.
+- `-o / --output_file` is a path to save the compiled model. Required.
+- `--extra_args` - is an extra arguments for compilation. Optional.
 
 ### Supported target backends
-- `llvm-cpu` - CPU execution using LLVM
-- `cuda` - NVIDIA GPU execution using CUDA
-- `vulkan` - GPU execution using Vulkan API
-- `vmvx` - Portable VM bytecode execution
-- `metal` - Apple GPU execution using Metal
-- `rocm` - AMD GPU execution using ROCm
+- `llvm-cpu` - CPU execution using LLVM.
+- `cuda` - NVIDIA GPU execution using CUDA.
+- `vulkan` - GPU execution using Vulkan API.
+- `vmvx` - Portable VM bytecode execution.
+- `metal` - Apple GPU execution using Metal.
+- `rocm` - AMD GPU execution using ROCm.
 
 ### Examples of usage
 ```sh
diff --git a/src/model_converters/iree_converter/iree_auxiliary/onnx_format.py b/src/model_converters/iree_converter/iree_auxiliary/onnx_format.py
@@ -16,13 +16,13 @@ def source_framework(self):
 
     def _validate_arguments(self):
         if self.model_path is None or self.model_path == '':
-            raise ValueError("The model_path parameter is required for ONNX conversion.")
+            raise ValueError('The model_path parameter is required for ONNX conversion.')
 
         if not os.path.exists(self.model_path):
-            raise FileNotFoundError(f"Model file not found: {self.model_path}")
+            raise FileNotFoundError(f'Model file not found: {self.model_path}')
 
         if self.onnx_opset_version is None:
-            raise ValueError("The onnx_opset_version parameter is required for ONNX conversion.")
+            raise ValueError('The onnx_opset_version parameter is required for ONNX conversion.')
 
     def _convert_model_from_framework(self):
         if not os.path.exists(self.output_mlir):
@@ -36,5 +36,5 @@ def _convert_model_from_framework(self):
             self.output_mlir,
         ]
         import_cmd = subprocess.list2cmdline(import_args)
-        ret = subprocess.run(import_cmd, shell=True, capture_output=True)
+        subprocess.run(import_cmd, shell=True, capture_output=True)
         return
diff --git a/src/model_converters/iree_converter/iree_auxiliary/pytorch_format.py b/src/model_converters/iree_converter/iree_auxiliary/pytorch_format.py
@@ -17,36 +17,38 @@ def __init__(self, args):
     @property
     def source_framework(self):
         return 'PyTorch'
-    
+
     def _validate_arguments(self):
         if self.input_shape is None:
-            raise ValueError("The input_shape parameter is required for PyTorch conversion.")
-        
+            raise ValueError('The input_shape parameter is required for PyTorch conversion.')
+
         # Check load methods:
         # 1. model_path (load from file)
         # 2. module + model_name (load from torch module)
         has_model_path = self.model_path is not None and self.model_path != ''
-        has_module_model = (self.module is not None and self.module != '' and 
-                          self.model_name is not None and self.model_name != '')
-        
+        has_module_model = (self.module is not None
+                            and self.module != ''
+                            and self.model_name is not None
+                            and self.model_name != '')
+
         if not has_model_path and not has_module_model:
             raise ValueError(
-                "For PyTorch conversion, you must specify either model_path, "
-                "or torch_module and model_name"
+                'For PyTorch conversion, you must specify either model_path, \
+                or torch_module and model_name'
             )
-        
+
         if has_model_path and has_module_model:
             raise ValueError(
-                "Provided incompatible parameters for PyTorch conversion (model_path and torch_module+model_name). "
-                "Please choose only one method of this."
+                'Provided incompatible parameters for PyTorch conversion (model_path and torch_module+model_name). \
+                Please choose only one method of this.'
             )
 
         if has_model_path and not os.path.exists(self.model_path):
-            raise FileNotFoundError(f"Model file not found: {self.model_path}")
+            raise FileNotFoundError(f'Model file not found: {self.model_path}')
 
-        if (self.model_weights is not None and self.model_weights != '' and 
-            not os.path.exists(self.model_weights)):
-            raise FileNotFoundError(f"Model weights not found: {self.model_weights}")
+        if (self.model_weights is not None and self.model_weights != ''
+                and not os.path.exists(self.model_weights)):
+            raise FileNotFoundError(f'Model weights not found: {self.model_weights}')
 
     def __get_model_from_path(self):
         self.log.info(f'Loading model from path {self.model_path}')
diff --git a/src/model_converters/iree_converter/iree_compiler.py b/src/model_converters/iree_converter/iree_compiler.py
@@ -10,6 +10,7 @@
 
 log = configure_logger()
 
+
 def cli_argument_parser():
     parser = argparse.ArgumentParser()
     parser.add_argument('-m', '--mlir',