huggingface
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 20 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎docs/source/en/api/cache.md‎
Lines changed: 6 additions & 0 deletions b/‎docs/source/en/api/cache.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/source/en/api/loaders/lora.md‎
Lines changed: 11 additions & 1 deletion b/‎docs/source/en/api/loaders/lora.md‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎docs/source/en/api/models/autoencoder_kl_hunyuan_video15.md‎
Lines changed: 36 additions & 0 deletions b/‎docs/source/en/api/models/autoencoder_kl_hunyuan_video15.md‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/flux2_transformer.md‎
Lines changed: 19 additions & 0 deletions b/‎docs/source/en/api/models/flux2_transformer.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/hunyuan_video15_transformer_3d.md‎
Lines changed: 30 additions & 0 deletions b/‎docs/source/en/api/models/hunyuan_video15_transformer_3d.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/ovisimage_transformer2d.md‎
Lines changed: 24 additions & 0 deletions b/‎docs/source/en/api/models/ovisimage_transformer2d.md‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/z_image_transformer2d.md‎
Lines changed: 19 additions & 0 deletions b/‎docs/source/en/api/models/z_image_transformer2d.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/bria_fibo.md‎
Lines changed: 6 additions & 6 deletions b/‎docs/source/en/api/pipelines/bria_fibo.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/source/en/api/pipelines/flux2.md‎
Lines changed: 39 additions & 0 deletions b/‎docs/source/en/api/pipelines/flux2.md‎
Lines changed: 39 additions & 0 deletions
@@ -349,6 +349,8 @@
         title: DiTTransformer2DModel
       - local: api/models/easyanimate_transformer3d
         title: EasyAnimateTransformer3DModel
+      - local: api/models/flux2_transformer
+        title: Flux2Transformer2DModel
       - local: api/models/flux_transformer
         title: FluxTransformer2DModel
       - local: api/models/hidream_image_transformer
@@ -357,6 +359,8 @@
         title: HunyuanDiT2DModel
       - local: api/models/hunyuanimage_transformer_2d
         title: HunyuanImageTransformer2DModel
+      - local: api/models/hunyuan_video15_transformer_3d
+        title: HunyuanVideo15Transformer3DModel
       - local: api/models/hunyuan_video_transformer_3d
         title: HunyuanVideoTransformer3DModel
       - local: api/models/latte_transformer3d
@@ -371,6 +375,8 @@
         title: MochiTransformer3DModel
       - local: api/models/omnigen_transformer
         title: OmniGenTransformer2DModel
+      - local: api/models/ovisimage_transformer2d
+        title: OvisImageTransformer2DModel
       - local: api/models/pixart_transformer2d
         title: PixArtTransformer2DModel
       - local: api/models/prior_transformer
@@ -395,6 +401,8 @@
         title: WanAnimateTransformer3DModel
       - local: api/models/wan_transformer_3d
         title: WanTransformer3DModel
+      - local: api/models/z_image_transformer2d
+        title: ZImageTransformer2DModel        
       title: Transformers
     - sections:
       - local: api/models/stable_cascade_unet
@@ -431,6 +439,8 @@
         title: AutoencoderKLHunyuanImageRefiner
       - local: api/models/autoencoder_kl_hunyuan_video
         title: AutoencoderKLHunyuanVideo
+      - local: api/models/autoencoder_kl_hunyuan_video15
+        title: AutoencoderKLHunyuanVideo15
       - local: api/models/autoencoderkl_ltx_video
         title: AutoencoderKLLTXVideo
       - local: api/models/autoencoderkl_magvit
@@ -525,6 +535,8 @@
         title: EasyAnimate
       - local: api/pipelines/flux
         title: Flux
+      - local: api/pipelines/flux2
+        title: Flux2
       - local: api/pipelines/control_flux_inpaint
         title: FluxControlInpaint
       - local: api/pipelines/hidream
@@ -541,6 +553,8 @@
         title: Kandinsky 2.2
       - local: api/pipelines/kandinsky3
         title: Kandinsky 3
+      - local: api/pipelines/kandinsky5_image
+        title: Kandinsky 5.0 Image
       - local: api/pipelines/kolors
         title: Kolors
       - local: api/pipelines/latent_consistency_models
@@ -559,6 +573,8 @@
         title: MultiDiffusion
       - local: api/pipelines/omnigen
         title: OmniGen
+      - local: api/pipelines/ovis_image
+        title: Ovis-Image
       - local: api/pipelines/pag
         title: PAG
       - local: api/pipelines/paint_by_example
@@ -634,6 +650,8 @@
         title: VisualCloze
       - local: api/pipelines/wuerstchen
         title: Wuerstchen
+      - local: api/pipelines/z_image
+        title: Z-Image        
       title: Image
     - sections:
       - local: api/pipelines/allegro
@@ -648,6 +666,8 @@
         title: Framepack
       - local: api/pipelines/hunyuan_video
         title: HunyuanVideo
+      - local: api/pipelines/hunyuan_video15
+        title: HunyuanVideo1.5
       - local: api/pipelines/i2vgenxl
         title: I2VGen-XL
       - local: api/pipelines/kandinsky5_video
 
@@ -34,3 +34,9 @@ Cache methods speedup diffusion transformers by storing and reusing intermediate
 [[autodoc]] FirstBlockCacheConfig
 
 [[autodoc]] apply_first_block_cache
+
+### TaylorSeerCacheConfig
+
+[[autodoc]] TaylorSeerCacheConfig
+
+[[autodoc]] apply_taylorseer_cache
@@ -30,7 +30,9 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
 - [`CogView4LoraLoaderMixin`] provides similar functions for [CogView4](https://huggingface.co/docs/diffusers/main/en/api/pipelines/cogview4).
 - [`AmusedLoraLoaderMixin`] is for the [`AmusedPipeline`].
 - [`HiDreamImageLoraLoaderMixin`] provides similar functions for [HiDream Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/hidream)
-- [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen)
+- [`QwenImageLoraLoaderMixin`] provides similar functions for [Qwen Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/qwen).
+- [`ZImageLoraLoaderMixin`] provides similar functions for [Z-Image](https://huggingface.co/docs/diffusers/main/en/api/pipelines/zimage).
+- [`Flux2LoraLoaderMixin`] provides similar functions for [Flux2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux2).
 - [`LoraBaseMixin`] provides a base class with several utility methods to fuse, unfuse, unload, LoRAs and more.
 
 > [!TIP]
@@ -56,6 +58,10 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
 
 [[autodoc]] loaders.lora_pipeline.FluxLoraLoaderMixin
 
+## Flux2LoraLoaderMixin
+
+[[autodoc]] loaders.lora_pipeline.Flux2LoraLoaderMixin
+
 ## CogVideoXLoraLoaderMixin
 
 [[autodoc]] loaders.lora_pipeline.CogVideoXLoraLoaderMixin
@@ -107,6 +113,10 @@ LoRA is a fast and lightweight training method that inserts and trains a signifi
 
 [[autodoc]] loaders.lora_pipeline.QwenImageLoraLoaderMixin
 
+## ZImageLoraLoaderMixin
+
+[[autodoc]] loaders.lora_pipeline.ZImageLoraLoaderMixin
+
 ## KandinskyLoraLoaderMixin
 [[autodoc]] loaders.lora_pipeline.KandinskyLoraLoaderMixin
 
 
@@ -0,0 +1,36 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# AutoencoderKLHunyuanVideo15
+
+The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanVideo1.5](https://github.com/Tencent/HunyuanVideo1-1.5) by Tencent.
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import AutoencoderKLHunyuanVideo15
+
+vae = AutoencoderKLHunyuanVideo15.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v", subfolder="vae", torch_dtype=torch.float32)
+
+# make sure to enable tiling to avoid OOM
+vae.enable_tiling()
+```
+
+## AutoencoderKLHunyuanVideo15
+
+[[autodoc]] AutoencoderKLHunyuanVideo15
+  - decode
+  - encode
+  - all
+
+## DecoderOutput
+
+[[autodoc]] models.autoencoders.vae.DecoderOutput
@@ -0,0 +1,19 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Flux2Transformer2DModel
+
+A Transformer model for image-like data from [Flux2](https://hf.co/black-forest-labs/FLUX.2-dev).
+
+## Flux2Transformer2DModel
+
+[[autodoc]] Flux2Transformer2DModel
@@ -0,0 +1,30 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# HunyuanVideo15Transformer3DModel
+
+A Diffusion Transformer model for 3D video-like data used in [HunyuanVideo1.5](https://github.com/Tencent/HunyuanVideo1-1.5).
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import HunyuanVideo15Transformer3DModel
+
+transformer = HunyuanVideo15Transformer3DModel.from_pretrained("hunyuanvideo-community/HunyuanVideo-1.5-Diffusers-480p_t2v" subfolder="transformer", torch_dtype=torch.bfloat16)
+```
+
+## HunyuanVideo15Transformer3DModel
+
+[[autodoc]] HunyuanVideo15Transformer3DModel
+
+## Transformer2DModelOutput
+
+[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
@@ -0,0 +1,24 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# OvisImageTransformer2DModel
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import OvisImageTransformer2DModel
+
+transformer = OvisImageTransformer2DModel.from_pretrained("AIDC-AI/Ovis-Image-7B", subfolder="transformer", torch_dtype=torch.bfloat16)
+```
+
+## OvisImageTransformer2DModel
+
+[[autodoc]] OvisImageTransformer2DModel
@@ -0,0 +1,19 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# ZImageTransformer2DModel
+
+A Transformer model for image-like data from [Z-Image](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo).
+
+## ZImageTransformer2DModel
+
+[[autodoc]] ZImageTransformer2DModel
@@ -21,9 +21,10 @@ With only 8 billion parameters, FIBO provides a new level of image quality, prom
 FIBO is trained exclusively on a structured prompt and will not work with freeform text prompts.
 you can use the [FIBO-VLM-prompt-to-JSON](https://huggingface.co/briaai/FIBO-VLM-prompt-to-JSON) model or the [FIBO-gemini-prompt-to-JSON](https://huggingface.co/briaai/FIBO-gemini-prompt-to-JSON)  to convert your freeform text prompt to a structured JSON prompt.
 
-its not recommended to use freeform text prompts directly with FIBO, as it will not produce the best results.
+> [!NOTE]
+> Avoid using freeform text prompts directly with FIBO because it does not produce the best results.
 
-you can learn more about FIBO in  [Bria Fibo Hugging Face page](https://huggingface.co/briaai/FIBO).
+Refer to the Bria Fibo Hugging Face [page](https://huggingface.co/briaai/FIBO) to learn more.
 
 
 ## Usage
@@ -37,9 +38,8 @@ hf auth login
 ```
 
 
-## BriaPipeline
+## BriaFiboPipeline
 
-[[autodoc]] BriaPipeline
+[[autodoc]] BriaFiboPipeline
 	- all
-	- __call__
-
+	- __call__
@@ -0,0 +1,39 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Flux2
+
+<div class="flex flex-wrap space-x-1">
+  <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+  <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
+</div>
+
+Flux.2 is the recent series of image generation models from Black Forest Labs, preceded by the [Flux.1](./flux.md) series. It is an entirely new model with a new architecture and pre-training done from scratch!
+
+Original model checkpoints for Flux can be found [here](https://huggingface.co/black-forest-labs). Original inference code can be found [here](https://github.com/black-forest-labs/flux2).
+
+> [!TIP]
+> Flux2 can be quite expensive to run on consumer hardware devices. However, you can perform a suite of optimizations to run it faster and in a more memory-friendly manner. Check out [this section](https://huggingface.co/blog/sd3#memory-optimizations-for-sd3) for more details. Additionally, Flux can benefit from quantization for memory efficiency with a trade-off in inference latency. Refer to [this blog post](https://huggingface.co/blog/quanto-diffusers) to learn more.
+>
+> [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
+
+## Caption upsampling
+
+Flux.2 can potentially generate better better outputs with better prompts. We can "upsample"
+an input prompt by setting the `caption_upsample_temperature` argument in the pipeline call arguments.
+The [official implementation](https://github.com/black-forest-labs/flux2/blob/5a5d316b1b42f6b59a8c9194b77c8256be848432/src/flux2/text_encoder.py#L140) recommends this value to be 0.15.
+
+## Flux2Pipeline
+
+[[autodoc]] Flux2Pipeline
+	- all
+	- __call__