change examples and data

apolinario · apolinario · commit 9913ed9b1c67 · 2025-12-04T21:48:34.000+09:00
diff --git a/packages/tasks/src/tasks/image-text-to-image/about.md b/packages/tasks/src/tasks/image-text-to-image/about.md
@@ -39,23 +39,31 @@ Models that perform specific transformations based on text conditions, such as c
 You can use the Diffusers library to interact with image-text-to-image models.
 
 ```python
-from diffusers import FluxControlPipeline
-from PIL import Image
 import torch
-
-# Load the model
-pipe = FluxControlPipeline.from_pretrained(
-    "black-forest-labs/FLUX.2-dev",
-    torch_dtype=torch.bfloat16
-).to("cuda")
-
-# Load input image
-image = Image.open("input.jpg").convert("RGB")
-
-# Edit the image with a text prompt
-prompt = "Make it a snowy winter scene"
-edited_image = pipe(prompt=prompt, image=image).images[0]
-edited_image.save("edited_image.png")
+from diffusers import Flux2Pipeline
+from diffusers.utils import load_image
+
+repo_id = "black-forest-labs/FLUX.2-dev"
+device = "cuda:0"
+torch_dtype = torch.bfloat16
+
+pipe = Flux2Pipeline.from_pretrained(
+    repo_id, torch_dtype=torch_dtype
+)
+pipe.enable_model_cpu_offload() #no need to do cpu offload for >80G VRAM carts like H200, B200, etc. and do a `pipe.to(device)` instead
+
+prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
+
+#cat_image = load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")
+image = pipe(
+    prompt=prompt,
+    #image=[cat_image] #multi-image input
+    generator=torch.Generator(device=device).manual_seed(42),
+    num_inference_steps=50,
+    guidance_scale=4,
+).images[0]
+
+image.save("flux2_output.png")
 ```
 
 ## Useful Resources
diff --git a/packages/tasks/src/tasks/image-text-to-image/data.ts b/packages/tasks/src/tasks/image-text-to-image/data.ts
@@ -5,18 +5,18 @@ const taskData: TaskDataCustom = {
 	demo: {
 		inputs: [
 			{
-				filename: "image-text-to-image-input.png",
+				filename: "image-to-image-input.jpeg",
 				type: "img",
 			},
 			{
-				label: "Text Prompt",
-				content: "Make it winter, add snow",
+				label: "Input",
+				content: "A city above clouds, pastel colors, Victorian style",
 				type: "text",
 			},
 		],
 		outputs: [
 			{
-				filename: "image-text-to-image-output.png",
+				filename: "image-to-image-output.png",
 				type: "img",
 			},
 		],
diff --git a/packages/tasks/src/tasks/image-text-to-video/about.md b/packages/tasks/src/tasks/image-text-to-video/about.md
@@ -39,22 +39,29 @@ Models that perform specific video transformations based on text conditions, suc
 You can use the Diffusers library to interact with image-text-to-video models.
 
 ```python
-from diffusers import LTXImageToVideoPipeline
-from PIL import Image
 import torch
-
-# Load the model
-pipe = LTXImageToVideoPipeline.from_pretrained(
-    "Lightricks/LTX-Video",
-    torch_dtype=torch.bfloat16
-).to("cuda")
-
-# Load input image
-image = Image.open("input.jpg").convert("RGB")
-
-# Generate video with a text prompt
-prompt = "A camera pan showing the scene in motion"
-video = pipe(prompt=prompt, image=image).frames
+from diffusers import LTXImageToVideoPipeline
+from diffusers.utils import export_to_video, load_image
+
+pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
+pipe.to("cuda")
+
+image = load_image(
+    "https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"
+)
+prompt = "A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background. Flames engulf the structure, with smoke billowing into the air. Firefighters in protective gear rush to the scene, a fire truck labeled '38' visible behind them. The girl's neutral expression contrasts sharply with the chaos of the fire, creating a poignant and emotionally charged scene."
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+
+video = pipe(
+    image=image,
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=704,
+    height=480,
+    num_frames=161,
+    num_inference_steps=50,
+).frames[0]
+export_to_video(video, "output.mp4", fps=24)
 ```
 
 ## Useful Resources
diff --git a/packages/tasks/src/tasks/image-text-to-video/data.ts b/packages/tasks/src/tasks/image-text-to-video/data.ts
@@ -5,18 +5,18 @@ const taskData: TaskDataCustom = {
 	demo: {
 		inputs: [
 			{
-				filename: "image-text-to-video-input.png",
+				filename: "image-to-video-input.jpg",
 				type: "img",
 			},
 			{
-				label: "Text Prompt",
-				content: "A camera pan showing the scene in motion",
+				label: "Input",
+				content: "Darth Vader is surfing on the waves.",
 				type: "text",
 			},
 		],
 		outputs: [
 			{
-				filename: "image-text-to-video-output.gif",
+				filename: "text-to-video-output.gif",
 				type: "img",
 			},
 		],