Skip to content

Commit 9913ed9

Browse files
committed
change examples and data
1 parent 3aec3d2 commit 9913ed9

File tree

4 files changed

+54
-39
lines changed

4 files changed

+54
-39
lines changed

packages/tasks/src/tasks/image-text-to-image/about.md

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,31 @@ Models that perform specific transformations based on text conditions, such as c
3939
You can use the Diffusers library to interact with image-text-to-image models.
4040

4141
```python
42-
from diffusers import FluxControlPipeline
43-
from PIL import Image
4442
import torch
45-
46-
# Load the model
47-
pipe = FluxControlPipeline.from_pretrained(
48-
"black-forest-labs/FLUX.2-dev",
49-
torch_dtype=torch.bfloat16
50-
).to("cuda")
51-
52-
# Load input image
53-
image = Image.open("input.jpg").convert("RGB")
54-
55-
# Edit the image with a text prompt
56-
prompt = "Make it a snowy winter scene"
57-
edited_image = pipe(prompt=prompt, image=image).images[0]
58-
edited_image.save("edited_image.png")
43+
from diffusers import Flux2Pipeline
44+
from diffusers.utils import load_image
45+
46+
repo_id = "black-forest-labs/FLUX.2-dev"
47+
device = "cuda:0"
48+
torch_dtype = torch.bfloat16
49+
50+
pipe = Flux2Pipeline.from_pretrained(
51+
repo_id, torch_dtype=torch_dtype
52+
)
53+
pipe.enable_model_cpu_offload() #no need to do cpu offload for >80G VRAM carts like H200, B200, etc. and do a `pipe.to(device)` instead
54+
55+
prompt = "Realistic macro photograph of a hermit crab using a soda can as its shell, partially emerging from the can, captured with sharp detail and natural colors, on a sunlit beach with soft shadows and a shallow depth of field, with blurred ocean waves in the background. The can has the text `BFL Diffusers` on it and it has a color gradient that start with #FF5733 at the top and transitions to #33FF57 at the bottom."
56+
57+
#cat_image = load_image("https://huggingface.co/spaces/zerogpu-aoti/FLUX.1-Kontext-Dev-fp8-dynamic/resolve/main/cat.png")
58+
image = pipe(
59+
prompt=prompt,
60+
#image=[cat_image] #multi-image input
61+
generator=torch.Generator(device=device).manual_seed(42),
62+
num_inference_steps=50,
63+
guidance_scale=4,
64+
).images[0]
65+
66+
image.save("flux2_output.png")
5967
```
6068

6169
## Useful Resources

packages/tasks/src/tasks/image-text-to-image/data.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@ const taskData: TaskDataCustom = {
55
demo: {
66
inputs: [
77
{
8-
filename: "image-text-to-image-input.png",
8+
filename: "image-to-image-input.jpeg",
99
type: "img",
1010
},
1111
{
12-
label: "Text Prompt",
13-
content: "Make it winter, add snow",
12+
label: "Input",
13+
content: "A city above clouds, pastel colors, Victorian style",
1414
type: "text",
1515
},
1616
],
1717
outputs: [
1818
{
19-
filename: "image-text-to-image-output.png",
19+
filename: "image-to-image-output.png",
2020
type: "img",
2121
},
2222
],

packages/tasks/src/tasks/image-text-to-video/about.md

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,29 @@ Models that perform specific video transformations based on text conditions, suc
3939
You can use the Diffusers library to interact with image-text-to-video models.
4040

4141
```python
42-
from diffusers import LTXImageToVideoPipeline
43-
from PIL import Image
4442
import torch
45-
46-
# Load the model
47-
pipe = LTXImageToVideoPipeline.from_pretrained(
48-
"Lightricks/LTX-Video",
49-
torch_dtype=torch.bfloat16
50-
).to("cuda")
51-
52-
# Load input image
53-
image = Image.open("input.jpg").convert("RGB")
54-
55-
# Generate video with a text prompt
56-
prompt = "A camera pan showing the scene in motion"
57-
video = pipe(prompt=prompt, image=image).frames
43+
from diffusers import LTXImageToVideoPipeline
44+
from diffusers.utils import export_to_video, load_image
45+
46+
pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
47+
pipe.to("cuda")
48+
49+
image = load_image(
50+
"https://huggingface.co/datasets/a-r-r-o-w/tiny-meme-dataset-captioned/resolve/main/images/8.png"
51+
)
52+
prompt = "A young girl stands calmly in the foreground, looking directly at the camera, as a house fire rages in the background. Flames engulf the structure, with smoke billowing into the air. Firefighters in protective gear rush to the scene, a fire truck labeled '38' visible behind them. The girl's neutral expression contrasts sharply with the chaos of the fire, creating a poignant and emotionally charged scene."
53+
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
54+
55+
video = pipe(
56+
image=image,
57+
prompt=prompt,
58+
negative_prompt=negative_prompt,
59+
width=704,
60+
height=480,
61+
num_frames=161,
62+
num_inference_steps=50,
63+
).frames[0]
64+
export_to_video(video, "output.mp4", fps=24)
5865
```
5966

6067
## Useful Resources

packages/tasks/src/tasks/image-text-to-video/data.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@ const taskData: TaskDataCustom = {
55
demo: {
66
inputs: [
77
{
8-
filename: "image-text-to-video-input.png",
8+
filename: "image-to-video-input.jpg",
99
type: "img",
1010
},
1111
{
12-
label: "Text Prompt",
13-
content: "A camera pan showing the scene in motion",
12+
label: "Input",
13+
content: "Darth Vader is surfing on the waves.",
1414
type: "text",
1515
},
1616
],
1717
outputs: [
1818
{
19-
filename: "image-text-to-video-output.gif",
19+
filename: "text-to-video-output.gif",
2020
type: "img",
2121
},
2222
],

0 commit comments

Comments
 (0)