Spaces:
Running
on
Zero
Running
on
Zero
Prime Cai
commited on
Commit
·
dbabfb0
1
Parent(s):
ec81f63
add num of images
Browse files- app.py +18 -6
- pipeline.py +5 -5
app.py
CHANGED
|
@@ -6,7 +6,7 @@ from PIL import Image
|
|
| 6 |
from diffusers.utils import load_image
|
| 7 |
from pipeline import FluxConditionalPipeline
|
| 8 |
from transformer import FluxTransformer2DConditionalModel
|
| 9 |
-
|
| 10 |
import os
|
| 11 |
|
| 12 |
pipe = None
|
|
@@ -44,7 +44,8 @@ def generate_image(
|
|
| 44 |
gemini_prompt: bool = True,
|
| 45 |
guidance: float = 3.5,
|
| 46 |
i_guidance: float = 1.0,
|
| 47 |
-
t_guidance: float = 1.0
|
|
|
|
| 48 |
):
|
| 49 |
w, h, min_size = image.size[0], image.size[1], min(image.size)
|
| 50 |
image = image.crop(
|
|
@@ -52,8 +53,13 @@ def generate_image(
|
|
| 52 |
).resize((512, 512))
|
| 53 |
|
| 54 |
control_image = load_image(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
result_image = pipe(
|
| 56 |
-
prompt=
|
| 57 |
negative_prompt="",
|
| 58 |
num_inference_steps=28,
|
| 59 |
height=512,
|
|
@@ -63,7 +69,7 @@ def generate_image(
|
|
| 63 |
guidance_scale_real_i=i_guidance,
|
| 64 |
guidance_scale_real_t=t_guidance,
|
| 65 |
gemini_prompt=gemini_prompt,
|
| 66 |
-
).images
|
| 67 |
|
| 68 |
return result_image
|
| 69 |
|
|
@@ -125,6 +131,10 @@ with demo:
|
|
| 125 |
<a href="https://huggingface.co/datasets/primecai/dsd_data" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace%20-Data-yellow" style="display:inline-block;"></a>
|
| 126 |
<a href="https://huggingface.co/primecai/dsd_model" target="_blank"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face%20-Model-green" style="display:inline-block;"></a>
|
| 127 |
<a href="https://x.com/prime_cai?lang=en" target="_blank"><img src="https://img.shields.io/twitter/follow/prime_cai?style=social" style="display:inline-block;"></a>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
</div>
|
| 129 |
"""
|
| 130 |
)
|
|
@@ -132,14 +142,16 @@ with demo:
|
|
| 132 |
iface = gr.Interface(
|
| 133 |
fn=generate_image,
|
| 134 |
inputs=[
|
| 135 |
-
gr.Image(type="pil", width=
|
| 136 |
gr.Textbox(lines=2, label="text", info="Could be something as simple as 'this character playing soccer'."),
|
| 137 |
gr.Checkbox(label="Gemini prompt", value=True, info="Use Gemini to enhance the prompt. This is recommended for most cases, unless you have a specific prompt similar to the examples in mind."),
|
| 138 |
gr.Slider(minimum=1.0, maximum=6.0, step=0.5, value=3.5, label="guidance scale", info="Tip: start with 3.5, then gradually increase if the consistency is consistently off"),
|
| 139 |
gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.5, label="real guidance scale for image", info="Tip: increase if the image is not consistent"),
|
| 140 |
gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.0, label="real guidance scale for prompt", info="Tip: increase if the prompt is not consistent"),
|
|
|
|
| 141 |
],
|
| 142 |
-
outputs=gr.Image(type="pil"),
|
|
|
|
| 143 |
# examples=get_samples(),
|
| 144 |
live=False,
|
| 145 |
)
|
|
|
|
| 6 |
from diffusers.utils import load_image
|
| 7 |
from pipeline import FluxConditionalPipeline
|
| 8 |
from transformer import FluxTransformer2DConditionalModel
|
| 9 |
+
from recaption import enhance_prompt
|
| 10 |
import os
|
| 11 |
|
| 12 |
pipe = None
|
|
|
|
| 44 |
gemini_prompt: bool = True,
|
| 45 |
guidance: float = 3.5,
|
| 46 |
i_guidance: float = 1.0,
|
| 47 |
+
t_guidance: float = 1.0,
|
| 48 |
+
num_images: int = 4,
|
| 49 |
):
|
| 50 |
w, h, min_size = image.size[0], image.size[1], min(image.size)
|
| 51 |
image = image.crop(
|
|
|
|
| 53 |
).resize((512, 512))
|
| 54 |
|
| 55 |
control_image = load_image(image)
|
| 56 |
+
text_list = []
|
| 57 |
+
for _ in range(num_images):
|
| 58 |
+
if gemini_prompt:
|
| 59 |
+
text = enhance_prompt(image, text.strip())
|
| 60 |
+
text_list.append(text.strip())
|
| 61 |
result_image = pipe(
|
| 62 |
+
prompt=text_list,
|
| 63 |
negative_prompt="",
|
| 64 |
num_inference_steps=28,
|
| 65 |
height=512,
|
|
|
|
| 69 |
guidance_scale_real_i=i_guidance,
|
| 70 |
guidance_scale_real_t=t_guidance,
|
| 71 |
gemini_prompt=gemini_prompt,
|
| 72 |
+
).images
|
| 73 |
|
| 74 |
return result_image
|
| 75 |
|
|
|
|
| 131 |
<a href="https://huggingface.co/datasets/primecai/dsd_data" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace%20-Data-yellow" style="display:inline-block;"></a>
|
| 132 |
<a href="https://huggingface.co/primecai/dsd_model" target="_blank"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face%20-Model-green" style="display:inline-block;"></a>
|
| 133 |
<a href="https://x.com/prime_cai?lang=en" target="_blank"><img src="https://img.shields.io/twitter/follow/prime_cai?style=social" style="display:inline-block;"></a>
|
| 134 |
+
|
| 135 |
+
<div style="text-align: center;">
|
| 136 |
+
The model does have randomness because of both the Gemini prompt enhancement and the diffusion initial noises. Please give it a few tries to get the best results.
|
| 137 |
+
</div>
|
| 138 |
</div>
|
| 139 |
"""
|
| 140 |
)
|
|
|
|
| 142 |
iface = gr.Interface(
|
| 143 |
fn=generate_image,
|
| 144 |
inputs=[
|
| 145 |
+
gr.Image(type="pil", width=300),
|
| 146 |
gr.Textbox(lines=2, label="text", info="Could be something as simple as 'this character playing soccer'."),
|
| 147 |
gr.Checkbox(label="Gemini prompt", value=True, info="Use Gemini to enhance the prompt. This is recommended for most cases, unless you have a specific prompt similar to the examples in mind."),
|
| 148 |
gr.Slider(minimum=1.0, maximum=6.0, step=0.5, value=3.5, label="guidance scale", info="Tip: start with 3.5, then gradually increase if the consistency is consistently off"),
|
| 149 |
gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.5, label="real guidance scale for image", info="Tip: increase if the image is not consistent"),
|
| 150 |
gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.0, label="real guidance scale for prompt", info="Tip: increase if the prompt is not consistent"),
|
| 151 |
+
gr.Slider(minimum=1, maximum=5, step=1, value=4, label="Number of images", info="Select how many images to generate"),
|
| 152 |
],
|
| 153 |
+
# outputs=gr.Image(type="pil"),
|
| 154 |
+
outputs=gr.Gallery(label="Generated Images", height=544),
|
| 155 |
# examples=get_samples(),
|
| 156 |
live=False,
|
| 157 |
)
|
pipeline.py
CHANGED
|
@@ -39,7 +39,7 @@ from diffusers.utils import (
|
|
| 39 |
)
|
| 40 |
from diffusers.utils.torch_utils import randn_tensor
|
| 41 |
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
| 42 |
-
from recaption import enhance_prompt
|
| 43 |
|
| 44 |
|
| 45 |
if is_torch_xla_available():
|
|
@@ -722,8 +722,8 @@ class FluxConditionalPipeline(DiffusionPipeline, SD3LoraLoaderMixin):
|
|
| 722 |
|
| 723 |
device = self._execution_device
|
| 724 |
|
| 725 |
-
if gemini_prompt:
|
| 726 |
-
|
| 727 |
# if gemini_prompt:
|
| 728 |
# while True:
|
| 729 |
# try:
|
|
@@ -779,8 +779,8 @@ class FluxConditionalPipeline(DiffusionPipeline, SD3LoraLoaderMixin):
|
|
| 779 |
# 3. Preprocess image
|
| 780 |
image = self.image_processor.preprocess(image)
|
| 781 |
# image = image[..., :512]
|
| 782 |
-
image = torch.nn.functional.interpolate(image, size=512)
|
| 783 |
-
black_image = torch.full((
|
| 784 |
image = torch.cat([image, black_image], dim=3)
|
| 785 |
latents_cond = self.vae.encode(image.to(self.vae.dtype).to(self.vae.device)).latent_dist.sample()
|
| 786 |
latents_cond = (
|
|
|
|
| 39 |
)
|
| 40 |
from diffusers.utils.torch_utils import randn_tensor
|
| 41 |
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
| 42 |
+
# from recaption import enhance_prompt
|
| 43 |
|
| 44 |
|
| 45 |
if is_torch_xla_available():
|
|
|
|
| 722 |
|
| 723 |
device = self._execution_device
|
| 724 |
|
| 725 |
+
# if gemini_prompt:
|
| 726 |
+
# prompt = enhance_prompt(image, prompt)
|
| 727 |
# if gemini_prompt:
|
| 728 |
# while True:
|
| 729 |
# try:
|
|
|
|
| 779 |
# 3. Preprocess image
|
| 780 |
image = self.image_processor.preprocess(image)
|
| 781 |
# image = image[..., :512]
|
| 782 |
+
image = torch.nn.functional.interpolate(image, size=512).repeat(batch_size, 1, 1, 1)
|
| 783 |
+
black_image = torch.full((batch_size, 3, 512, 512), -1.0)
|
| 784 |
image = torch.cat([image, black_image], dim=3)
|
| 785 |
latents_cond = self.vae.encode(image.to(self.vae.dtype).to(self.vae.device)).latent_dist.sample()
|
| 786 |
latents_cond = (
|