text-to-image stable-diffusion

Stable Diffusion model, fine-tuned for generating images of people with their thumbs up.

How to use it:


from diffusers import StableDiffusionPipeline
import torch
from torchmetrics.functional.multimodal import clip_score
from functools import partial

model_ckpt = "raghav-gaggar/stable-diffusion-thumbs-up"
sd_pipeline = StableDiffusionPipeline.from_pretrained(model_ckpt, torch_dtype=torch.float16).to("cuda")

prompts = [
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
    "thumbs up",
]

images = sd_pipeline(prompts, num_images_per_prompt=1, output_type="numpy").images
print(images.shape)

clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")

def calculate_clip_score(images, prompts):
    images_int = (images * 255).astype("uint8")
    clip_score = clip_score_fn(torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts).detach()
    return round(float(clip_score), 4)


sd_clip_score = calculate_clip_score(images, prompts)
print(f"CLIP score: {sd_clip_score}")

Sample pictures of this concept:

0 1 2 3