Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/photo_maker.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ Running PMV2 is now a two-step process:
```
python face_detect.py input_image_dir
```
An ```id_embeds.safetensors``` file will be generated in ```input_images_dir```
An ```id_embeds.bin``` file will be generated in ```input_images_dir```

**Note: this step is only needed to run once; the same ```id_embeds``` can be reused**

- Run the same command as in version 1 but replacing ```photomaker-v1.safetensors``` with ```photomaker-v2.safetensors```.

You can download ```photomaker-v2.safetensors``` from [here](https://huggingface.co/bssrdf/PhotoMakerV2)

- All the command line parameters from Version 1 remain the same for Version 2
- All the command line parameters from Version 1 remain the same for Version 2 plus one extra pointing to a valid ```id_embeds``` file: --pm-id-embed-path [path_to__id_embeds.bin]


30 changes: 18 additions & 12 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2220,18 +2220,24 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
id_embeds = load_tensor_from_file(work_ctx, pm_params.id_embed_path);
// print_ggml_tensor(id_embeds, true, "id_embeds:");
}
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask);
int64_t t1 = ggml_time_ms();
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->pmid_model->free_params_buffer();
}
// Encode input prompt without the trigger word for delayed conditioning
prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt);
// printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str());
prompt = prompt_text_only; //
if (sample_steps < 50) {
LOG_WARN("It's recommended to use >= 50 steps for photo maker!");
if (pmv2 && id_embeds == nullptr) {
LOG_WARN("Provided PhotoMaker images, but NO valid ID embeds file for PM v2");
LOG_WARN("Turn off PhotoMaker");
sd_ctx->sd->stacked_id = false;
} else {
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask);
int64_t t1 = ggml_time_ms();
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->pmid_model->free_params_buffer();
}
// Encode input prompt without the trigger word for delayed conditioning
prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt);
// printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str());
prompt = prompt_text_only; //
if (sample_steps < 50) {
LOG_WARN("It's recommended to use >= 50 steps for photo maker!");
}
}
} else {
LOG_WARN("Provided PhotoMaker model file, but NO input ID images");
Expand Down
Loading