enable text_encoder cpu

2022-09-30 21:39:23 +08:00
parent db1d7d5c48
commit dba7b01da7
4 changed files with 21 additions and 10 deletions
--- a/lama_cleaner/model/sd_pipeline.py
+++ b/lama_cleaner/model/sd_pipeline.py
@@ -236,7 +236,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
            truncation=True,
            return_tensors="pt",
        )
-        text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0]
+        text_encoder_device = self.text_encoder.device
+
+        text_embeddings = self.text_encoder(text_input.input_ids.to(text_encoder_device, non_blocking=True))[0].to(self.device, non_blocking=True)

        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
@@ -248,7 +250,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
            uncond_input = self.tokenizer(
                [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
            )
-            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
+            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(text_encoder_device, non_blocking=True))[0].to(self.device, non_blocking=True)

            # For classifier free guidance, we need to do two forward passes.
            # Here we concatenate the unconditional and text embeddings into a single batch
@@ -269,7 +271,6 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
        for i, t in tqdm(enumerate(self.scheduler.timesteps[t_start:])):
            # expand the latents if we are doing classifier free guidance
            latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
-
            # predict the noise residual
            noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample