add paint by example

2022-12-10 22:06:15 +08:00
parent 6e9d3d8442
commit 203f2bc9c7
18 changed files with 572 additions and 82 deletions
--- a/lama_cleaner/model/base.py
+++ b/lama_cleaner/model/base.py
@@ -211,6 +211,26 @@ class InpaintModel:

        return result

+    def _apply_cropper(self, image, mask, config: Config):
+        img_h, img_w = image.shape[:2]
+        l, t, w, h = (
+            config.croper_x,
+            config.croper_y,
+            config.croper_width,
+            config.croper_height,
+        )
+        r = l + w
+        b = t + h
+
+        l = max(l, 0)
+        r = min(r, img_w)
+        t = max(t, 0)
+        b = min(b, img_h)
+
+        crop_img = image[t:b, l:r, :]
+        crop_mask = mask[t:b, l:r]
+        return crop_img, crop_mask, (l, t, r, b)
+
    def _run_box(self, image, mask, box, config: Config):
        """

--- a/lama_cleaner/model/paint_by_example.py
+++ b/lama_cleaner/model/paint_by_example.py
@@ -0,0 +1,80 @@
+import random
+
+import PIL
+import PIL.Image
+import cv2
+import numpy as np
+import torch
+from diffusers import DiffusionPipeline
+from lama_cleaner.model.base import InpaintModel
+from lama_cleaner.schema import Config
+
+
+class PaintByExample(InpaintModel):
+    pad_mod = 8
+    min_size = 512
+
+    def init_model(self, device: torch.device, **kwargs):
+        use_gpu = device == torch.device('cuda') and torch.cuda.is_available()
+        torch_dtype = torch.float16 if use_gpu else torch.float32
+        self.model = DiffusionPipeline.from_pretrained(
+            "Fantasy-Studio/Paint-by-Example",
+            torch_dtype=torch_dtype,
+        )
+        self.model.enable_attention_slicing()
+        self.model = self.model.to(device)
+
+    def forward(self, image, mask, config: Config):
+        """Input image and output image have same size
+        image: [H, W, C] RGB
+        mask: [H, W, 1] 255 means area to repaint
+        return: BGR IMAGE
+        """
+        seed = config.paint_by_example_seed
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+
+        output = self.model(
+            image=PIL.Image.fromarray(image),
+            mask_image=PIL.Image.fromarray(mask[:, :, -1], mode="L"),
+            example_image=config.paint_by_example_example_image,
+            num_inference_steps=config.paint_by_example_steps,
+            output_type='np.array',
+        ).images[0]
+
+        output = (output * 255).round().astype("uint8")
+        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
+        return output
+
+    @torch.no_grad()
+    def __call__(self, image, mask, config: Config):
+        """
+        images: [H, W, C] RGB, not normalized
+        masks: [H, W]
+        return: BGR IMAGE
+        """
+        if config.use_croper:
+            crop_img, crop_mask, (l, t, r, b) = self._apply_cropper(image, mask, config)
+            crop_image = self._pad_forward(crop_img, crop_mask, config)
+            inpaint_result = image[:, :, ::-1]
+            inpaint_result[t:b, l:r, :] = crop_image
+        else:
+            inpaint_result = self._pad_forward(image, mask, config)
+
+        return inpaint_result
+
+    def forward_post_process(self, result, image, mask, config):
+        if config.paint_by_example_match_histograms:
+            result = self._match_histograms(result, image[:, :, ::-1], mask)
+
+        if config.paint_by_example_mask_blur != 0:
+            k = 2 * config.paint_by_example_mask_blur + 1
+            mask = cv2.GaussianBlur(mask, (k, k), 0)
+        return result, image, mask
+
+    @staticmethod
+    def is_downloaded() -> bool:
+        # model will be downloaded when app start, and can't switch in frontend settings
+        return True
--- a/lama_cleaner/model/sd.py
+++ b/lama_cleaner/model/sd.py
@@ -12,31 +12,6 @@ from lama_cleaner.model.base import InpaintModel
 from lama_cleaner.schema import Config, SDSampler


-#
-#
-# def preprocess_image(image):
-#     w, h = image.size
-#     w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
-#     image = image.resize((w, h), resample=PIL.Image.LANCZOS)
-#     image = np.array(image).astype(np.float32) / 255.0
-#     image = image[None].transpose(0, 3, 1, 2)
-#     image = torch.from_numpy(image)
-#     # [-1, 1]
-#     return 2.0 * image - 1.0
-#
-#
-# def preprocess_mask(mask):
-#     mask = mask.convert("L")
-#     w, h = mask.size
-#     w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
-#     mask = mask.resize((w // 8, h // 8), resample=PIL.Image.NEAREST)
-#     mask = np.array(mask).astype(np.float32) / 255.0
-#     mask = np.tile(mask, (4, 1, 1))
-#     mask = mask[None].transpose(0, 1, 2, 3)  # what does this step do?
-#     mask = 1 - mask  # repaint white, keep black
-#     mask = torch.from_numpy(mask)
-#     return mask
-
 class CPUTextEncoderWrapper:
    def __init__(self, text_encoder, torch_dtype):
        self.config = text_encoder.config
@@ -92,17 +67,6 @@ class SD(InpaintModel):
        return: BGR IMAGE
        """

-        # image = norm_img(image)  # [0, 1]
-        # image = image * 2 - 1  # [0, 1] -> [-1, 1]
-
-        # resize to latent feature map size
-        # h, w = mask.shape[:2]
-        # mask = cv2.resize(mask, (h // 8, w // 8), interpolation=cv2.INTER_AREA)
-        # mask = norm_img(mask)
-        #
-        # image = torch.from_numpy(image).unsqueeze(0).to(self.device)
-        # mask = torch.from_numpy(mask).unsqueeze(0).to(self.device)
-
        scheduler_config = self.model.scheduler.config

        if config.sd_sampler == SDSampler.ddim:
@@ -139,7 +103,6 @@ class SD(InpaintModel):
            prompt=config.prompt,
            negative_prompt=config.negative_prompt,
            mask_image=PIL.Image.fromarray(mask[:, :, -1], mode="L"),
-            strength=config.sd_strength,
            num_inference_steps=config.sd_steps,
            guidance_scale=config.sd_guidance_scale,
            output_type="np.array",
@@ -159,30 +122,10 @@ class SD(InpaintModel):
        masks: [H, W]
        return: BGR IMAGE
        """
-        img_h, img_w = image.shape[:2]
-
        # boxes = boxes_from_mask(mask)
        if config.use_croper:
-            logger.info("use croper")
-            l, t, w, h = (
-                config.croper_x,
-                config.croper_y,
-                config.croper_width,
-                config.croper_height,
-            )
-            r = l + w
-            b = t + h
-
-            l = max(l, 0)
-            r = min(r, img_w)
-            t = max(t, 0)
-            b = min(b, img_h)
-
-            crop_img = image[t:b, l:r, :]
-            crop_mask = mask[t:b, l:r]
-
+            crop_img, crop_mask, (l, t, r, b) = self._apply_cropper(image, mask, config)
            crop_image = self._pad_forward(crop_img, crop_mask, config)
-
            inpaint_result = image[:, :, ::-1]
            inpaint_result[t:b, l:r, :] = crop_image
        else: