🎨 完整的 IOPaint 项目更新

## 主要更新 - ✨ 更新所有依赖到最新稳定版本 - 📝 添加详细的项目文档和模型推荐 - 🔧 配置 VSCode Cloud Studio 预览功能 - 🐛 修复 PyTorch API 弃用警告 ## 依赖更新 - diffusers: 0.27.2 → 0.35.2 - gradio: 4.21.0 → 5.46.0 - peft: 0.7.1 → 0.18.0 - Pillow: 9.5.0 → 11.3.0 - fastapi: 0.108.0 → 0.116.2 ## 新增文件 - CLAUDE.md - 项目架构和开发指南 - UPGRADE_NOTES.md - 详细的升级说明 - .vscode/preview.yml - 预览配置 - .vscode/LAUNCH_GUIDE.md - 启动指南 - .gitignore - 更新的忽略规则 ## 代码修复 - 修复 iopaint/model/ldm.py 中的 torch.cuda.amp.autocast() 弃用警告 ## 文档更新 - README.md - 添加模型推荐和使用指南 - 完整的项目源码（iopaint/） - Web 前端源码（web_app/） 🤖 Generated with Claude Code
2025-11-28 17:10:24 +00:00
parent 03b999e9ea
commit 1b87a98261
332 changed files with 77453 additions and 26 deletions
--- a/iopaint/model/helper/init.py
+++ b/iopaint/model/helper/init.py
--- a/iopaint/model/helper/controlnet_preprocess.py
+++ b/iopaint/model/helper/controlnet_preprocess.py
@@ -0,0 +1,68 @@
+import torch
+import PIL
+import cv2
+from PIL import Image
+import numpy as np
+
+from iopaint.helper import pad_img_to_modulo
+
+
+def make_canny_control_image(image: np.ndarray) -> Image:
+    canny_image = cv2.Canny(image, 100, 200)
+    canny_image = canny_image[:, :, None]
+    canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
+    canny_image = PIL.Image.fromarray(canny_image)
+    control_image = canny_image
+    return control_image
+
+
+def make_openpose_control_image(image: np.ndarray) -> Image:
+    from controlnet_aux import OpenposeDetector
+
+    processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+    control_image = processor(image, hand_and_face=True)
+    return control_image
+
+
+def resize_image(input_image, resolution):
+    H, W, C = input_image.shape
+    H = float(H)
+    W = float(W)
+    k = float(resolution) / min(H, W)
+    H *= k
+    W *= k
+    H = int(np.round(H / 64.0)) * 64
+    W = int(np.round(W / 64.0)) * 64
+    img = cv2.resize(
+        input_image,
+        (W, H),
+        interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA,
+    )
+    return img
+
+
+def make_depth_control_image(image: np.ndarray) -> Image:
+    from controlnet_aux import MidasDetector
+
+    midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
+
+    origin_height, origin_width = image.shape[:2]
+    pad_image = pad_img_to_modulo(image, mod=64, square=False, min_size=512)
+    depth_image = midas(pad_image)
+    depth_image = depth_image[0:origin_height, 0:origin_width]
+    depth_image = depth_image[:, :, None]
+    depth_image = np.concatenate([depth_image, depth_image, depth_image], axis=2)
+    control_image = PIL.Image.fromarray(depth_image)
+    return control_image
+
+
+def make_inpaint_control_image(image: np.ndarray, mask: np.ndarray) -> torch.Tensor:
+    """
+    image: [H, W, C] RGB
+    mask: [H, W, 1] 255 means area to repaint
+    """
+    image = image.astype(np.float32) / 255.0
+    image[mask[:, :, -1] > 128] = -1.0  # set as masked pixel
+    image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
+    image = torch.from_numpy(image)
+    return image
--- a/iopaint/model/helper/cpu_text_encoder.py
+++ b/iopaint/model/helper/cpu_text_encoder.py
@@ -0,0 +1,41 @@
+import torch
+from transformers import PreTrainedModel
+
+from ..utils import torch_gc
+
+
+class CPUTextEncoderWrapper(PreTrainedModel):
+    def __init__(self, text_encoder, torch_dtype):
+        super().__init__(text_encoder.config)
+        self.config = text_encoder.config
+        self._device = text_encoder.device
+        # cpu not support float16
+        self.text_encoder = text_encoder.to(torch.device("cpu"), non_blocking=True)
+        self.text_encoder = self.text_encoder.to(torch.float32, non_blocking=True)
+        self.torch_dtype = torch_dtype
+        del text_encoder
+        torch_gc()
+
+    def __call__(self, x, **kwargs):
+        input_device = x.device
+        original_output = self.text_encoder(x.to(self.text_encoder.device), **kwargs)
+        for k, v in original_output.items():
+            if isinstance(v, tuple):
+                original_output[k] = [
+                    v[i].to(input_device).to(self.torch_dtype) for i in range(len(v))
+                ]
+            else:
+                original_output[k] = v.to(input_device).to(self.torch_dtype)
+        return original_output
+
+    @property
+    def dtype(self):
+        return self.torch_dtype
+
+    @property
+    def device(self) -> torch.device:
+        """
+        `torch.device`: The device on which the module is (assuming that all the module parameters are on the same
+        device).
+        """
+        return self._device
--- a/iopaint/model/helper/g_diffuser_bot.py
+++ b/iopaint/model/helper/g_diffuser_bot.py
@@ -0,0 +1,62 @@
+import cv2
+import numpy as np
+
+
+def expand_image(cv2_img, top: int, right: int, bottom: int, left: int):
+    assert cv2_img.shape[2] == 3
+    origin_h, origin_w = cv2_img.shape[:2]
+
+    # TODO: which is better?
+    # new_img = np.ones((new_height, new_width, 3), np.uint8) * 255
+    new_img = cv2.copyMakeBorder(
+        cv2_img, top, bottom, left, right, cv2.BORDER_REPLICATE
+    )
+
+    inner_padding_left = 0 if left > 0 else 0
+    inner_padding_right = 0 if right > 0 else 0
+    inner_padding_top = 0 if top > 0 else 0
+    inner_padding_bottom = 0 if bottom > 0 else 0
+
+    mask_image = np.zeros(
+        (
+            origin_h - inner_padding_top - inner_padding_bottom,
+            origin_w - inner_padding_left - inner_padding_right,
+        ),
+        np.uint8,
+    )
+    mask_image = cv2.copyMakeBorder(
+        mask_image,
+        top + inner_padding_top,
+        bottom + inner_padding_bottom,
+        left + inner_padding_left,
+        right + inner_padding_right,
+        cv2.BORDER_CONSTANT,
+        value=255,
+    )
+    # k = 2*int(min(origin_h, origin_w) // 6)+1
+    # k = 7
+    # mask_image = cv2.GaussianBlur(mask_image, (k, k), 0)
+    return new_img, mask_image
+
+
+if __name__ == "__main__":
+    from pathlib import Path
+
+    current_dir = Path(__file__).parent.absolute().resolve()
+    image_path = "/Users/cwq/code/github/IOPaint/iopaint/tests/bunny.jpeg"
+    init_image = cv2.imread(str(image_path))
+    init_image, mask_image = expand_image(
+        init_image,
+        top=0,
+        right=0,
+        bottom=0,
+        left=100,
+        softness=20,
+        space=20,
+    )
+    print(mask_image.dtype, mask_image.min(), mask_image.max())
+    print(init_image.dtype, init_image.min(), init_image.max())
+    mask_image = mask_image.astype(np.uint8)
+    init_image = init_image.astype(np.uint8)
+    cv2.imwrite("expanded_image.png", init_image)
+    cv2.imwrite("expanded_mask.png", mask_image)