🎨 完整的 IOPaint 项目更新

## 主要更新
-  更新所有依赖到最新稳定版本
- 📝 添加详细的项目文档和模型推荐
- 🔧 配置 VSCode Cloud Studio 预览功能
- 🐛 修复 PyTorch API 弃用警告

## 依赖更新
- diffusers: 0.27.2 → 0.35.2
- gradio: 4.21.0 → 5.46.0
- peft: 0.7.1 → 0.18.0
- Pillow: 9.5.0 → 11.3.0
- fastapi: 0.108.0 → 0.116.2

## 新增文件
- CLAUDE.md - 项目架构和开发指南
- UPGRADE_NOTES.md - 详细的升级说明
- .vscode/preview.yml - 预览配置
- .vscode/LAUNCH_GUIDE.md - 启动指南
- .gitignore - 更新的忽略规则

## 代码修复
- 修复 iopaint/model/ldm.py 中的 torch.cuda.amp.autocast() 弃用警告

## 文档更新
- README.md - 添加模型推荐和使用指南
- 完整的项目源码(iopaint/)
- Web 前端源码(web_app/)

🤖 Generated with Claude Code
This commit is contained in:
let5sne
2025-11-28 17:10:24 +00:00
parent 03b999e9ea
commit 1b87a98261
332 changed files with 77453 additions and 26 deletions

View File

View File

@@ -0,0 +1,68 @@
import torch
import PIL
import cv2
from PIL import Image
import numpy as np
from iopaint.helper import pad_img_to_modulo
def make_canny_control_image(image: np.ndarray) -> Image:
canny_image = cv2.Canny(image, 100, 200)
canny_image = canny_image[:, :, None]
canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
canny_image = PIL.Image.fromarray(canny_image)
control_image = canny_image
return control_image
def make_openpose_control_image(image: np.ndarray) -> Image:
from controlnet_aux import OpenposeDetector
processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
control_image = processor(image, hand_and_face=True)
return control_image
def resize_image(input_image, resolution):
H, W, C = input_image.shape
H = float(H)
W = float(W)
k = float(resolution) / min(H, W)
H *= k
W *= k
H = int(np.round(H / 64.0)) * 64
W = int(np.round(W / 64.0)) * 64
img = cv2.resize(
input_image,
(W, H),
interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA,
)
return img
def make_depth_control_image(image: np.ndarray) -> Image:
from controlnet_aux import MidasDetector
midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
origin_height, origin_width = image.shape[:2]
pad_image = pad_img_to_modulo(image, mod=64, square=False, min_size=512)
depth_image = midas(pad_image)
depth_image = depth_image[0:origin_height, 0:origin_width]
depth_image = depth_image[:, :, None]
depth_image = np.concatenate([depth_image, depth_image, depth_image], axis=2)
control_image = PIL.Image.fromarray(depth_image)
return control_image
def make_inpaint_control_image(image: np.ndarray, mask: np.ndarray) -> torch.Tensor:
"""
image: [H, W, C] RGB
mask: [H, W, 1] 255 means area to repaint
"""
image = image.astype(np.float32) / 255.0
image[mask[:, :, -1] > 128] = -1.0 # set as masked pixel
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return image

View File

@@ -0,0 +1,41 @@
import torch
from transformers import PreTrainedModel
from ..utils import torch_gc
class CPUTextEncoderWrapper(PreTrainedModel):
def __init__(self, text_encoder, torch_dtype):
super().__init__(text_encoder.config)
self.config = text_encoder.config
self._device = text_encoder.device
# cpu not support float16
self.text_encoder = text_encoder.to(torch.device("cpu"), non_blocking=True)
self.text_encoder = self.text_encoder.to(torch.float32, non_blocking=True)
self.torch_dtype = torch_dtype
del text_encoder
torch_gc()
def __call__(self, x, **kwargs):
input_device = x.device
original_output = self.text_encoder(x.to(self.text_encoder.device), **kwargs)
for k, v in original_output.items():
if isinstance(v, tuple):
original_output[k] = [
v[i].to(input_device).to(self.torch_dtype) for i in range(len(v))
]
else:
original_output[k] = v.to(input_device).to(self.torch_dtype)
return original_output
@property
def dtype(self):
return self.torch_dtype
@property
def device(self) -> torch.device:
"""
`torch.device`: The device on which the module is (assuming that all the module parameters are on the same
device).
"""
return self._device

View File

@@ -0,0 +1,62 @@
import cv2
import numpy as np
def expand_image(cv2_img, top: int, right: int, bottom: int, left: int):
assert cv2_img.shape[2] == 3
origin_h, origin_w = cv2_img.shape[:2]
# TODO: which is better?
# new_img = np.ones((new_height, new_width, 3), np.uint8) * 255
new_img = cv2.copyMakeBorder(
cv2_img, top, bottom, left, right, cv2.BORDER_REPLICATE
)
inner_padding_left = 0 if left > 0 else 0
inner_padding_right = 0 if right > 0 else 0
inner_padding_top = 0 if top > 0 else 0
inner_padding_bottom = 0 if bottom > 0 else 0
mask_image = np.zeros(
(
origin_h - inner_padding_top - inner_padding_bottom,
origin_w - inner_padding_left - inner_padding_right,
),
np.uint8,
)
mask_image = cv2.copyMakeBorder(
mask_image,
top + inner_padding_top,
bottom + inner_padding_bottom,
left + inner_padding_left,
right + inner_padding_right,
cv2.BORDER_CONSTANT,
value=255,
)
# k = 2*int(min(origin_h, origin_w) // 6)+1
# k = 7
# mask_image = cv2.GaussianBlur(mask_image, (k, k), 0)
return new_img, mask_image
if __name__ == "__main__":
from pathlib import Path
current_dir = Path(__file__).parent.absolute().resolve()
image_path = "/Users/cwq/code/github/IOPaint/iopaint/tests/bunny.jpeg"
init_image = cv2.imread(str(image_path))
init_image, mask_image = expand_image(
init_image,
top=0,
right=0,
bottom=0,
left=100,
softness=20,
space=20,
)
print(mask_image.dtype, mask_image.min(), mask_image.max())
print(init_image.dtype, init_image.min(), init_image.max())
mask_image = mask_image.astype(np.uint8)
init_image = init_image.astype(np.uint8)
cv2.imwrite("expanded_image.png", init_image)
cv2.imwrite("expanded_mask.png", mask_image)