🎨 完整的 IOPaint 项目更新
## 主要更新 - ✨ 更新所有依赖到最新稳定版本 - 📝 添加详细的项目文档和模型推荐 - 🔧 配置 VSCode Cloud Studio 预览功能 - 🐛 修复 PyTorch API 弃用警告 ## 依赖更新 - diffusers: 0.27.2 → 0.35.2 - gradio: 4.21.0 → 5.46.0 - peft: 0.7.1 → 0.18.0 - Pillow: 9.5.0 → 11.3.0 - fastapi: 0.108.0 → 0.116.2 ## 新增文件 - CLAUDE.md - 项目架构和开发指南 - UPGRADE_NOTES.md - 详细的升级说明 - .vscode/preview.yml - 预览配置 - .vscode/LAUNCH_GUIDE.md - 启动指南 - .gitignore - 更新的忽略规则 ## 代码修复 - 修复 iopaint/model/ldm.py 中的 torch.cuda.amp.autocast() 弃用警告 ## 文档更新 - README.md - 添加模型推荐和使用指南 - 完整的项目源码(iopaint/) - Web 前端源码(web_app/) 🤖 Generated with Claude Code
This commit is contained in:
0
iopaint/model/helper/__init__.py
Normal file
0
iopaint/model/helper/__init__.py
Normal file
68
iopaint/model/helper/controlnet_preprocess.py
Normal file
68
iopaint/model/helper/controlnet_preprocess.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import torch
|
||||
import PIL
|
||||
import cv2
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
from iopaint.helper import pad_img_to_modulo
|
||||
|
||||
|
||||
def make_canny_control_image(image: np.ndarray) -> Image:
|
||||
canny_image = cv2.Canny(image, 100, 200)
|
||||
canny_image = canny_image[:, :, None]
|
||||
canny_image = np.concatenate([canny_image, canny_image, canny_image], axis=2)
|
||||
canny_image = PIL.Image.fromarray(canny_image)
|
||||
control_image = canny_image
|
||||
return control_image
|
||||
|
||||
|
||||
def make_openpose_control_image(image: np.ndarray) -> Image:
|
||||
from controlnet_aux import OpenposeDetector
|
||||
|
||||
processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
||||
control_image = processor(image, hand_and_face=True)
|
||||
return control_image
|
||||
|
||||
|
||||
def resize_image(input_image, resolution):
|
||||
H, W, C = input_image.shape
|
||||
H = float(H)
|
||||
W = float(W)
|
||||
k = float(resolution) / min(H, W)
|
||||
H *= k
|
||||
W *= k
|
||||
H = int(np.round(H / 64.0)) * 64
|
||||
W = int(np.round(W / 64.0)) * 64
|
||||
img = cv2.resize(
|
||||
input_image,
|
||||
(W, H),
|
||||
interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA,
|
||||
)
|
||||
return img
|
||||
|
||||
|
||||
def make_depth_control_image(image: np.ndarray) -> Image:
|
||||
from controlnet_aux import MidasDetector
|
||||
|
||||
midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
||||
|
||||
origin_height, origin_width = image.shape[:2]
|
||||
pad_image = pad_img_to_modulo(image, mod=64, square=False, min_size=512)
|
||||
depth_image = midas(pad_image)
|
||||
depth_image = depth_image[0:origin_height, 0:origin_width]
|
||||
depth_image = depth_image[:, :, None]
|
||||
depth_image = np.concatenate([depth_image, depth_image, depth_image], axis=2)
|
||||
control_image = PIL.Image.fromarray(depth_image)
|
||||
return control_image
|
||||
|
||||
|
||||
def make_inpaint_control_image(image: np.ndarray, mask: np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
image: [H, W, C] RGB
|
||||
mask: [H, W, 1] 255 means area to repaint
|
||||
"""
|
||||
image = image.astype(np.float32) / 255.0
|
||||
image[mask[:, :, -1] > 128] = -1.0 # set as masked pixel
|
||||
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
|
||||
image = torch.from_numpy(image)
|
||||
return image
|
||||
41
iopaint/model/helper/cpu_text_encoder.py
Normal file
41
iopaint/model/helper/cpu_text_encoder.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import torch
|
||||
from transformers import PreTrainedModel
|
||||
|
||||
from ..utils import torch_gc
|
||||
|
||||
|
||||
class CPUTextEncoderWrapper(PreTrainedModel):
|
||||
def __init__(self, text_encoder, torch_dtype):
|
||||
super().__init__(text_encoder.config)
|
||||
self.config = text_encoder.config
|
||||
self._device = text_encoder.device
|
||||
# cpu not support float16
|
||||
self.text_encoder = text_encoder.to(torch.device("cpu"), non_blocking=True)
|
||||
self.text_encoder = self.text_encoder.to(torch.float32, non_blocking=True)
|
||||
self.torch_dtype = torch_dtype
|
||||
del text_encoder
|
||||
torch_gc()
|
||||
|
||||
def __call__(self, x, **kwargs):
|
||||
input_device = x.device
|
||||
original_output = self.text_encoder(x.to(self.text_encoder.device), **kwargs)
|
||||
for k, v in original_output.items():
|
||||
if isinstance(v, tuple):
|
||||
original_output[k] = [
|
||||
v[i].to(input_device).to(self.torch_dtype) for i in range(len(v))
|
||||
]
|
||||
else:
|
||||
original_output[k] = v.to(input_device).to(self.torch_dtype)
|
||||
return original_output
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self.torch_dtype
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
"""
|
||||
`torch.device`: The device on which the module is (assuming that all the module parameters are on the same
|
||||
device).
|
||||
"""
|
||||
return self._device
|
||||
62
iopaint/model/helper/g_diffuser_bot.py
Normal file
62
iopaint/model/helper/g_diffuser_bot.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def expand_image(cv2_img, top: int, right: int, bottom: int, left: int):
|
||||
assert cv2_img.shape[2] == 3
|
||||
origin_h, origin_w = cv2_img.shape[:2]
|
||||
|
||||
# TODO: which is better?
|
||||
# new_img = np.ones((new_height, new_width, 3), np.uint8) * 255
|
||||
new_img = cv2.copyMakeBorder(
|
||||
cv2_img, top, bottom, left, right, cv2.BORDER_REPLICATE
|
||||
)
|
||||
|
||||
inner_padding_left = 0 if left > 0 else 0
|
||||
inner_padding_right = 0 if right > 0 else 0
|
||||
inner_padding_top = 0 if top > 0 else 0
|
||||
inner_padding_bottom = 0 if bottom > 0 else 0
|
||||
|
||||
mask_image = np.zeros(
|
||||
(
|
||||
origin_h - inner_padding_top - inner_padding_bottom,
|
||||
origin_w - inner_padding_left - inner_padding_right,
|
||||
),
|
||||
np.uint8,
|
||||
)
|
||||
mask_image = cv2.copyMakeBorder(
|
||||
mask_image,
|
||||
top + inner_padding_top,
|
||||
bottom + inner_padding_bottom,
|
||||
left + inner_padding_left,
|
||||
right + inner_padding_right,
|
||||
cv2.BORDER_CONSTANT,
|
||||
value=255,
|
||||
)
|
||||
# k = 2*int(min(origin_h, origin_w) // 6)+1
|
||||
# k = 7
|
||||
# mask_image = cv2.GaussianBlur(mask_image, (k, k), 0)
|
||||
return new_img, mask_image
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pathlib import Path
|
||||
|
||||
current_dir = Path(__file__).parent.absolute().resolve()
|
||||
image_path = "/Users/cwq/code/github/IOPaint/iopaint/tests/bunny.jpeg"
|
||||
init_image = cv2.imread(str(image_path))
|
||||
init_image, mask_image = expand_image(
|
||||
init_image,
|
||||
top=0,
|
||||
right=0,
|
||||
bottom=0,
|
||||
left=100,
|
||||
softness=20,
|
||||
space=20,
|
||||
)
|
||||
print(mask_image.dtype, mask_image.min(), mask_image.max())
|
||||
print(init_image.dtype, init_image.min(), init_image.max())
|
||||
mask_image = mask_image.astype(np.uint8)
|
||||
init_image = init_image.astype(np.uint8)
|
||||
cv2.imwrite("expanded_image.png", init_image)
|
||||
cv2.imwrite("expanded_mask.png", mask_image)
|
||||
Reference in New Issue
Block a user