wip mat float16
This commit is contained in:
@@ -52,7 +52,7 @@ class ModulatedConv2d(nn.Module):
|
|||||||
)
|
)
|
||||||
self.out_channels = out_channels
|
self.out_channels = out_channels
|
||||||
self.kernel_size = kernel_size
|
self.kernel_size = kernel_size
|
||||||
self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size**2))
|
self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2))
|
||||||
self.padding = self.kernel_size // 2
|
self.padding = self.kernel_size // 2
|
||||||
self.up = up
|
self.up = up
|
||||||
self.down = down
|
self.down = down
|
||||||
@@ -213,7 +213,7 @@ class DecBlockFirst(nn.Module):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.fc = FullyConnectedLayer(
|
self.fc = FullyConnectedLayer(
|
||||||
in_features=in_channels * 2,
|
in_features=in_channels * 2,
|
||||||
out_features=in_channels * 4**2,
|
out_features=in_channels * 4 ** 2,
|
||||||
activation=activation,
|
activation=activation,
|
||||||
)
|
)
|
||||||
self.conv = StyleConv(
|
self.conv = StyleConv(
|
||||||
@@ -312,7 +312,7 @@ class DecBlock(nn.Module):
|
|||||||
in_channels=in_channels,
|
in_channels=in_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
up=2,
|
up=2,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
@@ -323,7 +323,7 @@ class DecBlock(nn.Module):
|
|||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
activation=activation,
|
activation=activation,
|
||||||
@@ -402,9 +402,6 @@ class MappingNet(torch.nn.Module):
|
|||||||
def forward(
|
def forward(
|
||||||
self, z, c, truncation_psi=1, truncation_cutoff=None, skip_w_avg_update=False
|
self, z, c, truncation_psi=1, truncation_cutoff=None, skip_w_avg_update=False
|
||||||
):
|
):
|
||||||
import ipdb
|
|
||||||
|
|
||||||
ipdb.set_trace()
|
|
||||||
# Embed, normalize, and concat inputs.
|
# Embed, normalize, and concat inputs.
|
||||||
x = None
|
x = None
|
||||||
if self.z_dim > 0:
|
if self.z_dim > 0:
|
||||||
@@ -510,7 +507,7 @@ class Discriminator(torch.nn.Module):
|
|||||||
self.img_channels = img_channels
|
self.img_channels = img_channels
|
||||||
|
|
||||||
resolution_log2 = int(np.log2(img_resolution))
|
resolution_log2 = int(np.log2(img_resolution))
|
||||||
assert img_resolution == 2**resolution_log2 and img_resolution >= 4
|
assert img_resolution == 2 ** resolution_log2 and img_resolution >= 4
|
||||||
self.resolution_log2 = resolution_log2
|
self.resolution_log2 = resolution_log2
|
||||||
|
|
||||||
def nf(stage):
|
def nf(stage):
|
||||||
@@ -546,7 +543,7 @@ class Discriminator(torch.nn.Module):
|
|||||||
)
|
)
|
||||||
self.Dis = nn.Sequential(*Dis)
|
self.Dis = nn.Sequential(*Dis)
|
||||||
|
|
||||||
self.fc0 = FullyConnectedLayer(nf(2) * 4**2, nf(2), activation=activation)
|
self.fc0 = FullyConnectedLayer(nf(2) * 4 ** 2, nf(2), activation=activation)
|
||||||
self.fc1 = FullyConnectedLayer(nf(2), 1 if cmap_dim == 0 else cmap_dim)
|
self.fc1 = FullyConnectedLayer(nf(2), 1 if cmap_dim == 0 else cmap_dim)
|
||||||
|
|
||||||
def forward(self, images_in, masks_in, c):
|
def forward(self, images_in, masks_in, c):
|
||||||
@@ -565,7 +562,7 @@ class Discriminator(torch.nn.Module):
|
|||||||
|
|
||||||
def nf(stage, channel_base=32768, channel_decay=1.0, channel_max=512):
|
def nf(stage, channel_base=32768, channel_decay=1.0, channel_max=512):
|
||||||
NF = {512: 64, 256: 128, 128: 256, 64: 512, 32: 512, 16: 512, 8: 512, 4: 512}
|
NF = {512: 64, 256: 128, 128: 256, 64: 512, 32: 512, 16: 512, 8: 512, 4: 512}
|
||||||
return NF[2**stage]
|
return NF[2 ** stage]
|
||||||
|
|
||||||
|
|
||||||
class Mlp(nn.Module):
|
class Mlp(nn.Module):
|
||||||
@@ -662,7 +659,7 @@ class Conv2dLayerPartial(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.weight_maskUpdater = torch.ones(1, 1, kernel_size, kernel_size)
|
self.weight_maskUpdater = torch.ones(1, 1, kernel_size, kernel_size)
|
||||||
self.slide_winsize = kernel_size**2
|
self.slide_winsize = kernel_size ** 2
|
||||||
self.stride = down
|
self.stride = down
|
||||||
self.padding = kernel_size // 2 if kernel_size % 2 == 1 else 0
|
self.padding = kernel_size // 2 if kernel_size % 2 == 1 else 0
|
||||||
|
|
||||||
@@ -678,9 +675,9 @@ class Conv2dLayerPartial(nn.Module):
|
|||||||
stride=self.stride,
|
stride=self.stride,
|
||||||
padding=self.padding,
|
padding=self.padding,
|
||||||
)
|
)
|
||||||
mask_ratio = self.slide_winsize / (update_mask + 1e-8)
|
mask_ratio = self.slide_winsize / (update_mask.to(torch.float32) + 1e-8)
|
||||||
update_mask = torch.clamp(update_mask, 0, 1) # 0 or 1
|
update_mask = torch.clamp(update_mask, 0, 1) # 0 or 1
|
||||||
mask_ratio = torch.mul(mask_ratio, update_mask)
|
mask_ratio = torch.mul(mask_ratio, update_mask).to(x.dtype)
|
||||||
x = self.conv(x)
|
x = self.conv(x)
|
||||||
x = torch.mul(x, mask_ratio)
|
x = torch.mul(x, mask_ratio)
|
||||||
return x, update_mask
|
return x, update_mask
|
||||||
@@ -718,7 +715,7 @@ class WindowAttention(nn.Module):
|
|||||||
self.window_size = window_size # Wh, Ww
|
self.window_size = window_size # Wh, Ww
|
||||||
self.num_heads = num_heads
|
self.num_heads = num_heads
|
||||||
head_dim = dim // num_heads
|
head_dim = dim // num_heads
|
||||||
self.scale = qk_scale or head_dim**-0.5
|
self.scale = qk_scale or head_dim ** -0.5
|
||||||
|
|
||||||
self.q = FullyConnectedLayer(in_features=dim, out_features=dim)
|
self.q = FullyConnectedLayer(in_features=dim, out_features=dim)
|
||||||
self.k = FullyConnectedLayer(in_features=dim, out_features=dim)
|
self.k = FullyConnectedLayer(in_features=dim, out_features=dim)
|
||||||
@@ -734,7 +731,7 @@ class WindowAttention(nn.Module):
|
|||||||
mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
|
mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
|
||||||
"""
|
"""
|
||||||
B_, N, C = x.shape
|
B_, N, C = x.shape
|
||||||
norm_x = F.normalize(x, p=2.0, dim=-1)
|
norm_x = F.normalize(x, p=2.0, dim=-1, eps=torch.finfo(x.dtype).eps)
|
||||||
q = (
|
q = (
|
||||||
self.q(norm_x)
|
self.q(norm_x)
|
||||||
.reshape(B_, N, self.num_heads, C // self.num_heads)
|
.reshape(B_, N, self.num_heads, C // self.num_heads)
|
||||||
@@ -771,7 +768,6 @@ class WindowAttention(nn.Module):
|
|||||||
).repeat(1, N, 1)
|
).repeat(1, N, 1)
|
||||||
|
|
||||||
attn = self.softmax(attn)
|
attn = self.softmax(attn)
|
||||||
|
|
||||||
x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
|
x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
|
||||||
x = self.proj(x)
|
x = self.proj(x)
|
||||||
return x, mask_windows
|
return x, mask_windows
|
||||||
@@ -935,7 +931,9 @@ class SwinTransformerBlock(nn.Module):
|
|||||||
) # nW*B, window_size*window_size, C
|
) # nW*B, window_size*window_size, C
|
||||||
else:
|
else:
|
||||||
attn_windows, mask_windows = self.attn(
|
attn_windows, mask_windows = self.attn(
|
||||||
x_windows, mask_windows, mask=self.calculate_mask(x_size).to(x.device)
|
x_windows,
|
||||||
|
mask_windows,
|
||||||
|
mask=self.calculate_mask(x_size).to(x.dtype).to(x.device),
|
||||||
) # nW*B, window_size*window_size, C
|
) # nW*B, window_size*window_size, C
|
||||||
|
|
||||||
# merge windows
|
# merge windows
|
||||||
@@ -1213,7 +1211,7 @@ class Encoder(nn.Module):
|
|||||||
self.resolution = []
|
self.resolution = []
|
||||||
|
|
||||||
for idx, i in enumerate(range(res_log2, 3, -1)): # from input size to 16x16
|
for idx, i in enumerate(range(res_log2, 3, -1)): # from input size to 16x16
|
||||||
res = 2**i
|
res = 2 ** i
|
||||||
self.resolution.append(res)
|
self.resolution.append(res)
|
||||||
if i == res_log2:
|
if i == res_log2:
|
||||||
block = EncFromRGB(img_channels * 2 + 1, nf(i), activation)
|
block = EncFromRGB(img_channels * 2 + 1, nf(i), activation)
|
||||||
@@ -1298,7 +1296,7 @@ class DecBlockFirstV2(nn.Module):
|
|||||||
in_channels=in_channels,
|
in_channels=in_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
activation=activation,
|
activation=activation,
|
||||||
@@ -1343,7 +1341,7 @@ class DecBlock(nn.Module):
|
|||||||
in_channels=in_channels,
|
in_channels=in_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
up=2,
|
up=2,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
@@ -1354,7 +1352,7 @@ class DecBlock(nn.Module):
|
|||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
activation=activation,
|
activation=activation,
|
||||||
@@ -1391,7 +1389,7 @@ class Decoder(nn.Module):
|
|||||||
for res in range(5, res_log2 + 1):
|
for res in range(5, res_log2 + 1):
|
||||||
setattr(
|
setattr(
|
||||||
self,
|
self,
|
||||||
"Dec_%dx%d" % (2**res, 2**res),
|
"Dec_%dx%d" % (2 ** res, 2 ** res),
|
||||||
DecBlock(
|
DecBlock(
|
||||||
res,
|
res,
|
||||||
nf(res - 1),
|
nf(res - 1),
|
||||||
@@ -1408,7 +1406,7 @@ class Decoder(nn.Module):
|
|||||||
def forward(self, x, ws, gs, E_features, noise_mode="random"):
|
def forward(self, x, ws, gs, E_features, noise_mode="random"):
|
||||||
x, img = self.Dec_16x16(x, ws, gs, E_features, noise_mode=noise_mode)
|
x, img = self.Dec_16x16(x, ws, gs, E_features, noise_mode=noise_mode)
|
||||||
for res in range(5, self.res_log2 + 1):
|
for res in range(5, self.res_log2 + 1):
|
||||||
block = getattr(self, "Dec_%dx%d" % (2**res, 2**res))
|
block = getattr(self, "Dec_%dx%d" % (2 ** res, 2 ** res))
|
||||||
x, img = block(x, img, ws, gs, E_features, noise_mode=noise_mode)
|
x, img = block(x, img, ws, gs, E_features, noise_mode=noise_mode)
|
||||||
|
|
||||||
return img
|
return img
|
||||||
@@ -1433,7 +1431,7 @@ class DecStyleBlock(nn.Module):
|
|||||||
in_channels=in_channels,
|
in_channels=in_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
up=2,
|
up=2,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
@@ -1444,7 +1442,7 @@ class DecStyleBlock(nn.Module):
|
|||||||
in_channels=out_channels,
|
in_channels=out_channels,
|
||||||
out_channels=out_channels,
|
out_channels=out_channels,
|
||||||
style_dim=style_dim,
|
style_dim=style_dim,
|
||||||
resolution=2**res,
|
resolution=2 ** res,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
use_noise=use_noise,
|
use_noise=use_noise,
|
||||||
activation=activation,
|
activation=activation,
|
||||||
@@ -1642,7 +1640,7 @@ class SynthesisNet(nn.Module):
|
|||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
resolution_log2 = int(np.log2(img_resolution))
|
resolution_log2 = int(np.log2(img_resolution))
|
||||||
assert img_resolution == 2**resolution_log2 and img_resolution >= 4
|
assert img_resolution == 2 ** resolution_log2 and img_resolution >= 4
|
||||||
|
|
||||||
self.num_layers = resolution_log2 * 2 - 3 * 2
|
self.num_layers = resolution_log2 * 2 - 3 * 2
|
||||||
self.img_resolution = img_resolution
|
self.img_resolution = img_resolution
|
||||||
@@ -1783,7 +1781,7 @@ class Discriminator(torch.nn.Module):
|
|||||||
self.img_channels = img_channels
|
self.img_channels = img_channels
|
||||||
|
|
||||||
resolution_log2 = int(np.log2(img_resolution))
|
resolution_log2 = int(np.log2(img_resolution))
|
||||||
assert img_resolution == 2**resolution_log2 and img_resolution >= 4
|
assert img_resolution == 2 ** resolution_log2 and img_resolution >= 4
|
||||||
self.resolution_log2 = resolution_log2
|
self.resolution_log2 = resolution_log2
|
||||||
|
|
||||||
if cmap_dim == None:
|
if cmap_dim == None:
|
||||||
@@ -1814,7 +1812,7 @@ class Discriminator(torch.nn.Module):
|
|||||||
)
|
)
|
||||||
self.Dis = nn.Sequential(*Dis)
|
self.Dis = nn.Sequential(*Dis)
|
||||||
|
|
||||||
self.fc0 = FullyConnectedLayer(nf(2) * 4**2, nf(2), activation=activation)
|
self.fc0 = FullyConnectedLayer(nf(2) * 4 ** 2, nf(2), activation=activation)
|
||||||
self.fc1 = FullyConnectedLayer(nf(2), 1 if cmap_dim == 0 else cmap_dim)
|
self.fc1 = FullyConnectedLayer(nf(2), 1 if cmap_dim == 0 else cmap_dim)
|
||||||
|
|
||||||
# for 64x64
|
# for 64x64
|
||||||
@@ -1839,7 +1837,7 @@ class Discriminator(torch.nn.Module):
|
|||||||
self.Dis_stg1 = nn.Sequential(*Dis_stg1)
|
self.Dis_stg1 = nn.Sequential(*Dis_stg1)
|
||||||
|
|
||||||
self.fc0_stg1 = FullyConnectedLayer(
|
self.fc0_stg1 = FullyConnectedLayer(
|
||||||
nf(2) // 2 * 4**2, nf(2) // 2, activation=activation
|
nf(2) // 2 * 4 ** 2, nf(2) // 2, activation=activation
|
||||||
)
|
)
|
||||||
self.fc1_stg1 = FullyConnectedLayer(
|
self.fc1_stg1 = FullyConnectedLayer(
|
||||||
nf(2) // 2, 1 if cmap_dim == 0 else cmap_dim
|
nf(2) // 2, 1 if cmap_dim == 0 else cmap_dim
|
||||||
@@ -1874,7 +1872,7 @@ MAT_MODEL_MD5 = os.environ.get("MAT_MODEL_MD5", "8ca927835fa3f5e21d65ffcb165377e
|
|||||||
|
|
||||||
class MAT(InpaintModel):
|
class MAT(InpaintModel):
|
||||||
name = "mat"
|
name = "mat"
|
||||||
min_size = 512
|
min_size = 1024
|
||||||
pad_mod = 512
|
pad_mod = 512
|
||||||
pad_to_square = True
|
pad_to_square = True
|
||||||
|
|
||||||
@@ -1890,9 +1888,9 @@ class MAT(InpaintModel):
|
|||||||
img_resolution=512,
|
img_resolution=512,
|
||||||
img_channels=3,
|
img_channels=3,
|
||||||
mapping_kwargs={"torch_dtype": self.torch_dtype},
|
mapping_kwargs={"torch_dtype": self.torch_dtype},
|
||||||
)
|
).to(self.torch_dtype)
|
||||||
# fmt: off
|
# fmt: off
|
||||||
self.model = load_model(G, MAT_MODEL_URL, device, MAT_MODEL_MD5).to(self.torch_dtype)
|
self.model = load_model(G, MAT_MODEL_URL, device, MAT_MODEL_MD5)
|
||||||
self.z = torch.from_numpy(np.random.randn(1, G.z_dim)).to(self.torch_dtype).to(device)
|
self.z = torch.from_numpy(np.random.randn(1, G.z_dim)).to(self.torch_dtype).to(device)
|
||||||
self.label = torch.zeros([1, self.model.c_dim], device=device).to(self.torch_dtype)
|
self.label = torch.zeros([1, self.model.c_dim], device=device).to(self.torch_dtype)
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ def make_beta_schedule(
|
|||||||
if schedule == "linear":
|
if schedule == "linear":
|
||||||
betas = (
|
betas = (
|
||||||
torch.linspace(
|
torch.linspace(
|
||||||
linear_start**0.5, linear_end**0.5, n_timestep, dtype=torch.float64
|
linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64
|
||||||
)
|
)
|
||||||
** 2
|
** 2
|
||||||
)
|
)
|
||||||
@@ -134,8 +134,10 @@ def timestep_embedding(device, timesteps, dim, max_period=10000, repeat_only=Fal
|
|||||||
###### MAT and FcF #######
|
###### MAT and FcF #######
|
||||||
|
|
||||||
|
|
||||||
def normalize_2nd_moment(x, dim=1, eps=1e-8):
|
def normalize_2nd_moment(x, dim=1):
|
||||||
return x * (x.square().mean(dim=dim, keepdim=True) + eps).rsqrt()
|
return (
|
||||||
|
x * (x.square().mean(dim=dim, keepdim=True) + torch.finfo(x.dtype).eps).rsqrt()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EasyDict(dict):
|
class EasyDict(dict):
|
||||||
@@ -460,7 +462,7 @@ def _upfirdn2d_ref(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1):
|
|||||||
if f is None:
|
if f is None:
|
||||||
f = torch.ones([1, 1], dtype=torch.float32, device=x.device)
|
f = torch.ones([1, 1], dtype=torch.float32, device=x.device)
|
||||||
assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
|
assert isinstance(f, torch.Tensor) and f.ndim in [1, 2]
|
||||||
assert f.dtype == torch.float32 and not f.requires_grad
|
assert not f.requires_grad
|
||||||
batch_size, num_channels, in_height, in_width = x.shape
|
batch_size, num_channels, in_height, in_width = x.shape
|
||||||
# upx, upy = _parse_scaling(up)
|
# upx, upy = _parse_scaling(up)
|
||||||
# downx, downy = _parse_scaling(down)
|
# downx, downy = _parse_scaling(down)
|
||||||
@@ -733,9 +735,7 @@ def conv2d_resample(
|
|||||||
# Validate arguments.
|
# Validate arguments.
|
||||||
assert isinstance(x, torch.Tensor) and (x.ndim == 4)
|
assert isinstance(x, torch.Tensor) and (x.ndim == 4)
|
||||||
assert isinstance(w, torch.Tensor) and (w.ndim == 4) and (w.dtype == x.dtype)
|
assert isinstance(w, torch.Tensor) and (w.ndim == 4) and (w.dtype == x.dtype)
|
||||||
assert f is None or (
|
assert f is None or (isinstance(f, torch.Tensor) and f.ndim in [1, 2])
|
||||||
isinstance(f, torch.Tensor) and f.ndim in [1, 2] and f.dtype == torch.float32
|
|
||||||
)
|
|
||||||
assert isinstance(up, int) and (up >= 1)
|
assert isinstance(up, int) and (up >= 1)
|
||||||
assert isinstance(down, int) and (down >= 1)
|
assert isinstance(down, int) and (down >= 1)
|
||||||
# assert isinstance(groups, int) and (groups >= 1), f"!!!!!! groups: {groups} isinstance(groups, int) {isinstance(groups, int)} {type(groups)}"
|
# assert isinstance(groups, int) and (groups >= 1), f"!!!!!! groups: {groups} isinstance(groups, int) {isinstance(groups, int)} {type(groups)}"
|
||||||
@@ -772,7 +772,7 @@ def conv2d_resample(
|
|||||||
f=f,
|
f=f,
|
||||||
up=up,
|
up=up,
|
||||||
padding=[px0, px1, py0, py1],
|
padding=[px0, px1, py0, py1],
|
||||||
gain=up**2,
|
gain=up ** 2,
|
||||||
flip_filter=flip_filter,
|
flip_filter=flip_filter,
|
||||||
)
|
)
|
||||||
return x
|
return x
|
||||||
@@ -814,7 +814,7 @@ def conv2d_resample(
|
|||||||
x=x,
|
x=x,
|
||||||
f=f,
|
f=f,
|
||||||
padding=[px0 + pxt, px1 + pxt, py0 + pyt, py1 + pyt],
|
padding=[px0 + pxt, px1 + pxt, py0 + pyt, py1 + pyt],
|
||||||
gain=up**2,
|
gain=up ** 2,
|
||||||
flip_filter=flip_filter,
|
flip_filter=flip_filter,
|
||||||
)
|
)
|
||||||
if down > 1:
|
if down > 1:
|
||||||
@@ -834,7 +834,7 @@ def conv2d_resample(
|
|||||||
f=(f if up > 1 else None),
|
f=(f if up > 1 else None),
|
||||||
up=up,
|
up=up,
|
||||||
padding=[px0, px1, py0, py1],
|
padding=[px0, px1, py0, py1],
|
||||||
gain=up**2,
|
gain=up ** 2,
|
||||||
flip_filter=flip_filter,
|
flip_filter=flip_filter,
|
||||||
)
|
)
|
||||||
x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight)
|
x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight)
|
||||||
@@ -870,7 +870,7 @@ class Conv2dLayer(torch.nn.Module):
|
|||||||
self.register_buffer("resample_filter", setup_filter(resample_filter))
|
self.register_buffer("resample_filter", setup_filter(resample_filter))
|
||||||
self.conv_clamp = conv_clamp
|
self.conv_clamp = conv_clamp
|
||||||
self.padding = kernel_size // 2
|
self.padding = kernel_size // 2
|
||||||
self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size**2))
|
self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2))
|
||||||
self.act_gain = activation_funcs[activation].def_gain
|
self.act_gain = activation_funcs[activation].def_gain
|
||||||
|
|
||||||
memory_format = (
|
memory_format = (
|
||||||
|
|||||||
@@ -9,13 +9,18 @@ from lama_cleaner.model_manager import ModelManager
|
|||||||
from lama_cleaner.schema import Config, HDStrategy, LDMSampler, SDSampler
|
from lama_cleaner.schema import Config, HDStrategy, LDMSampler, SDSampler
|
||||||
|
|
||||||
current_dir = Path(__file__).parent.absolute().resolve()
|
current_dir = Path(__file__).parent.absolute().resolve()
|
||||||
save_dir = current_dir / 'result'
|
save_dir = current_dir / "result"
|
||||||
save_dir.mkdir(exist_ok=True, parents=True)
|
save_dir.mkdir(exist_ok=True, parents=True)
|
||||||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
device = torch.device(device)
|
device = torch.device(device)
|
||||||
|
|
||||||
|
|
||||||
def get_data(fx: float = 1, fy: float = 1.0, img_p=current_dir / "image.png", mask_p=current_dir / "mask.png"):
|
def get_data(
|
||||||
|
fx: float = 1,
|
||||||
|
fy: float = 1.0,
|
||||||
|
img_p=current_dir / "image.png",
|
||||||
|
mask_p=current_dir / "mask.png",
|
||||||
|
):
|
||||||
img = cv2.imread(str(img_p))
|
img = cv2.imread(str(img_p))
|
||||||
img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
|
||||||
mask = cv2.imread(str(mask_p), cv2.IMREAD_GRAYSCALE)
|
mask = cv2.imread(str(mask_p), cv2.IMREAD_GRAYSCALE)
|
||||||
@@ -37,10 +42,15 @@ def get_config(strategy, **kwargs):
|
|||||||
return Config(**data)
|
return Config(**data)
|
||||||
|
|
||||||
|
|
||||||
def assert_equal(model, config, gt_name,
|
def assert_equal(
|
||||||
fx: float = 1, fy: float = 1,
|
model,
|
||||||
img_p=current_dir / "image.png",
|
config,
|
||||||
mask_p=current_dir / "mask.png"):
|
gt_name,
|
||||||
|
fx: float = 1,
|
||||||
|
fy: float = 1,
|
||||||
|
img_p=current_dir / "image.png",
|
||||||
|
mask_p=current_dir / "mask.png",
|
||||||
|
):
|
||||||
img, mask = get_data(fx=fx, fy=fy, img_p=img_p, mask_p=mask_p)
|
img, mask = get_data(fx=fx, fy=fy, img_p=img_p, mask_p=mask_p)
|
||||||
print(f"Input image shape: {img.shape}")
|
print(f"Input image shape: {img.shape}")
|
||||||
res = model(img, mask, config)
|
res = model(img, mask, config)
|
||||||
@@ -59,139 +69,13 @@ def assert_equal(model, config, gt_name,
|
|||||||
# assert np.array_equal(res, gt)
|
# assert np.array_equal(res, gt)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL])
|
||||||
"strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP]
|
|
||||||
)
|
|
||||||
def test_lama(strategy):
|
|
||||||
model = ModelManager(name="lama", device=device)
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
get_config(strategy),
|
|
||||||
f"lama_{strategy[0].upper() + strategy[1:]}_result.png",
|
|
||||||
)
|
|
||||||
|
|
||||||
fx = 1.3
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
get_config(strategy),
|
|
||||||
f"lama_{strategy[0].upper() + strategy[1:]}_fx_{fx}_result.png",
|
|
||||||
fx=1.3,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP]
|
|
||||||
)
|
|
||||||
@pytest.mark.parametrize("ldm_sampler", [LDMSampler.ddim, LDMSampler.plms])
|
|
||||||
def test_ldm(strategy, ldm_sampler):
|
|
||||||
model = ModelManager(name="ldm", device=device)
|
|
||||||
cfg = get_config(strategy, ldm_sampler=ldm_sampler)
|
|
||||||
assert_equal(
|
|
||||||
model, cfg, f"ldm_{strategy[0].upper() + strategy[1:]}_{ldm_sampler}_result.png"
|
|
||||||
)
|
|
||||||
|
|
||||||
fx = 1.3
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"ldm_{strategy[0].upper() + strategy[1:]}_{ldm_sampler}_fx_{fx}_result.png",
|
|
||||||
fx=fx,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP]
|
|
||||||
)
|
|
||||||
@pytest.mark.parametrize("zits_wireframe", [False, True])
|
|
||||||
def test_zits(strategy, zits_wireframe):
|
|
||||||
model = ModelManager(name="zits", device=device)
|
|
||||||
cfg = get_config(strategy, zits_wireframe=zits_wireframe)
|
|
||||||
# os.environ['ZITS_DEBUG_LINE_PATH'] = str(current_dir / 'zits_debug_line.jpg')
|
|
||||||
# os.environ['ZITS_DEBUG_EDGE_PATH'] = str(current_dir / 'zits_debug_edge.jpg')
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"zits_{strategy[0].upper() + strategy[1:]}_wireframe_{zits_wireframe}_result.png",
|
|
||||||
)
|
|
||||||
|
|
||||||
fx = 1.3
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"zits_{strategy.capitalize()}_wireframe_{zits_wireframe}_fx_{fx}_result.png",
|
|
||||||
fx=fx,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"strategy", [HDStrategy.ORIGINAL]
|
|
||||||
)
|
|
||||||
def test_mat(strategy):
|
def test_mat(strategy):
|
||||||
model = ModelManager(name="mat", device=device)
|
model = ModelManager(name="mat", device=device)
|
||||||
cfg = get_config(strategy)
|
cfg = get_config(strategy)
|
||||||
|
|
||||||
assert_equal(
|
for _ in range(10):
|
||||||
model,
|
assert_equal(
|
||||||
cfg,
|
model, cfg, f"mat_{strategy.capitalize()}_result.png",
|
||||||
f"mat_{strategy.capitalize()}_result.png",
|
)
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"strategy", [HDStrategy.ORIGINAL]
|
|
||||||
)
|
|
||||||
def test_fcf(strategy):
|
|
||||||
model = ModelManager(name="fcf", device=device)
|
|
||||||
cfg = get_config(strategy)
|
|
||||||
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"fcf_{strategy.capitalize()}_result.png",
|
|
||||||
fx=2,
|
|
||||||
fy=2
|
|
||||||
)
|
|
||||||
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"fcf_{strategy.capitalize()}_result.png",
|
|
||||||
fx=3.8,
|
|
||||||
fy=2
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP]
|
|
||||||
)
|
|
||||||
@pytest.mark.parametrize("cv2_flag", ['INPAINT_NS', 'INPAINT_TELEA'])
|
|
||||||
@pytest.mark.parametrize("cv2_radius", [3, 15])
|
|
||||||
def test_cv2(strategy, cv2_flag, cv2_radius):
|
|
||||||
model = ModelManager(
|
|
||||||
name="cv2",
|
|
||||||
device=torch.device(device),
|
|
||||||
)
|
|
||||||
cfg = get_config(strategy, cv2_flag=cv2_flag, cv2_radius=cv2_radius)
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"sd_{strategy.capitalize()}_{cv2_flag}_{cv2_radius}.png",
|
|
||||||
img_p=current_dir / "overture-creations-5sI6fQgYIuo.png",
|
|
||||||
mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask.png",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP])
|
|
||||||
def test_manga(strategy):
|
|
||||||
model = ModelManager(
|
|
||||||
name="manga",
|
|
||||||
device=torch.device(device),
|
|
||||||
)
|
|
||||||
cfg = get_config(strategy)
|
|
||||||
assert_equal(
|
|
||||||
model,
|
|
||||||
cfg,
|
|
||||||
f"sd_{strategy.capitalize()}.png",
|
|
||||||
img_p=current_dir / "overture-creations-5sI6fQgYIuo.png",
|
|
||||||
mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask.png",
|
|
||||||
)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user