From eb304ba69680a5c534ca07fe713baf100436dc5b Mon Sep 17 00:00:00 2001 From: Qing Date: Sat, 25 Mar 2023 22:46:28 +0800 Subject: [PATCH] wip mat float16 --- lama_cleaner/model/mat.py | 62 ++++++------ lama_cleaner/model/utils.py | 22 ++--- lama_cleaner/tests/test_model.py | 160 +++++-------------------------- 3 files changed, 63 insertions(+), 181 deletions(-) diff --git a/lama_cleaner/model/mat.py b/lama_cleaner/model/mat.py index ec6aed4..a709660 100644 --- a/lama_cleaner/model/mat.py +++ b/lama_cleaner/model/mat.py @@ -52,7 +52,7 @@ class ModulatedConv2d(nn.Module): ) self.out_channels = out_channels self.kernel_size = kernel_size - self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size**2)) + self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2)) self.padding = self.kernel_size // 2 self.up = up self.down = down @@ -213,7 +213,7 @@ class DecBlockFirst(nn.Module): super().__init__() self.fc = FullyConnectedLayer( in_features=in_channels * 2, - out_features=in_channels * 4**2, + out_features=in_channels * 4 ** 2, activation=activation, ) self.conv = StyleConv( @@ -312,7 +312,7 @@ class DecBlock(nn.Module): in_channels=in_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, up=2, use_noise=use_noise, @@ -323,7 +323,7 @@ class DecBlock(nn.Module): in_channels=out_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, use_noise=use_noise, activation=activation, @@ -402,9 +402,6 @@ class MappingNet(torch.nn.Module): def forward( self, z, c, truncation_psi=1, truncation_cutoff=None, skip_w_avg_update=False ): - import ipdb - - ipdb.set_trace() # Embed, normalize, and concat inputs. x = None if self.z_dim > 0: @@ -510,7 +507,7 @@ class Discriminator(torch.nn.Module): self.img_channels = img_channels resolution_log2 = int(np.log2(img_resolution)) - assert img_resolution == 2**resolution_log2 and img_resolution >= 4 + assert img_resolution == 2 ** resolution_log2 and img_resolution >= 4 self.resolution_log2 = resolution_log2 def nf(stage): @@ -546,7 +543,7 @@ class Discriminator(torch.nn.Module): ) self.Dis = nn.Sequential(*Dis) - self.fc0 = FullyConnectedLayer(nf(2) * 4**2, nf(2), activation=activation) + self.fc0 = FullyConnectedLayer(nf(2) * 4 ** 2, nf(2), activation=activation) self.fc1 = FullyConnectedLayer(nf(2), 1 if cmap_dim == 0 else cmap_dim) def forward(self, images_in, masks_in, c): @@ -565,7 +562,7 @@ class Discriminator(torch.nn.Module): def nf(stage, channel_base=32768, channel_decay=1.0, channel_max=512): NF = {512: 64, 256: 128, 128: 256, 64: 512, 32: 512, 16: 512, 8: 512, 4: 512} - return NF[2**stage] + return NF[2 ** stage] class Mlp(nn.Module): @@ -662,7 +659,7 @@ class Conv2dLayerPartial(nn.Module): ) self.weight_maskUpdater = torch.ones(1, 1, kernel_size, kernel_size) - self.slide_winsize = kernel_size**2 + self.slide_winsize = kernel_size ** 2 self.stride = down self.padding = kernel_size // 2 if kernel_size % 2 == 1 else 0 @@ -678,9 +675,9 @@ class Conv2dLayerPartial(nn.Module): stride=self.stride, padding=self.padding, ) - mask_ratio = self.slide_winsize / (update_mask + 1e-8) + mask_ratio = self.slide_winsize / (update_mask.to(torch.float32) + 1e-8) update_mask = torch.clamp(update_mask, 0, 1) # 0 or 1 - mask_ratio = torch.mul(mask_ratio, update_mask) + mask_ratio = torch.mul(mask_ratio, update_mask).to(x.dtype) x = self.conv(x) x = torch.mul(x, mask_ratio) return x, update_mask @@ -718,7 +715,7 @@ class WindowAttention(nn.Module): self.window_size = window_size # Wh, Ww self.num_heads = num_heads head_dim = dim // num_heads - self.scale = qk_scale or head_dim**-0.5 + self.scale = qk_scale or head_dim ** -0.5 self.q = FullyConnectedLayer(in_features=dim, out_features=dim) self.k = FullyConnectedLayer(in_features=dim, out_features=dim) @@ -734,7 +731,7 @@ class WindowAttention(nn.Module): mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None """ B_, N, C = x.shape - norm_x = F.normalize(x, p=2.0, dim=-1) + norm_x = F.normalize(x, p=2.0, dim=-1, eps=torch.finfo(x.dtype).eps) q = ( self.q(norm_x) .reshape(B_, N, self.num_heads, C // self.num_heads) @@ -771,7 +768,6 @@ class WindowAttention(nn.Module): ).repeat(1, N, 1) attn = self.softmax(attn) - x = (attn @ v).transpose(1, 2).reshape(B_, N, C) x = self.proj(x) return x, mask_windows @@ -935,7 +931,9 @@ class SwinTransformerBlock(nn.Module): ) # nW*B, window_size*window_size, C else: attn_windows, mask_windows = self.attn( - x_windows, mask_windows, mask=self.calculate_mask(x_size).to(x.device) + x_windows, + mask_windows, + mask=self.calculate_mask(x_size).to(x.dtype).to(x.device), ) # nW*B, window_size*window_size, C # merge windows @@ -1213,7 +1211,7 @@ class Encoder(nn.Module): self.resolution = [] for idx, i in enumerate(range(res_log2, 3, -1)): # from input size to 16x16 - res = 2**i + res = 2 ** i self.resolution.append(res) if i == res_log2: block = EncFromRGB(img_channels * 2 + 1, nf(i), activation) @@ -1298,7 +1296,7 @@ class DecBlockFirstV2(nn.Module): in_channels=in_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, use_noise=use_noise, activation=activation, @@ -1343,7 +1341,7 @@ class DecBlock(nn.Module): in_channels=in_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, up=2, use_noise=use_noise, @@ -1354,7 +1352,7 @@ class DecBlock(nn.Module): in_channels=out_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, use_noise=use_noise, activation=activation, @@ -1391,7 +1389,7 @@ class Decoder(nn.Module): for res in range(5, res_log2 + 1): setattr( self, - "Dec_%dx%d" % (2**res, 2**res), + "Dec_%dx%d" % (2 ** res, 2 ** res), DecBlock( res, nf(res - 1), @@ -1408,7 +1406,7 @@ class Decoder(nn.Module): def forward(self, x, ws, gs, E_features, noise_mode="random"): x, img = self.Dec_16x16(x, ws, gs, E_features, noise_mode=noise_mode) for res in range(5, self.res_log2 + 1): - block = getattr(self, "Dec_%dx%d" % (2**res, 2**res)) + block = getattr(self, "Dec_%dx%d" % (2 ** res, 2 ** res)) x, img = block(x, img, ws, gs, E_features, noise_mode=noise_mode) return img @@ -1433,7 +1431,7 @@ class DecStyleBlock(nn.Module): in_channels=in_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, up=2, use_noise=use_noise, @@ -1444,7 +1442,7 @@ class DecStyleBlock(nn.Module): in_channels=out_channels, out_channels=out_channels, style_dim=style_dim, - resolution=2**res, + resolution=2 ** res, kernel_size=3, use_noise=use_noise, activation=activation, @@ -1642,7 +1640,7 @@ class SynthesisNet(nn.Module): ): super().__init__() resolution_log2 = int(np.log2(img_resolution)) - assert img_resolution == 2**resolution_log2 and img_resolution >= 4 + assert img_resolution == 2 ** resolution_log2 and img_resolution >= 4 self.num_layers = resolution_log2 * 2 - 3 * 2 self.img_resolution = img_resolution @@ -1783,7 +1781,7 @@ class Discriminator(torch.nn.Module): self.img_channels = img_channels resolution_log2 = int(np.log2(img_resolution)) - assert img_resolution == 2**resolution_log2 and img_resolution >= 4 + assert img_resolution == 2 ** resolution_log2 and img_resolution >= 4 self.resolution_log2 = resolution_log2 if cmap_dim == None: @@ -1814,7 +1812,7 @@ class Discriminator(torch.nn.Module): ) self.Dis = nn.Sequential(*Dis) - self.fc0 = FullyConnectedLayer(nf(2) * 4**2, nf(2), activation=activation) + self.fc0 = FullyConnectedLayer(nf(2) * 4 ** 2, nf(2), activation=activation) self.fc1 = FullyConnectedLayer(nf(2), 1 if cmap_dim == 0 else cmap_dim) # for 64x64 @@ -1839,7 +1837,7 @@ class Discriminator(torch.nn.Module): self.Dis_stg1 = nn.Sequential(*Dis_stg1) self.fc0_stg1 = FullyConnectedLayer( - nf(2) // 2 * 4**2, nf(2) // 2, activation=activation + nf(2) // 2 * 4 ** 2, nf(2) // 2, activation=activation ) self.fc1_stg1 = FullyConnectedLayer( nf(2) // 2, 1 if cmap_dim == 0 else cmap_dim @@ -1874,7 +1872,7 @@ MAT_MODEL_MD5 = os.environ.get("MAT_MODEL_MD5", "8ca927835fa3f5e21d65ffcb165377e class MAT(InpaintModel): name = "mat" - min_size = 512 + min_size = 1024 pad_mod = 512 pad_to_square = True @@ -1890,9 +1888,9 @@ class MAT(InpaintModel): img_resolution=512, img_channels=3, mapping_kwargs={"torch_dtype": self.torch_dtype}, - ) + ).to(self.torch_dtype) # fmt: off - self.model = load_model(G, MAT_MODEL_URL, device, MAT_MODEL_MD5).to(self.torch_dtype) + self.model = load_model(G, MAT_MODEL_URL, device, MAT_MODEL_MD5) self.z = torch.from_numpy(np.random.randn(1, G.z_dim)).to(self.torch_dtype).to(device) self.label = torch.zeros([1, self.model.c_dim], device=device).to(self.torch_dtype) # fmt: on diff --git a/lama_cleaner/model/utils.py b/lama_cleaner/model/utils.py index 352af04..998db43 100644 --- a/lama_cleaner/model/utils.py +++ b/lama_cleaner/model/utils.py @@ -27,7 +27,7 @@ def make_beta_schedule( if schedule == "linear": betas = ( torch.linspace( - linear_start**0.5, linear_end**0.5, n_timestep, dtype=torch.float64 + linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64 ) ** 2 ) @@ -134,8 +134,10 @@ def timestep_embedding(device, timesteps, dim, max_period=10000, repeat_only=Fal ###### MAT and FcF ####### -def normalize_2nd_moment(x, dim=1, eps=1e-8): - return x * (x.square().mean(dim=dim, keepdim=True) + eps).rsqrt() +def normalize_2nd_moment(x, dim=1): + return ( + x * (x.square().mean(dim=dim, keepdim=True) + torch.finfo(x.dtype).eps).rsqrt() + ) class EasyDict(dict): @@ -460,7 +462,7 @@ def _upfirdn2d_ref(x, f, up=1, down=1, padding=0, flip_filter=False, gain=1): if f is None: f = torch.ones([1, 1], dtype=torch.float32, device=x.device) assert isinstance(f, torch.Tensor) and f.ndim in [1, 2] - assert f.dtype == torch.float32 and not f.requires_grad + assert not f.requires_grad batch_size, num_channels, in_height, in_width = x.shape # upx, upy = _parse_scaling(up) # downx, downy = _parse_scaling(down) @@ -733,9 +735,7 @@ def conv2d_resample( # Validate arguments. assert isinstance(x, torch.Tensor) and (x.ndim == 4) assert isinstance(w, torch.Tensor) and (w.ndim == 4) and (w.dtype == x.dtype) - assert f is None or ( - isinstance(f, torch.Tensor) and f.ndim in [1, 2] and f.dtype == torch.float32 - ) + assert f is None or (isinstance(f, torch.Tensor) and f.ndim in [1, 2]) assert isinstance(up, int) and (up >= 1) assert isinstance(down, int) and (down >= 1) # assert isinstance(groups, int) and (groups >= 1), f"!!!!!! groups: {groups} isinstance(groups, int) {isinstance(groups, int)} {type(groups)}" @@ -772,7 +772,7 @@ def conv2d_resample( f=f, up=up, padding=[px0, px1, py0, py1], - gain=up**2, + gain=up ** 2, flip_filter=flip_filter, ) return x @@ -814,7 +814,7 @@ def conv2d_resample( x=x, f=f, padding=[px0 + pxt, px1 + pxt, py0 + pyt, py1 + pyt], - gain=up**2, + gain=up ** 2, flip_filter=flip_filter, ) if down > 1: @@ -834,7 +834,7 @@ def conv2d_resample( f=(f if up > 1 else None), up=up, padding=[px0, px1, py0, py1], - gain=up**2, + gain=up ** 2, flip_filter=flip_filter, ) x = _conv2d_wrapper(x=x, w=w, groups=groups, flip_weight=flip_weight) @@ -870,7 +870,7 @@ class Conv2dLayer(torch.nn.Module): self.register_buffer("resample_filter", setup_filter(resample_filter)) self.conv_clamp = conv_clamp self.padding = kernel_size // 2 - self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size**2)) + self.weight_gain = 1 / np.sqrt(in_channels * (kernel_size ** 2)) self.act_gain = activation_funcs[activation].def_gain memory_format = ( diff --git a/lama_cleaner/tests/test_model.py b/lama_cleaner/tests/test_model.py index 7c3ef7d..de8a000 100644 --- a/lama_cleaner/tests/test_model.py +++ b/lama_cleaner/tests/test_model.py @@ -9,13 +9,18 @@ from lama_cleaner.model_manager import ModelManager from lama_cleaner.schema import Config, HDStrategy, LDMSampler, SDSampler current_dir = Path(__file__).parent.absolute().resolve() -save_dir = current_dir / 'result' +save_dir = current_dir / "result" save_dir.mkdir(exist_ok=True, parents=True) -device = 'cuda' if torch.cuda.is_available() else 'cpu' +device = "cuda" if torch.cuda.is_available() else "cpu" device = torch.device(device) -def get_data(fx: float = 1, fy: float = 1.0, img_p=current_dir / "image.png", mask_p=current_dir / "mask.png"): +def get_data( + fx: float = 1, + fy: float = 1.0, + img_p=current_dir / "image.png", + mask_p=current_dir / "mask.png", +): img = cv2.imread(str(img_p)) img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) mask = cv2.imread(str(mask_p), cv2.IMREAD_GRAYSCALE) @@ -37,10 +42,15 @@ def get_config(strategy, **kwargs): return Config(**data) -def assert_equal(model, config, gt_name, - fx: float = 1, fy: float = 1, - img_p=current_dir / "image.png", - mask_p=current_dir / "mask.png"): +def assert_equal( + model, + config, + gt_name, + fx: float = 1, + fy: float = 1, + img_p=current_dir / "image.png", + mask_p=current_dir / "mask.png", +): img, mask = get_data(fx=fx, fy=fy, img_p=img_p, mask_p=mask_p) print(f"Input image shape: {img.shape}") res = model(img, mask, config) @@ -59,139 +69,13 @@ def assert_equal(model, config, gt_name, # assert np.array_equal(res, gt) -@pytest.mark.parametrize( - "strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP] -) -def test_lama(strategy): - model = ModelManager(name="lama", device=device) - assert_equal( - model, - get_config(strategy), - f"lama_{strategy[0].upper() + strategy[1:]}_result.png", - ) - - fx = 1.3 - assert_equal( - model, - get_config(strategy), - f"lama_{strategy[0].upper() + strategy[1:]}_fx_{fx}_result.png", - fx=1.3, - ) - - -@pytest.mark.parametrize( - "strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP] -) -@pytest.mark.parametrize("ldm_sampler", [LDMSampler.ddim, LDMSampler.plms]) -def test_ldm(strategy, ldm_sampler): - model = ModelManager(name="ldm", device=device) - cfg = get_config(strategy, ldm_sampler=ldm_sampler) - assert_equal( - model, cfg, f"ldm_{strategy[0].upper() + strategy[1:]}_{ldm_sampler}_result.png" - ) - - fx = 1.3 - assert_equal( - model, - cfg, - f"ldm_{strategy[0].upper() + strategy[1:]}_{ldm_sampler}_fx_{fx}_result.png", - fx=fx, - ) - - -@pytest.mark.parametrize( - "strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP] -) -@pytest.mark.parametrize("zits_wireframe", [False, True]) -def test_zits(strategy, zits_wireframe): - model = ModelManager(name="zits", device=device) - cfg = get_config(strategy, zits_wireframe=zits_wireframe) - # os.environ['ZITS_DEBUG_LINE_PATH'] = str(current_dir / 'zits_debug_line.jpg') - # os.environ['ZITS_DEBUG_EDGE_PATH'] = str(current_dir / 'zits_debug_edge.jpg') - assert_equal( - model, - cfg, - f"zits_{strategy[0].upper() + strategy[1:]}_wireframe_{zits_wireframe}_result.png", - ) - - fx = 1.3 - assert_equal( - model, - cfg, - f"zits_{strategy.capitalize()}_wireframe_{zits_wireframe}_fx_{fx}_result.png", - fx=fx, - ) - - -@pytest.mark.parametrize( - "strategy", [HDStrategy.ORIGINAL] -) +@pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL]) def test_mat(strategy): model = ModelManager(name="mat", device=device) cfg = get_config(strategy) - assert_equal( - model, - cfg, - f"mat_{strategy.capitalize()}_result.png", - ) + for _ in range(10): + assert_equal( + model, cfg, f"mat_{strategy.capitalize()}_result.png", + ) - -@pytest.mark.parametrize( - "strategy", [HDStrategy.ORIGINAL] -) -def test_fcf(strategy): - model = ModelManager(name="fcf", device=device) - cfg = get_config(strategy) - - assert_equal( - model, - cfg, - f"fcf_{strategy.capitalize()}_result.png", - fx=2, - fy=2 - ) - - assert_equal( - model, - cfg, - f"fcf_{strategy.capitalize()}_result.png", - fx=3.8, - fy=2 - ) - - -@pytest.mark.parametrize( - "strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP] -) -@pytest.mark.parametrize("cv2_flag", ['INPAINT_NS', 'INPAINT_TELEA']) -@pytest.mark.parametrize("cv2_radius", [3, 15]) -def test_cv2(strategy, cv2_flag, cv2_radius): - model = ModelManager( - name="cv2", - device=torch.device(device), - ) - cfg = get_config(strategy, cv2_flag=cv2_flag, cv2_radius=cv2_radius) - assert_equal( - model, - cfg, - f"sd_{strategy.capitalize()}_{cv2_flag}_{cv2_radius}.png", - img_p=current_dir / "overture-creations-5sI6fQgYIuo.png", - mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask.png", - ) - - -@pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL, HDStrategy.RESIZE, HDStrategy.CROP]) -def test_manga(strategy): - model = ModelManager( - name="manga", - device=torch.device(device), - ) - cfg = get_config(strategy) - assert_equal( - model, - cfg, - f"sd_{strategy.capitalize()}.png", - img_p=current_dir / "overture-creations-5sI6fQgYIuo.png", - mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask.png", - )