optimize sd/paint_by_example modle VRAM usage

This commit is contained in:
Qing
2023-01-18 18:34:10 +08:00
parent 384f16dcd0
commit 148e97e8da
7 changed files with 61 additions and 24 deletions

View File

@@ -22,16 +22,30 @@ class PaintByExample(InpaintModel):
use_gpu = device == torch.device('cuda') and torch.cuda.is_available()
torch_dtype = torch.float16 if use_gpu and fp16 else torch.float32
model_kwargs = {"local_files_only": kwargs.get('local_files_only', False)}
if kwargs['disable_nsfw'] or kwargs.get('cpu_offload', False):
logger.info("Disable Paint By Example Model NSFW checker")
model_kwargs.update(dict(
safety_checker=None,
requires_safety_checker=False
))
self.model = DiffusionPipeline.from_pretrained(
"Fantasy-Studio/Paint-by-Example",
torch_dtype=torch_dtype,
**model_kwargs
)
self.model = self.model.to(device)
self.model.enable_attention_slicing()
if kwargs.get('enable_xformers', False):
self.model.enable_xformers_memory_efficient_attention()
# TODO: gpu_id
if kwargs.get('cpu_offload', False) and torch.cuda.is_available():
if kwargs.get('cpu_offload', False) and use_gpu:
self.model.image_encoder = self.model.image_encoder.to(device)
self.model.enable_sequential_cpu_offload(gpu_id=0)
else:
self.model = self.model.to(device)
def forward(self, image, mask, config: Config):
"""Input image and output image have same size

View File

@@ -37,10 +37,12 @@ class SD(InpaintModel):
fp16 = not kwargs.get('no_half', False)
model_kwargs = {"local_files_only": kwargs.get('local_files_only', kwargs['sd_run_local'])}
if kwargs['sd_disable_nsfw']:
if kwargs['disable_nsfw'] or kwargs.get('cpu_offload', False):
logger.info("Disable Stable Diffusion Model NSFW checker")
model_kwargs.update(dict(
safety_checker=None,
feature_extractor=None,
requires_safety_checker=False
))
use_gpu = device == torch.device('cuda') and torch.cuda.is_available()
@@ -52,19 +54,19 @@ class SD(InpaintModel):
use_auth_token=kwargs["hf_access_token"],
**model_kwargs
)
self.model = self.model.to(device)
# https://huggingface.co/docs/diffusers/v0.7.0/en/api/pipelines/stable_diffusion#diffusers.StableDiffusionInpaintPipeline.enable_attention_slicing
self.model.enable_attention_slicing()
# https://huggingface.co/docs/diffusers/v0.7.0/en/optimization/fp16#memory-efficient-attention
if kwargs.get('sd_enable_xformers', False):
if kwargs.get('enable_xformers', False):
self.model.enable_xformers_memory_efficient_attention()
if kwargs.get('cpu_offload', False) and torch.cuda.is_available():
if kwargs.get('cpu_offload', False) and use_gpu:
# TODO: gpu_id
logger.info("Enable sequential cpu offload")
self.model.enable_sequential_cpu_offload(gpu_id=0)
else:
self.model = self.model.to(device)
if kwargs['sd_cpu_textencoder']:
logger.info("Run Stable Diffusion TextEncoder on CPU")
self.model.text_encoder = CPUTextEncoderWrapper(self.model.text_encoder, torch_dtype)