add enable_low_mem
This commit is contained in:
@@ -13,7 +13,7 @@ from .helper.controlnet_preprocess import (
|
||||
make_inpaint_control_image,
|
||||
)
|
||||
from .helper.cpu_text_encoder import CPUTextEncoderWrapper
|
||||
from .utils import get_scheduler, handle_from_pretrained_exceptions, get_torch_dtype
|
||||
from .utils import get_scheduler, handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem
|
||||
|
||||
|
||||
class ControlNet(DiffusionInpaintModel):
|
||||
@@ -94,8 +94,7 @@ class ControlNet(DiffusionInpaintModel):
|
||||
**model_kwargs,
|
||||
)
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
self.model.enable_attention_slicing()
|
||||
enable_low_mem(self.model, kwargs.get("low_mem", False))
|
||||
|
||||
if kwargs.get("cpu_offload", False) and use_gpu:
|
||||
logger.info("Enable sequential cpu offload")
|
||||
|
||||
@@ -6,7 +6,7 @@ from loguru import logger
|
||||
|
||||
from ..base import DiffusionInpaintModel
|
||||
from ..helper.cpu_text_encoder import CPUTextEncoderWrapper
|
||||
from ..utils import handle_from_pretrained_exceptions, get_torch_dtype
|
||||
from ..utils import handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem
|
||||
from iopaint.schema import InpaintRequest
|
||||
from .powerpaint_tokenizer import add_task_to_prompt
|
||||
from ...const import POWERPAINT_NAME
|
||||
@@ -43,6 +43,8 @@ class PowerPaint(DiffusionInpaintModel):
|
||||
)
|
||||
self.model.tokenizer = PowerPaintTokenizer(self.model.tokenizer)
|
||||
|
||||
enable_low_mem(self.model, kwargs.get("low_mem", False))
|
||||
|
||||
if kwargs.get("cpu_offload", False) and use_gpu:
|
||||
logger.info("Enable sequential cpu offload")
|
||||
self.model.enable_sequential_cpu_offload(gpu_id=0)
|
||||
|
||||
@@ -1002,3 +1002,18 @@ def get_torch_dtype(device, no_half: bool):
|
||||
if device in ["cuda", "mps"] and use_fp16:
|
||||
return use_gpu, torch.float16
|
||||
return use_gpu, torch.float32
|
||||
|
||||
|
||||
def enable_low_mem(pipe, enable: bool):
|
||||
if torch.backends.mps.is_available():
|
||||
# https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/image_variation#diffusers.StableDiffusionImageVariationPipeline.enable_attention_slicing
|
||||
# CUDA: Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch 2.0 or xFormers.
|
||||
if enable:
|
||||
pipe.enable_attention_slicing("max")
|
||||
else:
|
||||
# https://huggingface.co/docs/diffusers/optimization/mps
|
||||
# Devices with less than 64GB of memory are recommended to use enable_attention_slicing
|
||||
pipe.enable_attention_slicing()
|
||||
|
||||
if enable:
|
||||
pipe.vae.enable_tiling()
|
||||
|
||||
Reference in New Issue
Block a user