add enable_low_mem

2024-01-08 23:54:20 +08:00
parent a71c3fbe1b
commit a49c3f86d3
7 changed files with 25 additions and 4 deletions
--- a/iopaint/model/controlnet.py
+++ b/iopaint/model/controlnet.py
@@ -13,7 +13,7 @@ from .helper.controlnet_preprocess import (
    make_inpaint_control_image,
 )
 from .helper.cpu_text_encoder import CPUTextEncoderWrapper
-from .utils import get_scheduler, handle_from_pretrained_exceptions, get_torch_dtype
+from .utils import get_scheduler, handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem


 class ControlNet(DiffusionInpaintModel):
@@ -94,8 +94,7 @@ class ControlNet(DiffusionInpaintModel):
                **model_kwargs,
            )

-        if torch.backends.mps.is_available():
-            self.model.enable_attention_slicing()
+        enable_low_mem(self.model, kwargs.get("low_mem", False))

        if kwargs.get("cpu_offload", False) and use_gpu:
            logger.info("Enable sequential cpu offload")
--- a/iopaint/model/power_paint/power_paint.py
+++ b/iopaint/model/power_paint/power_paint.py
@@ -6,7 +6,7 @@ from loguru import logger

 from ..base import DiffusionInpaintModel
 from ..helper.cpu_text_encoder import CPUTextEncoderWrapper
-from ..utils import handle_from_pretrained_exceptions, get_torch_dtype
+from ..utils import handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem
 from iopaint.schema import InpaintRequest
 from .powerpaint_tokenizer import add_task_to_prompt
 from ...const import POWERPAINT_NAME
@@ -43,6 +43,8 @@ class PowerPaint(DiffusionInpaintModel):
        )
        self.model.tokenizer = PowerPaintTokenizer(self.model.tokenizer)

+        enable_low_mem(self.model, kwargs.get("low_mem", False))
+
        if kwargs.get("cpu_offload", False) and use_gpu:
            logger.info("Enable sequential cpu offload")
            self.model.enable_sequential_cpu_offload(gpu_id=0)
--- a/iopaint/model/utils.py
+++ b/iopaint/model/utils.py
@@ -1002,3 +1002,18 @@ def get_torch_dtype(device, no_half: bool):
    if device in ["cuda", "mps"] and use_fp16:
        return use_gpu, torch.float16
    return use_gpu, torch.float32
+
+
+def enable_low_mem(pipe, enable: bool):
+    if torch.backends.mps.is_available():
+        # https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/image_variation#diffusers.StableDiffusionImageVariationPipeline.enable_attention_slicing
+        # CUDA: Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch 2.0 or xFormers.
+        if enable:
+            pipe.enable_attention_slicing("max")
+        else:
+            # https://huggingface.co/docs/diffusers/optimization/mps
+            # Devices with less than 64GB of memory are recommended to use enable_attention_slicing
+            pipe.enable_attention_slicing()
+
+    if enable:
+        pipe.vae.enable_tiling()