add enable_low_mem

This commit is contained in:
Qing
2024-01-09 22:42:48 +08:00
parent a49c3f86d3
commit 8ed969eec1
7 changed files with 147 additions and 21 deletions

View File

@@ -13,7 +13,12 @@ from .helper.controlnet_preprocess import (
make_inpaint_control_image,
)
from .helper.cpu_text_encoder import CPUTextEncoderWrapper
from .utils import get_scheduler, handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem
from .utils import (
get_scheduler,
handle_from_pretrained_exceptions,
get_torch_dtype,
enable_low_mem,
)
class ControlNet(DiffusionInpaintModel):

View File

@@ -6,7 +6,7 @@ from loguru import logger
from iopaint.const import INSTRUCT_PIX2PIX_NAME
from .base import DiffusionInpaintModel
from iopaint.schema import InpaintRequest
from .utils import get_torch_dtype
from .utils import get_torch_dtype, enable_low_mem
class InstructPix2Pix(DiffusionInpaintModel):
@@ -33,8 +33,7 @@ class InstructPix2Pix(DiffusionInpaintModel):
self.model = StableDiffusionInstructPix2PixPipeline.from_pretrained(
self.name, variant="fp16", torch_dtype=torch_dtype, **model_kwargs
)
if torch.backends.mps.is_available():
self.model.enable_attention_slicing()
enable_low_mem(self.model, kwargs.get("low_mem", False))
if kwargs.get("cpu_offload", False) and use_gpu:
logger.info("Enable sequential cpu offload")

View File

@@ -6,7 +6,7 @@ import torch
from iopaint.const import KANDINSKY22_NAME
from .base import DiffusionInpaintModel
from iopaint.schema import InpaintRequest
from .utils import get_torch_dtype
from .utils import get_torch_dtype, enable_low_mem
class Kandinsky(DiffusionInpaintModel):
@@ -25,8 +25,7 @@ class Kandinsky(DiffusionInpaintModel):
self.model = AutoPipelineForInpainting.from_pretrained(
self.name, **model_kwargs
).to(device)
if torch.backends.mps.is_available():
self.model.enable_attention_slicing()
enable_low_mem(self.model, kwargs.get("low_mem", False))
self.callback = kwargs.pop("callback", None)

View File

@@ -7,7 +7,7 @@ from loguru import logger
from iopaint.helper import decode_base64_to_image
from .base import DiffusionInpaintModel
from iopaint.schema import InpaintRequest
from .utils import get_torch_dtype
from .utils import get_torch_dtype, enable_low_mem
class PaintByExample(DiffusionInpaintModel):
@@ -30,9 +30,7 @@ class PaintByExample(DiffusionInpaintModel):
self.model = DiffusionPipeline.from_pretrained(
self.name, torch_dtype=torch_dtype, **model_kwargs
)
if torch.backends.mps.is_available():
self.model.enable_attention_slicing()
enable_low_mem(self.model, kwargs.get("low_mem", False))
# TODO: gpu_id
if kwargs.get("cpu_offload", False) and use_gpu:

View File

@@ -5,7 +5,7 @@ from loguru import logger
from .base import DiffusionInpaintModel
from .helper.cpu_text_encoder import CPUTextEncoderWrapper
from .utils import handle_from_pretrained_exceptions, get_torch_dtype
from .utils import handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem
from iopaint.schema import InpaintRequest, ModelType
@@ -48,10 +48,7 @@ class SD(DiffusionInpaintModel):
**model_kwargs,
)
if torch.backends.mps.is_available():
# MPS: Recommended RAM < 64 GB https://huggingface.co/docs/diffusers/optimization/mps
# CUDA: Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch 2.0 or xFormers. https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/image_variation#diffusers.StableDiffusionImageVariationPipeline.enable_attention_slicing
self.model.enable_attention_slicing()
enable_low_mem(self.model, kwargs.get("low_mem", False))
if kwargs.get("cpu_offload", False) and use_gpu:
logger.info("Enable sequential cpu offload")

View File

@@ -9,7 +9,7 @@ from loguru import logger
from iopaint.schema import InpaintRequest, ModelType
from .base import DiffusionInpaintModel
from .utils import handle_from_pretrained_exceptions, get_torch_dtype
from .utils import handle_from_pretrained_exceptions, get_torch_dtype, enable_low_mem
class SDXL(DiffusionInpaintModel):
@@ -47,10 +47,7 @@ class SDXL(DiffusionInpaintModel):
variant="fp16",
)
if torch.backends.mps.is_available():
# MPS: Recommended RAM < 64 GB https://huggingface.co/docs/diffusers/optimization/mps
# CUDA: Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch 2.0 or xFormers. https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/image_variation#diffusers.StableDiffusionImageVariationPipeline.enable_attention_slicing
self.model.enable_attention_slicing()
enable_low_mem(self.model, kwargs.get("low_mem", False))
if kwargs.get("cpu_offload", False) and use_gpu:
logger.info("Enable sequential cpu offload")