Skip to content

Module Group Offloading #10503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 50 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
d1737e3
update
a-r-r-o-w Jan 9, 2025
2783669
fix
a-r-r-o-w Jan 9, 2025
6a9a3e5
non_blocking; handle parameters and buffers
a-r-r-o-w Jan 10, 2025
c426a34
update
a-r-r-o-w Jan 10, 2025
d579037
Group offloading with cuda stream prefetching (#10516)
a-r-r-o-w Jan 11, 2025
5f33621
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 11, 2025
a8eabd0
update
a-r-r-o-w Jan 12, 2025
deda9a3
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 16, 2025
80ac5a7
copy model hook implementation from pab
a-r-r-o-w Jan 16, 2025
d2a2981
update; ~very workaround based implementation but it seems to work as…
a-r-r-o-w Jan 16, 2025
01c7d22
more workarounds to make it actually work
a-r-r-o-w Jan 16, 2025
22aff34
cleanup
a-r-r-o-w Jan 16, 2025
42bc19b
rewrite
a-r-r-o-w Jan 17, 2025
8c63bf5
update
a-r-r-o-w Jan 19, 2025
e09e716
make sure to sync current stream before overwriting with pinned params
a-r-r-o-w Jan 19, 2025
bf379c1
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 19, 2025
0bf0baf
better check
a-r-r-o-w Jan 19, 2025
b850c75
update
a-r-r-o-w Jan 20, 2025
6ed9c2f
remove hook implementation to not deal with merge conflict
a-r-r-o-w Jan 23, 2025
13dd337
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 23, 2025
073d4bc
re-add hook changes
a-r-r-o-w Jan 23, 2025
8ba2bda
why use more memory when less memory do trick
a-r-r-o-w Jan 23, 2025
b2e838f
why still use slightly more memory when less memory do trick
a-r-r-o-w Jan 23, 2025
f30c55f
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 23, 2025
5ea3d8a
optimise
a-r-r-o-w Jan 26, 2025
db2fd3b
add model tests
a-r-r-o-w Jan 26, 2025
a0160e1
add pipeline tests
a-r-r-o-w Jan 26, 2025
aaa9a53
update docs
a-r-r-o-w Jan 26, 2025
17b2753
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 26, 2025
edf8103
add layernorm and groupnorm
a-r-r-o-w Jan 26, 2025
af62c93
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Jan 28, 2025
f227e15
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Feb 4, 2025
24f9273
address review comments
a-r-r-o-w Feb 4, 2025
8f10d05
improve tests; add docs
a-r-r-o-w Feb 4, 2025
06b411f
improve docs
a-r-r-o-w Feb 4, 2025
8bd7e3b
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Feb 4, 2025
904e470
Apply suggestions from code review
a-r-r-o-w Feb 5, 2025
3172ed5
apply suggestions from code review
a-r-r-o-w Feb 5, 2025
72aa57f
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Feb 5, 2025
aee24bc
update tests
a-r-r-o-w Feb 5, 2025
db125ce
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Feb 6, 2025
3f20e6b
apply suggestions from review
a-r-r-o-w Feb 6, 2025
840576a
enable_group_offloading -> enable_group_offload for naming consistency
a-r-r-o-w Feb 6, 2025
8804d74
raise errors if multiple offloading strategies used; add relevant tests
a-r-r-o-w Feb 6, 2025
954bb7d
handle .to() when group offload applied
a-r-r-o-w Feb 6, 2025
ba6c4a8
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Feb 6, 2025
da88c33
refactor some repeated code
a-r-r-o-w Feb 6, 2025
a872e84
remove unintentional change from merge conflict
a-r-r-o-w Feb 6, 2025
6be43b8
handle .cuda()
a-r-r-o-w Feb 6, 2025
274b84e
Merge branch 'main' into groupwise-offloading
a-r-r-o-w Feb 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/diffusers/hooks/group_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@


if is_accelerate_available():
from accelerate.hooks import AlignDevicesHook, CpuOffload
from accelerate.utils import send_to_device


Expand Down Expand Up @@ -341,6 +342,8 @@ def apply_group_offloading(
else:
raise ValueError("Using streams for data transfer requires a CUDA device.")

_raise_error_if_accelerate_model_or_sequential_hook_present(module)

if offload_type == "block_level":
if num_blocks_per_group is None:
raise ValueError("num_blocks_per_group must be provided when using offload_type='block_level'.")
Expand Down Expand Up @@ -645,3 +648,17 @@ def _find_parent_module_in_module_dict(name: str, module_dict: Dict[str, torch.n
return parent_name
atoms.pop()
return ""


def _raise_error_if_accelerate_model_or_sequential_hook_present(module: torch.nn.Module) -> None:
if not is_accelerate_available():
return
for name, submodule in module.named_modules():
if not hasattr(submodule, "_hf_hook"):
continue
if isinstance(submodule._hf_hook, (AlignDevicesHook, CpuOffload)):
raise ValueError(
f"Cannot apply group offloading to a module that is already applying an alternative "
f"offloading strategy from Accelerate. If you want to apply group offloading, please "
f"disable the existing offloading strategy first. Offending module: {name} ({type(submodule)})"
)
22 changes: 22 additions & 0 deletions src/diffusers/pipelines/pipeline_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,8 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
default to "cuda".
"""
self._check_group_offloading_inactive_or_raise_error()

is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
if is_pipeline_device_mapped:
raise ValueError(
Expand Down Expand Up @@ -1186,6 +1188,8 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un
The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
default to "cuda".
"""
self._check_group_offloading_inactive_or_raise_error()

if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload
else:
Expand Down Expand Up @@ -1910,6 +1914,24 @@ def from_pipe(cls, pipeline, **kwargs):

return new_pipeline

def _check_group_offloading_inactive_or_raise_error(self) -> None:
from ..hooks import HookRegistry
from ..hooks.group_offloading import _GROUP_OFFLOADING

for name, component in self.components.items():
if not isinstance(component, torch.nn.Module):
continue
for module in component.modules():
if not hasattr(module, "_diffusers_hook"):
continue
registry: HookRegistry = module._diffusers_hook
if registry.get_hook(_GROUP_OFFLOADING) is not None:
raise ValueError(
f"You are trying to apply model/sequential CPU offloading to a pipeline that contains "
f"components with group offloading enabled. This is not supported. Please disable group "
f"offloading for the '{name}' component of the pipeline to use other offloading methods."
)


class StableDiffusionMixin:
r"""
Expand Down
39 changes: 39 additions & 0 deletions tests/hooks/test_group_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import torch

from diffusers.models import ModelMixin
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
from diffusers.utils.testing_utils import require_torch_gpu, torch_device


Expand Down Expand Up @@ -56,6 +57,20 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
return x


class DummyPipeline(DiffusionPipeline):
model_cpu_offload_seq = "model"

def __init__(self, model: torch.nn.Module) -> None:
super().__init__()

self.register_modules(model=model)

def __call__(self, x: torch.Tensor) -> torch.Tensor:
for _ in range(2):
x = x + 0.1 * self.model(x)
return x


@require_torch_gpu
class GroupOffloadTests(unittest.TestCase):
in_features = 64
Expand Down Expand Up @@ -151,3 +166,27 @@ def test_error_raised_if_supports_group_offloading_false(self):
self.model._supports_group_offloading = False
with self.assertRaisesRegex(ValueError, "does not support group offloading"):
self.model.enable_group_offload(onload_device=torch.device("cuda"))

def test_error_raised_if_model_offloading_applied_on_group_offloaded_module(self):
pipe = DummyPipeline(self.model)
pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)
with self.assertRaisesRegex(ValueError, "You are trying to apply model/sequential CPU offloading"):
pipe.enable_model_cpu_offload()

def test_error_raised_if_sequential_offloading_applied_on_group_offloaded_module(self):
pipe = DummyPipeline(self.model)
pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)
with self.assertRaisesRegex(ValueError, "You are trying to apply model/sequential CPU offloading"):
pipe.enable_sequential_cpu_offload()

def test_error_raised_if_group_offloading_applied_on_model_offloaded_module(self):
pipe = DummyPipeline(self.model)
pipe.enable_model_cpu_offload()
with self.assertRaisesRegex(ValueError, "Cannot apply group offloading"):
pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)

def test_error_raised_if_group_offloading_applied_on_sequential_offloaded_module(self):
pipe = DummyPipeline(self.model)
pipe.enable_sequential_cpu_offload()
with self.assertRaisesRegex(ValueError, "Cannot apply group offloading"):
pipe.model.enable_group_offload(torch_device, offload_type="block_level", num_blocks_per_group=3)