Merge pull request #231 from lucidrains/residual-fsq-fix

lucidrains · web-flow · commit 407e0eea5ca7 · 2025-11-21T18:47:44.000-08:00
allow for hard clamp in fsq, to ready for residual fsq pre-softclampi…
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "vector-quantize-pytorch"
-version = "1.25.2"
+version = "1.26.0"
 description = "Vector Quantization - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -23,7 +23,7 @@ classifiers=[
   ]
 
 dependencies = [
-    "torch>=2.0",
+    "torch>=2.4",
     "einops>=0.8.0",
     "einx>=0.3.0",
 ]
diff --git a/tests/test_readme.py b/tests/test_readme.py
@@ -247,13 +247,15 @@ def test_directional_reparam():
     quantized, indices, _ = rq(x)
 
 @pytest.mark.parametrize('preserve_symmetry', (True, False))
+@pytest.mark.parametrize('bound_hard_clamp', (True, False))
 def test_fsq(
-    preserve_symmetry
+    preserve_symmetry,
+    bound_hard_clamp
 ):
     from vector_quantize_pytorch import FSQ
 
     levels = [8,5,5,5] # see 4.1 and A.4.1 in the paper
-    quantizer = FSQ(levels, preserve_symmetry = preserve_symmetry)
+    quantizer = FSQ(levels, preserve_symmetry = preserve_symmetry, bound_hard_clamp = bound_hard_clamp)
 
     x = torch.randn(1, 1024, 4) # 4 since there are 4 levels
     xhat, indices = quantizer(x)
diff --git a/vector_quantize_pytorch/finite_scalar_quantization.py b/vector_quantize_pytorch/finite_scalar_quantization.py
@@ -11,7 +11,7 @@
 import torch
 import torch.nn as nn
 from torch.nn import Module
-from torch import tensor, Tensor, int32
+from torch import tensor, Tensor, int32, tanh, atanh, clamp
 from torch.amp import autocast
 
 import einx
@@ -30,6 +30,9 @@ def default(*args):
             return arg
     return None
 
+def identity(t):
+    return t
+
 def maybe(fn):
     @wraps(fn)
     def inner(x, *args, **kwargs):
@@ -73,6 +76,7 @@ def __init__(
         force_quantization_f32 = True,
         preserve_symmetry = False,
         noise_dropout = 0.,
+        bound_hard_clamp = False # for residual fsq, if input is pre-softclamped to the right range
     ):
         super().__init__()
 
@@ -121,22 +125,31 @@ def __init__(
         self.allowed_dtypes = allowed_dtypes
         self.force_quantization_f32 = force_quantization_f32
 
-    def bound(self, z, eps = 1e-3):
+        # allow for a hard clamp
+
+        self.bound_hard_clamp = bound_hard_clamp
+
+    def bound(self, z, eps = 1e-3, hard_clamp = False):
         """ Bound `z`, an array of shape (..., d). """
+        maybe_tanh = tanh if not hard_clamp else partial(clamp, min = -1., max = 1.)
+        maybe_atanh = atanh if not hard_clamp else identity
+
         half_l = (self._levels - 1) * (1 + eps) / 2
         offset = torch.where(self._levels % 2 == 0, 0.5, 0.0)
-        shift = (offset / half_l).atanh()
-        bounded_z = (z + shift).tanh() * half_l - offset
+        shift = maybe_atanh(offset / half_l)
+        bounded_z = maybe_tanh(z + shift) * half_l - offset
         half_width = self._levels // 2
         return round_ste(bounded_z) / half_width
 
     # symmetry-preserving and noise-approximated quantization, section 3.2 in https://arxiv.org/abs/2411.19842
     
-    def symmetry_preserving_bound(self, z):
+    def symmetry_preserving_bound(self, z, hard_clamp = False):
         """ QL(x) = 2 / (L - 1) * [(L - 1) * (tanh(x) + 1) / 2 + 0.5] - 1 """
+        maybe_tanh = tanh if not hard_clamp else partial(clamp, min = -1., max = 1.)
+
         levels_minus_1 = (self._levels - 1)
         scale = 2. / levels_minus_1
-        bracket = (levels_minus_1 * (z.tanh() + 1) / 2.) + 0.5
+        bracket = (levels_minus_1 * (maybe_tanh(z) + 1) / 2.) + 0.5
         bracket = floor_ste(bracket)
         return scale * bracket - 1.
 
@@ -146,7 +159,7 @@ def quantize(self, z):
         shape, device, noise_dropout, preserve_symmetry = z.shape[0], z.device, self.noise_dropout, self.preserve_symmetry
         bound_fn = self.symmetry_preserving_bound if preserve_symmetry else self.bound
 
-        bounded_z = bound_fn(z)
+        bounded_z = bound_fn(z, hard_clamp = self.bound_hard_clamp)
 
         # determine where to add a random offset elementwise
         # if using noise dropout
diff --git a/vector_quantize_pytorch/residual_fsq.py b/vector_quantize_pytorch/residual_fsq.py
@@ -1,11 +1,11 @@
+from __future__ import annotations
+
 import random
 from math import ceil
 from functools import partial
 
-from typing import List
-
 import torch
-from torch import nn
+from torch import nn, tensor
 from torch.nn import Module, ModuleList
 import torch.nn.functional as F
 from torch.amp import autocast
@@ -52,14 +52,15 @@ class ResidualFSQ(Module):
     def __init__(
         self,
         *,
-        levels: List[int],
+        levels: list[int],
         num_quantizers,
         dim = None,
         is_channel_first = False,
         quantize_dropout = False,
         quantize_dropout_cutoff_index = 0,
         quantize_dropout_multiple_of = 1,
-        soft_clamp_input_value = None,
+        soft_clamp_input_value: float | list[float] | Tensor | None = None,
+        bound_hard_clamp = True,
         **kwargs
     ):
         super().__init__()
@@ -74,25 +75,24 @@ def __init__(
         self.is_channel_first = is_channel_first
         self.num_quantizers = num_quantizers
 
-        # soft clamping the input value
-
-        self.soft_clamp_input_value = soft_clamp_input_value
-
         # layers
 
         self.levels = levels
         self.layers = nn.ModuleList([])
 
-        levels_tensor = torch.Tensor(levels)
+        levels_tensor = tensor(levels)
+        assert (levels_tensor > 1).all()
 
         scales = []
 
         for ind in range(num_quantizers):
-            scales.append((levels_tensor - 1) ** -ind)
+            scales.append(levels_tensor.float() ** -ind)
 
             fsq = FSQ(
                 levels = levels,
                 dim = codebook_dim,
+                preserve_symmetry = True,
+                bound_hard_clamp = bound_hard_clamp,
                 **kwargs
             )
 
@@ -111,6 +111,17 @@ def __init__(
         self.quantize_dropout_cutoff_index = quantize_dropout_cutoff_index
         self.quantize_dropout_multiple_of = quantize_dropout_multiple_of  # encodec paper proposes structured dropout, believe this was set to 4
 
+        # soft clamping the input value
+
+        if bound_hard_clamp:
+            assert not exists(soft_clamp_input_value)
+            soft_clamp_input_value = 1 + (1 / (levels_tensor - 1))
+
+        if isinstance(soft_clamp_input_value, (list, float)):
+            soft_clamp_input_value = tensor(soft_clamp_input_value)
+
+        self.register_buffer('soft_clamp_input_value', soft_clamp_input_value, persistent = False)
+
     @property
     def codebooks(self):
         codebooks = [layer.implicit_codebook for layer in self.layers]