Add support for masks

Rogier van Dalen · Rogier van Dalen · commit 57bbb9b76023 · 2025-03-27T11:51:16.000Z
diff --git a/speechbrain/processing/features.py b/speechbrain/processing/features.py
@@ -36,7 +36,7 @@
 """
 
 import math
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
 import torch
 from torch.distributed import ReduceOp
@@ -996,7 +996,11 @@ def forward(self, x):
         return cw_x
 
 
-def gaussian_statistics(x: torch.Tensor, dim: Union[int, tuple, None] = None):
+def gaussian_statistics(
+    x: torch.Tensor,
+    dim: Union[int, tuple, None] = None,
+    mask: Optional[torch.Tensor] = None,
+):
     """
     Compute first- and second-order moments of data, and return them as the
     count, mean, and variance of a vector over one or more dimensions.
@@ -1009,6 +1013,14 @@ def gaussian_statistics(x: torch.Tensor, dim: Union[int, tuple, None] = None):
         The dimension or dimensions that the statistics should be computed over.
         The other dimensions are retained in the output.
         If None, then scalar-valued statistics will be returned.
+    mask: torch.Tensor | None
+        A boolean tensor with True for elements that should be considered, and
+        False for elements that should not be considered (e.g. that are after
+        the end of utterances).
+        This tensor should have the same number of dimensions as "x".
+        The dimensions indicated by "dim" should have the same size as the
+        matching dimensions in "x".
+        The other dimensions should have size 1.
 
     Returns
     -------
@@ -1021,26 +1033,54 @@ def gaussian_statistics(x: torch.Tensor, dim: Union[int, tuple, None] = None):
         The variance.
     """
 
-    if dim is None:
-        number = math.prod(x.shape)
-    elif isinstance(dim, int):
-        number = x.shape[dim]
-    else:
-        assert isinstance(dim, tuple)
-        if dim == ():
+    def normalise_dimensions(
+        x: torch.Tensor, dim: Union[int, tuple, None]
+    ) -> Tuple[tuple, tuple]:
+        """Normalise "dim" and return (reduce_dimensions, keep_dimensions)."""
+        all_dimensions = range(len(x.shape))
+        if dim is None:
+            return (tuple(d for d in all_dimensions), ())
+        elif isinstance(dim, int):
+            return ((dim,), tuple(d for d in all_dimensions if d != dim))
+        else:
+            assert isinstance(dim, tuple)
+            return (dim, tuple(d for d in all_dimensions if d not in dim))
+
+    (reduce_dimensions, keep_dimensions) = normalise_dimensions(x, dim)
+
+    # Compute the number of elements that the statistics are computed over
+    # and check that the mask is shaped correctly.
+
+    # Check that the mask is shaped correctly.
+    if mask is not None:
+        assert len(mask.shape) == len(x.shape)
+        for d in reduce_dimensions:
+            assert mask.size(d) == x.size(d)
+        for d in keep_dimensions:
+            assert mask.size(d) == 1
+
+    if reduce_dimensions == ():
+        if mask is None:
             return 1, x, torch.zeros_like(x)
-        number = 1
-        for d in dim:
-            number *= x.shape[d]
+        else:
+            # mask.numel == 1
+            return int(torch.sum(mask)), mask * x, torch.zeros_like(x)
+
+    if mask is None:
+        number = math.prod(x.size(d) for d in reduce_dimensions)
+    else:
+        number = int(torch.sum(mask))
+
+    masked_data = x if mask is None else mask * x
 
     # First keep the dimensions so that broadcasting works.
-    mean_with_dims = torch.mean(x, dim=dim, keepdim=True)
+    mean_with_dims = torch.mean(masked_data, dim=dim, keepdim=True)
     mean = (
         torch.squeeze(mean_with_dims)
         if dim is None
         else torch.squeeze(mean_with_dims, dim=dim)
     )
-    variance = torch.mean(torch.square(x - mean_with_dims), dim=dim)
+    variance = torch.mean(torch.square(masked_data - mean_with_dims), dim=dim)
 
     return (number, mean, variance)
 
@@ -1141,17 +1181,23 @@ def combine_gaussian_statistics_distributed(
     return (global_count, global_mean, global_variance)
 
 
-def mean_std_update(x, mask, dim, run_count, run_mean, run_std=None):
+def mean_std_update(
+    x: torch.Tensor,
+    mask: Optional[torch.Tensor],
+    dim: Union[int, tuple, None],
+    run_count,
+    run_mean,
+    run_std=None,
+):
     """
     Update the running count, running mean, and running standard deviation
     by integrating new data x from multiple processes.
     """
-    assert torch.all(mask), "Not implemented yet"
 
     # TODO implement run_std is None
     current_statistics = (run_count, run_mean, torch.square(run_std))
     new_statistics = combine_gaussian_statistics_distributed(
-        gaussian_statistics(x, dim=dim)
+        gaussian_statistics(x, dim=dim, mask=mask)
     )
     (count, mean, variance) = combine_gaussian_statistics(
         current_statistics, new_statistics
diff --git a/tests/unittests/test_input_norm.py b/tests/unittests/test_input_norm.py
@@ -2,7 +2,7 @@
 """
 
 import functools
-from typing import List, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 import pytest
@@ -15,31 +15,59 @@
 )
 
 
+def normalise_dimensions(
+    dimensions: Union[int, tuple, None], num_dimensions: int
+):
+    """Ensure dimensions object is a tuple."""
+    if isinstance(dimensions, int):
+        return (dimensions,)
+    elif dimensions is None:
+        # All dimensions
+        return tuple(range(num_dimensions))
+    assert isinstance(dimensions, tuple)
+    return dimensions
+
+
+def random_mask_numpy(
+    generator: np.random.Generator,
+    data_shape: tuple,
+    dimensions: Union[int, tuple, None],
+):
+    dimensions_set = set(normalise_dimensions(dimensions, len(data_shape)))
+
+    mask_shape = tuple(
+        (data_shape[d] if d in dimensions_set else 1)
+        for d in range(len(data_shape))
+    )
+
+    return generator.integers(0, 2, size=mask_shape, dtype=bool)
+
+
 def reference_gaussian_statistics(
-    x: np.ndarray, dimensions: Union[int, tuple, None]
+    x: np.ndarray,
+    dimensions: Union[int, tuple, None],
+    mask: Optional[np.ndarray],
 ) -> Tuple[int, np.ndarray, np.ndarray]:
     """
     Compute reference count, mean, variance with Numpy, in the simplest way
     possible.
     """
     # Ensure dimensions object is a tuple.
-    if isinstance(dimensions, int):
-        dimensions = (dimensions,)
-    elif dimensions is None:
-        # All dimensions
-        dimensions = tuple(range(len(x.shape)))
-    assert isinstance(dimensions, tuple)
+    dimensions = normalise_dimensions(dimensions, len(x.shape))
 
     # Start by pretending that dimensions=() and then roll them up one by one.
-    count = 1
-    mean = x
-    variance_statistics = np.square(x)
+    all_count = 1
+    masked_data = x if mask is None else mask * x
+    mean = masked_data
+    variance_statistics = np.square(masked_data)
 
     for dimension in sorted(dimensions, reverse=True):
-        count *= x.shape[dimension]
+        all_count *= x.shape[dimension]
         mean = np.mean(mean, axis=dimension)
         variance_statistics = np.mean(variance_statistics, axis=dimension)
 
+    count = all_count if mask is None else np.sum(mask)
+
     variance = variance_statistics - np.square(mean)
 
     return count, mean, variance
@@ -67,7 +95,8 @@ def reference_gaussian_statistics(
         (0, 1, 3),
     ],
 )
-def test_gaussian_statistics(size, dimensions):
+@pytest.mark.parametrize("use_mask", [False, True])
+def test_gaussian_statistics(size, dimensions, use_mask: bool):
     if isinstance(dimensions, tuple):
         if any(dimension >= len(size) for dimension in dimensions):
             return
@@ -78,11 +107,20 @@ def test_gaussian_statistics(size, dimensions):
 
     x = generator.uniform(low=-5, high=+5, size=size)
 
+    if use_mask:
+        mask = random_mask_numpy(generator, size, dimensions)
+    else:
+        mask = None
+
     reference_count, reference_mean, reference_variance = (
-        reference_gaussian_statistics(x, dimensions=dimensions)
+        reference_gaussian_statistics(x, dimensions=dimensions, mask=mask)
     )
 
-    count, mean, variance = gaussian_statistics(torch.tensor(x), dim=dimensions)
+    count, mean, variance = gaussian_statistics(
+        torch.tensor(x),
+        dim=dimensions,
+        mask=None if mask is None else torch.tensor(mask),
+    )
 
     assert count == reference_count
     assert mean.shape == reference_mean.shape