Skip to content

Commit 37bf007

Browse files
committed
Cleaning diariazation.py
1 parent b2c1023 commit 37bf007

File tree

1 file changed

+4
-15
lines changed

1 file changed

+4
-15
lines changed

speechbrain/processing/diarization.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -970,8 +970,7 @@ def getEigenGaps(self, eig_vals):
970970
def do_spec_clustering(
971971
diary_obj, out_rttm_file, rec_id, k, pval, affinity_type, n_neighbors
972972
):
973-
"""
974-
Performs spectral clustering on embeddings. This function calls specific
973+
"""Performs spectral clustering on embeddings. This function calls specific
975974
clustering algorithms as per affinity.
976975
977976
Arguments
@@ -1040,9 +1039,7 @@ def do_spec_clustering(
10401039
def do_kmeans_clustering(
10411040
diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3
10421041
):
1043-
"""
1044-
Performs spectral clustering on embeddings. This function calls specific
1045-
clustering algorithms as per affinity.
1042+
"""Performs kmeans clustering on embeddings.
10461043
10471044
Arguments
10481045
---------
@@ -1061,10 +1058,8 @@ def do_kmeans_clustering(
10611058
"""
10621059

10631060
if k_oracle is not None:
1064-
print("ORACLE SPKRs")
10651061
num_of_spk = k_oracle
10661062
else:
1067-
print("EST. using MAX Eigen gap")
10681063
# Estimate num of using max eigen gap with `cos` affinity matrix.
10691064
# This is just for experimentation.
10701065
# Not doing full spectral clustering. Just re-using the code till
@@ -1123,9 +1118,7 @@ def do_kmeans_clustering(
11231118

11241119

11251120
def do_AHC(diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3):
1126-
"""
1127-
Performs spectral clustering on embeddings. This function calls specific
1128-
clustering algorithms as per affinity.
1121+
"""Performs Agglomerative Hierarchical Clustering on embeddings.
11291122
11301123
Arguments
11311124
---------
@@ -1145,23 +1138,20 @@ def do_AHC(diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3):
11451138

11461139
from sklearn.cluster import AgglomerativeClustering
11471140

1148-
# p_val --> threshold_val (for AHC)
1141+
# p_val is the threshold_val (for AHC)
11491142

11501143
diary_obj.norm_stat1()
11511144

11521145
# processing
11531146
if k_oracle is not None:
1154-
print("ORACLE SPKRs...")
11551147
num_of_spk = k_oracle
11561148

11571149
clustering = AgglomerativeClustering(
11581150
n_clusters=num_of_spk, affinity="cosine", linkage="ward",
11591151
).fit(diary_obj.stat1)
11601152
labels = clustering.labels_
11611153

1162-
print("labels.shape (Oracle) = ", labels.shape)
11631154
else:
1164-
print("Using AHC threshold pval = ", p_val)
11651155
# Estimate num of using max eigen gap with `cos` affinity matrix.
11661156
# This is just for experimentation.
11671157
clustering = AgglomerativeClustering(
@@ -1171,7 +1161,6 @@ def do_AHC(diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3):
11711161
distance_threshold=p_val,
11721162
).fit(diary_obj.stat1)
11731163
labels = clustering.labels_
1174-
print("labels.shape (Estima) = ", labels.shape)
11751164

11761165
# Convert labels to speaker boundaries
11771166
subseg_ids = diary_obj.segset

0 commit comments

Comments
 (0)