@@ -970,8 +970,7 @@ def getEigenGaps(self, eig_vals):
970970def do_spec_clustering (
971971 diary_obj , out_rttm_file , rec_id , k , pval , affinity_type , n_neighbors
972972):
973- """
974- Performs spectral clustering on embeddings. This function calls specific
973+ """Performs spectral clustering on embeddings. This function calls specific
975974 clustering algorithms as per affinity.
976975
977976 Arguments
@@ -1040,9 +1039,7 @@ def do_spec_clustering(
10401039def do_kmeans_clustering (
10411040 diary_obj , out_rttm_file , rec_id , k_oracle = 4 , p_val = 0.3
10421041):
1043- """
1044- Performs spectral clustering on embeddings. This function calls specific
1045- clustering algorithms as per affinity.
1042+ """Performs kmeans clustering on embeddings.
10461043
10471044 Arguments
10481045 ---------
@@ -1061,10 +1058,8 @@ def do_kmeans_clustering(
10611058 """
10621059
10631060 if k_oracle is not None :
1064- print ("ORACLE SPKRs" )
10651061 num_of_spk = k_oracle
10661062 else :
1067- print ("EST. using MAX Eigen gap" )
10681063 # Estimate num of using max eigen gap with `cos` affinity matrix.
10691064 # This is just for experimentation.
10701065 # Not doing full spectral clustering. Just re-using the code till
@@ -1123,9 +1118,7 @@ def do_kmeans_clustering(
11231118
11241119
11251120def do_AHC (diary_obj , out_rttm_file , rec_id , k_oracle = 4 , p_val = 0.3 ):
1126- """
1127- Performs spectral clustering on embeddings. This function calls specific
1128- clustering algorithms as per affinity.
1121+ """Performs Agglomerative Hierarchical Clustering on embeddings.
11291122
11301123 Arguments
11311124 ---------
@@ -1145,23 +1138,20 @@ def do_AHC(diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3):
11451138
11461139 from sklearn .cluster import AgglomerativeClustering
11471140
1148- # p_val --> threshold_val (for AHC)
1141+ # p_val is the threshold_val (for AHC)
11491142
11501143 diary_obj .norm_stat1 ()
11511144
11521145 # processing
11531146 if k_oracle is not None :
1154- print ("ORACLE SPKRs..." )
11551147 num_of_spk = k_oracle
11561148
11571149 clustering = AgglomerativeClustering (
11581150 n_clusters = num_of_spk , affinity = "cosine" , linkage = "ward" ,
11591151 ).fit (diary_obj .stat1 )
11601152 labels = clustering .labels_
11611153
1162- print ("labels.shape (Oracle) = " , labels .shape )
11631154 else :
1164- print ("Using AHC threshold pval = " , p_val )
11651155 # Estimate num of using max eigen gap with `cos` affinity matrix.
11661156 # This is just for experimentation.
11671157 clustering = AgglomerativeClustering (
@@ -1171,7 +1161,6 @@ def do_AHC(diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3):
11711161 distance_threshold = p_val ,
11721162 ).fit (diary_obj .stat1 )
11731163 labels = clustering .labels_
1174- print ("labels.shape (Estima) = " , labels .shape )
11751164
11761165 # Convert labels to speaker boundaries
11771166 subseg_ids = diary_obj .segset
0 commit comments