Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Changed some comments and renamed variables
  • Loading branch information
amandelpie committed Jul 19, 2022
commit c112184f0ab8a0bfbc44c7faf1245c0b23fae2aa
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package org.utbot.summary.clustering.dbscan

import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery

/**
* Keeps the information about clusters produced by [DBSCANTrainer].
*
* @property [k] Number of clusters.
* @property [numberOfClusters] Number of clusters.
* @property [clusterLabels] It contains labels of clusters in the range ```[0; k)```
* or [Int.MIN_VALUE] if point could not be assigned to any cluster.
*/
data class DBSCANModel(
val k: Int = 0,
val numberOfClusters: Int = 0,
val clusterLabels: IntArray
)
Original file line number Diff line number Diff line change
Expand Up @@ -36,44 +36,53 @@ class DBSCANTrainer<T>(val eps: Float, val minSamples: Int, val metric: Metric<T
rangeQuery.metric = metric
} // TODO: could be refactored if we add some new variants of RangeQuery

val labels = IntArray(data.size) { _ -> UNDEFINED }
var k = 0
val labels = IntArray(data.size) { UNDEFINED }

// It changes in the range [0; k), where k is a final number of clusters found by DBSCAN
var clusterLabel = 0

for (i in data.indices) {
if (labels[i] == UNDEFINED) {
val neighbors = rangeQuery.findNeighbors(data[i], eps).toMutableList()
if (neighbors.size < minSamples) {
labels[i] = NOISE
} else {
labels[i] = k
expandCluster(neighbors, labels, k)
k++
labels[i] = clusterLabel
expandCluster(neighbors, labels, clusterLabel)

// If the existing cluster can not be expanded, the cluster label is incremented.
clusterLabel++
}
}
}

return DBSCANModel(k = k, clusterLabels = labels)
return DBSCANModel(numberOfClusters = clusterLabel, clusterLabels = labels)
}

private fun expandCluster(
neighbors: MutableList<Neighbor<T>>,
labels: IntArray,
k: Int
) {
neighbors.forEach { // Neighbors to expand.
// Neighbors to expand.
neighbors.forEach {
if (labels[it.index] == UNDEFINED) {
labels[it.index] = CLUSTER_PART // All neighbors of a cluster point became cluster points.
// All neighbors of a cluster point became cluster points.
labels[it.index] = CLUSTER_PART
}
}

// NOTE: the size of neighbors could grow from iteration to iteration and the classical for-loop in Kotlin could not be used
var j = 0
while (j < neighbors.count()) // Process every seed point Q.

// Process every seed point Q.
while (j < neighbors.count())
{
val q = neighbors[j]
val idx = q.index

if (labels[idx] == NOISE) { // Change Noise to border point.
// Change Noise to border point.
if (labels[idx] == NOISE) {
labels[idx] = k
}

Expand All @@ -82,7 +91,7 @@ class DBSCANTrainer<T>(val eps: Float, val minSamples: Int, val metric: Metric<T

val qNeighbors = rangeQuery.findNeighbors(q.key, eps)

if (qNeighbors.size >= minSamples) { // Density check (if Q is a core point).
if (qNeighbors.size >= minSamples) {
mergeTwoGroupsInCluster(qNeighbors, labels, neighbors)
}
}
Expand Down