Skip to content

Commit 9425ae0

Browse files
Merge pull request #3194 from graphql-java/schema-diff-optimizing
Schema diff optimizing
2 parents bf76c9f + 119999d commit 9425ae0

5 files changed

Lines changed: 100 additions & 81 deletions

File tree

src/main/java/graphql/schema/diffing/DiffImpl.java

Lines changed: 52 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33
import com.google.common.collect.HashMultiset;
44
import com.google.common.collect.Multiset;
55
import com.google.common.collect.Multisets;
6-
import com.google.common.util.concurrent.AtomicDoubleArray;
76
import graphql.Internal;
87

98
import java.util.ArrayList;
10-
import java.util.Arrays;
119
import java.util.Collection;
1210
import java.util.LinkedHashMap;
1311
import java.util.LinkedHashSet;
@@ -46,6 +44,11 @@ private static class MappingEntry {
4644
public boolean siblingsFinished;
4745
public LinkedBlockingQueue<MappingEntry> mappingEntriesSiblings;
4846
public int[] assignments;
47+
48+
/**
49+
* These are the available vertices, relative to the parent mapping.
50+
* Meaning the last mapped element is NOT contained in it.
51+
*/
4952
public List<Vertex> availableTargetVertices;
5053

5154
Mapping partialMapping;
@@ -112,7 +115,12 @@ OptimalEdit diffImpl(Mapping startMapping, List<Vertex> allSources, List<Vertex>
112115

113116
int fixedEditorialCost = baseEditorialCostForMapping(startMapping, completeSourceGraph, completeTargetGraph);
114117
int level = startMapping.size();
118+
119+
List<Vertex> allNonFixedTargets = new ArrayList<>(allTargets);
120+
startMapping.forEachTarget(allNonFixedTargets::remove);
121+
115122
MappingEntry firstMappingEntry = new MappingEntry(startMapping, level, fixedEditorialCost);
123+
firstMappingEntry.availableTargetVertices = allNonFixedTargets;
116124

117125
OptimalEdit optimalEdit = new OptimalEdit(completeSourceGraph, completeTargetGraph);
118126
PriorityQueue<MappingEntry> queue = new PriorityQueue<>((mappingEntry1, mappingEntry2) -> {
@@ -129,8 +137,6 @@ OptimalEdit diffImpl(Mapping startMapping, List<Vertex> allSources, List<Vertex>
129137
// System.out.println("graph size: " + this.completeSourceGraph.size() + " non mapped vertices " + (completeSourceGraph.size() - startMapping.size()));
130138
// System.out.println("start mapping at level: " + firstMappingEntry.level);
131139

132-
List<Vertex> allNonFixedTargets = new ArrayList<>(allTargets);
133-
startMapping.forEachTarget(allNonFixedTargets::remove);
134140

135141
int count = 0;
136142
while (!queue.isEmpty()) {
@@ -140,9 +146,7 @@ OptimalEdit diffImpl(Mapping startMapping, List<Vertex> allSources, List<Vertex>
140146
continue;
141147

142148
}
143-
// if (count % 100 == 0) {
144-
// System.out.println(" level: " + mappingEntry.level + " with cost: " + mappingEntry.lowerBoundCost + " queue size: " + queue.size());
145-
// }
149+
// System.out.println(mappingEntry.lowerBoundCost + " vs ged " + optimalEdit.ged + " count " + count);
146150

147151
if (mappingEntry.level > 0 && !mappingEntry.siblingsFinished) {
148152
addSiblingToQueue(
@@ -161,8 +165,7 @@ OptimalEdit diffImpl(Mapping startMapping, List<Vertex> allSources, List<Vertex>
161165
queue,
162166
optimalEdit,
163167
allSources,
164-
allTargets,
165-
allNonFixedTargets
168+
allTargets
166169
);
167170
}
168171

@@ -179,38 +182,38 @@ private void addChildToQueue(int fixedEditorialCost,
179182
PriorityQueue<MappingEntry> queue,
180183
OptimalEdit optimalEdit,
181184
List<Vertex> allSources,
182-
List<Vertex> allTargets,
183-
List<Vertex> allNonFixedTargets) {
184-
Mapping partialMapping = parentEntry.partialMapping;
185+
List<Vertex> allTargets
186+
) {
187+
Mapping parentPartialMapping = parentEntry.partialMapping;
185188
int level = parentEntry.level;
186189

187-
assertTrue(level == partialMapping.size());
190+
assertTrue(level == parentPartialMapping.size());
188191

189-
// not great: we iterate over all targets, which can be huge
190-
ArrayList<Vertex> availableTargetVertices = new ArrayList<>(allNonFixedTargets);
191-
partialMapping.forEachNonFixedTarget(availableTargetVertices::remove);
192-
assertTrue(availableTargetVertices.size() + partialMapping.size() == allTargets.size());
192+
// the available target vertices are the parent queue entry ones plus
193+
// minus the additional mapped element in parentPartialMapping
194+
ArrayList<Vertex> availableTargetVertices = new ArrayList<>(parentEntry.availableTargetVertices);
195+
availableTargetVertices.remove(parentPartialMapping.getTarget(level - 1));
196+
assertTrue(availableTargetVertices.size() + parentPartialMapping.size() == allTargets.size());
193197
Vertex v_i = allSources.get(level);
194198

199+
195200
// the cost matrix is for the non mapped vertices
196201
int costMatrixSize = allSources.size() - level;
197202

198203
// costMatrix gets modified by the hungarian algorithm ... therefore we create two of them
199-
AtomicDoubleArray[] costMatrixForHungarianAlgo = new AtomicDoubleArray[costMatrixSize];
200-
Arrays.setAll(costMatrixForHungarianAlgo, (index) -> new AtomicDoubleArray(costMatrixSize));
201-
AtomicDoubleArray[] costMatrix = new AtomicDoubleArray[costMatrixSize];
202-
Arrays.setAll(costMatrix, (index) -> new AtomicDoubleArray(costMatrixSize));
204+
double[][] costMatrixForHungarianAlgo = new double[costMatrixSize][costMatrixSize];
205+
double[][] costMatrix = new double[costMatrixSize][costMatrixSize];
203206

204207

205-
Map<Vertex, Double> deletionCostsCache = new LinkedHashMap<>();
208+
Map<Vertex, Double> isolatedVerticesCache = new LinkedHashMap<>();
206209

207210
for (int i = level; i < allSources.size(); i++) {
208211
Vertex v = allSources.get(i);
209212
int j = 0;
210213
for (Vertex u : availableTargetVertices) {
211-
double cost = calcLowerBoundMappingCost(v, u, partialMapping, deletionCostsCache);
212-
costMatrixForHungarianAlgo[i - level].set(j, cost);
213-
costMatrix[i - level].set(j, cost);
214+
double cost = calcLowerBoundMappingCost(v, u, parentPartialMapping, isolatedVerticesCache);
215+
costMatrixForHungarianAlgo[i - level][j] = cost;
216+
costMatrix[i - level][j] = cost;
214217
j++;
215218
}
216219
runningCheck.check();
@@ -219,14 +222,14 @@ private void addChildToQueue(int fixedEditorialCost,
219222
// System.out.println("size2: " + allCount + " vs sqrt of realSize:" + Math.sqrt(realSize));
220223
HungarianAlgorithm hungarianAlgorithm = new HungarianAlgorithm(costMatrixForHungarianAlgo);
221224
int[] assignments = hungarianAlgorithm.execute();
222-
int editorialCostForMapping = editorialCostForMapping(fixedEditorialCost, partialMapping, completeSourceGraph, completeTargetGraph);
225+
int editorialCostForMapping = editorialCostForMapping(fixedEditorialCost, parentPartialMapping, completeSourceGraph, completeTargetGraph);
223226
// System.out.println("editorial cost for partial mapping: " + editorialCostForMapping + " for level " + (level - (allSources.size() - allNonFixedTargets.size())));
224-
// System.out.println("last source vertex " + partialMapping.getSource(partialMapping.size() - 1) + " -> " + partialMapping.getTarget(partialMapping.size() - 1));
227+
// System.out.println("last source vertex " + parentPartialMapping.getSource(parentPartialMapping.size() - 1) + " -> " + parentPartialMapping.getTarget(parentPartialMapping.size() - 1));
225228
double costMatrixSum = getCostMatrixSum(costMatrix, assignments);
226229
double lowerBoundForPartialMapping = editorialCostForMapping + costMatrixSum;
227230
int v_i_target_IndexSibling = assignments[0];
228231
Vertex bestExtensionTargetVertexSibling = availableTargetVertices.get(v_i_target_IndexSibling);
229-
Mapping newMappingSibling = partialMapping.extendMapping(v_i, bestExtensionTargetVertexSibling);
232+
Mapping newMappingSibling = parentPartialMapping.extendMapping(v_i, bestExtensionTargetVertexSibling);
230233

231234

232235
if (lowerBoundForPartialMapping >= optimalEdit.ged) {
@@ -239,12 +242,13 @@ private void addChildToQueue(int fixedEditorialCost,
239242
newMappingEntry.availableTargetVertices = availableTargetVertices;
240243

241244
queue.add(newMappingEntry);
242-
Mapping fullMapping = partialMapping.copy();
245+
Mapping fullMapping = parentPartialMapping.copy();
243246
for (int i = 0; i < assignments.length; i++) {
244247
fullMapping.add(allSources.get(level + i), availableTargetVertices.get(assignments[i]));
245248
}
246249

247250
int costForFullMapping = editorialCostForMapping(fixedEditorialCost, fullMapping, completeSourceGraph, completeTargetGraph);
251+
assertTrue(lowerBoundForPartialMapping <= costForFullMapping);
248252
if (costForFullMapping < optimalEdit.ged) {
249253
updateOptimalEdit(optimalEdit, costForFullMapping, fullMapping);
250254
}
@@ -254,7 +258,7 @@ private void addChildToQueue(int fixedEditorialCost,
254258
hungarianAlgorithm,
255259
costMatrix,
256260
editorialCostForMapping,
257-
partialMapping,
261+
parentPartialMapping,
258262
v_i,
259263
optimalEdit.ged,
260264
level + 1,
@@ -274,20 +278,18 @@ private void updateOptimalEdit(OptimalEdit optimalEdit, int newGed, Mapping mapp
274278
// generate all children mappings and save in MappingEntry.sibling
275279
private void calculateRestOfChildren(List<Vertex> availableTargetVertices,
276280
HungarianAlgorithm hungarianAlgorithm,
277-
AtomicDoubleArray[] costMatrixCopy,
281+
double[][] costMatrixCopy,
278282
double editorialCostForMapping,
279283
Mapping partialMapping,
280284
Vertex v_i,
281285
int upperBound,
282286
int level,
283287
LinkedBlockingQueue<MappingEntry> siblings
284288
) {
285-
// System.out.println("level : " + level + " max children count calculate: " + (availableTargetVertices.size() - 1));
286289
// starting from 1 as we already generated the first one
287-
int oldSiblingsSize = siblings.size();
288290
for (int child = 1; child < availableTargetVertices.size(); child++) {
289291
int[] assignments = hungarianAlgorithm.nextChild();
290-
if (hungarianAlgorithm.costMatrix[0].get(assignments[0]) == Integer.MAX_VALUE) {
292+
if (hungarianAlgorithm.costMatrix[0][assignments[0]] == Integer.MAX_VALUE) {
291293
break;
292294
}
293295

@@ -310,7 +312,7 @@ private void calculateRestOfChildren(List<Vertex> availableTargetVertices,
310312

311313
runningCheck.check();
312314
}
313-
// System.out.println("calculated " + (siblings.size() - oldSiblingsSize) + " children / " + (availableTargetVertices.size()));
315+
// System.out.println("overall children count " + (siblings.size()+1) + " vs possible mappings " + possibleMappings.possibleMappings.get(v_i).size() );
314316
siblings.add(MappingEntry.DUMMY);
315317

316318
}
@@ -340,17 +342,18 @@ private void addSiblingToQueue(
340342
}
341343
assertTrue(fullMapping.size() == this.completeSourceGraph.size());
342344
int costForFullMapping = editorialCostForMapping(fixedEditorialCost, fullMapping, completeSourceGraph, completeTargetGraph);
345+
assertTrue(sibling.lowerBoundCost <= costForFullMapping);
343346
if (costForFullMapping < optimalEdit.ged) {
344347
updateOptimalEdit(optimalEdit, costForFullMapping, fullMapping);
345348
}
346349
}
347350
}
348351

349352

350-
private double getCostMatrixSum(AtomicDoubleArray[] costMatrix, int[] assignments) {
353+
private double getCostMatrixSum(double[][] costMatrix, int[] assignments) {
351354
double costMatrixSum = 0;
352355
for (int i = 0; i < assignments.length; i++) {
353-
costMatrixSum += costMatrix[i].get(assignments[i]);
356+
costMatrixSum += costMatrix[i][assignments[i]];
354357
}
355358
return costMatrixSum;
356359
}
@@ -387,24 +390,24 @@ private double getCostMatrixSum(AtomicDoubleArray[] costMatrix, int[] assignment
387390
private double calcLowerBoundMappingCost(Vertex v,
388391
Vertex u,
389392
Mapping partialMapping,
390-
Map<Vertex, Double> deletionCostsCache) {
393+
Map<Vertex, Double> isolatedVerticesCache) {
391394
if (!possibleMappings.mappingPossible(v, u)) {
392395
return Integer.MAX_VALUE;
393396
}
394397
if (u.isOfType(SchemaGraph.ISOLATED)) {
395-
if (deletionCostsCache.containsKey(v)) {
396-
return deletionCostsCache.get(v);
398+
if (isolatedVerticesCache.containsKey(v)) {
399+
return isolatedVerticesCache.get(v);
397400
}
398401
double result = calcLowerBoundMappingCostForIsolated(v, partialMapping, true);
399-
deletionCostsCache.put(v, result);
402+
isolatedVerticesCache.put(v, result);
400403
return result;
401404
}
402405
if (v.isOfType(SchemaGraph.ISOLATED)) {
403-
if (deletionCostsCache.containsKey(u)) {
404-
return deletionCostsCache.get(u);
406+
if (isolatedVerticesCache.containsKey(u)) {
407+
return isolatedVerticesCache.get(u);
405408
}
406409
double result = calcLowerBoundMappingCostForIsolated(u, partialMapping, false);
407-
deletionCostsCache.put(u, result);
410+
isolatedVerticesCache.put(u, result);
408411
return result;
409412
}
410413

@@ -530,28 +533,18 @@ private double calcLowerBoundMappingCostForIsolated(Vertex vertex,
530533
) {
531534
SchemaGraph schemaGraph = sourceOrTarget ? completeSourceGraph : completeTargetGraph;
532535

536+
// every adjacent edge is inserted/deleted for an isolated vertex
533537
Collection<Edge> adjacentEdges = schemaGraph.getAdjacentEdgesNonCopy(vertex);
534-
int innerEdgesCount = 0;
535-
int labeledEdgesFromAnchoredVertex = 0;
536538

537-
for (Edge edge : adjacentEdges) {
538-
if (!partialMapping.contains(edge.getTo(), sourceOrTarget)) {
539-
innerEdgesCount++;
540-
} else {
541-
if (edge.getLabel() != null) {
542-
labeledEdgesFromAnchoredVertex++;
543-
}
544-
}
545-
}
539+
// for the inverse adjacent edges we only count the anchored ones
540+
int anchoredInverseEdges = 0;
546541
Collection<Edge> adjacentEdgesInverse = schemaGraph.getAdjacentEdgesInverseNonCopy(vertex);
547542
for (Edge edge : adjacentEdgesInverse) {
548543
if (partialMapping.contains(edge.getFrom(), sourceOrTarget)) {
549-
if (edge.getLabel() != null) {
550-
labeledEdgesFromAnchoredVertex++;
551-
}
544+
anchoredInverseEdges++;
552545
}
553546
}
554-
return 1 + innerEdgesCount + labeledEdgesFromAnchoredVertex;
547+
return 1 + adjacentEdges.size() + anchoredInverseEdges;
555548
}
556549

557550

src/main/java/graphql/schema/diffing/HungarianAlgorithm.java

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package graphql.schema.diffing;
22

3-
import com.google.common.util.concurrent.AtomicDoubleArray;
43
import graphql.Internal;
54

65
import java.util.Arrays;
@@ -53,7 +52,7 @@
5352
@Internal
5453
public class HungarianAlgorithm {
5554
// changed by reduce
56-
public final AtomicDoubleArray[] costMatrix;
55+
public final double[][] costMatrix;
5756

5857
// constant always
5958
private final int rows;
@@ -85,10 +84,10 @@ public class HungarianAlgorithm {
8584
* irregular in the sense that all rows must be the same length; in
8685
* addition, all entries must be non-infinite numbers.
8786
*/
88-
public HungarianAlgorithm(AtomicDoubleArray[] costMatrix) {
89-
this.dim = Math.max(costMatrix.length, costMatrix[0].length());
87+
public HungarianAlgorithm(double[][] costMatrix) {
88+
this.dim = Math.max(costMatrix.length, costMatrix[0].length);
9089
this.rows = costMatrix.length;
91-
this.cols = costMatrix[0].length();
90+
this.cols = costMatrix[0].length;
9291
this.costMatrix = costMatrix;
9392
// for (int w = 0; w < this.dim; w++) {
9493
// if (w < costMatrix.length) {
@@ -131,8 +130,8 @@ protected void computeInitialFeasibleSolution() {
131130
}
132131
for (int w = 0; w < dim; w++) {
133132
for (int j = 0; j < dim; j++) {
134-
if (costMatrix[w].get(j) < labelByJob[j]) {
135-
labelByJob[j] = costMatrix[w].get(j);
133+
if (costMatrix[w][j] < labelByJob[j]) {
134+
labelByJob[j] = costMatrix[w][j];
136135
}
137136
}
138137
}
@@ -241,7 +240,7 @@ protected void executePhase() {
241240
committedWorkers[worker] = true;
242241
for (int j = 0; j < dim; j++) {
243242
if (parentWorkerByCommittedJob[j] == -1) {
244-
double slack = costMatrix[worker].get(j) - labelByWorker[worker]
243+
double slack = costMatrix[worker][j] - labelByWorker[worker]
245244
- labelByJob[j];
246245
if (minSlackValueByJob[j] > slack) {
247246
minSlackValueByJob[j] = slack;
@@ -274,7 +273,7 @@ protected void greedyMatch() {
274273
for (int w = 0; w < dim; w++) {
275274
for (int j = 0; j < dim; j++) {
276275
if (matchJobByWorker[w] == -1 && matchWorkerByJob[j] == -1
277-
&& costMatrix[w].get(j) - labelByWorker[w] - labelByJob[j] == 0) {
276+
&& costMatrix[w][j] - labelByWorker[w] - labelByJob[j] == 0) {
278277
match(w, j);
279278
}
280279
}
@@ -293,7 +292,7 @@ protected void initializePhase(int w) {
293292
Arrays.fill(parentWorkerByCommittedJob, -1);
294293
committedWorkers[w] = true;
295294
for (int j = 0; j < dim; j++) {
296-
minSlackValueByJob[j] = costMatrix[w].get(j) - labelByWorker[w] - labelByJob[j];
295+
minSlackValueByJob[j] = costMatrix[w][j] - labelByWorker[w] - labelByJob[j];
297296
minSlackWorkerByJob[j] = w;
298297
}
299298
}
@@ -316,12 +315,12 @@ protected void reduce() {
316315
for (int w = 0; w < dim; w++) {
317316
double min = Double.POSITIVE_INFINITY;
318317
for (int j = 0; j < dim; j++) {
319-
if (costMatrix[w].get(j) < min) {
320-
min = costMatrix[w].get(j);
318+
if (costMatrix[w][j] < min) {
319+
min = costMatrix[w][j];
321320
}
322321
}
323322
for (int j = 0; j < dim; j++) {
324-
costMatrix[w].set(j, costMatrix[w].get(j) - min);
323+
costMatrix[w][j] = costMatrix[w][j] - min;
325324
}
326325
}
327326
double[] min = new double[dim];
@@ -330,14 +329,14 @@ protected void reduce() {
330329
}
331330
for (int w = 0; w < dim; w++) {
332331
for (int j = 0; j < dim; j++) {
333-
if (costMatrix[w].get(j) < min[j]) {
334-
min[j] = costMatrix[w].get(j);
332+
if (costMatrix[w][j] < min[j]) {
333+
min[j] = costMatrix[w][j];
335334
}
336335
}
337336
}
338337
for (int w = 0; w < dim; w++) {
339338
for (int j = 0; j < dim; j++) {
340-
costMatrix[w].set(j, costMatrix[w].get(j) - min[j]);
339+
costMatrix[w][j] = costMatrix[w][j] - min[j];
341340
}
342341
}
343342
}
@@ -365,7 +364,7 @@ protected void updateLabeling(double slack) {
365364
public int[] nextChild() {
366365
int currentJobAssigned = matchJobByWorker[0];
367366
// we want to make currentJobAssigned not allowed,meaning we set the size to Infinity
368-
costMatrix[0].set(currentJobAssigned, Integer.MAX_VALUE);
367+
costMatrix[0][currentJobAssigned] = Integer.MAX_VALUE;
369368
matchWorkerByJob[currentJobAssigned] = -1;
370369
matchJobByWorker[0] = -1;
371370
minSlackValueByJob[currentJobAssigned] = Integer.MAX_VALUE;

0 commit comments

Comments
 (0)