Skip to content

Commit 730d6b4

Browse files
committed
Now identical clustering performed on 95% identical sequences
1 parent 276e5cc commit 730d6b4

File tree

3 files changed

+55
-3
lines changed

3 files changed

+55
-3
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,27 @@ public void testHelicalLocal() throws IOException, StructureException {
234234
assertEquals("A5", results.get(0).getStoichiometry());
235235

236236
}
237+
238+
/**
239+
* A structure with very similar entities (clustering at 95% seq id): 4DZ8
240+
* @throws IOException
241+
* @throws StructureException
242+
*/
243+
@Test
244+
public void testPseudoIdentity95() throws IOException, StructureException {
245+
Structure pdb = StructureIO.getStructure("BIO:4DZ8:1");
246+
247+
SubunitClustererParameters cp = new SubunitClustererParameters();
248+
cp.setClustererMethod(SubunitClustererMethod.IDENTITY);
249+
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
250+
251+
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
252+
pdb, symmParams, cp);
253+
254+
assertEquals("C2", symmetry.getSymmetry());
255+
assertEquals("A2", symmetry.getStoichiometry());
256+
assertFalse(symmetry.isPseudoStoichiometric());
257+
assertEquals(SubunitClustererMethod.IDENTITY, symmetry.getSubunitClusters().get(0).getClustererMethod());
258+
259+
}
237260
}

biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,29 @@ public boolean mergeIdentical(SubunitCluster other) {
143143
return true;
144144
}
145145

146+
/**
147+
* Merges the other SubunitCluster into this one if their representatives
148+
* sequences are more than 0.95 identical on 0.95 of coverage.
149+
* <p>
150+
* The sequence alignment is performed using Smith Waterman, default linear
151+
* {@link SimpleGapPenalty} and BLOSUM62 as scoring matrix.
152+
*
153+
* @param other
154+
* SubunitCluster
155+
* @return true if the SubunitClusters were merged, false otherwise
156+
* @throws CompoundNotFoundException
157+
*/
158+
public boolean mergeIdentity95(SubunitCluster other) throws CompoundNotFoundException {
159+
boolean merged = mergeSequence(other, 0.95, 0.95,
160+
PairwiseSequenceAlignerType.LOCAL, new SimpleGapPenalty(),
161+
SubstitutionMatrixHelper.getBlosum62());
162+
163+
if (merged) {
164+
this.method = SubunitClustererMethod.IDENTITY;
165+
}
166+
return merged;
167+
}
168+
146169
/**
147170
* Merges the other SubunitCluster into this one if their representatives
148171
* sequences are similar (higher sequence identity and coverage than the

biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClusterer.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,22 @@ public static List<SubunitCluster> cluster(List<Subunit> subunits,
6868
for (Subunit s : subunits)
6969
clusters.add(new SubunitCluster(s));
7070

71-
// Now merge clusters by IDENTITY
71+
// Now merge clusters by 95% IDENTITY
7272
for (int c1 = 0; c1 < clusters.size(); c1++) {
7373
for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
74-
if (clusters.get(c1).mergeIdentical(clusters.get(c2)))
75-
clusters.remove(c2);
74+
try {
75+
if (clusters.get(c1).mergeIdentity95(clusters.get(c2)))
76+
clusters.remove(c2);
77+
} catch (CompoundNotFoundException e) {
78+
logger.warn("Could not merge by Identity95. {}",
79+
e.getMessage());
80+
}
7681
}
7782
}
7883

7984
if (params.getClustererMethod() == SubunitClustererMethod.IDENTITY)
8085
return clusters;
86+
8187

8288
// Now merge clusters by SEQUENCE similarity
8389
for (int c1 = 0; c1 < clusters.size(); c1++) {

0 commit comments

Comments
 (0)