From 6ac887746b1d7f7df93e7793e15d773557913961 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Mon, 20 Jan 2020 15:30:56 -0800
Subject: [PATCH 01/11] Extracting method in test
---
.../structure/cluster/SubunitCluster.java | 2 +-
.../structure/cluster/TestSubunitCluster.java | 97 ++++++++-----------
2 files changed, 39 insertions(+), 60 deletions(-)
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
index 2ae2ccb83c..392a102e51 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -241,7 +241,7 @@ public boolean mergeIdentical(SubunitCluster other) {
* same Subunit. This is checked by comparing the entity identifiers of the subunits
* if one can be found.
* Thus this only makes sense when the subunits are complete chains of a
- * deposited PDB entry. I
+ * deposited PDB entry.
*
* @param other
* SubunitCluster
diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
index 52c7e305b7..c54085fbe8 100644
--- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
+++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
@@ -53,16 +53,7 @@ public class TestSubunitCluster {
public void testMergeIdentical() {
// Create an Atom Array of poly-alanine
- List atoms = new ArrayList<>(10);
- for (int i = 0; i < 10; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("ALA");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms.add(a);
- }
- Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]);
+ Atom[] reprAtoms = mockAtomArray(10, "ALA", -1, null);
// Create two identical SubunitCluster
SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms,
@@ -79,16 +70,7 @@ public void testMergeIdentical() {
assertEquals(sc1.length(), 10);
// Create an Atom Array of poly-glycine
- List atoms2 = new ArrayList<>(10);
- for (int i = 0; i < 10; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("GLY");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms2.add(a);
- }
- Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]);
+ Atom[] reprAtoms2 = mockAtomArray(10, "GLY", -1, null);
SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2,
"subunit 1", null, null));
@@ -111,17 +93,8 @@ public void testMergeIdentical() {
@Test
public void testMergeSequence() throws CompoundNotFoundException {
- // Create an Atom Array of ploy-alanine
- List atoms = new ArrayList<>(100);
- for (int i = 0; i < 100; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("ALA");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms.add(a);
- }
- Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]);
+ // Create an Atom Array of poly-alanine
+ Atom[] reprAtoms = mockAtomArray(100, "ALA", -1, null);
// Create two identical SubunitCluster
SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms,
@@ -140,16 +113,7 @@ public void testMergeSequence() throws CompoundNotFoundException {
assertEquals(sc1.length(), 100);
// Create an Atom Array of poly-glycine
- List atoms2 = new ArrayList(100);
- for (int i = 0; i < 100; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("GLY");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms2.add(a);
- }
- Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]);
+ Atom[] reprAtoms2 = mockAtomArray(100, "GLY", -1, null);
SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2,
"subunit 3", null, null));
@@ -163,24 +127,7 @@ public void testMergeSequence() throws CompoundNotFoundException {
assertEquals(sc1.length(), 100);
// Create an Atom Array of 9 glycine and 91 alanine
- List atoms3 = new ArrayList<>(100);
- for (int i = 0; i < 9; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("GLY");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms3.add(a);
- }
- for (int i = 0; i < 91; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("ALA");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms3.add(a);
- }
- Atom[] reprAtoms3 = atoms3.toArray(new Atom[atoms3.size()]);
+ Atom[] reprAtoms3 = mockAtomArray(9, "GLY", 91, "ALA");
SubunitCluster sc4 = new SubunitCluster(new Subunit(reprAtoms3,
"subunit 4", null, null));
@@ -283,4 +230,36 @@ public void testDivideInternally() throws StructureException, IOException {
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(1).length);
}
+
+ /**
+ * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2
+ * @param size1 the number of residues of type1 to add
+ * @param type1 the 3 letter code of residue
+ * @param size2 the number of residues of type2 to add, if -1 none are added
+ * @param type2 the 3 letter code of residue, if null none are added
+ * @return the mock atom array
+ */
+ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) {
+ List atoms = new ArrayList<>(size1 + size2);
+ for (int i = 0; i < size1; i++) {
+ Group g = new AminoAcidImpl();
+ g.setPDBName(type1);
+ Atom a = new AtomImpl();
+ a.setName(StructureTools.CA_ATOM_NAME);
+ g.addAtom(a);
+ atoms.add(a);
+ }
+
+ if (size2 >= 0 && type2 !=null) {
+ for (int i = 0; i < size2; i++) {
+ Group g = new AminoAcidImpl();
+ g.setPDBName(type2);
+ Atom a = new AtomImpl();
+ a.setName(StructureTools.CA_ATOM_NAME);
+ g.addAtom(a);
+ atoms.add(a);
+ }
+ }
+ return atoms.toArray(new Atom[0]);
+ }
}
From d415e3c8ace1ef49db25429f60bd928fd5767124 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Mon, 20 Jan 2020 16:10:47 -0800
Subject: [PATCH 02/11] New test
---
.../structure/cluster/TestSubunitCluster.java | 94 ++++++++++++++++++-
1 file changed, 93 insertions(+), 1 deletion(-)
diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
index c54085fbe8..3947372521 100644
--- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
+++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
@@ -30,10 +30,14 @@
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.AtomImpl;
+import org.biojava.nbio.structure.Chain;
+import org.biojava.nbio.structure.ChainImpl;
+import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
+import org.biojava.nbio.structure.StructureImpl;
import org.biojava.nbio.structure.StructureTools;
import org.junit.Test;
@@ -85,6 +89,47 @@ public void testMergeIdentical() {
}
+ @Test
+ public void testMergeIdenticalByEntityId() {
+
+ // Create 2 Atom Arrays, with same entity id
+ Atom[] reprAtoms1 = mockAtomArray("A", 1, 10, "ALA", -1, null);
+ Structure structure1 = reprAtoms1[0].getGroup().getChain().getStructure();
+
+ Atom[] reprAtoms2 = mockAtomArray("B", 1, 10, "PRO", -1, null);
+ Structure structure2 = reprAtoms2[0].getGroup().getChain().getStructure();
+
+ // Create two SubunitCluster with same entity id
+ SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms1,
+ "A", null, structure1));
+ SubunitCluster sc2 = new SubunitCluster(new Subunit(reprAtoms2,
+ "B", null, structure2));
+
+ boolean merged = sc1.mergeIdenticalByEntityId(sc2);
+
+ // Merged have to be true, and the merged SubunitCluster is sc1
+ assertTrue(merged);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(10, sc1.length());
+
+ // Create an Atom Array of poly-glycine with a different entity id
+ Atom[] reprAtoms3 = mockAtomArray("A", 2, 10, "GLY", -1, null);
+ Structure structure3 = reprAtoms2[0].getGroup().getChain().getStructure();
+
+ SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms3,
+ "A", null, structure3));
+
+ merged = sc1.mergeIdenticalByEntityId(sc3);
+
+ // Merged have to be false, and Clusters result unmodified
+ assertFalse(merged);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(10, sc1.length());
+
+ }
+
/**
* Test {@link SubunitCluster#mergeSequence(SubunitCluster, SubunitClustererParameters)}
*
@@ -232,7 +277,8 @@ public void testDivideInternally() throws StructureException, IOException {
}
/**
- * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2
+ * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2.
+ *
* @param size1 the number of residues of type1 to add
* @param type1 the 3 letter code of residue
* @param size2 the number of residues of type2 to add, if -1 none are added
@@ -240,10 +286,55 @@ public void testDivideInternally() throws StructureException, IOException {
* @return the mock atom array
*/
private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) {
+
+ List atoms = new ArrayList<>(size1 + size2);
+ for (int i = 0; i < size1; i++) {
+ Group g = new AminoAcidImpl();
+ g.setPDBName(type1);
+ Atom a = new AtomImpl();
+ a.setName(StructureTools.CA_ATOM_NAME);
+ g.addAtom(a);
+ atoms.add(a);
+ }
+
+ if (size2 >= 0 && type2 !=null) {
+ for (int i = 0; i < size2; i++) {
+ Group g = new AminoAcidImpl();
+ g.setPDBName(type2);
+ Atom a = new AtomImpl();
+ a.setName(StructureTools.CA_ATOM_NAME);
+ g.addAtom(a);
+ atoms.add(a);
+ }
+ }
+ return atoms.toArray(new Atom[0]);
+ }
+
+ /**
+ * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2.
+ *
+ * @param chainId a chain with this chain id will be set as parent of groups
+ * @param entityId an entity with this id will be set as parent of chain
+ * @param size1 the number of residues of type1 to add
+ * @param type1 the 3 letter code of residue
+ * @param size2 the number of residues of type2 to add, if -1 none are added
+ * @param type2 the 3 letter code of residue, if null none are added
+ * @return the mock atom array
+ */
+ private Atom[] mockAtomArray(String chainId, int entityId, int size1, String type1, int size2, String type2) {
+ Chain chain = new ChainImpl();
+ Structure structure = new StructureImpl();
+ chain.setId(chainId);
+ structure.addChain(chain);
+ EntityInfo entityInfo = new EntityInfo();
+ entityInfo.setMolId(entityId);
+ chain.setEntityInfo(entityInfo);
+
List atoms = new ArrayList<>(size1 + size2);
for (int i = 0; i < size1; i++) {
Group g = new AminoAcidImpl();
g.setPDBName(type1);
+ chain.addGroup(g);
Atom a = new AtomImpl();
a.setName(StructureTools.CA_ATOM_NAME);
g.addAtom(a);
@@ -254,6 +345,7 @@ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) {
for (int i = 0; i < size2; i++) {
Group g = new AminoAcidImpl();
g.setPDBName(type2);
+ chain.addGroup(g);
Atom a = new AtomImpl();
a.setName(StructureTools.CA_ATOM_NAME);
g.addAtom(a);
From 679f84081a8afe5f0b162fd9a9ac8c3ebb971de5 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Mon, 20 Jan 2020 16:18:09 -0800
Subject: [PATCH 03/11] Right order of params
---
.../structure/cluster/TestSubunitCluster.java | 44 +++++++++----------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
index 3947372521..585c4f1d1c 100644
--- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
+++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
@@ -69,9 +69,9 @@ public void testMergeIdentical() {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 10);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(10, sc1.length());
// Create an Atom Array of poly-glycine
Atom[] reprAtoms2 = mockAtomArray(10, "GLY", -1, null);
@@ -83,9 +83,9 @@ public void testMergeIdentical() {
// Merged have to be false, and Clusters result inmodified
assertFalse(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 10);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(10, sc1.length());
}
@@ -153,9 +153,9 @@ public void testMergeSequence() throws CompoundNotFoundException {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 100);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(100, sc1.length());
// Create an Atom Array of poly-glycine
Atom[] reprAtoms2 = mockAtomArray(100, "GLY", -1, null);
@@ -167,9 +167,9 @@ public void testMergeSequence() throws CompoundNotFoundException {
// Merged have to be false, and Clusters result inmodified
assertFalse(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 100);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(100, sc1.length());
// Create an Atom Array of 9 glycine and 91 alanine
Atom[] reprAtoms3 = mockAtomArray(9, "GLY", 91, "ALA");
@@ -181,9 +181,9 @@ public void testMergeSequence() throws CompoundNotFoundException {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged);
- assertEquals(sc1.size(), 3);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 91);
+ assertEquals(3, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(91, sc1.length());
}
@@ -224,10 +224,10 @@ public void testMergeStructure() throws StructureException, IOException {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged13);
assertTrue(merged24);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 2);
- assertEquals(sc1.length(), 141);
- assertEquals(sc2.length(), 146);
+ assertEquals(2, sc1.size());
+ assertEquals(2, sc2.size());
+ assertEquals(141, sc1.length());
+ assertEquals(146, sc2.length());
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(1).length);
assertEquals(sc2.getAlignedAtomsSubunit(0).length,
@@ -237,8 +237,8 @@ public void testMergeStructure() throws StructureException, IOException {
boolean merged = sc1.mergeStructure(sc2, clustererParameters);
assertTrue(merged);
- assertEquals(sc1.size(), 4);
- assertEquals(sc1.length(), 140, 2);
+ assertEquals(4, sc1.size());
+ assertEquals(140, sc1.length(), 2);
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(2).length);
@@ -270,7 +270,7 @@ public void testDivideInternally() throws StructureException, IOException {
// Divided has to be true, and Subunit length shorter than half
assertTrue(divided);
- assertEquals(sc1.size(), 2);
+ assertEquals(2, sc1.size());
assertTrue(sc1.length() < 178);
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(1).length);
From 4d6a504c95e173ad9cd56a8cd44689a8c0bf6884 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Tue, 21 Jan 2020 15:34:48 -0800
Subject: [PATCH 04/11] Now testing for subunit clustering with entity ids
---
.../TestQuatSymmetryDetectorExamples.java | 24 ++++
.../structure/cluster/TestSubunitCluster.java | 108 +++++++++++-------
2 files changed, 89 insertions(+), 43 deletions(-)
diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
index 1b69ec3efc..5f3d0b4881 100644
--- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
+++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -366,4 +366,28 @@ public void testPseudoIdentity95() throws IOException, StructureException {
assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod());
}
+
+ @Test
+ public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, StructureException {
+ Structure pdb = StructureIO.getStructure("BIO:1SMT:1");
+
+ SubunitClustererParameters cp = new SubunitClustererParameters();
+// cp.setOptimizeAlignment(false);
+// cp.setSequenceIdentityThreshold(0.75);
+// cp.setMinimumSequenceLength(3);
+// cp.setAbsoluteMinimumSequenceLength(3);
+// cp.setUseSequenceCoverage(false);
+// cp.setUseStructureCoverage(false);
+// cp.setUseRMSD(false);
+ cp.setUseEntityIdForSeqIdentityDetermination(true);
+ cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
+ QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
+// symmParams.setOnTheFly(true);
+ QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
+ pdb, symmParams, cp);
+
+ // C2 symmetry, A2 stoichiometry
+ assertEquals("C2", symmetry.getSymmetry());
+ assertEquals("A2", symmetry.getStoichiometry().toString());
+ }
}
diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
index 585c4f1d1c..4b6e55ee28 100644
--- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
+++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
@@ -34,6 +34,7 @@
import org.biojava.nbio.structure.ChainImpl;
import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.Group;
+import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
@@ -93,17 +94,15 @@ public void testMergeIdentical() {
public void testMergeIdenticalByEntityId() {
// Create 2 Atom Arrays, with same entity id
- Atom[] reprAtoms1 = mockAtomArray("A", 1, 10, "ALA", -1, null);
- Structure structure1 = reprAtoms1[0].getGroup().getChain().getStructure();
-
- Atom[] reprAtoms2 = mockAtomArray("B", 1, 10, "PRO", -1, null);
- Structure structure2 = reprAtoms2[0].getGroup().getChain().getStructure();
+ Structure structure = mockStructure();
+ Atom[] reprAtoms1 = getAtomArray(structure.getChain("A"));
+ Atom[] reprAtoms2 = getAtomArray(structure.getChain("B"));
// Create two SubunitCluster with same entity id
SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms1,
- "A", null, structure1));
+ "A", null, structure));
SubunitCluster sc2 = new SubunitCluster(new Subunit(reprAtoms2,
- "B", null, structure2));
+ "B", null, structure));
boolean merged = sc1.mergeIdenticalByEntityId(sc2);
@@ -111,14 +110,13 @@ public void testMergeIdenticalByEntityId() {
assertTrue(merged);
assertEquals(2, sc1.size());
assertEquals(1, sc2.size());
- assertEquals(10, sc1.length());
+ assertEquals(9, sc1.length());
// Create an Atom Array of poly-glycine with a different entity id
- Atom[] reprAtoms3 = mockAtomArray("A", 2, 10, "GLY", -1, null);
- Structure structure3 = reprAtoms2[0].getGroup().getChain().getStructure();
+ Atom[] reprAtoms3 = getAtomArray(structure.getChain("C"));
SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms3,
- "A", null, structure3));
+ "C", null, structure));
merged = sc1.mergeIdenticalByEntityId(sc3);
@@ -126,7 +124,7 @@ public void testMergeIdenticalByEntityId() {
assertFalse(merged);
assertEquals(2, sc1.size());
assertEquals(1, sc2.size());
- assertEquals(10, sc1.length());
+ assertEquals(9, sc1.length());
}
@@ -311,47 +309,71 @@ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) {
}
/**
- * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2.
- *
- * @param chainId a chain with this chain id will be set as parent of groups
- * @param entityId an entity with this id will be set as parent of chain
- * @param size1 the number of residues of type1 to add
- * @param type1 the 3 letter code of residue
- * @param size2 the number of residues of type2 to add, if -1 none are added
- * @param type2 the 3 letter code of residue, if null none are added
- * @return the mock atom array
+ * Create a mock structure with 2 entities 1 (chains A, B) and 2 (chain C).
+ * @return a structure
*/
- private Atom[] mockAtomArray(String chainId, int entityId, int size1, String type1, int size2, String type2) {
- Chain chain = new ChainImpl();
+ private Structure mockStructure() {
Structure structure = new StructureImpl();
- chain.setId(chainId);
- structure.addChain(chain);
- EntityInfo entityInfo = new EntityInfo();
- entityInfo.setMolId(entityId);
- chain.setEntityInfo(entityInfo);
+ EntityInfo entity1 = new EntityInfo();
+ entity1.setMolId(1);
+ EntityInfo entity2 = new EntityInfo();
+ entity2.setMolId(2);
+ structure.addEntityInfo(entity1);
+ structure.addEntityInfo(entity2);
+
+ Chain chainA = new ChainImpl();
+ chainA.setId("A");
+ Chain chainB = new ChainImpl();
+ chainB.setId("B");
+ entity1.addChain(chainA);
+ entity1.addChain(chainB);
+ Chain chainC = new ChainImpl();
+ chainC.setId("C");
+ entity2.addChain(chainC);
+
+ structure.addChain(chainA);
+ structure.addChain(chainB);
+ structure.addChain(chainC);
+
+ // entity 1: chain A 10 observed residues, chain B 9 observed residues (first unobserved)
+ List aGroups = getGroupList(10, "ALA", chainA);
+ chainA.setAtomGroups(new ArrayList<>(aGroups));
+ chainA.setSeqResGroups(aGroups);
+ chainA.setEntityInfo(entity1);
+
+ List bGroups = getGroupList(10, "ALA", chainB);
+ chainB.setAtomGroups(new ArrayList<>(bGroups.subList(1,10)));
+ chainB.setSeqResGroups(bGroups);
+ chainB.setEntityInfo(entity1);
+
+ List cGroups = getGroupList(20, "GLY", chainC);
+ chainC.setAtomGroups(new ArrayList<>(cGroups));
+ chainC.setSeqResGroups(cGroups);
+ chainC.setEntityInfo(entity2);
+
+ return structure;
+ }
- List atoms = new ArrayList<>(size1 + size2);
- for (int i = 0; i < size1; i++) {
+ private List getGroupList(int size, String type, Chain chain) {
+ List list = new ArrayList<>();
+ for (int i=0;i= 0 && type2 !=null) {
- for (int i = 0; i < size2; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName(type2);
- chain.addGroup(g);
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms.add(a);
- }
+ private Atom[] getAtomArray(Chain chain) {
+ Atom[] atoms = new Atom[chain.getAtomGroups().size()];
+ for (int i = 0; i
Date: Tue, 21 Jan 2020 15:37:36 -0800
Subject: [PATCH 05/11] Removing comments
---
.../test/symmetry/TestQuatSymmetryDetectorExamples.java | 8 --------
1 file changed, 8 deletions(-)
diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
index 5f3d0b4881..364a9e0b42 100644
--- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
+++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -372,17 +372,9 @@ public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, S
Structure pdb = StructureIO.getStructure("BIO:1SMT:1");
SubunitClustererParameters cp = new SubunitClustererParameters();
-// cp.setOptimizeAlignment(false);
-// cp.setSequenceIdentityThreshold(0.75);
-// cp.setMinimumSequenceLength(3);
-// cp.setAbsoluteMinimumSequenceLength(3);
-// cp.setUseSequenceCoverage(false);
-// cp.setUseStructureCoverage(false);
-// cp.setUseRMSD(false);
cp.setUseEntityIdForSeqIdentityDetermination(true);
cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
-// symmParams.setOnTheFly(true);
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
pdb, symmParams, cp);
From fc543afa5a69309f4a08b9a109878ca4db000e34 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Tue, 21 Jan 2020 15:39:39 -0800
Subject: [PATCH 06/11] Fixing the cluster by entity id alignment issue. Now
tests pass
---
.../structure/cluster/SubunitCluster.java | 47 +++++++++++++++++--
1 file changed, 43 insertions(+), 4 deletions(-)
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
index 392a102e51..e270bcfe8a 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -32,6 +32,8 @@
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
+import org.biojava.nbio.structure.EntityInfo;
+import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.StructureAlignment;
@@ -252,12 +254,49 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
if (!isIdenticalByEntityIdTo(other))
return false;
+ Subunit thisSub = this.subunits.get(this.representative);
+ Subunit otherSub = other.subunits.get(other.representative);
logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
- this.subunits.get(this.representative).getName(),
- other.subunits.get(other.representative).getName());
+ thisSub.getName(),
+ otherSub.getName());
- this.subunits.addAll(other.subunits);
- this.subunitEQR.addAll(other.subunitEQR);
+ List thisAligned = new ArrayList<>();
+ List otherAligned = new ArrayList<>();
+
+ // we've merged by entity id, we can assume structure, chain and entity are available
+ Structure thisStruct = thisSub.getStructure();
+ Structure otherStruct = otherSub.getStructure();
+ String thisName = thisSub.getName();
+ String otherName = otherSub.getName();
+ Chain thisChain = thisStruct.getChain(thisName);
+ Chain otherChain = otherStruct.getChain(otherName);
+ EntityInfo entityInfo = thisChain.getEntityInfo();
+
+ // Extract the aligned residues of both Subunits
+ for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) {
+
+ Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup();
+
+ int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);
+
+ Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
+
+ if (!otherChain.getAtomGroups().contains(otherG)) {
+ // skip residues that are unobserved in other sequence ("gaps" in the entity alignment)
+ continue;
+ }
+
+ int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
+
+ // Only consider residues that are part of the SubunitCluster
+ if (this.subunitEQR.get(this.representative).contains(thisIndex)
+ && other.subunitEQR.get(other.representative).contains(otherIndex)) {
+ thisAligned.add(thisIndex);
+ otherAligned.add(otherIndex);
+ }
+ }
+
+ updateEquivResidues(other, thisAligned, otherAligned);
return true;
}
From 8b76e96a7af6b0118025b8a906b64de11b0d9693 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Tue, 21 Jan 2020 16:01:52 -0800
Subject: [PATCH 07/11] Some improvements
---
.../biojava/nbio/structure/EntityInfo.java | 6 +--
.../structure/cluster/SubunitCluster.java | 40 +++++++++++++------
2 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
index b2438763f3..cc52262b12 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
@@ -306,12 +306,12 @@ public List getChainIds() {
* used and when all chains within the entity are numbered in the same way), but
* in general they will be neither unique (because of insertion codes) nor aligned.
*
- * @param g
- * @param c
+ * @param g the group
+ * @param c the chain
* @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()}
* is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned
* for the given group and chain
- * @throws IllegalArgumentException if the given Chain is not a member of this EnityInfo
+ * @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo
* @see Chain#getSeqResGroup(int)
*/
public int getAlignedResIndex(Group g, Chain c) {
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
index e270bcfe8a..32235d0ab7 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -192,22 +192,35 @@ public boolean isIdenticalTo(SubunitCluster other) {
* @return true if the SubunitClusters are identical, false otherwise
*/
public boolean isIdenticalByEntityIdTo(SubunitCluster other) {
- Structure thisStruct = this.subunits.get(this.representative).getStructure();
- Structure otherStruct = other.subunits.get(other.representative).getStructure();
- String thisName = this.subunits.get(this.representative).getName();
- String otherName = other.subunits.get(this.representative).getName();
+ Subunit thisSub = this.subunits.get(this.representative);
+ Subunit otherSub = other.subunits.get(other.representative);
+ String thisName = thisSub.getName();
+ String otherName = otherSub.getName();
+
+ Structure thisStruct = thisSub.getStructure();
+ Structure otherStruct = otherSub.getStructure();
+ if (thisStruct == null || otherStruct == null) {
+ logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id",
+ thisName,
+ otherName);
+ return false;
+ }
+ if (thisStruct != otherStruct) {
+ // different object references: will not cluster even if entity id is same
+ return false;
+ }
Chain thisChain = thisStruct.getChain(thisName);
Chain otherChain = otherStruct.getChain(otherName);
if (thisChain == null || otherChain == null) {
logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
- this.subunits.get(this.representative).getName(),
- other.subunits.get(other.representative).getName());
+ thisName,
+ otherName);
return false;
}
if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) {
logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
- this.subunits.get(this.representative).getName(),
- other.subunits.get(other.representative).getName());
+ thisName,
+ otherName);
return false;
}
int thisEntityId = thisChain.getEntityInfo().getMolId();
@@ -256,18 +269,19 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
Subunit thisSub = this.subunits.get(this.representative);
Subunit otherSub = other.subunits.get(other.representative);
+ String thisName = thisSub.getName();
+ String otherName = otherSub.getName();
+
logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
- thisSub.getName(),
- otherSub.getName());
+ thisName,
+ otherName);
List thisAligned = new ArrayList<>();
List otherAligned = new ArrayList<>();
- // we've merged by entity id, we can assume structure, chain and entity are available
+ // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo())
Structure thisStruct = thisSub.getStructure();
Structure otherStruct = otherSub.getStructure();
- String thisName = thisSub.getName();
- String otherName = otherSub.getName();
Chain thisChain = thisStruct.getChain(thisName);
Chain otherChain = otherStruct.getChain(otherName);
EntityInfo entityInfo = thisChain.getEntityInfo();
From d851f6078b50e7fbe5ffde3ff23d7bf16d4a211a Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Tue, 21 Jan 2020 16:40:53 -0800
Subject: [PATCH 08/11] More checks and a warning in case no aligned atoms
found. Also a few minor fixes
---
.../TestQuatSymmetryDetectorExamples.java | 8 ++++++++
.../structure/cluster/SubunitCluster.java | 11 +++++++++-
.../symmetry/core/C2RotationSolver.java | 7 +++----
.../symmetry/core/QuatSymmetrySubunits.java | 20 +++++++++----------
4 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
index 364a9e0b42..550fb542a4 100644
--- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
+++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -32,6 +32,7 @@
import org.biojava.nbio.structure.cluster.SubunitClusterer;
import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
+import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults;
@@ -369,6 +370,13 @@ public void testPseudoIdentity95() throws IOException, StructureException {
@Test
public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, StructureException {
+ AtomCache cache = new AtomCache();
+ cache.setUseMmtf(false);
+ cache.setUseMmCif(true);
+ FileParsingParameters params = new FileParsingParameters();
+ params.setAlignSeqRes(true);
+ cache.setFileParsingParams(params);
+ StructureIO.setAtomCache(cache);
Structure pdb = StructureIO.getStructure("BIO:1SMT:1");
SubunitClustererParameters cp = new SubunitClustererParameters();
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
index 32235d0ab7..986f092775 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -293,6 +293,11 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);
+ if (seqresIndex == -1) {
+ // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing
+ continue;
+ }
+
Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
if (!otherChain.getAtomGroups().contains(otherG)) {
@@ -310,6 +315,10 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
}
}
+ if (thisAligned.size() == 0 && otherAligned.size() == 0) {
+ logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity seqres alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
+ }
+
updateEquivResidues(other, thisAligned, otherAligned);
return true;
@@ -743,7 +752,7 @@ public SubunitClustererMethod getClustererMethod() {
*/
public List getAlignedAtomsSubunits() {
- List alignedAtoms = Collections.emptyList();
+ List alignedAtoms = new ArrayList<>();
// Loop through all subunits and add the aligned positions
for (int s = 0; s < subunits.size(); s++)
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
index 0d81d4ce5b..90b3a3f6e1 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
@@ -39,8 +39,8 @@
* @author Peter
*/
public class C2RotationSolver implements QuatSymmetrySolver {
- private QuatSymmetrySubunits subunits = null;
- private QuatSymmetryParameters parameters = null;
+ private QuatSymmetrySubunits subunits;
+ private QuatSymmetryParameters parameters;
private Vector3d centroid = new Vector3d();
private Matrix4d centroidInverse = new Matrix4d();
@@ -132,7 +132,7 @@ private void solve() {
}
private void addEOperation() {
- List permutation = Arrays.asList(new Integer[]{0,1});
+ List permutation = Arrays.asList(0,1);
Matrix4d transformation = new Matrix4d();
transformation.setIdentity();
combineWithTranslation(transformation);
@@ -145,7 +145,6 @@ private void addEOperation() {
/**
* Adds translational component to rotation matrix
- * @param rotTrans
* @param rotation
* @return
*/
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java
index 92b2786e8c..27d16cd6fe 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java
@@ -21,6 +21,7 @@
package org.biojava.nbio.structure.symmetry.core;
import org.biojava.nbio.structure.Atom;
+import org.biojava.nbio.structure.Calc;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.cluster.SubunitCluster;
import org.biojava.nbio.structure.geometry.CalcPoint;
@@ -34,7 +35,7 @@
import java.util.stream.Collectors;
/**
- * A bean to represent information about the set of {@link Subunit} being
+ * A bean to represent information about the set of {@link org.biojava.nbio.structure.cluster.Subunit}s being
* considered for symmetry detection. This class is a helper for the
* {@link QuatSymmetryDetector} algorithm, since it calculates and caches the
* {@link MomentsOfInertia} and the centroids of each Subunit.
@@ -45,13 +46,13 @@
*/
public class QuatSymmetrySubunits {
- private List caCoords = new ArrayList();
- private List originalCenters = new ArrayList();
- private List centers = new ArrayList();
- private List unitVectors = new ArrayList();
+ private List caCoords = new ArrayList<>();
+ private List originalCenters = new ArrayList<>();
+ private List centers = new ArrayList<>();
+ private List unitVectors = new ArrayList<>();
- private List folds = new ArrayList();
- private List clusterIds = new ArrayList();
+ private List folds = new ArrayList<>();
+ private List clusterIds = new ArrayList<>();
private List clusters;
private Point3d centroid;
@@ -75,10 +76,7 @@ public QuatSymmetrySubunits(List clusters) {
clusterIds.add(c);
Atom[] atoms = clusters.get(c).getAlignedAtomsSubunit(s);
- // Convert atoms to points
- Point3d[] points = new Point3d[atoms.length];
- for (int i = 0; i < atoms.length; i++)
- points[i] = atoms[i].getCoordsAsPoint3d();
+ Point3d[] points = Calc.atomsToPoints(atoms);
caCoords.add(points);
}
From f54b62595cb799481a7e2aef3212388c74929335 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Tue, 21 Jan 2020 16:48:24 -0800
Subject: [PATCH 09/11] Small optimization
---
.../biojava/nbio/structure/cluster/SubunitCluster.java | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
index 986f092775..d73d747cb6 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -298,15 +298,15 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
continue;
}
+ // note the seqresindex is 1-based
Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
- if (!otherChain.getAtomGroups().contains(otherG)) {
- // skip residues that are unobserved in other sequence ("gaps" in the entity alignment)
+ int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
+ if (otherIndex == -1) {
+ // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment)
continue;
}
- int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
-
// Only consider residues that are part of the SubunitCluster
if (this.subunitEQR.get(this.representative).contains(thisIndex)
&& other.subunitEQR.get(other.representative).contains(otherIndex)) {
@@ -316,7 +316,7 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
}
if (thisAligned.size() == 0 && otherAligned.size() == 0) {
- logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity seqres alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
+ logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
}
updateEquivResidues(other, thisAligned, otherAligned);
From ed322e387cd46344a7864ac58b60c38df7c37633 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Tue, 21 Jan 2020 17:12:29 -0800
Subject: [PATCH 10/11] Docs, a new StructureTools method and a new performance
test (ignored)
---
.../TestQuatSymmetryDetectorExamples.java | 41 ++++++++++++++++++-
.../nbio/structure/StructureTools.java | 30 +++++++++++---
.../cluster/SubunitClustererParameters.java | 6 ++-
3 files changed, 70 insertions(+), 7 deletions(-)
diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
index 550fb542a4..0d094a059a 100644
--- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
+++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -28,15 +28,19 @@
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
+import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.cluster.SubunitClusterer;
import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
import org.biojava.nbio.structure.io.FileParsingParameters;
+import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
+import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults;
import org.biojava.nbio.structure.symmetry.core.Stoichiometry;
+import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -369,7 +373,7 @@ public void testPseudoIdentity95() throws IOException, StructureException {
}
@Test
- public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, StructureException {
+ public void testSymDetectionWithClusteringByEntityId() throws IOException, StructureException {
AtomCache cache = new AtomCache();
cache.setUseMmtf(false);
cache.setUseMmCif(true);
@@ -390,4 +394,39 @@ public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, S
assertEquals("C2", symmetry.getSymmetry());
assertEquals("A2", symmetry.getStoichiometry().toString());
}
+
+ /**
+ * A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination()
+ * has a dramatic effect in runtime versus doing alignments.
+ * This takes minutes with the parameter on, but hours without the parameter.
+ */
+ @Ignore("This is a performance test to be run manually")
+ @Test
+ public void testSymDetectionPerformanceLargeCapsid() throws IOException, StructureException {
+ AtomCache cache = new AtomCache();
+ cache.setUseMmtf(false);
+ cache.setUseMmCif(true);
+ FileParsingParameters params = new FileParsingParameters();
+ params.setAlignSeqRes(true);
+ params.setParseBioAssembly(true);
+ cache.setFileParsingParams(params);
+ StructureIO.setAtomCache(cache);
+
+ // making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge
+ Structure au = StructureIO.getStructure("6NHJ");
+ StructureTools.reduceToRepresentativeAtoms(au);
+ BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
+ List transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms();
+ Structure pdb =builder.rebuildQuaternaryStructure(au, transforms, true, false);
+
+ SubunitClustererParameters cp = new SubunitClustererParameters();
+ cp.setUseEntityIdForSeqIdentityDetermination(true); // this is the parameter that makes this fast
+ cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
+ QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
+ QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
+ pdb, symmParams, cp);
+
+ assertEquals("I", symmetry.getSymmetry());
+ assertEquals("A960B960C600D480E300", symmetry.getStoichiometry().toString());
+ }
}
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
index c62180176c..9ba8170ab5 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
@@ -1264,7 +1264,7 @@ public static final Character get1LetterCode(String groupCode3) {
* 3-character code for a group.
*
*/
- public static final boolean isNucleotide(String groupCode3) {
+ public static boolean isNucleotide(String groupCode3) {
String code = groupCode3.trim();
return nucleotides30.containsKey(code)
|| nucleotides23.containsKey(code);
@@ -1283,7 +1283,7 @@ public static final boolean isNucleotide(String groupCode3) {
* @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
*/
@Deprecated
- public static final Structure getReducedStructure(Structure s,
+ public static Structure getReducedStructure(Structure s,
String chainId) throws StructureException {
// since we deal here with structure alignments,
// only use Model 1...
@@ -1338,7 +1338,7 @@ public static final Structure getReducedStructure(Structure s,
return newS;
}
- public static final String convertAtomsToSeq(Atom[] atoms) {
+ public static String convertAtomsToSeq(Atom[] atoms) {
StringBuilder buf = new StringBuilder();
Group prevGroup = null;
@@ -1374,7 +1374,7 @@ public static final String convertAtomsToSeq(Atom[] atoms) {
* @throws StructureException
* if the group cannot be found.
*/
- public static final Group getGroupByPDBResidueNumber(Structure struc,
+ public static Group getGroupByPDBResidueNumber(Structure struc,
ResidueNumber pdbResNum) throws StructureException {
if (struc == null || pdbResNum == null) {
throw new IllegalArgumentException("Null argument(s).");
@@ -1447,7 +1447,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
* @param chain
* @param cutoff
* @return
- * @see {@link #getRepresentativeAtomsInContact(Chain, double)}
+ * @see #getRepresentativeAtomsInContact(Chain, double)
*/
public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
Grid grid = new Grid(cutoff);
@@ -1921,4 +1921,24 @@ private static String replaceFirstChar(String name, char c, char d) {
return name;
}
+ /**
+ * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure.
+ * @param structure the structure
+ * @since 5.4.0
+ */
+ public static void reduceToRepresentativeAtoms(Structure structure) {
+ for (int modelIdx = 0; modelIdx atoms = g.getAtoms();
+ if (g.isAminoAcid()) {
+ atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME));
+ } else if (g.isNucleotide()) {
+ atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE));
+ }
+ // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures
+ }
+ }
+ }
+ }
}
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
index f3abae6c3e..4224c76d0c 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
@@ -511,7 +511,8 @@ public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCo
/**
* Whether to use the entity id of subunits to infer that sequences are identical.
* Only applies if the {@link SubunitClustererMethod} is a sequence based one.
- * @return
+ * @return the flag
+ * @since 5.4.0
*/
public boolean isUseEntityIdForSeqIdentityDetermination() {
return useEntityIdForSeqIdentityDetermination;
@@ -520,7 +521,10 @@ public boolean isUseEntityIdForSeqIdentityDetermination() {
/**
* Whether to use the entity id of subunits to infer that sequences are identical.
* Only applies if the {@link SubunitClustererMethod} is a sequence based one.
+ * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be
+ * set to true.
* @param useEntityIdForSeqIdentityDetermination the flag to be set
+ * @since 5.4.0
*/
public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) {
this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination;
From f779dc3d32958ca5a717a40129f73c20acca47b7 Mon Sep 17 00:00:00 2001
From: Jose Manuel Duarte
Date: Thu, 23 Jan 2020 11:56:24 -0800
Subject: [PATCH 11/11] Docs
---
.../symmetry/TestQuatSymmetryDetectorExamples.java | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
index 0d094a059a..a5cd0a9fb4 100644
--- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
+++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -398,7 +398,6 @@ public void testSymDetectionWithClusteringByEntityId() throws IOException, Struc
/**
* A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination()
* has a dramatic effect in runtime versus doing alignments.
- * This takes minutes with the parameter on, but hours without the parameter.
*/
@Ignore("This is a performance test to be run manually")
@Test
@@ -413,14 +412,20 @@ public void testSymDetectionPerformanceLargeCapsid() throws IOException, Structu
StructureIO.setAtomCache(cache);
// making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge
+ // 6Q1F is another good example
Structure au = StructureIO.getStructure("6NHJ");
StructureTools.reduceToRepresentativeAtoms(au);
BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
List transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms();
- Structure pdb =builder.rebuildQuaternaryStructure(au, transforms, true, false);
+ Structure pdb = builder.rebuildQuaternaryStructure(au, transforms, true, false);
SubunitClustererParameters cp = new SubunitClustererParameters();
- cp.setUseEntityIdForSeqIdentityDetermination(true); // this is the parameter that makes this fast
+
+ // This is the parameter that makes this fast, set it to false to see the difference.
+ // As of git commit ed322e387cd46344a7864a, the difference in runtime is not that huge:
+ // 2 minutes with true, 10 minutes with false. I observed a much larger difference before, but can't reproduce anymore - JD 2020-01-23
+ cp.setUseEntityIdForSeqIdentityDetermination(true);
+
cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(