From 6ac887746b1d7f7df93e7793e15d773557913961 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Mon, 20 Jan 2020 15:30:56 -0800 Subject: [PATCH 01/11] Extracting method in test --- .../structure/cluster/SubunitCluster.java | 2 +- .../structure/cluster/TestSubunitCluster.java | 97 ++++++++----------- 2 files changed, 39 insertions(+), 60 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java index 2ae2ccb83c..392a102e51 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java @@ -241,7 +241,7 @@ public boolean mergeIdentical(SubunitCluster other) { * same Subunit. This is checked by comparing the entity identifiers of the subunits * if one can be found. * Thus this only makes sense when the subunits are complete chains of a - * deposited PDB entry. I + * deposited PDB entry. * * @param other * SubunitCluster diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java index 52c7e305b7..c54085fbe8 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java @@ -53,16 +53,7 @@ public class TestSubunitCluster { public void testMergeIdentical() { // Create an Atom Array of poly-alanine - List atoms = new ArrayList<>(10); - for (int i = 0; i < 10; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("ALA"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms.add(a); - } - Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]); + Atom[] reprAtoms = mockAtomArray(10, "ALA", -1, null); // Create two identical SubunitCluster SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms, @@ -79,16 +70,7 @@ public void testMergeIdentical() { assertEquals(sc1.length(), 10); // Create an Atom Array of poly-glycine - List atoms2 = new ArrayList<>(10); - for (int i = 0; i < 10; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("GLY"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms2.add(a); - } - Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]); + Atom[] reprAtoms2 = mockAtomArray(10, "GLY", -1, null); SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2, "subunit 1", null, null)); @@ -111,17 +93,8 @@ public void testMergeIdentical() { @Test public void testMergeSequence() throws CompoundNotFoundException { - // Create an Atom Array of ploy-alanine - List atoms = new ArrayList<>(100); - for (int i = 0; i < 100; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("ALA"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms.add(a); - } - Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]); + // Create an Atom Array of poly-alanine + Atom[] reprAtoms = mockAtomArray(100, "ALA", -1, null); // Create two identical SubunitCluster SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms, @@ -140,16 +113,7 @@ public void testMergeSequence() throws CompoundNotFoundException { assertEquals(sc1.length(), 100); // Create an Atom Array of poly-glycine - List atoms2 = new ArrayList(100); - for (int i = 0; i < 100; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("GLY"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms2.add(a); - } - Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]); + Atom[] reprAtoms2 = mockAtomArray(100, "GLY", -1, null); SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2, "subunit 3", null, null)); @@ -163,24 +127,7 @@ public void testMergeSequence() throws CompoundNotFoundException { assertEquals(sc1.length(), 100); // Create an Atom Array of 9 glycine and 91 alanine - List atoms3 = new ArrayList<>(100); - for (int i = 0; i < 9; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("GLY"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms3.add(a); - } - for (int i = 0; i < 91; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("ALA"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms3.add(a); - } - Atom[] reprAtoms3 = atoms3.toArray(new Atom[atoms3.size()]); + Atom[] reprAtoms3 = mockAtomArray(9, "GLY", 91, "ALA"); SubunitCluster sc4 = new SubunitCluster(new Subunit(reprAtoms3, "subunit 4", null, null)); @@ -283,4 +230,36 @@ public void testDivideInternally() throws StructureException, IOException { assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(1).length); } + + /** + * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2 + * @param size1 the number of residues of type1 to add + * @param type1 the 3 letter code of residue + * @param size2 the number of residues of type2 to add, if -1 none are added + * @param type2 the 3 letter code of residue, if null none are added + * @return the mock atom array + */ + private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) { + List atoms = new ArrayList<>(size1 + size2); + for (int i = 0; i < size1; i++) { + Group g = new AminoAcidImpl(); + g.setPDBName(type1); + Atom a = new AtomImpl(); + a.setName(StructureTools.CA_ATOM_NAME); + g.addAtom(a); + atoms.add(a); + } + + if (size2 >= 0 && type2 !=null) { + for (int i = 0; i < size2; i++) { + Group g = new AminoAcidImpl(); + g.setPDBName(type2); + Atom a = new AtomImpl(); + a.setName(StructureTools.CA_ATOM_NAME); + g.addAtom(a); + atoms.add(a); + } + } + return atoms.toArray(new Atom[0]); + } } From d415e3c8ace1ef49db25429f60bd928fd5767124 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Mon, 20 Jan 2020 16:10:47 -0800 Subject: [PATCH 02/11] New test --- .../structure/cluster/TestSubunitCluster.java | 94 ++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java index c54085fbe8..3947372521 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java @@ -30,10 +30,14 @@ import org.biojava.nbio.structure.AminoAcidImpl; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.AtomImpl; +import org.biojava.nbio.structure.Chain; +import org.biojava.nbio.structure.ChainImpl; +import org.biojava.nbio.structure.EntityInfo; import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.StructureImpl; import org.biojava.nbio.structure.StructureTools; import org.junit.Test; @@ -85,6 +89,47 @@ public void testMergeIdentical() { } + @Test + public void testMergeIdenticalByEntityId() { + + // Create 2 Atom Arrays, with same entity id + Atom[] reprAtoms1 = mockAtomArray("A", 1, 10, "ALA", -1, null); + Structure structure1 = reprAtoms1[0].getGroup().getChain().getStructure(); + + Atom[] reprAtoms2 = mockAtomArray("B", 1, 10, "PRO", -1, null); + Structure structure2 = reprAtoms2[0].getGroup().getChain().getStructure(); + + // Create two SubunitCluster with same entity id + SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms1, + "A", null, structure1)); + SubunitCluster sc2 = new SubunitCluster(new Subunit(reprAtoms2, + "B", null, structure2)); + + boolean merged = sc1.mergeIdenticalByEntityId(sc2); + + // Merged have to be true, and the merged SubunitCluster is sc1 + assertTrue(merged); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(10, sc1.length()); + + // Create an Atom Array of poly-glycine with a different entity id + Atom[] reprAtoms3 = mockAtomArray("A", 2, 10, "GLY", -1, null); + Structure structure3 = reprAtoms2[0].getGroup().getChain().getStructure(); + + SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms3, + "A", null, structure3)); + + merged = sc1.mergeIdenticalByEntityId(sc3); + + // Merged have to be false, and Clusters result unmodified + assertFalse(merged); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(10, sc1.length()); + + } + /** * Test {@link SubunitCluster#mergeSequence(SubunitCluster, SubunitClustererParameters)} * @@ -232,7 +277,8 @@ public void testDivideInternally() throws StructureException, IOException { } /** - * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2 + * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2. + * * @param size1 the number of residues of type1 to add * @param type1 the 3 letter code of residue * @param size2 the number of residues of type2 to add, if -1 none are added @@ -240,10 +286,55 @@ public void testDivideInternally() throws StructureException, IOException { * @return the mock atom array */ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) { + + List atoms = new ArrayList<>(size1 + size2); + for (int i = 0; i < size1; i++) { + Group g = new AminoAcidImpl(); + g.setPDBName(type1); + Atom a = new AtomImpl(); + a.setName(StructureTools.CA_ATOM_NAME); + g.addAtom(a); + atoms.add(a); + } + + if (size2 >= 0 && type2 !=null) { + for (int i = 0; i < size2; i++) { + Group g = new AminoAcidImpl(); + g.setPDBName(type2); + Atom a = new AtomImpl(); + a.setName(StructureTools.CA_ATOM_NAME); + g.addAtom(a); + atoms.add(a); + } + } + return atoms.toArray(new Atom[0]); + } + + /** + * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2. + * + * @param chainId a chain with this chain id will be set as parent of groups + * @param entityId an entity with this id will be set as parent of chain + * @param size1 the number of residues of type1 to add + * @param type1 the 3 letter code of residue + * @param size2 the number of residues of type2 to add, if -1 none are added + * @param type2 the 3 letter code of residue, if null none are added + * @return the mock atom array + */ + private Atom[] mockAtomArray(String chainId, int entityId, int size1, String type1, int size2, String type2) { + Chain chain = new ChainImpl(); + Structure structure = new StructureImpl(); + chain.setId(chainId); + structure.addChain(chain); + EntityInfo entityInfo = new EntityInfo(); + entityInfo.setMolId(entityId); + chain.setEntityInfo(entityInfo); + List atoms = new ArrayList<>(size1 + size2); for (int i = 0; i < size1; i++) { Group g = new AminoAcidImpl(); g.setPDBName(type1); + chain.addGroup(g); Atom a = new AtomImpl(); a.setName(StructureTools.CA_ATOM_NAME); g.addAtom(a); @@ -254,6 +345,7 @@ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) { for (int i = 0; i < size2; i++) { Group g = new AminoAcidImpl(); g.setPDBName(type2); + chain.addGroup(g); Atom a = new AtomImpl(); a.setName(StructureTools.CA_ATOM_NAME); g.addAtom(a); From 679f84081a8afe5f0b162fd9a9ac8c3ebb971de5 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Mon, 20 Jan 2020 16:18:09 -0800 Subject: [PATCH 03/11] Right order of params --- .../structure/cluster/TestSubunitCluster.java | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java index 3947372521..585c4f1d1c 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java @@ -69,9 +69,9 @@ public void testMergeIdentical() { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 10); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(10, sc1.length()); // Create an Atom Array of poly-glycine Atom[] reprAtoms2 = mockAtomArray(10, "GLY", -1, null); @@ -83,9 +83,9 @@ public void testMergeIdentical() { // Merged have to be false, and Clusters result inmodified assertFalse(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 10); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(10, sc1.length()); } @@ -153,9 +153,9 @@ public void testMergeSequence() throws CompoundNotFoundException { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 100); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(100, sc1.length()); // Create an Atom Array of poly-glycine Atom[] reprAtoms2 = mockAtomArray(100, "GLY", -1, null); @@ -167,9 +167,9 @@ public void testMergeSequence() throws CompoundNotFoundException { // Merged have to be false, and Clusters result inmodified assertFalse(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 100); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(100, sc1.length()); // Create an Atom Array of 9 glycine and 91 alanine Atom[] reprAtoms3 = mockAtomArray(9, "GLY", 91, "ALA"); @@ -181,9 +181,9 @@ public void testMergeSequence() throws CompoundNotFoundException { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged); - assertEquals(sc1.size(), 3); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 91); + assertEquals(3, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(91, sc1.length()); } @@ -224,10 +224,10 @@ public void testMergeStructure() throws StructureException, IOException { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged13); assertTrue(merged24); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 2); - assertEquals(sc1.length(), 141); - assertEquals(sc2.length(), 146); + assertEquals(2, sc1.size()); + assertEquals(2, sc2.size()); + assertEquals(141, sc1.length()); + assertEquals(146, sc2.length()); assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(1).length); assertEquals(sc2.getAlignedAtomsSubunit(0).length, @@ -237,8 +237,8 @@ public void testMergeStructure() throws StructureException, IOException { boolean merged = sc1.mergeStructure(sc2, clustererParameters); assertTrue(merged); - assertEquals(sc1.size(), 4); - assertEquals(sc1.length(), 140, 2); + assertEquals(4, sc1.size()); + assertEquals(140, sc1.length(), 2); assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(2).length); @@ -270,7 +270,7 @@ public void testDivideInternally() throws StructureException, IOException { // Divided has to be true, and Subunit length shorter than half assertTrue(divided); - assertEquals(sc1.size(), 2); + assertEquals(2, sc1.size()); assertTrue(sc1.length() < 178); assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(1).length); From 4d6a504c95e173ad9cd56a8cd44689a8c0bf6884 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Tue, 21 Jan 2020 15:34:48 -0800 Subject: [PATCH 04/11] Now testing for subunit clustering with entity ids --- .../TestQuatSymmetryDetectorExamples.java | 24 ++++ .../structure/cluster/TestSubunitCluster.java | 108 +++++++++++------- 2 files changed, 89 insertions(+), 43 deletions(-) diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java index 1b69ec3efc..5f3d0b4881 100644 --- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java +++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java @@ -366,4 +366,28 @@ public void testPseudoIdentity95() throws IOException, StructureException { assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod()); } + + @Test + public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, StructureException { + Structure pdb = StructureIO.getStructure("BIO:1SMT:1"); + + SubunitClustererParameters cp = new SubunitClustererParameters(); +// cp.setOptimizeAlignment(false); +// cp.setSequenceIdentityThreshold(0.75); +// cp.setMinimumSequenceLength(3); +// cp.setAbsoluteMinimumSequenceLength(3); +// cp.setUseSequenceCoverage(false); +// cp.setUseStructureCoverage(false); +// cp.setUseRMSD(false); + cp.setUseEntityIdForSeqIdentityDetermination(true); + cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); + QuatSymmetryParameters symmParams = new QuatSymmetryParameters(); +// symmParams.setOnTheFly(true); + QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry( + pdb, symmParams, cp); + + // C2 symmetry, A2 stoichiometry + assertEquals("C2", symmetry.getSymmetry()); + assertEquals("A2", symmetry.getStoichiometry().toString()); + } } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java index 585c4f1d1c..4b6e55ee28 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java @@ -34,6 +34,7 @@ import org.biojava.nbio.structure.ChainImpl; import org.biojava.nbio.structure.EntityInfo; import org.biojava.nbio.structure.Group; +import org.biojava.nbio.structure.ResidueNumber; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; @@ -93,17 +94,15 @@ public void testMergeIdentical() { public void testMergeIdenticalByEntityId() { // Create 2 Atom Arrays, with same entity id - Atom[] reprAtoms1 = mockAtomArray("A", 1, 10, "ALA", -1, null); - Structure structure1 = reprAtoms1[0].getGroup().getChain().getStructure(); - - Atom[] reprAtoms2 = mockAtomArray("B", 1, 10, "PRO", -1, null); - Structure structure2 = reprAtoms2[0].getGroup().getChain().getStructure(); + Structure structure = mockStructure(); + Atom[] reprAtoms1 = getAtomArray(structure.getChain("A")); + Atom[] reprAtoms2 = getAtomArray(structure.getChain("B")); // Create two SubunitCluster with same entity id SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms1, - "A", null, structure1)); + "A", null, structure)); SubunitCluster sc2 = new SubunitCluster(new Subunit(reprAtoms2, - "B", null, structure2)); + "B", null, structure)); boolean merged = sc1.mergeIdenticalByEntityId(sc2); @@ -111,14 +110,13 @@ public void testMergeIdenticalByEntityId() { assertTrue(merged); assertEquals(2, sc1.size()); assertEquals(1, sc2.size()); - assertEquals(10, sc1.length()); + assertEquals(9, sc1.length()); // Create an Atom Array of poly-glycine with a different entity id - Atom[] reprAtoms3 = mockAtomArray("A", 2, 10, "GLY", -1, null); - Structure structure3 = reprAtoms2[0].getGroup().getChain().getStructure(); + Atom[] reprAtoms3 = getAtomArray(structure.getChain("C")); SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms3, - "A", null, structure3)); + "C", null, structure)); merged = sc1.mergeIdenticalByEntityId(sc3); @@ -126,7 +124,7 @@ public void testMergeIdenticalByEntityId() { assertFalse(merged); assertEquals(2, sc1.size()); assertEquals(1, sc2.size()); - assertEquals(10, sc1.length()); + assertEquals(9, sc1.length()); } @@ -311,47 +309,71 @@ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) { } /** - * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2. - * - * @param chainId a chain with this chain id will be set as parent of groups - * @param entityId an entity with this id will be set as parent of chain - * @param size1 the number of residues of type1 to add - * @param type1 the 3 letter code of residue - * @param size2 the number of residues of type2 to add, if -1 none are added - * @param type2 the 3 letter code of residue, if null none are added - * @return the mock atom array + * Create a mock structure with 2 entities 1 (chains A, B) and 2 (chain C). + * @return a structure */ - private Atom[] mockAtomArray(String chainId, int entityId, int size1, String type1, int size2, String type2) { - Chain chain = new ChainImpl(); + private Structure mockStructure() { Structure structure = new StructureImpl(); - chain.setId(chainId); - structure.addChain(chain); - EntityInfo entityInfo = new EntityInfo(); - entityInfo.setMolId(entityId); - chain.setEntityInfo(entityInfo); + EntityInfo entity1 = new EntityInfo(); + entity1.setMolId(1); + EntityInfo entity2 = new EntityInfo(); + entity2.setMolId(2); + structure.addEntityInfo(entity1); + structure.addEntityInfo(entity2); + + Chain chainA = new ChainImpl(); + chainA.setId("A"); + Chain chainB = new ChainImpl(); + chainB.setId("B"); + entity1.addChain(chainA); + entity1.addChain(chainB); + Chain chainC = new ChainImpl(); + chainC.setId("C"); + entity2.addChain(chainC); + + structure.addChain(chainA); + structure.addChain(chainB); + structure.addChain(chainC); + + // entity 1: chain A 10 observed residues, chain B 9 observed residues (first unobserved) + List aGroups = getGroupList(10, "ALA", chainA); + chainA.setAtomGroups(new ArrayList<>(aGroups)); + chainA.setSeqResGroups(aGroups); + chainA.setEntityInfo(entity1); + + List bGroups = getGroupList(10, "ALA", chainB); + chainB.setAtomGroups(new ArrayList<>(bGroups.subList(1,10))); + chainB.setSeqResGroups(bGroups); + chainB.setEntityInfo(entity1); + + List cGroups = getGroupList(20, "GLY", chainC); + chainC.setAtomGroups(new ArrayList<>(cGroups)); + chainC.setSeqResGroups(cGroups); + chainC.setEntityInfo(entity2); + + return structure; + } - List atoms = new ArrayList<>(size1 + size2); - for (int i = 0; i < size1; i++) { + private List getGroupList(int size, String type, Chain chain) { + List list = new ArrayList<>(); + for (int i=0;i= 0 && type2 !=null) { - for (int i = 0; i < size2; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName(type2); - chain.addGroup(g); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms.add(a); - } + private Atom[] getAtomArray(Chain chain) { + Atom[] atoms = new Atom[chain.getAtomGroups().size()]; + for (int i = 0; i Date: Tue, 21 Jan 2020 15:37:36 -0800 Subject: [PATCH 05/11] Removing comments --- .../test/symmetry/TestQuatSymmetryDetectorExamples.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java index 5f3d0b4881..364a9e0b42 100644 --- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java +++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java @@ -372,17 +372,9 @@ public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, S Structure pdb = StructureIO.getStructure("BIO:1SMT:1"); SubunitClustererParameters cp = new SubunitClustererParameters(); -// cp.setOptimizeAlignment(false); -// cp.setSequenceIdentityThreshold(0.75); -// cp.setMinimumSequenceLength(3); -// cp.setAbsoluteMinimumSequenceLength(3); -// cp.setUseSequenceCoverage(false); -// cp.setUseStructureCoverage(false); -// cp.setUseRMSD(false); cp.setUseEntityIdForSeqIdentityDetermination(true); cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); QuatSymmetryParameters symmParams = new QuatSymmetryParameters(); -// symmParams.setOnTheFly(true); QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry( pdb, symmParams, cp); From fc543afa5a69309f4a08b9a109878ca4db000e34 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Tue, 21 Jan 2020 15:39:39 -0800 Subject: [PATCH 06/11] Fixing the cluster by entity id alignment issue. Now tests pass --- .../structure/cluster/SubunitCluster.java | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java index 392a102e51..e270bcfe8a 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java @@ -32,6 +32,8 @@ import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Chain; +import org.biojava.nbio.structure.EntityInfo; +import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.align.StructureAlignment; @@ -252,12 +254,49 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { if (!isIdenticalByEntityIdTo(other)) return false; + Subunit thisSub = this.subunits.get(this.representative); + Subunit otherSub = other.subunits.get(other.representative); logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical", - this.subunits.get(this.representative).getName(), - other.subunits.get(other.representative).getName()); + thisSub.getName(), + otherSub.getName()); - this.subunits.addAll(other.subunits); - this.subunitEQR.addAll(other.subunitEQR); + List thisAligned = new ArrayList<>(); + List otherAligned = new ArrayList<>(); + + // we've merged by entity id, we can assume structure, chain and entity are available + Structure thisStruct = thisSub.getStructure(); + Structure otherStruct = otherSub.getStructure(); + String thisName = thisSub.getName(); + String otherName = otherSub.getName(); + Chain thisChain = thisStruct.getChain(thisName); + Chain otherChain = otherStruct.getChain(otherName); + EntityInfo entityInfo = thisChain.getEntityInfo(); + + // Extract the aligned residues of both Subunits + for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) { + + Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup(); + + int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain); + + Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1); + + if (!otherChain.getAtomGroups().contains(otherG)) { + // skip residues that are unobserved in other sequence ("gaps" in the entity alignment) + continue; + } + + int otherIndex = otherChain.getAtomGroups().indexOf(otherG); + + // Only consider residues that are part of the SubunitCluster + if (this.subunitEQR.get(this.representative).contains(thisIndex) + && other.subunitEQR.get(other.representative).contains(otherIndex)) { + thisAligned.add(thisIndex); + otherAligned.add(otherIndex); + } + } + + updateEquivResidues(other, thisAligned, otherAligned); return true; } From 8b76e96a7af6b0118025b8a906b64de11b0d9693 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Tue, 21 Jan 2020 16:01:52 -0800 Subject: [PATCH 07/11] Some improvements --- .../biojava/nbio/structure/EntityInfo.java | 6 +-- .../structure/cluster/SubunitCluster.java | 40 +++++++++++++------ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java index b2438763f3..cc52262b12 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java @@ -306,12 +306,12 @@ public List getChainIds() { * used and when all chains within the entity are numbered in the same way), but * in general they will be neither unique (because of insertion codes) nor aligned. *

- * @param g - * @param c + * @param g the group + * @param c the chain * @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()} * is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned * for the given group and chain - * @throws IllegalArgumentException if the given Chain is not a member of this EnityInfo + * @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo * @see Chain#getSeqResGroup(int) */ public int getAlignedResIndex(Group g, Chain c) { diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java index e270bcfe8a..32235d0ab7 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java @@ -192,22 +192,35 @@ public boolean isIdenticalTo(SubunitCluster other) { * @return true if the SubunitClusters are identical, false otherwise */ public boolean isIdenticalByEntityIdTo(SubunitCluster other) { - Structure thisStruct = this.subunits.get(this.representative).getStructure(); - Structure otherStruct = other.subunits.get(other.representative).getStructure(); - String thisName = this.subunits.get(this.representative).getName(); - String otherName = other.subunits.get(this.representative).getName(); + Subunit thisSub = this.subunits.get(this.representative); + Subunit otherSub = other.subunits.get(other.representative); + String thisName = thisSub.getName(); + String otherName = otherSub.getName(); + + Structure thisStruct = thisSub.getStructure(); + Structure otherStruct = otherSub.getStructure(); + if (thisStruct == null || otherStruct == null) { + logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id", + thisName, + otherName); + return false; + } + if (thisStruct != otherStruct) { + // different object references: will not cluster even if entity id is same + return false; + } Chain thisChain = thisStruct.getChain(thisName); Chain otherChain = otherStruct.getChain(otherName); if (thisChain == null || otherChain == null) { logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", - this.subunits.get(this.representative).getName(), - other.subunits.get(other.representative).getName()); + thisName, + otherName); return false; } if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) { logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", - this.subunits.get(this.representative).getName(), - other.subunits.get(other.representative).getName()); + thisName, + otherName); return false; } int thisEntityId = thisChain.getEntityInfo().getMolId(); @@ -256,18 +269,19 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { Subunit thisSub = this.subunits.get(this.representative); Subunit otherSub = other.subunits.get(other.representative); + String thisName = thisSub.getName(); + String otherName = otherSub.getName(); + logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical", - thisSub.getName(), - otherSub.getName()); + thisName, + otherName); List thisAligned = new ArrayList<>(); List otherAligned = new ArrayList<>(); - // we've merged by entity id, we can assume structure, chain and entity are available + // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo()) Structure thisStruct = thisSub.getStructure(); Structure otherStruct = otherSub.getStructure(); - String thisName = thisSub.getName(); - String otherName = otherSub.getName(); Chain thisChain = thisStruct.getChain(thisName); Chain otherChain = otherStruct.getChain(otherName); EntityInfo entityInfo = thisChain.getEntityInfo(); From d851f6078b50e7fbe5ffde3ff23d7bf16d4a211a Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Tue, 21 Jan 2020 16:40:53 -0800 Subject: [PATCH 08/11] More checks and a warning in case no aligned atoms found. Also a few minor fixes --- .../TestQuatSymmetryDetectorExamples.java | 8 ++++++++ .../structure/cluster/SubunitCluster.java | 11 +++++++++- .../symmetry/core/C2RotationSolver.java | 7 +++---- .../symmetry/core/QuatSymmetrySubunits.java | 20 +++++++++---------- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java index 364a9e0b42..550fb542a4 100644 --- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java +++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java @@ -32,6 +32,7 @@ import org.biojava.nbio.structure.cluster.SubunitClusterer; import org.biojava.nbio.structure.cluster.SubunitClustererMethod; import org.biojava.nbio.structure.cluster.SubunitClustererParameters; +import org.biojava.nbio.structure.io.FileParsingParameters; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; @@ -369,6 +370,13 @@ public void testPseudoIdentity95() throws IOException, StructureException { @Test public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, StructureException { + AtomCache cache = new AtomCache(); + cache.setUseMmtf(false); + cache.setUseMmCif(true); + FileParsingParameters params = new FileParsingParameters(); + params.setAlignSeqRes(true); + cache.setFileParsingParams(params); + StructureIO.setAtomCache(cache); Structure pdb = StructureIO.getStructure("BIO:1SMT:1"); SubunitClustererParameters cp = new SubunitClustererParameters(); diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java index 32235d0ab7..986f092775 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java @@ -293,6 +293,11 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain); + if (seqresIndex == -1) { + // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing + continue; + } + Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1); if (!otherChain.getAtomGroups().contains(otherG)) { @@ -310,6 +315,10 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { } } + if (thisAligned.size() == 0 && otherAligned.size() == 0) { + logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity seqres alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName); + } + updateEquivResidues(other, thisAligned, otherAligned); return true; @@ -743,7 +752,7 @@ public SubunitClustererMethod getClustererMethod() { */ public List getAlignedAtomsSubunits() { - List alignedAtoms = Collections.emptyList(); + List alignedAtoms = new ArrayList<>(); // Loop through all subunits and add the aligned positions for (int s = 0; s < subunits.size(); s++) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java index 0d81d4ce5b..90b3a3f6e1 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java @@ -39,8 +39,8 @@ * @author Peter */ public class C2RotationSolver implements QuatSymmetrySolver { - private QuatSymmetrySubunits subunits = null; - private QuatSymmetryParameters parameters = null; + private QuatSymmetrySubunits subunits; + private QuatSymmetryParameters parameters; private Vector3d centroid = new Vector3d(); private Matrix4d centroidInverse = new Matrix4d(); @@ -132,7 +132,7 @@ private void solve() { } private void addEOperation() { - List permutation = Arrays.asList(new Integer[]{0,1}); + List permutation = Arrays.asList(0,1); Matrix4d transformation = new Matrix4d(); transformation.setIdentity(); combineWithTranslation(transformation); @@ -145,7 +145,6 @@ private void addEOperation() { /** * Adds translational component to rotation matrix - * @param rotTrans * @param rotation * @return */ diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java index 92b2786e8c..27d16cd6fe 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java @@ -21,6 +21,7 @@ package org.biojava.nbio.structure.symmetry.core; import org.biojava.nbio.structure.Atom; +import org.biojava.nbio.structure.Calc; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.cluster.SubunitCluster; import org.biojava.nbio.structure.geometry.CalcPoint; @@ -34,7 +35,7 @@ import java.util.stream.Collectors; /** - * A bean to represent information about the set of {@link Subunit} being + * A bean to represent information about the set of {@link org.biojava.nbio.structure.cluster.Subunit}s being * considered for symmetry detection. This class is a helper for the * {@link QuatSymmetryDetector} algorithm, since it calculates and caches the * {@link MomentsOfInertia} and the centroids of each Subunit. @@ -45,13 +46,13 @@ */ public class QuatSymmetrySubunits { - private List caCoords = new ArrayList(); - private List originalCenters = new ArrayList(); - private List centers = new ArrayList(); - private List unitVectors = new ArrayList(); + private List caCoords = new ArrayList<>(); + private List originalCenters = new ArrayList<>(); + private List centers = new ArrayList<>(); + private List unitVectors = new ArrayList<>(); - private List folds = new ArrayList(); - private List clusterIds = new ArrayList(); + private List folds = new ArrayList<>(); + private List clusterIds = new ArrayList<>(); private List clusters; private Point3d centroid; @@ -75,10 +76,7 @@ public QuatSymmetrySubunits(List clusters) { clusterIds.add(c); Atom[] atoms = clusters.get(c).getAlignedAtomsSubunit(s); - // Convert atoms to points - Point3d[] points = new Point3d[atoms.length]; - for (int i = 0; i < atoms.length; i++) - points[i] = atoms[i].getCoordsAsPoint3d(); + Point3d[] points = Calc.atomsToPoints(atoms); caCoords.add(points); } From f54b62595cb799481a7e2aef3212388c74929335 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Tue, 21 Jan 2020 16:48:24 -0800 Subject: [PATCH 09/11] Small optimization --- .../biojava/nbio/structure/cluster/SubunitCluster.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java index 986f092775..d73d747cb6 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java @@ -298,15 +298,15 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { continue; } + // note the seqresindex is 1-based Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1); - if (!otherChain.getAtomGroups().contains(otherG)) { - // skip residues that are unobserved in other sequence ("gaps" in the entity alignment) + int otherIndex = otherChain.getAtomGroups().indexOf(otherG); + if (otherIndex == -1) { + // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment) continue; } - int otherIndex = otherChain.getAtomGroups().indexOf(otherG); - // Only consider residues that are part of the SubunitCluster if (this.subunitEQR.get(this.representative).contains(thisIndex) && other.subunitEQR.get(other.representative).contains(otherIndex)) { @@ -316,7 +316,7 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { } if (thisAligned.size() == 0 && otherAligned.size() == 0) { - logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity seqres alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName); + logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName); } updateEquivResidues(other, thisAligned, otherAligned); From ed322e387cd46344a7864ac58b60c38df7c37633 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Tue, 21 Jan 2020 17:12:29 -0800 Subject: [PATCH 10/11] Docs, a new StructureTools method and a new performance test (ignored) --- .../TestQuatSymmetryDetectorExamples.java | 41 ++++++++++++++++++- .../nbio/structure/StructureTools.java | 30 +++++++++++--- .../cluster/SubunitClustererParameters.java | 6 ++- 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java index 550fb542a4..0d094a059a 100644 --- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java +++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java @@ -28,15 +28,19 @@ import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.StructureTools; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.cluster.SubunitClusterer; import org.biojava.nbio.structure.cluster.SubunitClustererMethod; import org.biojava.nbio.structure.cluster.SubunitClustererParameters; import org.biojava.nbio.structure.io.FileParsingParameters; +import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; +import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; import org.biojava.nbio.structure.symmetry.core.Stoichiometry; +import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -369,7 +373,7 @@ public void testPseudoIdentity95() throws IOException, StructureException { } @Test - public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, StructureException { + public void testSymDetectionWithClusteringByEntityId() throws IOException, StructureException { AtomCache cache = new AtomCache(); cache.setUseMmtf(false); cache.setUseMmCif(true); @@ -390,4 +394,39 @@ public void testSymDetectionWithSubunitClusterByEntityId() throws IOException, S assertEquals("C2", symmetry.getSymmetry()); assertEquals("A2", symmetry.getStoichiometry().toString()); } + + /** + * A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination() + * has a dramatic effect in runtime versus doing alignments. + * This takes minutes with the parameter on, but hours without the parameter. + */ + @Ignore("This is a performance test to be run manually") + @Test + public void testSymDetectionPerformanceLargeCapsid() throws IOException, StructureException { + AtomCache cache = new AtomCache(); + cache.setUseMmtf(false); + cache.setUseMmCif(true); + FileParsingParameters params = new FileParsingParameters(); + params.setAlignSeqRes(true); + params.setParseBioAssembly(true); + cache.setFileParsingParams(params); + StructureIO.setAtomCache(cache); + + // making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge + Structure au = StructureIO.getStructure("6NHJ"); + StructureTools.reduceToRepresentativeAtoms(au); + BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); + List transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms(); + Structure pdb =builder.rebuildQuaternaryStructure(au, transforms, true, false); + + SubunitClustererParameters cp = new SubunitClustererParameters(); + cp.setUseEntityIdForSeqIdentityDetermination(true); // this is the parameter that makes this fast + cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); + QuatSymmetryParameters symmParams = new QuatSymmetryParameters(); + QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry( + pdb, symmParams, cp); + + assertEquals("I", symmetry.getSymmetry()); + assertEquals("A960B960C600D480E300", symmetry.getStoichiometry().toString()); + } } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java index c62180176c..9ba8170ab5 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java @@ -1264,7 +1264,7 @@ public static final Character get1LetterCode(String groupCode3) { * 3-character code for a group. * */ - public static final boolean isNucleotide(String groupCode3) { + public static boolean isNucleotide(String groupCode3) { String code = groupCode3.trim(); return nucleotides30.containsKey(code) || nucleotides23.containsKey(code); @@ -1283,7 +1283,7 @@ public static final boolean isNucleotide(String groupCode3) { * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0) */ @Deprecated - public static final Structure getReducedStructure(Structure s, + public static Structure getReducedStructure(Structure s, String chainId) throws StructureException { // since we deal here with structure alignments, // only use Model 1... @@ -1338,7 +1338,7 @@ public static final Structure getReducedStructure(Structure s, return newS; } - public static final String convertAtomsToSeq(Atom[] atoms) { + public static String convertAtomsToSeq(Atom[] atoms) { StringBuilder buf = new StringBuilder(); Group prevGroup = null; @@ -1374,7 +1374,7 @@ public static final String convertAtomsToSeq(Atom[] atoms) { * @throws StructureException * if the group cannot be found. */ - public static final Group getGroupByPDBResidueNumber(Structure struc, + public static Group getGroupByPDBResidueNumber(Structure struc, ResidueNumber pdbResNum) throws StructureException { if (struc == null || pdbResNum == null) { throw new IllegalArgumentException("Null argument(s)."); @@ -1447,7 +1447,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) { * @param chain * @param cutoff * @return - * @see {@link #getRepresentativeAtomsInContact(Chain, double)} + * @see #getRepresentativeAtomsInContact(Chain, double) */ public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) { Grid grid = new Grid(cutoff); @@ -1921,4 +1921,24 @@ private static String replaceFirstChar(String name, char c, char d) { return name; } + /** + * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure. + * @param structure the structure + * @since 5.4.0 + */ + public static void reduceToRepresentativeAtoms(Structure structure) { + for (int modelIdx = 0; modelIdx atoms = g.getAtoms(); + if (g.isAminoAcid()) { + atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME)); + } else if (g.isNucleotide()) { + atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE)); + } + // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures + } + } + } + } } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java index f3abae6c3e..4224c76d0c 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java @@ -511,7 +511,8 @@ public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCo /** * Whether to use the entity id of subunits to infer that sequences are identical. * Only applies if the {@link SubunitClustererMethod} is a sequence based one. - * @return + * @return the flag + * @since 5.4.0 */ public boolean isUseEntityIdForSeqIdentityDetermination() { return useEntityIdForSeqIdentityDetermination; @@ -520,7 +521,10 @@ public boolean isUseEntityIdForSeqIdentityDetermination() { /** * Whether to use the entity id of subunits to infer that sequences are identical. * Only applies if the {@link SubunitClustererMethod} is a sequence based one. + * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be + * set to true. * @param useEntityIdForSeqIdentityDetermination the flag to be set + * @since 5.4.0 */ public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) { this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination; From f779dc3d32958ca5a717a40129f73c20acca47b7 Mon Sep 17 00:00:00 2001 From: Jose Manuel Duarte Date: Thu, 23 Jan 2020 11:56:24 -0800 Subject: [PATCH 11/11] Docs --- .../symmetry/TestQuatSymmetryDetectorExamples.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java index 0d094a059a..a5cd0a9fb4 100644 --- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java +++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java @@ -398,7 +398,6 @@ public void testSymDetectionWithClusteringByEntityId() throws IOException, Struc /** * A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination() * has a dramatic effect in runtime versus doing alignments. - * This takes minutes with the parameter on, but hours without the parameter. */ @Ignore("This is a performance test to be run manually") @Test @@ -413,14 +412,20 @@ public void testSymDetectionPerformanceLargeCapsid() throws IOException, Structu StructureIO.setAtomCache(cache); // making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge + // 6Q1F is another good example Structure au = StructureIO.getStructure("6NHJ"); StructureTools.reduceToRepresentativeAtoms(au); BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); List transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms(); - Structure pdb =builder.rebuildQuaternaryStructure(au, transforms, true, false); + Structure pdb = builder.rebuildQuaternaryStructure(au, transforms, true, false); SubunitClustererParameters cp = new SubunitClustererParameters(); - cp.setUseEntityIdForSeqIdentityDetermination(true); // this is the parameter that makes this fast + + // This is the parameter that makes this fast, set it to false to see the difference. + // As of git commit ed322e387cd46344a7864a, the difference in runtime is not that huge: + // 2 minutes with true, 10 minutes with false. I observed a much larger difference before, but can't reproduce anymore - JD 2020-01-23 + cp.setUseEntityIdForSeqIdentityDetermination(true); + cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); QuatSymmetryParameters symmParams = new QuatSymmetryParameters(); QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(