diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java index 1b69ec3efc..a5cd0a9fb4 100644 --- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java +++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java @@ -28,14 +28,19 @@ import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.StructureTools; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.cluster.SubunitClusterer; import org.biojava.nbio.structure.cluster.SubunitClustererMethod; import org.biojava.nbio.structure.cluster.SubunitClustererParameters; +import org.biojava.nbio.structure.io.FileParsingParameters; +import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder; +import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters; import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults; import org.biojava.nbio.structure.symmetry.core.Stoichiometry; +import org.junit.Ignore; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -366,4 +371,67 @@ public void testPseudoIdentity95() throws IOException, StructureException { assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod()); } + + @Test + public void testSymDetectionWithClusteringByEntityId() throws IOException, StructureException { + AtomCache cache = new AtomCache(); + cache.setUseMmtf(false); + cache.setUseMmCif(true); + FileParsingParameters params = new FileParsingParameters(); + params.setAlignSeqRes(true); + cache.setFileParsingParams(params); + StructureIO.setAtomCache(cache); + Structure pdb = StructureIO.getStructure("BIO:1SMT:1"); + + SubunitClustererParameters cp = new SubunitClustererParameters(); + cp.setUseEntityIdForSeqIdentityDetermination(true); + cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); + QuatSymmetryParameters symmParams = new QuatSymmetryParameters(); + QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry( + pdb, symmParams, cp); + + // C2 symmetry, A2 stoichiometry + assertEquals("C2", symmetry.getSymmetry()); + assertEquals("A2", symmetry.getStoichiometry().toString()); + } + + /** + * A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination() + * has a dramatic effect in runtime versus doing alignments. + */ + @Ignore("This is a performance test to be run manually") + @Test + public void testSymDetectionPerformanceLargeCapsid() throws IOException, StructureException { + AtomCache cache = new AtomCache(); + cache.setUseMmtf(false); + cache.setUseMmCif(true); + FileParsingParameters params = new FileParsingParameters(); + params.setAlignSeqRes(true); + params.setParseBioAssembly(true); + cache.setFileParsingParams(params); + StructureIO.setAtomCache(cache); + + // making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge + // 6Q1F is another good example + Structure au = StructureIO.getStructure("6NHJ"); + StructureTools.reduceToRepresentativeAtoms(au); + BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); + List transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms(); + Structure pdb = builder.rebuildQuaternaryStructure(au, transforms, true, false); + + SubunitClustererParameters cp = new SubunitClustererParameters(); + + // This is the parameter that makes this fast, set it to false to see the difference. + // As of git commit ed322e387cd46344a7864a, the difference in runtime is not that huge: + // 2 minutes with true, 10 minutes with false. I observed a much larger difference before, but can't reproduce anymore - JD 2020-01-23 + cp.setUseEntityIdForSeqIdentityDetermination(true); + + cp.setClustererMethod(SubunitClustererMethod.SEQUENCE); + QuatSymmetryParameters symmParams = new QuatSymmetryParameters(); + QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry( + pdb, symmParams, cp); + + assertEquals("I", symmetry.getSymmetry()); + assertEquals("A960B960C600D480E300", symmetry.getStoichiometry().toString()); + } } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java index b2438763f3..cc52262b12 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java @@ -306,12 +306,12 @@ public List getChainIds() { * used and when all chains within the entity are numbered in the same way), but * in general they will be neither unique (because of insertion codes) nor aligned. *

- * @param g - * @param c + * @param g the group + * @param c the chain * @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()} * is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned * for the given group and chain - * @throws IllegalArgumentException if the given Chain is not a member of this EnityInfo + * @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo * @see Chain#getSeqResGroup(int) */ public int getAlignedResIndex(Group g, Chain c) { diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java index c62180176c..9ba8170ab5 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java @@ -1264,7 +1264,7 @@ public static final Character get1LetterCode(String groupCode3) { * 3-character code for a group. * */ - public static final boolean isNucleotide(String groupCode3) { + public static boolean isNucleotide(String groupCode3) { String code = groupCode3.trim(); return nucleotides30.containsKey(code) || nucleotides23.containsKey(code); @@ -1283,7 +1283,7 @@ public static final boolean isNucleotide(String groupCode3) { * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0) */ @Deprecated - public static final Structure getReducedStructure(Structure s, + public static Structure getReducedStructure(Structure s, String chainId) throws StructureException { // since we deal here with structure alignments, // only use Model 1... @@ -1338,7 +1338,7 @@ public static final Structure getReducedStructure(Structure s, return newS; } - public static final String convertAtomsToSeq(Atom[] atoms) { + public static String convertAtomsToSeq(Atom[] atoms) { StringBuilder buf = new StringBuilder(); Group prevGroup = null; @@ -1374,7 +1374,7 @@ public static final String convertAtomsToSeq(Atom[] atoms) { * @throws StructureException * if the group cannot be found. */ - public static final Group getGroupByPDBResidueNumber(Structure struc, + public static Group getGroupByPDBResidueNumber(Structure struc, ResidueNumber pdbResNum) throws StructureException { if (struc == null || pdbResNum == null) { throw new IllegalArgumentException("Null argument(s)."); @@ -1447,7 +1447,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) { * @param chain * @param cutoff * @return - * @see {@link #getRepresentativeAtomsInContact(Chain, double)} + * @see #getRepresentativeAtomsInContact(Chain, double) */ public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) { Grid grid = new Grid(cutoff); @@ -1921,4 +1921,24 @@ private static String replaceFirstChar(String name, char c, char d) { return name; } + /** + * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure. + * @param structure the structure + * @since 5.4.0 + */ + public static void reduceToRepresentativeAtoms(Structure structure) { + for (int modelIdx = 0; modelIdx atoms = g.getAtoms(); + if (g.isAminoAcid()) { + atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME)); + } else if (g.isNucleotide()) { + atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE)); + } + // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures + } + } + } + } } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java index 2ae2ccb83c..d73d747cb6 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java @@ -32,6 +32,8 @@ import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Chain; +import org.biojava.nbio.structure.EntityInfo; +import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.align.StructureAlignment; @@ -190,22 +192,35 @@ public boolean isIdenticalTo(SubunitCluster other) { * @return true if the SubunitClusters are identical, false otherwise */ public boolean isIdenticalByEntityIdTo(SubunitCluster other) { - Structure thisStruct = this.subunits.get(this.representative).getStructure(); - Structure otherStruct = other.subunits.get(other.representative).getStructure(); - String thisName = this.subunits.get(this.representative).getName(); - String otherName = other.subunits.get(this.representative).getName(); + Subunit thisSub = this.subunits.get(this.representative); + Subunit otherSub = other.subunits.get(other.representative); + String thisName = thisSub.getName(); + String otherName = otherSub.getName(); + + Structure thisStruct = thisSub.getStructure(); + Structure otherStruct = otherSub.getStructure(); + if (thisStruct == null || otherStruct == null) { + logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id", + thisName, + otherName); + return false; + } + if (thisStruct != otherStruct) { + // different object references: will not cluster even if entity id is same + return false; + } Chain thisChain = thisStruct.getChain(thisName); Chain otherChain = otherStruct.getChain(otherName); if (thisChain == null || otherChain == null) { logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", - this.subunits.get(this.representative).getName(), - other.subunits.get(other.representative).getName()); + thisName, + otherName); return false; } if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) { logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id", - this.subunits.get(this.representative).getName(), - other.subunits.get(other.representative).getName()); + thisName, + otherName); return false; } int thisEntityId = thisChain.getEntityInfo().getMolId(); @@ -241,7 +256,7 @@ public boolean mergeIdentical(SubunitCluster other) { * same Subunit. This is checked by comparing the entity identifiers of the subunits * if one can be found. * Thus this only makes sense when the subunits are complete chains of a - * deposited PDB entry. I + * deposited PDB entry. * * @param other * SubunitCluster @@ -252,12 +267,59 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) { if (!isIdenticalByEntityIdTo(other)) return false; + Subunit thisSub = this.subunits.get(this.representative); + Subunit otherSub = other.subunits.get(other.representative); + String thisName = thisSub.getName(); + String otherName = otherSub.getName(); + logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical", - this.subunits.get(this.representative).getName(), - other.subunits.get(other.representative).getName()); + thisName, + otherName); - this.subunits.addAll(other.subunits); - this.subunitEQR.addAll(other.subunitEQR); + List thisAligned = new ArrayList<>(); + List otherAligned = new ArrayList<>(); + + // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo()) + Structure thisStruct = thisSub.getStructure(); + Structure otherStruct = otherSub.getStructure(); + Chain thisChain = thisStruct.getChain(thisName); + Chain otherChain = otherStruct.getChain(otherName); + EntityInfo entityInfo = thisChain.getEntityInfo(); + + // Extract the aligned residues of both Subunits + for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) { + + Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup(); + + int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain); + + if (seqresIndex == -1) { + // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing + continue; + } + + // note the seqresindex is 1-based + Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1); + + int otherIndex = otherChain.getAtomGroups().indexOf(otherG); + if (otherIndex == -1) { + // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment) + continue; + } + + // Only consider residues that are part of the SubunitCluster + if (this.subunitEQR.get(this.representative).contains(thisIndex) + && other.subunitEQR.get(other.representative).contains(otherIndex)) { + thisAligned.add(thisIndex); + otherAligned.add(otherIndex); + } + } + + if (thisAligned.size() == 0 && otherAligned.size() == 0) { + logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName); + } + + updateEquivResidues(other, thisAligned, otherAligned); return true; } @@ -690,7 +752,7 @@ public SubunitClustererMethod getClustererMethod() { */ public List getAlignedAtomsSubunits() { - List alignedAtoms = Collections.emptyList(); + List alignedAtoms = new ArrayList<>(); // Loop through all subunits and add the aligned positions for (int s = 0; s < subunits.size(); s++) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java index f3abae6c3e..4224c76d0c 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java @@ -511,7 +511,8 @@ public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCo /** * Whether to use the entity id of subunits to infer that sequences are identical. * Only applies if the {@link SubunitClustererMethod} is a sequence based one. - * @return + * @return the flag + * @since 5.4.0 */ public boolean isUseEntityIdForSeqIdentityDetermination() { return useEntityIdForSeqIdentityDetermination; @@ -520,7 +521,10 @@ public boolean isUseEntityIdForSeqIdentityDetermination() { /** * Whether to use the entity id of subunits to infer that sequences are identical. * Only applies if the {@link SubunitClustererMethod} is a sequence based one. + * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be + * set to true. * @param useEntityIdForSeqIdentityDetermination the flag to be set + * @since 5.4.0 */ public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) { this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination; diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java index 0d81d4ce5b..90b3a3f6e1 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java @@ -39,8 +39,8 @@ * @author Peter */ public class C2RotationSolver implements QuatSymmetrySolver { - private QuatSymmetrySubunits subunits = null; - private QuatSymmetryParameters parameters = null; + private QuatSymmetrySubunits subunits; + private QuatSymmetryParameters parameters; private Vector3d centroid = new Vector3d(); private Matrix4d centroidInverse = new Matrix4d(); @@ -132,7 +132,7 @@ private void solve() { } private void addEOperation() { - List permutation = Arrays.asList(new Integer[]{0,1}); + List permutation = Arrays.asList(0,1); Matrix4d transformation = new Matrix4d(); transformation.setIdentity(); combineWithTranslation(transformation); @@ -145,7 +145,6 @@ private void addEOperation() { /** * Adds translational component to rotation matrix - * @param rotTrans * @param rotation * @return */ diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java index 92b2786e8c..27d16cd6fe 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java @@ -21,6 +21,7 @@ package org.biojava.nbio.structure.symmetry.core; import org.biojava.nbio.structure.Atom; +import org.biojava.nbio.structure.Calc; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.cluster.SubunitCluster; import org.biojava.nbio.structure.geometry.CalcPoint; @@ -34,7 +35,7 @@ import java.util.stream.Collectors; /** - * A bean to represent information about the set of {@link Subunit} being + * A bean to represent information about the set of {@link org.biojava.nbio.structure.cluster.Subunit}s being * considered for symmetry detection. This class is a helper for the * {@link QuatSymmetryDetector} algorithm, since it calculates and caches the * {@link MomentsOfInertia} and the centroids of each Subunit. @@ -45,13 +46,13 @@ */ public class QuatSymmetrySubunits { - private List caCoords = new ArrayList(); - private List originalCenters = new ArrayList(); - private List centers = new ArrayList(); - private List unitVectors = new ArrayList(); + private List caCoords = new ArrayList<>(); + private List originalCenters = new ArrayList<>(); + private List centers = new ArrayList<>(); + private List unitVectors = new ArrayList<>(); - private List folds = new ArrayList(); - private List clusterIds = new ArrayList(); + private List folds = new ArrayList<>(); + private List clusterIds = new ArrayList<>(); private List clusters; private Point3d centroid; @@ -75,10 +76,7 @@ public QuatSymmetrySubunits(List clusters) { clusterIds.add(c); Atom[] atoms = clusters.get(c).getAlignedAtomsSubunit(s); - // Convert atoms to points - Point3d[] points = new Point3d[atoms.length]; - for (int i = 0; i < atoms.length; i++) - points[i] = atoms[i].getCoordsAsPoint3d(); + Point3d[] points = Calc.atomsToPoints(atoms); caCoords.add(points); } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java index 52c7e305b7..4b6e55ee28 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java @@ -30,10 +30,15 @@ import org.biojava.nbio.structure.AminoAcidImpl; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.AtomImpl; +import org.biojava.nbio.structure.Chain; +import org.biojava.nbio.structure.ChainImpl; +import org.biojava.nbio.structure.EntityInfo; import org.biojava.nbio.structure.Group; +import org.biojava.nbio.structure.ResidueNumber; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.StructureImpl; import org.biojava.nbio.structure.StructureTools; import org.junit.Test; @@ -53,16 +58,7 @@ public class TestSubunitCluster { public void testMergeIdentical() { // Create an Atom Array of poly-alanine - List atoms = new ArrayList<>(10); - for (int i = 0; i < 10; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("ALA"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms.add(a); - } - Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]); + Atom[] reprAtoms = mockAtomArray(10, "ALA", -1, null); // Create two identical SubunitCluster SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms, @@ -74,21 +70,12 @@ public void testMergeIdentical() { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 10); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(10, sc1.length()); // Create an Atom Array of poly-glycine - List atoms2 = new ArrayList<>(10); - for (int i = 0; i < 10; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("GLY"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms2.add(a); - } - Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]); + Atom[] reprAtoms2 = mockAtomArray(10, "GLY", -1, null); SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2, "subunit 1", null, null)); @@ -97,9 +84,47 @@ public void testMergeIdentical() { // Merged have to be false, and Clusters result inmodified assertFalse(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 10); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(10, sc1.length()); + + } + + @Test + public void testMergeIdenticalByEntityId() { + + // Create 2 Atom Arrays, with same entity id + Structure structure = mockStructure(); + Atom[] reprAtoms1 = getAtomArray(structure.getChain("A")); + Atom[] reprAtoms2 = getAtomArray(structure.getChain("B")); + + // Create two SubunitCluster with same entity id + SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms1, + "A", null, structure)); + SubunitCluster sc2 = new SubunitCluster(new Subunit(reprAtoms2, + "B", null, structure)); + + boolean merged = sc1.mergeIdenticalByEntityId(sc2); + + // Merged have to be true, and the merged SubunitCluster is sc1 + assertTrue(merged); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(9, sc1.length()); + + // Create an Atom Array of poly-glycine with a different entity id + Atom[] reprAtoms3 = getAtomArray(structure.getChain("C")); + + SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms3, + "C", null, structure)); + + merged = sc1.mergeIdenticalByEntityId(sc3); + + // Merged have to be false, and Clusters result unmodified + assertFalse(merged); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(9, sc1.length()); } @@ -111,17 +136,8 @@ public void testMergeIdentical() { @Test public void testMergeSequence() throws CompoundNotFoundException { - // Create an Atom Array of ploy-alanine - List atoms = new ArrayList<>(100); - for (int i = 0; i < 100; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("ALA"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms.add(a); - } - Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]); + // Create an Atom Array of poly-alanine + Atom[] reprAtoms = mockAtomArray(100, "ALA", -1, null); // Create two identical SubunitCluster SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms, @@ -135,21 +151,12 @@ public void testMergeSequence() throws CompoundNotFoundException { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 100); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(100, sc1.length()); // Create an Atom Array of poly-glycine - List atoms2 = new ArrayList(100); - for (int i = 0; i < 100; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("GLY"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms2.add(a); - } - Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]); + Atom[] reprAtoms2 = mockAtomArray(100, "GLY", -1, null); SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2, "subunit 3", null, null)); @@ -158,29 +165,12 @@ public void testMergeSequence() throws CompoundNotFoundException { // Merged have to be false, and Clusters result inmodified assertFalse(merged); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 100); + assertEquals(2, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(100, sc1.length()); // Create an Atom Array of 9 glycine and 91 alanine - List atoms3 = new ArrayList<>(100); - for (int i = 0; i < 9; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("GLY"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms3.add(a); - } - for (int i = 0; i < 91; i++) { - Group g = new AminoAcidImpl(); - g.setPDBName("ALA"); - Atom a = new AtomImpl(); - a.setName(StructureTools.CA_ATOM_NAME); - g.addAtom(a); - atoms3.add(a); - } - Atom[] reprAtoms3 = atoms3.toArray(new Atom[atoms3.size()]); + Atom[] reprAtoms3 = mockAtomArray(9, "GLY", 91, "ALA"); SubunitCluster sc4 = new SubunitCluster(new Subunit(reprAtoms3, "subunit 4", null, null)); @@ -189,9 +179,9 @@ public void testMergeSequence() throws CompoundNotFoundException { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged); - assertEquals(sc1.size(), 3); - assertEquals(sc2.size(), 1); - assertEquals(sc1.length(), 91); + assertEquals(3, sc1.size()); + assertEquals(1, sc2.size()); + assertEquals(91, sc1.length()); } @@ -232,10 +222,10 @@ public void testMergeStructure() throws StructureException, IOException { // Merged have to be true, and the merged SubunitCluster is sc1 assertTrue(merged13); assertTrue(merged24); - assertEquals(sc1.size(), 2); - assertEquals(sc2.size(), 2); - assertEquals(sc1.length(), 141); - assertEquals(sc2.length(), 146); + assertEquals(2, sc1.size()); + assertEquals(2, sc2.size()); + assertEquals(141, sc1.length()); + assertEquals(146, sc2.length()); assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(1).length); assertEquals(sc2.getAlignedAtomsSubunit(0).length, @@ -245,8 +235,8 @@ public void testMergeStructure() throws StructureException, IOException { boolean merged = sc1.mergeStructure(sc2, clustererParameters); assertTrue(merged); - assertEquals(sc1.size(), 4); - assertEquals(sc1.length(), 140, 2); + assertEquals(4, sc1.size()); + assertEquals(140, sc1.length(), 2); assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(2).length); @@ -278,9 +268,112 @@ public void testDivideInternally() throws StructureException, IOException { // Divided has to be true, and Subunit length shorter than half assertTrue(divided); - assertEquals(sc1.size(), 2); + assertEquals(2, sc1.size()); assertTrue(sc1.length() < 178); assertEquals(sc1.getAlignedAtomsSubunit(0).length, sc1.getAlignedAtomsSubunit(1).length); } + + /** + * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2. + * + * @param size1 the number of residues of type1 to add + * @param type1 the 3 letter code of residue + * @param size2 the number of residues of type2 to add, if -1 none are added + * @param type2 the 3 letter code of residue, if null none are added + * @return the mock atom array + */ + private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) { + + List atoms = new ArrayList<>(size1 + size2); + for (int i = 0; i < size1; i++) { + Group g = new AminoAcidImpl(); + g.setPDBName(type1); + Atom a = new AtomImpl(); + a.setName(StructureTools.CA_ATOM_NAME); + g.addAtom(a); + atoms.add(a); + } + + if (size2 >= 0 && type2 !=null) { + for (int i = 0; i < size2; i++) { + Group g = new AminoAcidImpl(); + g.setPDBName(type2); + Atom a = new AtomImpl(); + a.setName(StructureTools.CA_ATOM_NAME); + g.addAtom(a); + atoms.add(a); + } + } + return atoms.toArray(new Atom[0]); + } + + /** + * Create a mock structure with 2 entities 1 (chains A, B) and 2 (chain C). + * @return a structure + */ + private Structure mockStructure() { + Structure structure = new StructureImpl(); + EntityInfo entity1 = new EntityInfo(); + entity1.setMolId(1); + EntityInfo entity2 = new EntityInfo(); + entity2.setMolId(2); + structure.addEntityInfo(entity1); + structure.addEntityInfo(entity2); + + Chain chainA = new ChainImpl(); + chainA.setId("A"); + Chain chainB = new ChainImpl(); + chainB.setId("B"); + entity1.addChain(chainA); + entity1.addChain(chainB); + Chain chainC = new ChainImpl(); + chainC.setId("C"); + entity2.addChain(chainC); + + structure.addChain(chainA); + structure.addChain(chainB); + structure.addChain(chainC); + + // entity 1: chain A 10 observed residues, chain B 9 observed residues (first unobserved) + List aGroups = getGroupList(10, "ALA", chainA); + chainA.setAtomGroups(new ArrayList<>(aGroups)); + chainA.setSeqResGroups(aGroups); + chainA.setEntityInfo(entity1); + + List bGroups = getGroupList(10, "ALA", chainB); + chainB.setAtomGroups(new ArrayList<>(bGroups.subList(1,10))); + chainB.setSeqResGroups(bGroups); + chainB.setEntityInfo(entity1); + + List cGroups = getGroupList(20, "GLY", chainC); + chainC.setAtomGroups(new ArrayList<>(cGroups)); + chainC.setSeqResGroups(cGroups); + chainC.setEntityInfo(entity2); + + return structure; + } + + private List getGroupList(int size, String type, Chain chain) { + List list = new ArrayList<>(); + for (int i=0;i