diff --git a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
index 1b69ec3efc..a5cd0a9fb4 100644
--- a/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
+++ b/biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/symmetry/TestQuatSymmetryDetectorExamples.java
@@ -28,14 +28,19 @@
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
+import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.cluster.SubunitClusterer;
import org.biojava.nbio.structure.cluster.SubunitClustererMethod;
import org.biojava.nbio.structure.cluster.SubunitClustererParameters;
+import org.biojava.nbio.structure.io.FileParsingParameters;
+import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
+import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryDetector;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryParameters;
import org.biojava.nbio.structure.symmetry.core.QuatSymmetryResults;
import org.biojava.nbio.structure.symmetry.core.Stoichiometry;
+import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -366,4 +371,67 @@ public void testPseudoIdentity95() throws IOException, StructureException {
assertEquals(SubunitClustererMethod.SEQUENCE, symmetry.getSubunitClusters().get(0).getClustererMethod());
}
+
+ @Test
+ public void testSymDetectionWithClusteringByEntityId() throws IOException, StructureException {
+ AtomCache cache = new AtomCache();
+ cache.setUseMmtf(false);
+ cache.setUseMmCif(true);
+ FileParsingParameters params = new FileParsingParameters();
+ params.setAlignSeqRes(true);
+ cache.setFileParsingParams(params);
+ StructureIO.setAtomCache(cache);
+ Structure pdb = StructureIO.getStructure("BIO:1SMT:1");
+
+ SubunitClustererParameters cp = new SubunitClustererParameters();
+ cp.setUseEntityIdForSeqIdentityDetermination(true);
+ cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
+ QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
+ QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
+ pdb, symmParams, cp);
+
+ // C2 symmetry, A2 stoichiometry
+ assertEquals("C2", symmetry.getSymmetry());
+ assertEquals("A2", symmetry.getStoichiometry().toString());
+ }
+
+ /**
+ * A performance test that demonstrates how the SubunitClustererParameters.setUseEntityIdForSeqIdentityDetermination()
+ * has a dramatic effect in runtime versus doing alignments.
+ */
+ @Ignore("This is a performance test to be run manually")
+ @Test
+ public void testSymDetectionPerformanceLargeCapsid() throws IOException, StructureException {
+ AtomCache cache = new AtomCache();
+ cache.setUseMmtf(false);
+ cache.setUseMmCif(true);
+ FileParsingParameters params = new FileParsingParameters();
+ params.setAlignSeqRes(true);
+ params.setParseBioAssembly(true);
+ cache.setFileParsingParams(params);
+ StructureIO.setAtomCache(cache);
+
+ // making sure we remove all atoms but representative before we expand, otherwise memory requirements are huge
+ // 6Q1F is another good example
+ Structure au = StructureIO.getStructure("6NHJ");
+ StructureTools.reduceToRepresentativeAtoms(au);
+ BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
+ List transforms = au.getPDBHeader().getBioAssemblies().get(1).getTransforms();
+ Structure pdb = builder.rebuildQuaternaryStructure(au, transforms, true, false);
+
+ SubunitClustererParameters cp = new SubunitClustererParameters();
+
+ // This is the parameter that makes this fast, set it to false to see the difference.
+ // As of git commit ed322e387cd46344a7864a, the difference in runtime is not that huge:
+ // 2 minutes with true, 10 minutes with false. I observed a much larger difference before, but can't reproduce anymore - JD 2020-01-23
+ cp.setUseEntityIdForSeqIdentityDetermination(true);
+
+ cp.setClustererMethod(SubunitClustererMethod.SEQUENCE);
+ QuatSymmetryParameters symmParams = new QuatSymmetryParameters();
+ QuatSymmetryResults symmetry = QuatSymmetryDetector.calcGlobalSymmetry(
+ pdb, symmParams, cp);
+
+ assertEquals("I", symmetry.getSymmetry());
+ assertEquals("A960B960C600D480E300", symmetry.getStoichiometry().toString());
+ }
}
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
index b2438763f3..cc52262b12 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java
@@ -306,12 +306,12 @@ public List getChainIds() {
* used and when all chains within the entity are numbered in the same way), but
* in general they will be neither unique (because of insertion codes) nor aligned.
*
- * @param g
- * @param c
+ * @param g the group
+ * @param c the chain
* @return the aligned residue index (1 to n), if no SEQRES groups are available at all then {@link ResidueNumber#getSeqNum()}
* is returned as a fall-back, if the group is not found in the SEQRES groups then -1 is returned
* for the given group and chain
- * @throws IllegalArgumentException if the given Chain is not a member of this EnityInfo
+ * @throws IllegalArgumentException if the given Chain is not a member of this EntityInfo
* @see Chain#getSeqResGroup(int)
*/
public int getAlignedResIndex(Group g, Chain c) {
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
index c62180176c..9ba8170ab5 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java
@@ -1264,7 +1264,7 @@ public static final Character get1LetterCode(String groupCode3) {
* 3-character code for a group.
*
*/
- public static final boolean isNucleotide(String groupCode3) {
+ public static boolean isNucleotide(String groupCode3) {
String code = groupCode3.trim();
return nucleotides30.containsKey(code)
|| nucleotides23.containsKey(code);
@@ -1283,7 +1283,7 @@ public static final boolean isNucleotide(String groupCode3) {
* @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
*/
@Deprecated
- public static final Structure getReducedStructure(Structure s,
+ public static Structure getReducedStructure(Structure s,
String chainId) throws StructureException {
// since we deal here with structure alignments,
// only use Model 1...
@@ -1338,7 +1338,7 @@ public static final Structure getReducedStructure(Structure s,
return newS;
}
- public static final String convertAtomsToSeq(Atom[] atoms) {
+ public static String convertAtomsToSeq(Atom[] atoms) {
StringBuilder buf = new StringBuilder();
Group prevGroup = null;
@@ -1374,7 +1374,7 @@ public static final String convertAtomsToSeq(Atom[] atoms) {
* @throws StructureException
* if the group cannot be found.
*/
- public static final Group getGroupByPDBResidueNumber(Structure struc,
+ public static Group getGroupByPDBResidueNumber(Structure struc,
ResidueNumber pdbResNum) throws StructureException {
if (struc == null || pdbResNum == null) {
throw new IllegalArgumentException("Null argument(s).");
@@ -1447,7 +1447,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
* @param chain
* @param cutoff
* @return
- * @see {@link #getRepresentativeAtomsInContact(Chain, double)}
+ * @see #getRepresentativeAtomsInContact(Chain, double)
*/
public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
Grid grid = new Grid(cutoff);
@@ -1921,4 +1921,24 @@ private static String replaceFirstChar(String name, char c, char d) {
return name;
}
+ /**
+ * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure.
+ * @param structure the structure
+ * @since 5.4.0
+ */
+ public static void reduceToRepresentativeAtoms(Structure structure) {
+ for (int modelIdx = 0; modelIdx atoms = g.getAtoms();
+ if (g.isAminoAcid()) {
+ atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME));
+ } else if (g.isNucleotide()) {
+ atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE));
+ }
+ // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures
+ }
+ }
+ }
+ }
}
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
index 2ae2ccb83c..d73d747cb6 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitCluster.java
@@ -32,6 +32,8 @@
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
+import org.biojava.nbio.structure.EntityInfo;
+import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.align.StructureAlignment;
@@ -190,22 +192,35 @@ public boolean isIdenticalTo(SubunitCluster other) {
* @return true if the SubunitClusters are identical, false otherwise
*/
public boolean isIdenticalByEntityIdTo(SubunitCluster other) {
- Structure thisStruct = this.subunits.get(this.representative).getStructure();
- Structure otherStruct = other.subunits.get(other.representative).getStructure();
- String thisName = this.subunits.get(this.representative).getName();
- String otherName = other.subunits.get(this.representative).getName();
+ Subunit thisSub = this.subunits.get(this.representative);
+ Subunit otherSub = other.subunits.get(other.representative);
+ String thisName = thisSub.getName();
+ String otherName = otherSub.getName();
+
+ Structure thisStruct = thisSub.getStructure();
+ Structure otherStruct = otherSub.getStructure();
+ if (thisStruct == null || otherStruct == null) {
+ logger.info("SubunitClusters {}-{} have no referenced structures. Ignoring identity check by entity id",
+ thisName,
+ otherName);
+ return false;
+ }
+ if (thisStruct != otherStruct) {
+ // different object references: will not cluster even if entity id is same
+ return false;
+ }
Chain thisChain = thisStruct.getChain(thisName);
Chain otherChain = otherStruct.getChain(otherName);
if (thisChain == null || otherChain == null) {
logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
- this.subunits.get(this.representative).getName(),
- other.subunits.get(other.representative).getName());
+ thisName,
+ otherName);
return false;
}
if (thisChain.getEntityInfo() == null || otherChain.getEntityInfo() == null) {
logger.info("Can't determine entity ids of SubunitClusters {}-{}. Ignoring identity check by entity id",
- this.subunits.get(this.representative).getName(),
- other.subunits.get(other.representative).getName());
+ thisName,
+ otherName);
return false;
}
int thisEntityId = thisChain.getEntityInfo().getMolId();
@@ -241,7 +256,7 @@ public boolean mergeIdentical(SubunitCluster other) {
* same Subunit. This is checked by comparing the entity identifiers of the subunits
* if one can be found.
* Thus this only makes sense when the subunits are complete chains of a
- * deposited PDB entry. I
+ * deposited PDB entry.
*
* @param other
* SubunitCluster
@@ -252,12 +267,59 @@ public boolean mergeIdenticalByEntityId(SubunitCluster other) {
if (!isIdenticalByEntityIdTo(other))
return false;
+ Subunit thisSub = this.subunits.get(this.representative);
+ Subunit otherSub = other.subunits.get(other.representative);
+ String thisName = thisSub.getName();
+ String otherName = otherSub.getName();
+
logger.info("SubunitClusters {}-{} belong to same entity. Assuming they are identical",
- this.subunits.get(this.representative).getName(),
- other.subunits.get(other.representative).getName());
+ thisName,
+ otherName);
- this.subunits.addAll(other.subunits);
- this.subunitEQR.addAll(other.subunitEQR);
+ List thisAligned = new ArrayList<>();
+ List otherAligned = new ArrayList<>();
+
+ // we've merged by entity id, we can assume structure, chain and entity are available (checked in isIdenticalByEntityIdTo())
+ Structure thisStruct = thisSub.getStructure();
+ Structure otherStruct = otherSub.getStructure();
+ Chain thisChain = thisStruct.getChain(thisName);
+ Chain otherChain = otherStruct.getChain(otherName);
+ EntityInfo entityInfo = thisChain.getEntityInfo();
+
+ // Extract the aligned residues of both Subunits
+ for (int thisIndex=0; thisIndex < thisSub.size(); thisIndex++) {
+
+ Group g = thisSub.getRepresentativeAtoms()[thisIndex].getGroup();
+
+ int seqresIndex = entityInfo.getAlignedResIndex(g, thisChain);
+
+ if (seqresIndex == -1) {
+ // this might mean that FileParsingParameters.setAlignSeqRes() wasn't set to true during parsing
+ continue;
+ }
+
+ // note the seqresindex is 1-based
+ Group otherG = otherChain.getSeqResGroups().get(seqresIndex - 1);
+
+ int otherIndex = otherChain.getAtomGroups().indexOf(otherG);
+ if (otherIndex == -1) {
+ // skip residues that are unobserved in other sequence ("gaps" in the entity SEQRES alignment)
+ continue;
+ }
+
+ // Only consider residues that are part of the SubunitCluster
+ if (this.subunitEQR.get(this.representative).contains(thisIndex)
+ && other.subunitEQR.get(other.representative).contains(otherIndex)) {
+ thisAligned.add(thisIndex);
+ otherAligned.add(otherIndex);
+ }
+ }
+
+ if (thisAligned.size() == 0 && otherAligned.size() == 0) {
+ logger.warn("No equivalent aligned atoms found between SubunitClusters {}-{} via entity SEQRES alignment. Is FileParsingParameters.setAlignSeqRes() set?", thisName, otherName);
+ }
+
+ updateEquivResidues(other, thisAligned, otherAligned);
return true;
}
@@ -690,7 +752,7 @@ public SubunitClustererMethod getClustererMethod() {
*/
public List getAlignedAtomsSubunits() {
- List alignedAtoms = Collections.emptyList();
+ List alignedAtoms = new ArrayList<>();
// Loop through all subunits and add the aligned positions
for (int s = 0; s < subunits.size(); s++)
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
index f3abae6c3e..4224c76d0c 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cluster/SubunitClustererParameters.java
@@ -511,7 +511,8 @@ public boolean isHighConfidenceScores(double sequenceIdentity, double sequenceCo
/**
* Whether to use the entity id of subunits to infer that sequences are identical.
* Only applies if the {@link SubunitClustererMethod} is a sequence based one.
- * @return
+ * @return the flag
+ * @since 5.4.0
*/
public boolean isUseEntityIdForSeqIdentityDetermination() {
return useEntityIdForSeqIdentityDetermination;
@@ -520,7 +521,10 @@ public boolean isUseEntityIdForSeqIdentityDetermination() {
/**
* Whether to use the entity id of subunits to infer that sequences are identical.
* Only applies if the {@link SubunitClustererMethod} is a sequence based one.
+ * Note this requires {@link org.biojava.nbio.structure.io.FileParsingParameters#setAlignSeqRes(boolean)} to be
+ * set to true.
* @param useEntityIdForSeqIdentityDetermination the flag to be set
+ * @since 5.4.0
*/
public void setUseEntityIdForSeqIdentityDetermination(boolean useEntityIdForSeqIdentityDetermination) {
this.useEntityIdForSeqIdentityDetermination = useEntityIdForSeqIdentityDetermination;
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
index 0d81d4ce5b..90b3a3f6e1 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/C2RotationSolver.java
@@ -39,8 +39,8 @@
* @author Peter
*/
public class C2RotationSolver implements QuatSymmetrySolver {
- private QuatSymmetrySubunits subunits = null;
- private QuatSymmetryParameters parameters = null;
+ private QuatSymmetrySubunits subunits;
+ private QuatSymmetryParameters parameters;
private Vector3d centroid = new Vector3d();
private Matrix4d centroidInverse = new Matrix4d();
@@ -132,7 +132,7 @@ private void solve() {
}
private void addEOperation() {
- List permutation = Arrays.asList(new Integer[]{0,1});
+ List permutation = Arrays.asList(0,1);
Matrix4d transformation = new Matrix4d();
transformation.setIdentity();
combineWithTranslation(transformation);
@@ -145,7 +145,6 @@ private void addEOperation() {
/**
* Adds translational component to rotation matrix
- * @param rotTrans
* @param rotation
* @return
*/
diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java
index 92b2786e8c..27d16cd6fe 100644
--- a/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java
+++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/symmetry/core/QuatSymmetrySubunits.java
@@ -21,6 +21,7 @@
package org.biojava.nbio.structure.symmetry.core;
import org.biojava.nbio.structure.Atom;
+import org.biojava.nbio.structure.Calc;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.cluster.SubunitCluster;
import org.biojava.nbio.structure.geometry.CalcPoint;
@@ -34,7 +35,7 @@
import java.util.stream.Collectors;
/**
- * A bean to represent information about the set of {@link Subunit} being
+ * A bean to represent information about the set of {@link org.biojava.nbio.structure.cluster.Subunit}s being
* considered for symmetry detection. This class is a helper for the
* {@link QuatSymmetryDetector} algorithm, since it calculates and caches the
* {@link MomentsOfInertia} and the centroids of each Subunit.
@@ -45,13 +46,13 @@
*/
public class QuatSymmetrySubunits {
- private List caCoords = new ArrayList();
- private List originalCenters = new ArrayList();
- private List centers = new ArrayList();
- private List unitVectors = new ArrayList();
+ private List caCoords = new ArrayList<>();
+ private List originalCenters = new ArrayList<>();
+ private List centers = new ArrayList<>();
+ private List unitVectors = new ArrayList<>();
- private List folds = new ArrayList();
- private List clusterIds = new ArrayList();
+ private List folds = new ArrayList<>();
+ private List clusterIds = new ArrayList<>();
private List clusters;
private Point3d centroid;
@@ -75,10 +76,7 @@ public QuatSymmetrySubunits(List clusters) {
clusterIds.add(c);
Atom[] atoms = clusters.get(c).getAlignedAtomsSubunit(s);
- // Convert atoms to points
- Point3d[] points = new Point3d[atoms.length];
- for (int i = 0; i < atoms.length; i++)
- points[i] = atoms[i].getCoordsAsPoint3d();
+ Point3d[] points = Calc.atomsToPoints(atoms);
caCoords.add(points);
}
diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
index 52c7e305b7..4b6e55ee28 100644
--- a/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
+++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/cluster/TestSubunitCluster.java
@@ -30,10 +30,15 @@
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.AtomImpl;
+import org.biojava.nbio.structure.Chain;
+import org.biojava.nbio.structure.ChainImpl;
+import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.Group;
+import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
+import org.biojava.nbio.structure.StructureImpl;
import org.biojava.nbio.structure.StructureTools;
import org.junit.Test;
@@ -53,16 +58,7 @@ public class TestSubunitCluster {
public void testMergeIdentical() {
// Create an Atom Array of poly-alanine
- List atoms = new ArrayList<>(10);
- for (int i = 0; i < 10; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("ALA");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms.add(a);
- }
- Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]);
+ Atom[] reprAtoms = mockAtomArray(10, "ALA", -1, null);
// Create two identical SubunitCluster
SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms,
@@ -74,21 +70,12 @@ public void testMergeIdentical() {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 10);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(10, sc1.length());
// Create an Atom Array of poly-glycine
- List atoms2 = new ArrayList<>(10);
- for (int i = 0; i < 10; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("GLY");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms2.add(a);
- }
- Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]);
+ Atom[] reprAtoms2 = mockAtomArray(10, "GLY", -1, null);
SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2,
"subunit 1", null, null));
@@ -97,9 +84,47 @@ public void testMergeIdentical() {
// Merged have to be false, and Clusters result inmodified
assertFalse(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 10);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(10, sc1.length());
+
+ }
+
+ @Test
+ public void testMergeIdenticalByEntityId() {
+
+ // Create 2 Atom Arrays, with same entity id
+ Structure structure = mockStructure();
+ Atom[] reprAtoms1 = getAtomArray(structure.getChain("A"));
+ Atom[] reprAtoms2 = getAtomArray(structure.getChain("B"));
+
+ // Create two SubunitCluster with same entity id
+ SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms1,
+ "A", null, structure));
+ SubunitCluster sc2 = new SubunitCluster(new Subunit(reprAtoms2,
+ "B", null, structure));
+
+ boolean merged = sc1.mergeIdenticalByEntityId(sc2);
+
+ // Merged have to be true, and the merged SubunitCluster is sc1
+ assertTrue(merged);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(9, sc1.length());
+
+ // Create an Atom Array of poly-glycine with a different entity id
+ Atom[] reprAtoms3 = getAtomArray(structure.getChain("C"));
+
+ SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms3,
+ "C", null, structure));
+
+ merged = sc1.mergeIdenticalByEntityId(sc3);
+
+ // Merged have to be false, and Clusters result unmodified
+ assertFalse(merged);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(9, sc1.length());
}
@@ -111,17 +136,8 @@ public void testMergeIdentical() {
@Test
public void testMergeSequence() throws CompoundNotFoundException {
- // Create an Atom Array of ploy-alanine
- List atoms = new ArrayList<>(100);
- for (int i = 0; i < 100; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("ALA");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms.add(a);
- }
- Atom[] reprAtoms = atoms.toArray(new Atom[atoms.size()]);
+ // Create an Atom Array of poly-alanine
+ Atom[] reprAtoms = mockAtomArray(100, "ALA", -1, null);
// Create two identical SubunitCluster
SubunitCluster sc1 = new SubunitCluster(new Subunit(reprAtoms,
@@ -135,21 +151,12 @@ public void testMergeSequence() throws CompoundNotFoundException {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 100);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(100, sc1.length());
// Create an Atom Array of poly-glycine
- List atoms2 = new ArrayList(100);
- for (int i = 0; i < 100; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("GLY");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms2.add(a);
- }
- Atom[] reprAtoms2 = atoms2.toArray(new Atom[atoms2.size()]);
+ Atom[] reprAtoms2 = mockAtomArray(100, "GLY", -1, null);
SubunitCluster sc3 = new SubunitCluster(new Subunit(reprAtoms2,
"subunit 3", null, null));
@@ -158,29 +165,12 @@ public void testMergeSequence() throws CompoundNotFoundException {
// Merged have to be false, and Clusters result inmodified
assertFalse(merged);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 100);
+ assertEquals(2, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(100, sc1.length());
// Create an Atom Array of 9 glycine and 91 alanine
- List atoms3 = new ArrayList<>(100);
- for (int i = 0; i < 9; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("GLY");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms3.add(a);
- }
- for (int i = 0; i < 91; i++) {
- Group g = new AminoAcidImpl();
- g.setPDBName("ALA");
- Atom a = new AtomImpl();
- a.setName(StructureTools.CA_ATOM_NAME);
- g.addAtom(a);
- atoms3.add(a);
- }
- Atom[] reprAtoms3 = atoms3.toArray(new Atom[atoms3.size()]);
+ Atom[] reprAtoms3 = mockAtomArray(9, "GLY", 91, "ALA");
SubunitCluster sc4 = new SubunitCluster(new Subunit(reprAtoms3,
"subunit 4", null, null));
@@ -189,9 +179,9 @@ public void testMergeSequence() throws CompoundNotFoundException {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged);
- assertEquals(sc1.size(), 3);
- assertEquals(sc2.size(), 1);
- assertEquals(sc1.length(), 91);
+ assertEquals(3, sc1.size());
+ assertEquals(1, sc2.size());
+ assertEquals(91, sc1.length());
}
@@ -232,10 +222,10 @@ public void testMergeStructure() throws StructureException, IOException {
// Merged have to be true, and the merged SubunitCluster is sc1
assertTrue(merged13);
assertTrue(merged24);
- assertEquals(sc1.size(), 2);
- assertEquals(sc2.size(), 2);
- assertEquals(sc1.length(), 141);
- assertEquals(sc2.length(), 146);
+ assertEquals(2, sc1.size());
+ assertEquals(2, sc2.size());
+ assertEquals(141, sc1.length());
+ assertEquals(146, sc2.length());
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(1).length);
assertEquals(sc2.getAlignedAtomsSubunit(0).length,
@@ -245,8 +235,8 @@ public void testMergeStructure() throws StructureException, IOException {
boolean merged = sc1.mergeStructure(sc2, clustererParameters);
assertTrue(merged);
- assertEquals(sc1.size(), 4);
- assertEquals(sc1.length(), 140, 2);
+ assertEquals(4, sc1.size());
+ assertEquals(140, sc1.length(), 2);
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(2).length);
@@ -278,9 +268,112 @@ public void testDivideInternally() throws StructureException, IOException {
// Divided has to be true, and Subunit length shorter than half
assertTrue(divided);
- assertEquals(sc1.size(), 2);
+ assertEquals(2, sc1.size());
assertTrue(sc1.length() < 178);
assertEquals(sc1.getAlignedAtomsSubunit(0).length,
sc1.getAlignedAtomsSubunit(1).length);
}
+
+ /**
+ * Create a mock atom array, with size1 residues of type1, followed by size2 residues of type2.
+ *
+ * @param size1 the number of residues of type1 to add
+ * @param type1 the 3 letter code of residue
+ * @param size2 the number of residues of type2 to add, if -1 none are added
+ * @param type2 the 3 letter code of residue, if null none are added
+ * @return the mock atom array
+ */
+ private Atom[] mockAtomArray(int size1, String type1, int size2, String type2) {
+
+ List atoms = new ArrayList<>(size1 + size2);
+ for (int i = 0; i < size1; i++) {
+ Group g = new AminoAcidImpl();
+ g.setPDBName(type1);
+ Atom a = new AtomImpl();
+ a.setName(StructureTools.CA_ATOM_NAME);
+ g.addAtom(a);
+ atoms.add(a);
+ }
+
+ if (size2 >= 0 && type2 !=null) {
+ for (int i = 0; i < size2; i++) {
+ Group g = new AminoAcidImpl();
+ g.setPDBName(type2);
+ Atom a = new AtomImpl();
+ a.setName(StructureTools.CA_ATOM_NAME);
+ g.addAtom(a);
+ atoms.add(a);
+ }
+ }
+ return atoms.toArray(new Atom[0]);
+ }
+
+ /**
+ * Create a mock structure with 2 entities 1 (chains A, B) and 2 (chain C).
+ * @return a structure
+ */
+ private Structure mockStructure() {
+ Structure structure = new StructureImpl();
+ EntityInfo entity1 = new EntityInfo();
+ entity1.setMolId(1);
+ EntityInfo entity2 = new EntityInfo();
+ entity2.setMolId(2);
+ structure.addEntityInfo(entity1);
+ structure.addEntityInfo(entity2);
+
+ Chain chainA = new ChainImpl();
+ chainA.setId("A");
+ Chain chainB = new ChainImpl();
+ chainB.setId("B");
+ entity1.addChain(chainA);
+ entity1.addChain(chainB);
+ Chain chainC = new ChainImpl();
+ chainC.setId("C");
+ entity2.addChain(chainC);
+
+ structure.addChain(chainA);
+ structure.addChain(chainB);
+ structure.addChain(chainC);
+
+ // entity 1: chain A 10 observed residues, chain B 9 observed residues (first unobserved)
+ List aGroups = getGroupList(10, "ALA", chainA);
+ chainA.setAtomGroups(new ArrayList<>(aGroups));
+ chainA.setSeqResGroups(aGroups);
+ chainA.setEntityInfo(entity1);
+
+ List bGroups = getGroupList(10, "ALA", chainB);
+ chainB.setAtomGroups(new ArrayList<>(bGroups.subList(1,10)));
+ chainB.setSeqResGroups(bGroups);
+ chainB.setEntityInfo(entity1);
+
+ List cGroups = getGroupList(20, "GLY", chainC);
+ chainC.setAtomGroups(new ArrayList<>(cGroups));
+ chainC.setSeqResGroups(cGroups);
+ chainC.setEntityInfo(entity2);
+
+ return structure;
+ }
+
+ private List getGroupList(int size, String type, Chain chain) {
+ List list = new ArrayList<>();
+ for (int i=0;i