Skip to content

Commit ebabdf7

Browse files
authored
Avoid ASA calculation for redundant NCS interfaces (#823)
* WIP: towards calculating interface ASA non-redundantly for NCS structures * Working implementation * More assertions * Docs * More efficient * Not storing the ASA values in all redundant interfaces, it can take a lot of memory * Bugfix: removing also ncs-clustered interfaces in removeInterfacesBelowArea * Bugfix: now removing interfaces below area properly. Test for it * Bugfix: clustering of interfaces depended on the ids. It should work whether ids are there or not * Better solution: propagate the interface area values * Sorting with lambda and some docs * Set groupAsas as a reference to the same objects, thus using no more mem * No wildcard imports * Bugfix: order of molecules in interfaces can be inverted with respect to reference * Extracting constant * A much better solution without a regex
1 parent d622217 commit ebabdf7

5 files changed

Lines changed: 224 additions & 40 deletions

File tree

biojava-structure/src/main/java/org/biojava/nbio/structure/asa/GroupAsa.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ public GroupAsa(Group g) {
9797
this.g = g;
9898

9999
int groupNoHSize = getGroupNoHSize();
100-
atomAsaUs = new ArrayList<Double>(groupNoHSize);
101-
atomAsaCs = new ArrayList<Double>(groupNoHSize);
100+
atomAsaUs = new ArrayList<>(groupNoHSize);
101+
atomAsaCs = new ArrayList<>(groupNoHSize);
102102
}
103103

104104
private int getGroupNoHSize() {

biojava-structure/src/main/java/org/biojava/nbio/structure/contact/StructureInterface.java

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,18 +104,18 @@ public StructureInterface(
104104
AtomContactSet contacts,
105105
CrystalTransform firstTransf, CrystalTransform secondTransf) {
106106

107-
this.molecules = new Pair<Atom[]>(firstMolecule, secondMolecule);
108-
this.moleculeIds = new Pair<String>(firstMoleculeId,secondMoleculeId);
107+
this.molecules = new Pair<>(firstMolecule, secondMolecule);
108+
this.moleculeIds = new Pair<>(firstMoleculeId,secondMoleculeId);
109109
this.contacts = contacts;
110-
this.transforms = new Pair<CrystalTransform>(firstTransf, secondTransf);
110+
this.transforms = new Pair<>(firstTransf, secondTransf);
111111
}
112112

113113
/**
114114
* Constructs an empty StructureInterface
115115
*/
116116
public StructureInterface() {
117-
this.groupAsas1 = new TreeMap<ResidueNumber, GroupAsa>();
118-
this.groupAsas2 = new TreeMap<ResidueNumber, GroupAsa>();
117+
this.groupAsas1 = new TreeMap<>();
118+
this.groupAsas2 = new TreeMap<>();
119119
}
120120

121121
public int getId() {
@@ -133,7 +133,7 @@ public void setId(int id) {
133133
* @return
134134
*/
135135
public Pair<String> getCrystalIds() {
136-
return new Pair<String>(
136+
return new Pair<>(
137137
moleculeIds.getFirst()+transforms.getFirst().getTransformId()+transforms.getFirst().getCrystalTranslation(),
138138
moleculeIds.getSecond()+transforms.getSecond().getTransformId()+transforms.getSecond().getCrystalTranslation());
139139
}
@@ -197,7 +197,16 @@ public void setTransforms(Pair<CrystalTransform> transforms) {
197197
this.transforms = transforms;
198198
}
199199

200-
protected void setAsas(double[] asas1, double[] asas2, int nSpherePoints, int nThreads, int cofactorSizeToUse) {
200+
/**
201+
* Set ASA annotations by passing the uncomplexed ASA values of the 2 partners.
202+
* This will calculate complexed ASA and set the ASA values in the member variables.
203+
* @param asas1 ASA values for atoms of partner 1
204+
* @param asas2 ASA values for atoms of partner 2
205+
* @param nSpherePoints the number of sphere points to be used for complexed ASA calculation
206+
* @param nThreads the number of threads to be used for complexed ASA calculation
207+
* @param cofactorSizeToUse the minimum size of cofactor molecule (non-chain HET atoms) that will be used in ASA calculation
208+
*/
209+
void setAsas(double[] asas1, double[] asas2, int nSpherePoints, int nThreads, int cofactorSizeToUse) {
201210

202211
Atom[] atoms = getAtomsForAsa(cofactorSizeToUse);
203212
AsaCalculator asaCalc = new AsaCalculator(atoms,
@@ -296,7 +305,7 @@ protected Atom[] getAtomsForAsa(int cofactorSizeToUse) {
296305
* @return
297306
*/
298307
private static final Atom[] getAllNonHAtomArray(Atom[] m, int minSizeHetAtomToInclude) {
299-
List<Atom> atoms = new ArrayList<Atom>();
308+
List<Atom> atoms = new ArrayList<>();
300309

301310
for (Atom a:m){
302311

@@ -411,6 +420,14 @@ public void setFirstGroupAsa(GroupAsa groupAsa) {
411420
groupAsas1.put(groupAsa.getGroup().getResidueNumber(), groupAsa);
412421
}
413422

423+
void setFirstGroupAsas(Map<ResidueNumber, GroupAsa> firstGroupAsas) {
424+
this.groupAsas1 = firstGroupAsas;
425+
}
426+
427+
void setSecondGroupAsas(Map<ResidueNumber, GroupAsa> secondGroupAsas) {
428+
this.groupAsas2 = secondGroupAsas;
429+
}
430+
414431
/**
415432
* Gets a map of ResidueNumbers to GroupAsas for all groups of second chain.
416433
* @return

biojava-structure/src/main/java/org/biojava/nbio/structure/contact/StructureInterfaceList.java

Lines changed: 95 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@
2121
package org.biojava.nbio.structure.contact;
2222

2323
import java.io.Serializable;
24-
import java.util.*;
24+
import java.util.ArrayList;
25+
import java.util.Collections;
26+
import java.util.Iterator;
27+
import java.util.List;
28+
import java.util.Map;
29+
import java.util.Optional;
30+
import java.util.Set;
31+
import java.util.TreeMap;
2532

2633
import org.biojava.nbio.core.util.SingleLinkageClusterer;
2734
import org.biojava.nbio.structure.Atom;
@@ -72,6 +79,8 @@ public class StructureInterfaceList implements Serializable, Iterable<StructureI
7279
private List<StructureInterfaceCluster> clusters = null;
7380
private List<StructureInterfaceCluster> clustersNcs = null;
7481

82+
private Map<String, String> chainOrigNamesMap;
83+
7584
public StructureInterfaceList() {
7685
this.list = new ArrayList<>();
7786
}
@@ -129,8 +138,20 @@ public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
129138
Map<String, Atom[]> uniqAsaChains = new TreeMap<>();
130139
Map<String, double[]> chainAsas = new TreeMap<>();
131140

141+
List<StructureInterface> redundancyReducedList;
142+
if (clustersNcs != null) {
143+
redundancyReducedList = new ArrayList<>();
144+
for (StructureInterfaceCluster ncsCluster : clustersNcs) {
145+
// we use the first one in list as the only one for which we calculate ASAs
146+
redundancyReducedList.add(ncsCluster.getMembers().get(0));
147+
}
148+
149+
} else {
150+
redundancyReducedList = list;
151+
}
152+
132153
// first we gather rotation-unique chains (in terms of AU id and transform id)
133-
for (StructureInterface interf:list) {
154+
for (StructureInterface interf:redundancyReducedList) {
134155
String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
135156
String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
136157

@@ -159,12 +180,12 @@ public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
159180

160181
logger.debug("Calculated uncomplexed ASA for {} orientation-unique chains. Time: {} s", uniqAsaChains.size(), ((end-start)/1000.0));
161182

162-
logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", list.size());
183+
logger.debug ("Will calculate complexed ASA for {} pairwise complexes.", redundancyReducedList.size());
163184

164185
start = System.currentTimeMillis();
165186

166187
// now we calculate the ASAs for each of the complexes
167-
for (StructureInterface interf:list) {
188+
for (StructureInterface interf:redundancyReducedList) {
168189

169190
String molecId1 = interf.getMoleculeIds().getFirst()+interf.getTransforms().getFirst().getTransformId();
170191
String molecId2 = interf.getMoleculeIds().getSecond()+interf.getTransforms().getSecond().getTransformId();
@@ -176,15 +197,53 @@ public void calcAsas(int nSpherePoints, int nThreads, int cofactorSizeToUse) {
176197
}
177198
end = System.currentTimeMillis();
178199

179-
logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", list.size(), ((end-start)/1000.0));
200+
logger.debug("Calculated complexes ASA for {} pairwise complexes. Time: {} s", redundancyReducedList.size(), ((end-start)/1000.0));
180201

202+
// now let's populate the interface area value for the NCS-redundant ones from the reference interface (first one in list)
203+
if (clustersNcs!=null) {
204+
if (chainOrigNamesMap==null) {
205+
logger.warn("No chainOrigNamesMap is set. Considering NCS interfaces in same order as reference. This is likely a bug.");
206+
}
207+
for (StructureInterfaceCluster ncsCluster : clustersNcs) {
208+
StructureInterface refInterf = ncsCluster.getMembers().get(0);
209+
String refMolecId1 = refInterf.getMoleculeIds().getFirst();
210+
for (int i=1;i<ncsCluster.getMembers().size();i++) {
211+
StructureInterface member = ncsCluster.getMembers().get(i);
212+
member.setTotalArea(refInterf.getTotalArea());
213+
String molecId1 = member.getMoleculeIds().getFirst();
214+
if (areMolecIdsSameOrder(refMolecId1, molecId1)) {
215+
// we add the reference interface GroupAsas as the GroupAsas for all other members, like that
216+
// ResidueNumbers won't match in their chain ids, but otherwise all info is there without using a lot of memory
217+
member.setFirstGroupAsas(refInterf.getFirstGroupAsas());
218+
member.setSecondGroupAsas(refInterf.getSecondGroupAsas());
219+
} else {
220+
member.setFirstGroupAsas(refInterf.getSecondGroupAsas());
221+
member.setSecondGroupAsas(refInterf.getFirstGroupAsas());
222+
}
223+
}
224+
}
225+
}
181226

182227
// finally we sort based on the ChainInterface.comparable() (based in interfaceArea)
183228
sort();
184229
}
185230

231+
private boolean areMolecIdsSameOrder(String refMolecId, String molecId) {
232+
233+
if (chainOrigNamesMap==null) {
234+
// we've already warned above
235+
return true;
236+
}
237+
238+
String refMolecIdOrig = chainOrigNamesMap.get(refMolecId);
239+
String molecIdOrig = chainOrigNamesMap.get(molecId);
240+
241+
return (refMolecIdOrig.equals(molecIdOrig));
242+
}
243+
186244
/**
187245
* Sorts the interface list and reassigns ids based on new sorting
246+
*
188247
*/
189248
public void sort() {
190249
Collections.sort(list);
@@ -251,9 +310,21 @@ public void addNcsEquivalent(StructureInterface interfaceNew, StructureInterface
251310
clustersNcs.add(newCluster);
252311
}
253312

313+
/**
314+
* Sets a map with mapping from NCS chain names to original chain names.
315+
* Necessary when {@link #addNcsEquivalent(StructureInterface, StructureInterface)} is used and NCS equivalent
316+
* interfaces exist in this list and their names need mapping when setting ASAs.
317+
* @param chainOrigNamesMap a map of NCS chain name to original chain name
318+
*/
319+
public void setChainOrigNamesMap(Map<String, String> chainOrigNamesMap) {
320+
this.chainOrigNamesMap = chainOrigNamesMap;
321+
}
322+
254323
/**
255324
* Removes from this interface list all interfaces with areas
256-
* below the default cutoff area
325+
* below the default cutoff area.
326+
* Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would
327+
* be 0 and thus all removed.
257328
* @see #DEFAULT_MINIMUM_INTERFACE_AREA
258329
*/
259330
public void removeInterfacesBelowArea() {
@@ -262,17 +333,18 @@ public void removeInterfacesBelowArea() {
262333

263334
/**
264335
* Removes from this interface list all interfaces with areas
265-
* below the given cutoff area
266-
* @param area
336+
* below the given cutoff area.
337+
* Note that this must be called after {@link #calcAsas(int, int, int)}, otherwise all areas would
338+
* be 0 and thus all removed.
339+
* @param area the minimum interface buried surface area to keep. Interfaces below this value will be removed.
267340
*/
268341
public void removeInterfacesBelowArea(double area) {
269-
Iterator<StructureInterface> it = iterator();
270-
while (it.hasNext()) {
271-
StructureInterface interf = it.next();
272-
if (interf.getTotalArea()<area) {
273-
it.remove();
274-
}
275-
}
342+
343+
list.removeIf(interf -> interf.getTotalArea() < area);
344+
345+
if (clustersNcs != null) {
346+
clustersNcs.removeIf(ncsCluster -> ncsCluster.getMembers().get(0).getTotalArea() < area);
347+
}
276348
}
277349

278350
/**
@@ -291,6 +363,7 @@ public List<StructureInterfaceCluster> getClusters() {
291363
* Calculate the interface clusters for this StructureInterfaceList
292364
* using a contact overlap score to measure the similarity of interfaces.
293365
* Subsequent calls will use the cached value without recomputing the clusters.
366+
* The clusters will be assigned ids by sorting descending by {@link StructureInterfaceCluster#getTotalArea()}
294367
* @param contactOverlapScoreClusterCutoff the contact overlap score above which a pair will be
295368
* clustered
296369
* @return
@@ -300,7 +373,7 @@ public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClu
300373
return clusters;
301374
}
302375

303-
clusters = new ArrayList<StructureInterfaceCluster>();
376+
clusters = new ArrayList<>();
304377

305378
// nothing to do if we have no interfaces
306379
if (list.size()==0) return clusters;
@@ -324,9 +397,9 @@ public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClu
324397

325398
SingleLinkageClusterer slc = new SingleLinkageClusterer(matrix, true);
326399

327-
Map<Integer,Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
400+
Map<Integer, Set<Integer>> clusteredIndices = slc.getClusters(contactOverlapScoreClusterCutoff);
328401
for (int clusterIdx:clusteredIndices.keySet()) {
329-
List<StructureInterface> members = new ArrayList<StructureInterface>();
402+
List<StructureInterface> members = new ArrayList<>();
330403
for (int idx:clusteredIndices.get(clusterIdx)) {
331404
members.add(list.get(idx));
332405
}
@@ -335,8 +408,9 @@ public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClu
335408
double averageScore = 0.0;
336409
int countPairs = 0;
337410
for (int i=0;i<members.size();i++) {
411+
int iIdx = list.indexOf(members.get(i));
338412
for (int j=i+1;j<members.size();j++) {
339-
averageScore += matrix[members.get(i).getId()-1][members.get(j).getId()-1];
413+
averageScore += matrix[iIdx][list.indexOf(members.get(j))];
340414
countPairs++;
341415
}
342416
}
@@ -358,19 +432,15 @@ public List<StructureInterfaceCluster> getClusters(double contactOverlapScoreClu
358432
}
359433

360434
// now we sort by areas (descending) and assign ids based on that sorting
361-
Collections.sort(clusters, new Comparator<StructureInterfaceCluster>() {
362-
@Override
363-
public int compare(StructureInterfaceCluster o1, StructureInterfaceCluster o2) {
364-
return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending
365-
}
435+
clusters.sort((o1, o2) -> {
436+
return Double.compare(o2.getTotalArea(), o1.getTotalArea()); //note we invert so that sorting is descending
366437
});
367438
int id = 1;
368439
for (StructureInterfaceCluster cluster:clusters) {
369440
cluster.setId(id);
370441
id++;
371442
}
372443

373-
374444
return clusters;
375445
}
376446

biojava-structure/src/main/java/org/biojava/nbio/structure/xtal/CrystalBuilder.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444

4545
public class CrystalBuilder {
4646

47+
public static final String NCS_CHAINID_SUFFIX_CHAR = "n";
48+
4749
// Default number of cell neighbors to try in interface search (in 3 directions of space).
4850
// In the search, only bounding box overlaps are tried, thus there's not so much overhead in adding
4951
// more cells. We actually tested it and using numCells from 1 to 10 didn't change runtimes at all.
@@ -219,7 +221,10 @@ public StructureInterfaceList getUniqueInterfaces(double cutoff) {
219221
return set;
220222
}
221223

222-
224+
// pass the chainOrigNames map in NCS case so that StructureInterfaceList can deal with original to NCS chain names conversion
225+
if (chainOrigNames!=null) {
226+
set.setChainOrigNamesMap(chainOrigNames);
227+
}
223228

224229
// initialising the visited ArrayList for keeping track of symmetry redundancy
225230
initialiseVisited();
@@ -319,7 +324,7 @@ private void calcInterfacesCrystal(StructureInterfaceList set, double cutoff) {
319324

320325
// 3) an operator can be "self redundant" if it is the inverse of itself (involutory, e.g. all pure 2-folds with no translation)
321326
if (tt.isEquivalent(tt)) {
322-
logger.debug("Transform "+tt+" is equivalent to itself, will skip half of i-chains to j-chains comparisons");
327+
logger.debug("Transform {} is equivalent to itself, will skip half of i-chains to j-chains comparisons", tt.toString());
323328
// in this case we can't skip the operator, but we can skip half of the matrix comparisons e.g. j>i
324329
// we set a flag and do that within the loop below
325330
selfEquivalent = true;
@@ -552,7 +557,7 @@ public void translate(Matrix4d m, Vector3d translation) {
552557

553558
/**
554559
* Apply the NCS operators in the given Structure adding new chains as needed.
555-
* All chains are (re)assigned ids of the form: original_chain_id+ncs_operator_index+"n".
560+
* All chains are (re)assigned ids of the form: original_chain_id+ncs_operator_index+{@value #NCS_CHAINID_SUFFIX_CHAR}.
556561
* @param structure
557562
* the structure to expand
558563
* @param chainOrigNames
@@ -583,8 +588,8 @@ public static void expandNcsOps(Structure structure, Map<String,String> chainOri
583588
Matrix4d m = ncsOps[iOperator];
584589

585590
Chain clonedChain = (Chain)c.clone();
586-
String newChainId = cOrigId+(iOperator+1)+"n";
587-
String newChainName = cOrigName+(iOperator+1)+"n";
591+
String newChainId = cOrigId+(iOperator+1)+NCS_CHAINID_SUFFIX_CHAR;
592+
String newChainName = cOrigName+(iOperator+1)+NCS_CHAINID_SUFFIX_CHAR;
588593
clonedChain.setId(newChainId);
589594
clonedChain.setName(newChainName);
590595

0 commit comments

Comments
 (0)