Skip to content

Commit 36073a8

Browse files
authored
Merge pull request #867 from josemduarte/interface-finder
New interface finder functionality
2 parents e18bf4f + 0df3813 commit 36073a8

File tree

5 files changed

+297
-27
lines changed

5 files changed

+297
-27
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package org.biojava.nbio.structure.test.contact;
2+
3+
import org.biojava.nbio.structure.Structure;
4+
import org.biojava.nbio.structure.StructureException;
5+
import org.biojava.nbio.structure.StructureIO;
6+
import org.biojava.nbio.structure.contact.AtomContactSet;
7+
import org.biojava.nbio.structure.contact.InterfaceFinder;
8+
import org.biojava.nbio.structure.contact.Pair;
9+
import org.biojava.nbio.structure.contact.StructureInterface;
10+
import org.biojava.nbio.structure.contact.StructureInterfaceList;
11+
import org.junit.Test;
12+
13+
import java.io.IOException;
14+
import java.util.HashSet;
15+
import java.util.Set;
16+
17+
import static org.junit.Assert.assertEquals;
18+
19+
public class TestInterfaceFinder {
20+
21+
@Test
22+
public void testGetAllInterfaces() throws StructureException, IOException {
23+
Structure s = StructureIO.getStructure("3hbx");
24+
25+
long start = System.currentTimeMillis();
26+
27+
InterfaceFinder finder = new InterfaceFinder(s);
28+
StructureInterfaceList list = finder.getAllInterfaces();
29+
30+
long end = System.currentTimeMillis();
31+
System.out.println("Took " + (end-start) + " ms to calculate interfaces");
32+
33+
assertEquals(12, list.size());
34+
35+
Set<Pair<String>> unique = new HashSet<>();
36+
37+
for (StructureInterface interf : list) {
38+
System.out.println("Interface " + interf.getMoleculeIds());
39+
AtomContactSet set = interf.getContacts();
40+
System.out.println("Number of contacts: " + set.size());
41+
42+
unique.add(interf.getMoleculeIds());
43+
44+
}
45+
assertEquals(12, unique.size());
46+
}
47+
}

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ public static List<Group> getUnalignedGroups(Atom[] ca) {
474474
* @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF
475475
*/
476476
public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) {
477-
// Geometric hashing of the reduced structure
477+
// Spatial hashing of the reduced structure
478478
Grid grid = new Grid(cutoff);
479479
grid.addAtoms(query);
480480

@@ -1387,7 +1387,7 @@ public static Group getGroupByPDBResidueNumber(Structure struc,
13871387

13881388
/**
13891389
* Returns the set of intra-chain contacts for the given chain for given
1390-
* atom names, i.e. the contact map. Uses a geometric hashing algorithm that
1390+
* atom names, i.e. the contact map. Uses a spatial hashing algorithm that
13911391
* speeds up the calculation without need of full distance matrix. The
13921392
* parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs
13931393
* to be set to true for this to work.
@@ -1422,7 +1422,7 @@ public static AtomContactSet getAtomsInContact(Chain chain,
14221422

14231423
/**
14241424
* Returns the set of intra-chain contacts for the given chain for all non-H
1425-
* atoms of non-hetatoms, i.e. the contact map. Uses a geometric hashing
1425+
* atoms of non-hetatoms, i.e. the contact map. Uses a spatial hashing
14261426
* algorithm that speeds up the calculation without need of full distance
14271427
* matrix. The parsing mode
14281428
* {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
@@ -1439,7 +1439,7 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
14391439
/**
14401440
* Returns the set of intra-chain contacts for the given chain for C-alpha
14411441
* atoms (including non-standard aminoacids appearing as HETATM groups),
1442-
* i.e. the contact map. Uses a geometric hashing algorithm that speeds up
1442+
* i.e. the contact map. Uses a spatial hashing algorithm that speeds up
14431443
* the calculation without need of full distance matrix. The parsing mode
14441444
* {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
14451445
* true for this to work.
@@ -1462,7 +1462,7 @@ public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
14621462
/**
14631463
* Returns the set of intra-chain contacts for the given chain for C-alpha
14641464
* or C3' atoms (including non-standard aminoacids appearing as HETATM
1465-
* groups), i.e. the contact map. Uses a geometric hashing algorithm that
1465+
* groups), i.e. the contact map. Uses a spatial hashing algorithm that
14661466
* speeds up the calculation without need of full distance matrix.
14671467
*
14681468
* @param chain
@@ -1483,7 +1483,7 @@ public static AtomContactSet getRepresentativeAtomsInContact(Chain chain,
14831483

14841484
/**
14851485
* Returns the set of inter-chain contacts between the two given chains for
1486-
* the given atom names. Uses a geometric hashing algorithm that speeds up
1486+
* the given atom names. Uses a spatial hashing algorithm that speeds up
14871487
* the calculation without need of full distance matrix. The parsing mode
14881488
* {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
14891489
* true for this to work.
@@ -1518,7 +1518,7 @@ public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
15181518

15191519
/**
15201520
* Returns the set of inter-chain contacts between the two given chains for
1521-
* all non-H atoms. Uses a geometric hashing algorithm that speeds up the
1521+
* all non-H atoms. Uses a spatial hashing algorithm that speeds up the
15221522
* calculation without need of full distance matrix. The parsing mode
15231523
* {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
15241524
* true for this to work.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package org.biojava.nbio.structure.contact;
2+
3+
import org.biojava.nbio.structure.Atom;
4+
import org.biojava.nbio.structure.Calc;
5+
import org.biojava.nbio.structure.Chain;
6+
import org.biojava.nbio.structure.Structure;
7+
import org.biojava.nbio.structure.StructureTools;
8+
import org.biojava.nbio.structure.xtal.CrystalTransform;
9+
import org.biojava.nbio.structure.xtal.SpaceGroup;
10+
11+
import javax.vecmath.Point3d;
12+
import java.util.List;
13+
14+
/**
15+
* A class containing methods to find interfaces in a given structure.
16+
* @author Jose Duarte
17+
* @since 5.4.0
18+
*/
19+
public class InterfaceFinder {
20+
21+
public static final double DEFAULT_CONTACT_CUTOFF = 6;
22+
23+
private static final CrystalTransform IDENTITY_TRANSFORM = new CrystalTransform((SpaceGroup) null);
24+
private static final boolean INCLUDE_HETATOMS = true;
25+
26+
private Structure structure;
27+
private double cutoff;
28+
29+
private BoundingBox[] boundingBoxes;
30+
31+
public InterfaceFinder(Structure structure) {
32+
this.structure = structure;
33+
this.cutoff = DEFAULT_CONTACT_CUTOFF;
34+
}
35+
36+
/**
37+
* Set the contact distance cutoff.
38+
* @param cutoff the distance value in Angstroms
39+
*/
40+
public void setCutoff(double cutoff) {
41+
this.cutoff = cutoff;
42+
}
43+
44+
/**
45+
* Find all inter polymer-chain interfaces in the structure.
46+
* Two chains will be considered in contact if at least a pair of atoms (one from each chain) is within the
47+
* contact cutoff.
48+
* @return the list of all interfaces
49+
*/
50+
public StructureInterfaceList getAllInterfaces() {
51+
initBoundingBoxes();
52+
53+
StructureInterfaceList list = new StructureInterfaceList();
54+
55+
List<Chain> polyChains = structure.getPolyChains();
56+
for (int i = 0; i<polyChains.size(); i++) {
57+
for (int j = i + 1; j<polyChains.size(); j++) {
58+
if (! boundingBoxes[i].overlaps(boundingBoxes[j], cutoff)) {
59+
continue;
60+
}
61+
StructureInterface interf = calcInterface(polyChains.get(i), polyChains.get(j));
62+
if (interf!=null) {
63+
list.add(interf);
64+
}
65+
}
66+
}
67+
return list;
68+
}
69+
70+
private void initBoundingBoxes() {
71+
List<Chain> polyChains = structure.getPolyChains();
72+
boundingBoxes = new BoundingBox[polyChains.size()];
73+
for (int i = 0; i<polyChains.size(); i++) {
74+
Atom[] atoms = StructureTools.getAllNonHAtomArray(polyChains.get(i), INCLUDE_HETATOMS);
75+
Point3d[] points = Calc.atomsToPoints(atoms);
76+
BoundingBox bb = new BoundingBox(points);
77+
boundingBoxes[i] = bb;
78+
}
79+
}
80+
81+
private StructureInterface calcInterface(Chain chain1, Chain chain2) {
82+
AtomContactSet graph = StructureTools.getAtomsInContact(chain1, chain2, cutoff, INCLUDE_HETATOMS);
83+
84+
StructureInterface interf = null;
85+
if (graph.size()>0) {
86+
interf = new StructureInterface(
87+
StructureTools.getAllNonHAtomArray(chain1, INCLUDE_HETATOMS), StructureTools.getAllNonHAtomArray(chain2, INCLUDE_HETATOMS),
88+
chain1.getName(), chain2.getName(),
89+
graph,
90+
IDENTITY_TRANSFORM, IDENTITY_TRANSFORM);
91+
}
92+
93+
return interf;
94+
}
95+
}

biojava-structure/src/main/java/org/biojava/nbio/structure/contact/StructureInterface.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ protected Atom[] getAtomsForAsa(int cofactorSizeToUse) {
304304
* non-Hydrogen atoms are not included
305305
* @return
306306
*/
307-
private static final Atom[] getAllNonHAtomArray(Atom[] m, int minSizeHetAtomToInclude) {
307+
private static Atom[] getAllNonHAtomArray(Atom[] m, int minSizeHetAtomToInclude) {
308308
List<Atom> atoms = new ArrayList<>();
309309

310310
for (Atom a:m){
@@ -348,7 +348,7 @@ private static boolean isInChain(Group g) {
348348
ChemComp chemComp = g.getChemComp();
349349

350350
if (chemComp==null) {
351-
logger.warn("Warning: can't determine PolymerType for group "+g.getResidueNumber()+" ("+g.getPDBName()+"). Will consider it as non-nucleotide/non-protein type.");
351+
logger.warn("Can't determine PolymerType for group "+g.getResidueNumber()+" ("+g.getPDBName()+"). Will consider it as non-nucleotide/non-protein type.");
352352
return false;
353353
}
354354

@@ -458,8 +458,8 @@ public GroupAsa getSecondGroupAsa(ResidueNumber resNum) {
458458
*/
459459
public Pair<List<Group>> getCoreResidues(double bsaToAsaCutoff, double minAsaForSurface) {
460460

461-
List<Group> core1 = new ArrayList<Group>();
462-
List<Group> core2 = new ArrayList<Group>();
461+
List<Group> core1 = new ArrayList<>();
462+
List<Group> core2 = new ArrayList<>();
463463

464464
for (GroupAsa groupAsa:groupAsas1.values()) {
465465

@@ -482,7 +482,7 @@ public Pair<List<Group>> getCoreResidues(double bsaToAsaCutoff, double minAsaFor
482482
}
483483
}
484484

485-
return new Pair<List<Group>>(core1, core2);
485+
return new Pair<>(core1, core2);
486486
}
487487

488488
/**
@@ -494,8 +494,8 @@ public Pair<List<Group>> getCoreResidues(double bsaToAsaCutoff, double minAsaFor
494494
*/
495495
public Pair<List<Group>> getRimResidues(double bsaToAsaCutoff, double minAsaForSurface) {
496496

497-
List<Group> rim1 = new ArrayList<Group>();
498-
List<Group> rim2 = new ArrayList<Group>();
497+
List<Group> rim1 = new ArrayList<>();
498+
List<Group> rim2 = new ArrayList<>();
499499

500500
for (GroupAsa groupAsa:groupAsas1.values()) {
501501

@@ -529,8 +529,8 @@ public Pair<List<Group>> getRimResidues(double bsaToAsaCutoff, double minAsaForS
529529
*/
530530
public Pair<List<Group>> getInterfacingResidues(double minAsaForSurface) {
531531

532-
List<Group> interf1 = new ArrayList<Group>();
533-
List<Group> interf2 = new ArrayList<Group>();
532+
List<Group> interf1 = new ArrayList<>();
533+
List<Group> interf2 = new ArrayList<>();
534534

535535
for (GroupAsa groupAsa:groupAsas1.values()) {
536536

@@ -545,7 +545,7 @@ public Pair<List<Group>> getInterfacingResidues(double minAsaForSurface) {
545545
}
546546
}
547547

548-
return new Pair<List<Group>>(interf1, interf2);
548+
return new Pair<>(interf1, interf2);
549549
}
550550

551551
/**
@@ -554,8 +554,8 @@ public Pair<List<Group>> getInterfacingResidues(double minAsaForSurface) {
554554
* @return
555555
*/
556556
public Pair<List<Group>> getSurfaceResidues(double minAsaForSurface) {
557-
List<Group> surf1 = new ArrayList<Group>();
558-
List<Group> surf2 = new ArrayList<Group>();
557+
List<Group> surf1 = new ArrayList<>();
558+
List<Group> surf2 = new ArrayList<>();
559559

560560
for (GroupAsa groupAsa:groupAsas1.values()) {
561561

@@ -570,7 +570,7 @@ public Pair<List<Group>> getSurfaceResidues(double minAsaForSurface) {
570570
}
571571
}
572572

573-
return new Pair<List<Group>>(surf1, surf2);
573+
return new Pair<>(surf1, surf2);
574574
}
575575

576576
public StructureInterfaceCluster getCluster() {
@@ -585,12 +585,12 @@ public void setCluster(StructureInterfaceCluster cluster) {
585585
* Calculates the contact overlap score between this StructureInterface and
586586
* the given one.
587587
* The two sides of the given StructureInterface need to match this StructureInterface
588-
* in the sense that they must come from the same Compound (Entity), i.e.
588+
* in the sense that they must come from the same Entity, i.e.
589589
* their residue numbers need to align with 100% identity, except for unobserved
590590
* density residues. The SEQRES indices obtained through {@link EntityInfo#getAlignedResIndex(Group, Chain)} are
591591
* used to match residues, thus if no SEQRES is present or if {@link FileParsingParameters#setAlignSeqRes(boolean)}
592592
* is not used, this calculation is not guaranteed to work properly.
593-
* @param other
593+
* @param other the interface to be compared to this one
594594
* @param invert if false the comparison will be done first-to-first and second-to-second,
595595
* if true the match will be first-to-second and second-to-first
596596
* @return the contact overlap score, range [0.0,1.0]
@@ -668,7 +668,7 @@ public GroupContactSet getGroupContacts() {
668668

669669
/**
670670
* Tell whether the interface is isologous, i.e. it is formed
671-
* by the same patches of same Compound on both sides.
671+
* by the same patches of same entity on both sides.
672672
*
673673
* @return true if isologous, false if heterologous
674674
*/
@@ -691,11 +691,11 @@ public Pair<Chain> getParentChains() {
691691
return null;
692692
}
693693

694-
return new Pair<Chain>(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain());
694+
return new Pair<>(firstMol[0].getGroup().getChain(), secondMol[0].getGroup().getChain());
695695
}
696696

697697
/**
698-
* Finds the parent compounds by looking up the references of first atom of each side of this interface
698+
* Finds the parent entities by looking up the references of first atom of each side of this interface
699699
* @return
700700
*/
701701
public Pair<EntityInfo> getParentCompounds() {
@@ -720,7 +720,7 @@ private Structure getParentStructure() {
720720
* Return a String representing the 2 molecules of this interface in PDB format.
721721
* If the molecule ids (i.e. chain ids) are the same for both molecules, then the second
722722
* one will be replaced by the next letter in alphabet (or A for Z)
723-
* @return
723+
* @return the PDB-formatted string
724724
*/
725725
public String toPDB() {
726726

@@ -758,7 +758,7 @@ public String toPDB() {
758758
* Return a String representing the 2 molecules of this interface in mmCIF format.
759759
* If the molecule ids (i.e. chain ids) are the same for both molecules, then the second
760760
* one will be written as chainId_operatorId (with operatorId taken from {@link #getTransforms()}
761-
* @return
761+
* @return the mmCIF-formatted string
762762
*/
763763
public String toMMCIF() {
764764
StringBuilder sb = new StringBuilder();

0 commit comments

Comments
 (0)