Skip to content

Commit 6e3ccb0

Browse files
authored
Merge pull request #961 from josemduarte/issue943
Bond parsing in PDB format now uses chain ids correctly
2 parents 0d03221 + 4e0473e commit 6e3ccb0

3 files changed

Lines changed: 90 additions & 18 deletions

File tree

biojava-integrationtest/pom.xml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,20 @@
2323
<dependency>
2424
<groupId>junit</groupId>
2525
<artifactId>junit</artifactId>
26-
<scope>test</scope>
2726
</dependency>
27+
<!-- Junit5 dependencies, defined in parent pom. -->
28+
<dependency>
29+
<groupId>org.junit.jupiter</groupId>
30+
<artifactId>junit-jupiter-engine</artifactId>
31+
</dependency>
32+
<dependency>
33+
<groupId>org.junit.jupiter</groupId>
34+
<artifactId>junit-jupiter-params</artifactId>
35+
</dependency>
36+
<dependency>
37+
<groupId>org.junit.vintage</groupId>
38+
<artifactId>junit-vintage-engine</artifactId>
39+
</dependency>
2840
<dependency>
2941
<groupId>org.biojava</groupId>
3042
<artifactId>biojava-structure</artifactId>
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package org.biojava.nbio.structure.test.io;
2+
3+
import org.biojava.nbio.structure.Atom;
4+
import org.biojava.nbio.structure.Group;
5+
import org.biojava.nbio.structure.Structure;
6+
import org.biojava.nbio.structure.io.FileParsingParameters;
7+
import org.biojava.nbio.structure.io.PDBFileReader;
8+
import org.junit.jupiter.api.Test;
9+
10+
import static org.junit.jupiter.api.Assertions.*;
11+
12+
class TestBondParsing {
13+
14+
/**
15+
* Integration test for bond parsing in PDB-format, where author chain ids and asym ids differ and can cause
16+
* problems. See https://github.com/biojava/biojava/issues/943
17+
*/
18+
@Test
19+
public void testIssue943() throws Exception {
20+
PDBFileReader reader = new PDBFileReader();
21+
FileParsingParameters params = new FileParsingParameters();
22+
params.setCreateAtomBonds(true);
23+
reader.setFileParsingParameters(params);
24+
Structure s = reader.getStructureById("1v9i");
25+
26+
Group his95 = s.getPolyChain("A").getAtomGroup(94);
27+
Atom ne2His95 = his95.getAtom("NE2");
28+
assertEquals(3, ne2His95.getBonds().size());
29+
30+
Group zn = s.getNonPolyChain("B").getAtomGroup(0);
31+
assertEquals(3, zn.getAtom("ZN").getBonds().size());
32+
33+
}
34+
}

biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public BondMaker(Structure structure, FileParsingParameters params) {
9696
* nucleotide bonds: inferred from sequence and distances
9797
* </li>
9898
* <li>
99-
* intra-group (residue) bonds: read from the chemical component dictionary, via {@link ChemCompProvider}
99+
* intra-group (residue) bonds: read from the chemical component dictionary, via {@link org.biojava.nbio.structure.chem.ChemCompProvider}
100100
* </li>
101101
*/
102102
public void makeBonds() {
@@ -167,7 +167,7 @@ private void formIntraResidueBonds() {
167167
continue;
168168
}
169169
// Now add support for altLocGroup
170-
List<Group> totList = new ArrayList<Group>();
170+
List<Group> totList = new ArrayList<>();
171171
totList.add(mainGroup);
172172
totList.addAll(mainGroup.getAltLocs());
173173

@@ -293,10 +293,10 @@ public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) {
293293

294294
private void formDisulfideBond(SSBondImpl disulfideBond) {
295295
try {
296-
Map<Integer, Atom> a = getAtomFromRecord("SG", "", "CYS",
296+
Map<Integer, Atom> a = getAtomFromRecord("SG", "",
297297
disulfideBond.getChainID1(), disulfideBond.getResnum1(),
298298
disulfideBond.getInsCode1());
299-
Map<Integer, Atom> b = getAtomFromRecord("SG", "", "CYS",
299+
Map<Integer, Atom> b = getAtomFromRecord("SG", "",
300300
disulfideBond.getChainID2(), disulfideBond.getResnum2(),
301301
disulfideBond.getInsCode2());
302302

@@ -324,7 +324,7 @@ private void formDisulfideBond(SSBondImpl disulfideBond) {
324324

325325
/**
326326
* Creates bond objects from a LinkRecord as parsed from a PDB file
327-
* @param linkRecord
327+
* @param linkRecord the PDB-format LINK record
328328
*/
329329
public void formLinkRecordBond(LinkRecord linkRecord) {
330330
// only work with atoms that aren't alternate locations
@@ -333,15 +333,24 @@ public void formLinkRecordBond(LinkRecord linkRecord) {
333333
return;
334334

335335
try {
336-
Map<Integer, Atom> a = getAtomFromRecord(linkRecord.getName1(),
337-
linkRecord.getAltLoc1(), linkRecord.getResName1(),
338-
linkRecord.getChainID1(), linkRecord.getResSeq1(),
339-
linkRecord.getiCode1());
336+
// The PDB format uses author chain ids to reference chains. But one author chain id corresponds to multiple asym ids,
337+
// thus we need to grab all the possible asym ids (poly and nonpoly) and then try to find the atoms
338+
// See issue https://github.com/biojava/biojava/issues/943
339+
String polyChainId1 = structure.getPolyChainByPDB(linkRecord.getChainID1()).getId();
340+
String polyChainId2 = structure.getPolyChainByPDB(linkRecord.getChainID2()).getId();
341+
List<Chain> nonpolyChains1 = structure.getNonPolyChainsByPDB(linkRecord.getChainID1());
342+
List<Chain> nonpolyChains2 = structure.getNonPolyChainsByPDB(linkRecord.getChainID2());
340343

341-
Map<Integer, Atom> b = getAtomFromRecord(linkRecord.getName2(),
342-
linkRecord.getAltLoc2(), linkRecord.getResName2(),
343-
linkRecord.getChainID2(), linkRecord.getResSeq2(),
344-
linkRecord.getiCode2());
344+
List<String> allChainIds1 = new ArrayList<>();
345+
List<String> allChainIds2 = new ArrayList<>();
346+
if (polyChainId1!=null) allChainIds1.add(polyChainId1);
347+
if (polyChainId2!=null) allChainIds2.add(polyChainId2);
348+
if (nonpolyChains1!=null) nonpolyChains1.forEach(npc -> allChainIds1.add(npc.getId()));
349+
if (nonpolyChains2!=null) nonpolyChains2.forEach(npc -> allChainIds2.add(npc.getId()));
350+
351+
Map<Integer, Atom> a = getAtomFromRecordTryMultipleChainIds(linkRecord.getName1(), linkRecord.getAltLoc1(), linkRecord.getResSeq1(), linkRecord.getiCode1(), allChainIds1);
352+
353+
Map<Integer, Atom> b = getAtomFromRecordTryMultipleChainIds(linkRecord.getName2(), linkRecord.getAltLoc2(), linkRecord.getResSeq2(), linkRecord.getiCode2(), allChainIds2);
345354

346355
for(int i=0; i<structure.nrModels(); i++){
347356
if(a.containsKey(i) && b.containsKey(i)){
@@ -352,7 +361,7 @@ public void formLinkRecordBond(LinkRecord linkRecord) {
352361
}
353362
}
354363
}
355-
}catch (StructureException e) {
364+
} catch (StructureException e) {
356365
// Note, in Calpha only mode the link atoms may not be present.
357366
if (! params.isParseCAOnly()) {
358367
logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString());
@@ -363,6 +372,23 @@ public void formLinkRecordBond(LinkRecord linkRecord) {
363372
}
364373
}
365374

375+
private Map<Integer, Atom> getAtomFromRecordTryMultipleChainIds(String name, String altLoc, String resSeq, String iCode, List<String> chainIds) throws StructureException {
376+
Map<Integer, Atom> a = null;
377+
for (String chainId : chainIds) {
378+
try {
379+
a = getAtomFromRecord(name, altLoc, chainId, resSeq, iCode);
380+
// first instance that doesn't give an exception will be considered the right one. Not much more we can do here
381+
break;
382+
} catch (StructureException e) {
383+
logger.debug("Tried to get atom {} {} {} (alt loc {}) from chain id {}, but did not find it", name, resSeq, iCode, altLoc, chainId);
384+
}
385+
}
386+
if (a == null) {
387+
throw new StructureException("Could not find atom "+name+" "+resSeq+" "+iCode+" (alt loc "+altLoc+")");
388+
}
389+
return a;
390+
}
391+
366392

367393
public void formBondsFromStructConn(StructConn conn) {
368394
final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit.
@@ -415,15 +441,15 @@ public void formBondsFromStructConn(StructConn conn) {
415441
Map<Integer,Atom> a2 = null;
416442

417443
try {
418-
a1 = getAtomFromRecord(atomName1, altLoc1, resName1, chainId1, seqId1, insCode1);
444+
a1 = getAtomFromRecord(atomName1, altLoc1, chainId1, seqId1, insCode1);
419445

420446
} catch (StructureException e) {
421447

422448
logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1);
423449
continue;
424450
}
425451
try {
426-
a2 = getAtomFromRecord(atomName2, altLoc2, resName2, chainId2, seqId2, insCode2);
452+
a2 = getAtomFromRecord(atomName2, altLoc2, chainId2, seqId2, insCode2);
427453
} catch (StructureException e) {
428454

429455
logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2);
@@ -461,7 +487,7 @@ public void formBondsFromStructConn(StructConn conn) {
461487
structure.setSSBonds(ssbonds);
462488
}
463489

464-
private Map<Integer,Atom> getAtomFromRecord(String name, String altLoc, String resName, String chainID, String resSeq, String iCode)
490+
private Map<Integer,Atom> getAtomFromRecord(String name, String altLoc, String chainID, String resSeq, String iCode)
465491
throws StructureException {
466492

467493
if (iCode==null || iCode.isEmpty()) {

0 commit comments

Comments
 (0)