Skip to content

Commit f4b201b

Browse files
committed
Fix for #929
1 parent 98eff70 commit f4b201b

File tree

2 files changed

+44
-9
lines changed

2 files changed

+44
-9
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/TestBondParsing.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,27 @@ public void testIssue943() throws Exception {
3131
assertEquals(3, zn.getAtom("ZN").getBonds().size());
3232

3333
}
34+
35+
/**
36+
* Integration test for SS bond parsing in PDB-format, where author chain ids and asym ids differ and can cause
37+
* problems. See https://github.com/biojava/biojava/issues/929
38+
*/
39+
@Test
40+
public void testIssue929() throws Exception {
41+
PDBFileReader reader = new PDBFileReader();
42+
FileParsingParameters params = new FileParsingParameters();
43+
params.setCreateAtomBonds(true);
44+
reader.setFileParsingParameters(params);
45+
Structure s = reader.getStructureById("1a4w");
46+
47+
Group cysB = s.getPolyChain("B").getAtomGroup(118);
48+
Atom sgCysB = cysB.getAtom("SG");
49+
assertEquals(2, sgCysB.getBonds().size());
50+
51+
Group cysA = s.getPolyChain("A").getAtomGroup(1);
52+
Atom sgCysA = cysA.getAtom("SG");
53+
assertEquals(2, sgCysA.getBonds().size());
54+
55+
56+
}
3457
}

biojava-structure/src/main/java/org/biojava/nbio/structure/io/BondMaker.java

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ public class BondMaker {
7979
*/
8080
private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1;
8181

82-
private Structure structure;
83-
private FileParsingParameters params;
82+
private final Structure structure;
83+
private final FileParsingParameters params;
8484

8585
public BondMaker(Structure structure, FileParsingParameters params) {
8686
this.structure = structure;
@@ -282,7 +282,7 @@ private void trimBondLists() {
282282

283283
/**
284284
* Creates disulfide bond objects and references in the corresponding Atoms objects, given
285-
* a list of {@link SSBondImpl}s parsed from a PDB/mmCIF file.
285+
* a list of {@link SSBondImpl}s parsed from a PDB file.
286286
* @param disulfideBonds
287287
*/
288288
public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) {
@@ -293,12 +293,24 @@ public void formDisulfideBonds(List<SSBondImpl> disulfideBonds) {
293293

294294
private void formDisulfideBond(SSBondImpl disulfideBond) {
295295
try {
296-
Map<Integer, Atom> a = getAtomFromRecord("SG", "",
297-
disulfideBond.getChainID1(), disulfideBond.getResnum1(),
298-
disulfideBond.getInsCode1());
299-
Map<Integer, Atom> b = getAtomFromRecord("SG", "",
300-
disulfideBond.getChainID2(), disulfideBond.getResnum2(),
301-
disulfideBond.getInsCode2());
296+
// The PDB format uses author chain ids to reference chains. But one author chain id corresponds to multiple asym ids,
297+
// thus we need to grab all the possible asym ids (poly and nonpoly) and then try to find the atoms
298+
// See issue https://github.com/biojava/biojava/issues/943
299+
String polyChainId1 = structure.getPolyChainByPDB(disulfideBond.getChainID1()).getId();
300+
String polyChainId2 = structure.getPolyChainByPDB(disulfideBond.getChainID2()).getId();
301+
List<Chain> nonpolyChains1 = structure.getNonPolyChainsByPDB(disulfideBond.getChainID1());
302+
List<Chain> nonpolyChains2 = structure.getNonPolyChainsByPDB(disulfideBond.getChainID2());
303+
304+
List<String> allChainIds1 = new ArrayList<>();
305+
List<String> allChainIds2 = new ArrayList<>();
306+
if (polyChainId1!=null) allChainIds1.add(polyChainId1);
307+
if (polyChainId2!=null) allChainIds2.add(polyChainId2);
308+
if (nonpolyChains1!=null) nonpolyChains1.forEach(npc -> allChainIds1.add(npc.getId()));
309+
if (nonpolyChains2!=null) nonpolyChains2.forEach(npc -> allChainIds2.add(npc.getId()));
310+
311+
Map<Integer, Atom> a = getAtomFromRecordTryMultipleChainIds("SG", "", disulfideBond.getResnum1(), disulfideBond.getInsCode1(), allChainIds1);
312+
313+
Map<Integer, Atom> b = getAtomFromRecordTryMultipleChainIds("SG", "", disulfideBond.getResnum2(), disulfideBond.getInsCode2(), allChainIds2);
302314

303315
for(int i=0; i<structure.nrModels(); i++){
304316
if(a.containsKey(i) && b.containsKey(i)){

0 commit comments

Comments
 (0)