Skip to content

Commit d8fddbd

Browse files
committed
Changing behavior of PDBFileParser with nonpoly-only chains
1 parent 71a6ddf commit d8fddbd

File tree

4 files changed

+86
-32
lines changed

4 files changed

+86
-32
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.text.SimpleDateFormat;
3333
import java.util.ArrayList;
3434
import java.util.Arrays;
35+
import java.util.Collections;
3536
import java.util.Date;
3637
import java.util.HashMap;
3738
import java.util.Iterator;
@@ -2107,7 +2108,7 @@ private void pdb_CONECT_Handler(String line) {
21072108
}
21082109
}
21092110

2110-
/*
2111+
/**
21112112
Handler for
21122113
MODEL Record Format
21132114
@@ -2116,7 +2117,6 @@ private void pdb_CONECT_Handler(String line) {
21162117
1 - 6 Record name "MODEL "
21172118
11 - 14 Integer serial Model serial number.
21182119
*/
2119-
21202120
private void pdb_MODEL_Handler(String line) {
21212121

21222122
if (params.isHeaderOnly()) return;
@@ -2127,24 +2127,12 @@ private void pdb_MODEL_Handler(String line) {
21272127
current_chain.addGroup(current_group);
21282128
current_group.trimToSize();
21292129
}
2130-
//System.out.println("starting new model "+(structure.nrModels()+1));
21312130

21322131
Chain ch = isKnownChain(current_chain.getChainID(),current_model) ;
21332132
if ( ch == null ) {
21342133
current_model.add(current_chain);
21352134
}
2136-
// removing water-only chains, they don't follow the standard data modeling practices.
2137-
// We have to remove them or otherwise they can cause problems down the line,
2138-
// e.g. 3o6j has chain Z with a single water molecule
2139-
Iterator<Chain> it = current_model.iterator();
2140-
while (it.hasNext()) {
2141-
Chain c = it.next();
2142-
if (StructureTools.isChainWaterOnly(c)) {
2143-
logger.warn("Chain {} ({} atom groups) is composed of water molecules only. Removing it.",
2144-
c.getChainID(), c.getAtomGroups().size());
2145-
it.remove();
2146-
}
2147-
}
2135+
21482136
structure.addModel(current_model);
21492137
current_model = new ArrayList<Chain>();
21502138
current_chain = null;
@@ -2939,18 +2927,7 @@ private void triggerEndFileChecks(){
29392927
pdbHeader.setJournalArticle(journalArticle);
29402928
}
29412929

2942-
// removing water-only chains, they don't follow the standard data modeling practices.
2943-
// We have to remove them or otherwise they can cause problems down the line,
2944-
// e.g. 3o6j has chain Z with a single water molecule
2945-
Iterator<Chain> it = current_model.iterator();
2946-
while (it.hasNext()) {
2947-
Chain c = it.next();
2948-
if (StructureTools.isChainWaterOnly(c)) {
2949-
logger.warn("Chain {} ({} atom groups) is composed of water molecules only. Removing it.",
2950-
c.getChainID(), c.getAtomGroups().size());
2951-
it.remove();
2952-
}
2953-
}
2930+
29542931
structure.addModel(current_model);
29552932
structure.setPDBHeader(pdbHeader);
29562933
structure.setCrystallographicInfo(crystallographicInfo);
@@ -3156,6 +3133,32 @@ public void linkChains2Compound(Structure s){
31563133
}
31573134
}
31583135

3136+
// in rare cases where a purely non-polymer or purely water chain is present we have missed it above
3137+
// we need now to assign a new compound to it so that at least the structure is consistent
3138+
// see https://github.com/biojava/biojava/pull/394
3139+
3140+
if (compounds!=null && !compounds.isEmpty()) {
3141+
for (Chain c: s.getChains()) {
3142+
if (c.getCompound() == null) {
3143+
3144+
Compound compound = new Compound();
3145+
compound.addChain(c);
3146+
compound.setMolId(findMaxCompoundId(compounds)+1);
3147+
c.setCompound(compound);
3148+
compounds.add(compound);
3149+
3150+
logger.warn("No compound (entity) found in file for chain {}. Creating new compound {} for it.", c.getChainID(), compound.getMolId());
3151+
}
3152+
}
3153+
}
3154+
}
3155+
3156+
private static int findMaxCompoundId(List<Compound> compounds) {
3157+
List<Integer> allIds = new ArrayList<Integer>(compounds.size());
3158+
for (Compound compound: compounds) {
3159+
allIds.add(compound.getMolId());
3160+
}
3161+
return Collections.max(allIds);
31593162
}
31603163

31613164
/**

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestNonDepositedFiles.java

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,7 @@
2020
*/
2121
package org.biojava.nbio.structure.io;
2222

23-
import static org.junit.Assert.assertEquals;
24-
import static org.junit.Assert.assertFalse;
25-
import static org.junit.Assert.assertNotNull;
26-
import static org.junit.Assert.assertNull;
27-
import static org.junit.Assert.assertTrue;
23+
import static org.junit.Assert.*;
2824

2925
import java.io.BufferedReader;
3026
import java.io.IOException;
@@ -330,5 +326,60 @@ public void testNewLigandChain() throws IOException, StructureException {
330326
Chain c2 = s2.getChainByPDB("B");
331327
assertNotNull(c2);
332328
assertEquals(expectedNumLigands, c2.getAtomGroups().size());
329+
330+
// pdb and mmcif should have same number of chains
331+
assertEquals(s1.getChains().size(), s2.getChains().size());
332+
}
333+
334+
@Test
335+
public void testWaterOnlyChain() throws IOException, StructureException {
336+
337+
// following 2 files are cut-down versions of 4a10
338+
InputStream pdbStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4a10_short.pdb.gz"));
339+
InputStream cifStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4a10_short.cif.gz"));
340+
341+
PDBFileParser pdbpars = new PDBFileParser();
342+
Structure s1 = pdbpars.parsePDBFile(pdbStream) ;
343+
344+
assertEquals(2, s1.getChains().size());
345+
346+
Chain c1 = null;
347+
try {
348+
c1 = s1.getChainByPDB("F");
349+
350+
} catch (StructureException e) {
351+
fail("Got StructureException while looking for water-only chain F");
352+
}
353+
354+
// checking that compounds are linked
355+
assertNotNull(c1.getCompound());
356+
357+
// checking that the water molecule was assigned an ad-hoc compound
358+
assertEquals(2,s1.getCompounds().size());
359+
360+
361+
362+
MMcifParser mmcifpars = new SimpleMMcifParser();
363+
SimpleMMcifConsumer consumer = new SimpleMMcifConsumer();
364+
mmcifpars.addMMcifConsumer(consumer);
365+
mmcifpars.parse(cifStream) ;
366+
Structure s2 = consumer.getStructure();
367+
368+
369+
assertEquals(2, s2.getChains().size());
370+
371+
Chain c = null;
372+
try {
373+
c = s2.getChainByPDB("F");
374+
375+
} catch (StructureException e) {
376+
fail("Got StructureException while looking for water-only chain F");
377+
}
378+
379+
// checking that compounds are linked
380+
assertNotNull(c.getCompound());
381+
382+
// checking that the water molecule was assigned an ad-hoc compound
383+
assertEquals(2,s2.getCompounds().size());
333384
}
334385
}
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)