Skip to content

Commit 31b4218

Browse files
author
Matt Larson
committed
Reading non-polymeric chains in SimpleMMcifConsumer matching PDBFileParser behavior.
1 parent e06a702 commit 31b4218

File tree

4 files changed

+81
-32
lines changed

4 files changed

+81
-32
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -797,26 +797,21 @@ public void documentEnd() {
797797
continue;
798798
}
799799
int eId = Integer.parseInt(entityId);
800-
// We didn't add above compounds for nonpolymeric entities, thus here if a chain is nonpolymeric
801-
// its compound won't be found. In biojava Structure data model a nonpolymeric chain does not really
802-
// make much sense, since all small molecules are associated to a polymeric chain (the same data
803-
// model as PDB files).
804-
// In any case it happens in rare cases that a non-polymeric chain is not associated to any polymeric
805-
// chain, e.g.
806-
// - 2uub: asym_id X, chainId Z, entity_id 24: fully non-polymeric but still with its own chainId
807-
// - 3o6j: asym_id K, chainId Z, entity_id 6 : a single water molecule
808-
// - 1dz9: asym_id K, chainId K, entity_id 6 : a potassium ion alone
809-
// We will discard those chains here, because they don't fit into the current data model and thus
810-
// can cause problems, e.g.
811-
// a) they would not be linked to a compound and give null pointers
812-
// b) StructureTools.getAllAtoms() methods that return all atoms except waters would have
813-
// empty lists for water-only chains
800+
801+
// Compounds are not added for non-polymeric entities, if a chain is non-polymeric its compound won't be found.
802+
// TODO: add all entities and unique compounds and add methods to directly get polymer or non-polymer
803+
// asyms (chains). Either create a unique StructureImpl or modify existing for a better representation of the
804+
// mmCIF internal data structures but is compatible with Structure interface.
814805
Compound compound = structure.getCompoundById(eId);
815806
if (compound==null) {
816-
logger.warn("Could not find a compound for entity_id {} corresponding to chain id {} (asym id {})."
817-
+ " Most likely it is a purely non-polymeric chain ({} groups). Removing it from this structure.",
818-
eId,chain.getChainID(),chain.getInternalChainID(),chain.getAtomGroups().size());
819-
it.remove();
807+
// Supports the case where the only chain members were from non-polymeric entity that is missing.
808+
// Solved by creating a new Compound(entity) to which this chain will belong.
809+
logger.warn("Could not find a compound for entity_id {}, for chain id {}, creating a new compound.",
810+
eId, chain.getChainID());
811+
compound = new Compound();
812+
compound.setId((long)eId);
813+
compound.addChain(chain);
814+
structure.addCompound(compound);
820815
} else {
821816
logger.debug("Adding chain with chain id {} (asym id {}) to compound with entity_id {}",
822817
chain.getChainID(), chain.getInternalChainID(), eId);
@@ -1109,12 +1104,20 @@ private void addCompounds(StructAsym asym) {
11091104
// get the corresponding Entity
11101105
Compound c = structure.getCompoundById(eId);
11111106
if ( c == null){
1112-
if (e!=null && e.getType().equals("polymer")) {
1113-
c = createNewCompoundFromESG(esg, eId);
1114-
c.setMolName(e.getPdbx_description());
1115-
structure.addCompound(c);
1116-
logger.debug("Adding Compound with entity id {} from _entity_src_syn, with name: {}",eId,c.getMolName());
1117-
}
1107+
if (e!=null) {
1108+
if (e.getType().equals("polymer")) {
1109+
c = createNewCompoundFromESG(esg, eId);
1110+
c.setMolName(e.getPdbx_description());
1111+
structure.addCompound(c);
1112+
logger.debug("Adding Compound with entity id {} from _entity_src_syn, with name: {}",eId,c.getMolName());
1113+
} else if (e.getType().equals("non-solvent")) {
1114+
// TODO handle non-polymer compounds.
1115+
} else if (e.getType().equals("water")) {
1116+
// TODO handle solvent entity.
1117+
} else {
1118+
logger.warn("Could not add entity id " + esg.getEntity_id() + " that has unknown _entity.type");
1119+
}
1120+
}
11181121
}
11191122

11201123
}

biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestNonDepositedFiles.java

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,18 @@
2020
*/
2121
package org.biojava.nbio.structure.io;
2222

23+
import static org.junit.Assert.assertEquals;
24+
import static org.junit.Assert.assertFalse;
25+
import static org.junit.Assert.assertNotNull;
26+
import static org.junit.Assert.assertNull;
27+
import static org.junit.Assert.assertTrue;
28+
29+
import java.io.BufferedReader;
30+
import java.io.IOException;
31+
import java.io.InputStream;
32+
import java.io.InputStreamReader;
33+
import java.util.zip.GZIPInputStream;
34+
2335
import org.biojava.nbio.structure.Chain;
2436
import org.biojava.nbio.structure.Structure;
2537
import org.biojava.nbio.structure.StructureException;
@@ -31,14 +43,6 @@
3143
import org.biojava.nbio.structure.xtal.CrystalCell;
3244
import org.junit.Test;
3345

34-
import java.io.BufferedReader;
35-
import java.io.IOException;
36-
import java.io.InputStream;
37-
import java.io.InputStreamReader;
38-
import java.util.zip.GZIPInputStream;
39-
40-
import static org.junit.Assert.*;
41-
4246
/**
4347
* Tests for non-deposited PDB/mmCIF files, i.e. any kind of "raw" file
4448
* lacking significant parts of the headers.
@@ -285,4 +289,46 @@ public void testRefmacPdbFile() throws IOException {
285289
assertEquals(1, s.getCompounds().size());
286290
}
287291

292+
/**
293+
* This test represents a common situation for a non-deposited structure.
294+
* When building with common crystallography software, the user often adds new
295+
* ligands (or solvent) molecules as new chains. Only prior to deposition
296+
* then relabel them so that they belong to the same chain as the polymeric residues.
297+
*
298+
* In this case, the ligands represent valuable information and should not be discarded.
299+
*/
300+
@Test
301+
public void testNewLigandChain() throws IOException, StructureException {
302+
// Test the file parsing speed when the files are already downloaded.
303+
304+
InputStream pdbStream = new GZIPInputStream(this.getClass().getResourceAsStream("/ligandTest.pdb.gz"));
305+
InputStream cifStream = new GZIPInputStream(this.getClass().getResourceAsStream("/ligandTest.cif.gz"));
306+
307+
assertNotNull(cifStream);
308+
assertNotNull(pdbStream);
309+
310+
FileParsingParameters params = new FileParsingParameters();
311+
PDBFileParser pdbpars = new PDBFileParser();
312+
pdbpars.setFileParsingParameters(params);
313+
Structure s1 = pdbpars.parsePDBFile(pdbStream) ;
314+
315+
// The chain B should be present with 1 ligand HEM
316+
Chain c1 = s1.getChainByPDB("B");
317+
assertNotNull(c1);
318+
319+
int expectedNumLigands = 1;
320+
assertEquals(expectedNumLigands, c1.getAtomGroups().size());
321+
322+
MMcifParser mmcifpars = new SimpleMMcifParser();
323+
SimpleMMcifConsumer consumer = new SimpleMMcifConsumer();
324+
consumer.setFileParsingParameters(params);
325+
mmcifpars.addMMcifConsumer(consumer);
326+
mmcifpars.parse(cifStream) ;
327+
Structure s2 = consumer.getStructure();
328+
329+
// The chain B should be present with 1 ligand HEM
330+
Chain c2 = s2.getChainByPDB("B");
331+
assertNotNull(c2);
332+
assertEquals(expectedNumLigands, c2.getAtomGroups().size());
333+
}
288334
}
29.4 KB
Binary file not shown.
23.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)