Skip to content

Commit 07e1b1c

Browse files
committed
Updates to the Biojava code to include imporved handling of Entity information.
A test has been included to include entity information testing. The way entity information was compiled has been changed so it is included from multiple sources in the mmCIF The type "Compound" has been refactored to "EntityInfo"
1 parent 44d8c3f commit 07e1b1c

25 files changed

+285
-269
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/StructureTest.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,13 @@ public void testReadPDBFile() throws Exception {
119119
assertEquals(64, c2.getAtomGroups(GroupType.HETATM).size());
120120
assertEquals(0, c2.getAtomGroups(GroupType.NUCLEOTIDE).size());
121121

122-
List<Compound> compounds= structure.getCompounds();
122+
List<EntityInfo> compounds= structure.getEntityInformation();
123123

124124
// from Biojava 4.2 on we are creating compounds whenever an entity is found to be without an assigned compound in the file
125125
// see issues https://github.com/biojava/biojava/issues/305 and https://github.com/biojava/biojava/pull/394
126126
assertEquals(2, compounds.size());
127-
Compound mol = compounds.get(0);
128-
assertTrue(mol.getMolName().startsWith("TRYPSIN INHIBITOR"));
127+
EntityInfo mol = compounds.get(0);
128+
assertTrue(mol.getDescription().startsWith("TRYPSIN INHIBITOR"));
129129
}
130130

131131
@Test
@@ -203,14 +203,14 @@ public void testPDBHeader(){
203203
assertEquals("the technique in the Header is " + technique, techShould, technique);
204204

205205

206-
List <Compound> compounds = structure.getCompounds();
206+
List <EntityInfo> compounds = structure.getEntityInformation();
207207

208208
// from Biojava 4.2 on we are creating compounds whenever an entity is found to be without an assigned compound in the file
209209
// see issues https://github.com/biojava/biojava/issues/305 and https://github.com/biojava/biojava/pull/394
210210
assertEquals("did not find the right number of compounds! ", 2, compounds.size());
211211

212-
Compound comp = compounds.get(0);
213-
assertEquals("did not get the right compounds info",true,comp.getMolName().startsWith("TRYPSIN INHIBITOR"));
212+
EntityInfo comp = compounds.get(0);
213+
assertEquals("did not get the right compounds info",true,comp.getDescription().startsWith("TRYPSIN INHIBITOR"));
214214

215215
List<String> chainIds = comp.getChainIds();
216216
List<Chain> chains = comp.getChains();

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/TestLongPdbVsMmCifParsing.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,15 +210,15 @@ private void testStructureMethods(Structure sPdb, Structure sCif) {
210210
// compounds: there's quite some inconsistencies here between pdb and cif:
211211
// sugar polymers are not in pdb at all: we avoid them
212212
boolean canCompareCompoundsSize = true;
213-
for (Compound compound: sCif.getCompounds()) {
214-
if (compound.getMolName()==null || compound.getMolName().contains("SUGAR")) {
213+
for (EntityInfo compound: sCif.getEntityInformation()) {
214+
if (compound.getDescription()==null || compound.getDescription().contains("SUGAR")) {
215215
canCompareCompoundsSize = false;
216216
break;
217217
}
218218
}
219219

220220
if (canCompareCompoundsSize)
221-
assertEquals("failed number of Compounds pdb vs cif", sPdb.getCompounds().size(), sCif.getCompounds().size());
221+
assertEquals("failed number of Compounds pdb vs cif", sPdb.getEntityInformation().size(), sCif.getEntityInformation().size());
222222

223223

224224
// ss bonds

biojava-structure/src/main/java/demo/DemoMMCIFReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ public void loadFromDirectAccess(){
102102
System.out.println(h.getAtomSequence());
103103
System.out.println(h.getAtomGroups(GroupType.HETATM));
104104

105-
System.out.println("Compounds: " + s.getCompounds());
105+
System.out.println("Compounds: " + s.getEntityInformation());
106106

107107
} catch (Exception e) {
108108
e.printStackTrace();

biojava-structure/src/main/java/org/biojava/nbio/structure/Chain.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,15 @@ public interface Chain {
195195
* @param compound the Compound
196196
* @see #getCompound()
197197
*/
198-
public void setCompound(Compound compound);
198+
public void setCompound(EntityInfo compound);
199199

200200
/**
201201
* Returns the Compound for this chain.
202202
*
203203
* @return the Compound object
204-
* @see #setCompound(Compound)
204+
* @see #setCompound(EntityInfo)
205205
*/
206-
public Compound getCompound();
206+
public EntityInfo getCompound();
207207

208208
/**
209209
* Sets the name of this chain (Chain id in PDB file ).

biojava-structure/src/main/java/org/biojava/nbio/structure/ChainImpl.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public class ChainImpl implements Chain, Serializable {
6565
private List<Group> seqResGroups;
6666

6767
private Long id;
68-
private Compound mol;
68+
private EntityInfo mol;
6969
private Structure parent;
7070

7171
private Map<String, Integer> pdbResnumMap;
@@ -217,15 +217,15 @@ private static int findMathingGroupIndex(List<Group> atomGroups, Group g) {
217217
*
218218
*/
219219
@Override
220-
public void setCompound(Compound mol) {
220+
public void setCompound(EntityInfo mol) {
221221
this.mol = mol;
222222
}
223223

224224
/** {@inheritDoc}
225225
*
226226
*/
227227
@Override
228-
public Compound getCompound() {
228+
public EntityInfo getCompound() {
229229
return this.mol;
230230
}
231231

@@ -516,8 +516,8 @@ public String toString(){
516516
StringBuilder str = new StringBuilder();
517517
str.append("Chain >").append(getChainID()).append("<").append(newline);
518518
if ( mol != null ){
519-
if ( mol.getMolName() != null){
520-
str.append(mol.getMolName()).append(newline);
519+
if ( mol.getDescription() != null){
520+
str.append(mol.getDescription()).append(newline);
521521
}
522522
}
523523
str.append("total SEQRES length: ").append(getSeqResGroups().size()).append(" total ATOM length:")

biojava-structure/src/main/java/org/biojava/nbio/structure/Compound.java renamed to biojava-structure/src/main/java/org/biojava/nbio/structure/EntityInfo.java

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,12 @@
4848
*
4949
* @author Jules Jacobsen
5050
* @author Jose Duarte
51+
* @author Anthony Bradley
5152
* @since 1.5
5253
*/
53-
public class Compound implements Serializable {
54+
public class EntityInfo implements Serializable {
5455

55-
private final static Logger logger = LoggerFactory.getLogger(Compound.class);
56+
private final static Logger logger = LoggerFactory.getLogger(EntityInfo.class);
5657

5758

5859
//TODO we should consider having the data here as it is in mmCIF dictionary - JD 2014-12-11
@@ -80,21 +81,21 @@ public class Compound implements Serializable {
8081
private Map<String, Map<ResidueNumber,Integer>> chains2pdbResNums2ResSerials;
8182

8283
private String refChainId;
83-
84-
private String molName = null;
84+
private String description = null;
8585
private String title = null;
86+
/**
87+
* The type of group (polymer, non-polymer, water)
88+
*/
89+
private String type = null;
8690
private List<String> synonyms = null;
8791
private List<String> ecNums = null;
8892
private String engineered = null;
8993
private String mutation = null;
9094
private String biologicalUnit = null;
9195
private String details = null;
92-
9396
private String numRes = null;
9497
private String resNames = null;
95-
9698
private String headerVars = null;
97-
9899
private String synthetic = null;
99100
private String fragment = null;
100101
private String organismScientific = null;
@@ -130,7 +131,7 @@ public class Compound implements Serializable {
130131

131132
private Long id;
132133

133-
public Compound () {
134+
public EntityInfo () {
134135
chains = new ArrayList<Chain>();
135136
chains2pdbResNums2ResSerials = new HashMap<String, Map<ResidueNumber,Integer>>();
136137
molId = -1;
@@ -141,7 +142,7 @@ public Compound () {
141142
* but not setting the Chains
142143
* @param c
143144
*/
144-
public Compound (Compound c) {
145+
public EntityInfo (EntityInfo c) {
145146

146147
this.chains = new ArrayList<Chain>();
147148

@@ -151,7 +152,7 @@ public Compound (Compound c) {
151152

152153
this.refChainId = c.refChainId;
153154

154-
this.molName = c.molName;
155+
this.description = c.description;
155156
this.title = c.title;
156157

157158
if (c.synonyms!=null) {
@@ -213,7 +214,7 @@ public Compound (Compound c) {
213214
public String toString(){
214215
StringBuilder buf = new StringBuilder();
215216
buf.append("Compound: ").append(molId).append(" ");
216-
buf.append(molName==null?"(no name)":"("+molName+")");
217+
buf.append(description==null?"(no name)":"("+description+")");
217218
buf.append(" chains: ");
218219
if (chains!=null) {
219220
for (int i=0;i<chains.size();i++) {
@@ -292,8 +293,8 @@ public void showCompound() {
292293
}
293294
System.out.println("Chains: " + buf.toString());
294295
}
295-
if (this.molName != null) {
296-
System.out.println("Mol Name: " + this.molName);
296+
if (this.description != null) {
297+
System.out.println("Mol Name: " + this.description);
297298
}
298299
if (this.title != null) {
299300
System.out.println("Title: " + this.title);
@@ -619,12 +620,12 @@ public void setMolId(int molId) {
619620
this.molId = molId;
620621
}
621622

622-
public String getMolName() {
623-
return molName;
623+
public String getDescription() {
624+
return description;
624625
}
625626

626-
public void setMolName(String molName) {
627-
this.molName = molName;
627+
public void setDescription(String molName) {
628+
this.description = molName;
628629
}
629630

630631
public String getTitle() {
@@ -988,4 +989,18 @@ public void addChain(Chain chain){
988989
public void setChains(List<Chain> chains){
989990
this.chains = chains;
990991
}
992+
993+
/**
994+
* @return the type
995+
*/
996+
public String getType() {
997+
return type;
998+
}
999+
1000+
/**
1001+
* @param type the type to set
1002+
*/
1003+
public void setType(String type) {
1004+
this.type = type;
1005+
}
9911006
}

biojava-structure/src/main/java/org/biojava/nbio/structure/Structure.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
* <ul>
4141
* <li>{@link PDBHeader}</li>
4242
* <li>{@link DBRef}</li>
43-
* <li>{@link Compound}</li>
43+
* <li>{@link EntityInfo}</li>
4444
* </ul>
4545
*
4646
* The structure object provides access to the data from the ATOM records through
@@ -518,20 +518,20 @@ public Chain getChainByPDB(String chainId, int modelnr)
518518
*
519519
* @param molList
520520
*/
521-
public void setCompounds(List<Compound> molList);
521+
public void setCompounds(List<EntityInfo> molList);
522522

523523
/**
524524
* Get all the Compounds for this Structure.
525525
* Compounds are called Entities in mmCIF dictionary.
526526
*
527527
* @return a list of Compounds
528528
*/
529-
public List<Compound> getCompounds();
529+
public List<EntityInfo> getEntityInformation();
530530

531531
/**
532532
* Add a Compound to this Structure
533533
*/
534-
public void addCompound(Compound compound);
534+
public void addCompound(EntityInfo compound);
535535

536536
/**
537537
* Set the list of database references for this structure
@@ -553,7 +553,7 @@ public Chain getChainByPDB(String chainId, int modelnr)
553553
* @param molId
554554
* @return a compound
555555
*/
556-
public Compound getCompoundById(int molId);
556+
public EntityInfo getCompoundById(int molId);
557557

558558

559559
/**

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureImpl.java

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public class StructureImpl implements Structure, Serializable {
5757
private List<List<Chain>> models;
5858

5959
private List<Map <String,Integer>> connections ;
60-
private List<Compound> compounds;
60+
private List<EntityInfo> compounds;
6161
private List<DBRef> dbrefs;
6262
private List<Bond> ssbonds;
6363
private List<Site> sites;
@@ -79,7 +79,7 @@ public StructureImpl() {
7979
models = new ArrayList<List<Chain>>();
8080
name = "";
8181
connections = new ArrayList<Map<String,Integer>>();
82-
compounds = new ArrayList<Compound>();
82+
compounds = new ArrayList<EntityInfo>();
8383
dbrefs = new ArrayList<DBRef>();
8484
pdbHeader = new PDBHeader();
8585
ssbonds = new ArrayList<Bond>();
@@ -169,9 +169,9 @@ public Structure clone() {
169169

170170
// deep-copying of Compounds is tricky: there's cross references also in the Chains
171171
// beware: if we copy the compounds we would also need to reset the references to compounds in the individual chains
172-
List<Compound> newCompoundList = new ArrayList<Compound>();
173-
for (Compound compound:this.compounds) {
174-
Compound newCompound = new Compound(compound); // this sets everything but the chains
172+
List<EntityInfo> newCompoundList = new ArrayList<EntityInfo>();
173+
for (EntityInfo compound:this.compounds) {
174+
EntityInfo newCompound = new EntityInfo(compound); // this sets everything but the chains
175175
for (String chainId:compound.getChainIds()) {
176176

177177
for (int modelNr=0;modelNr<n.nrModels();modelNr++) {
@@ -443,8 +443,8 @@ public String toString(){
443443

444444
str.append("chain ").append(j).append(": >").append(cha.getChainID()).append("< ");
445445
if ( cha.getCompound() != null){
446-
Compound comp = cha.getCompound();
447-
String molName = comp.getMolName();
446+
EntityInfo comp = cha.getCompound();
447+
String molName = comp.getDescription();
448448
if ( molName != null){
449449
str.append(molName);
450450
}
@@ -465,7 +465,7 @@ public String toString(){
465465
str.append(dbref.toPDB()).append(newline);
466466
}
467467
str.append("Molecules: ").append(newline);
468-
for (Compound mol : compounds) {
468+
for (EntityInfo mol : compounds) {
469469
str.append(mol).append(newline);
470470
}
471471

@@ -665,19 +665,19 @@ public boolean hasChain(String chainId) {
665665

666666
/** {@inheritDoc} */
667667
@Override
668-
public void setCompounds(List<Compound> molList){
668+
public void setCompounds(List<EntityInfo> molList){
669669
this.compounds = molList;
670670
}
671671

672672
/** {@inheritDoc} */
673673
@Override
674-
public void addCompound(Compound compound) {
674+
public void addCompound(EntityInfo compound) {
675675
this.compounds.add(compound);
676676
}
677677

678678
/** {@inheritDoc} */
679679
@Override
680-
public List<Compound> getCompounds() {
680+
public List<EntityInfo> getEntityInformation() {
681681
// compounds are parsed from the PDB/mmCIF file normally
682682
// but if the file is incomplete, it won't have the Compounds information and we try
683683
// to guess it from the existing seqres/atom sequences
@@ -686,7 +686,7 @@ public List<Compound> getCompounds() {
686686
this.compounds = cf.findCompounds();
687687

688688
// now we need to set references in chains:
689-
for (Compound compound:compounds) {
689+
for (EntityInfo compound:compounds) {
690690
for (Chain c:compound.getChains()) {
691691
c.setCompound(compound);
692692
}
@@ -697,8 +697,8 @@ public List<Compound> getCompounds() {
697697

698698
/** {@inheritDoc} */
699699
@Override
700-
public Compound getCompoundById(int molId) {
701-
for (Compound mol : this.compounds){
700+
public EntityInfo getCompoundById(int molId) {
701+
for (EntityInfo mol : this.compounds){
702702
if (mol.getMolId()==molId){
703703
return mol;
704704
}

0 commit comments

Comments
 (0)