Skip to content

Commit cc19d5e

Browse files
committed
Implementing new crystallographic metadata fields.
1 parent cebd7f0 commit cc19d5e

File tree

5 files changed

+175
-72
lines changed

5 files changed

+175
-72
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package org.biojava.nbio.structure.test.io;
2+
3+
import org.junit.Test;
4+
import static org.junit.Assert.*;
5+
6+
import org.biojava.nbio.structure.Structure;
7+
import org.biojava.nbio.structure.StructureIO;
8+
import org.biojava.nbio.structure.align.util.AtomCache;
9+
10+
/**
11+
* A test for the parsing of some crystallographic metadata: non standard space group and non standard coordinate frame convention.
12+
*
13+
*
14+
* For more info see https://github.com/eppic-team/owl/issues/4 and https://github.com/eppic-team/eppic/issues/37
15+
*
16+
*
17+
*
18+
* @author Jose Duarte
19+
* @since 4.2.5
20+
*/
21+
public class TestCrystallographicMetadata {
22+
23+
24+
@Test
25+
public void test4hhb() throws Exception {
26+
27+
AtomCache cache = new AtomCache();
28+
// at the moment implemented only in mmcif
29+
cache.setUseMmCif(true);
30+
StructureIO.setAtomCache(cache);
31+
32+
Structure s = StructureIO.getStructure("4hhb");
33+
34+
// 4hhb is one of the few entries that aren't in the standard coordinate frame convention
35+
assertTrue(s.getCrystallographicInfo().isNonStandardCoordFrameConvention());
36+
37+
// 4hhn has a standard SG
38+
assertFalse(s.getCrystallographicInfo().isNonStandardSg());
39+
assertNotNull(s.getCrystallographicInfo().getSpaceGroup());
40+
}
41+
42+
@Test
43+
public void test1smt() throws Exception {
44+
45+
AtomCache cache = new AtomCache();
46+
// at the moment implemented only in mmcif
47+
cache.setUseMmCif(true);
48+
StructureIO.setAtomCache(cache);
49+
50+
Structure s = StructureIO.getStructure("1smt");
51+
52+
// 1smt is a normal entry, should be standard
53+
assertFalse(s.getCrystallographicInfo().isNonStandardCoordFrameConvention());
54+
55+
// 1smt has a standard SG
56+
assertFalse(s.getCrystallographicInfo().isNonStandardSg());
57+
assertNotNull(s.getCrystallographicInfo().getSpaceGroup());
58+
59+
}
60+
61+
@Test
62+
public void test1zna() throws Exception {
63+
AtomCache cache = new AtomCache();
64+
// at the moment implemented only in mmcif
65+
cache.setUseMmCif(true);
66+
StructureIO.setAtomCache(cache);
67+
68+
Structure s = StructureIO.getStructure("1zna");
69+
70+
// 1zna is one of the few entries that has a non-standard SG
71+
assertTrue(s.getCrystallographicInfo().isNonStandardSg());
72+
assertNull(s.getCrystallographicInfo().getSpaceGroup());
73+
}
74+
75+
76+
}

biojava-structure/src/main/java/org/biojava/nbio/structure/PDBCrystallographicInfo.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,23 @@ public class PDBCrystallographicInfo implements Serializable {
4848
* are not stored.
4949
*/
5050
private Matrix4d[] ncsOperators;
51+
52+
/**
53+
* Whether this structure has a non-standard space group not supported
54+
* by Biojava. If this is true the sg member will be null.
55+
* @since 4.2.5
56+
*/
57+
private boolean nonStandardSg;
58+
59+
/**
60+
* Whether this structure is non-standard coordinate frame convention, for which our scale matrix
61+
* calculation and thus the crystal reconstruction will be incorrect.
62+
* There's ~ 200 old structures in the PDB affected by the non-standard frame problem, hopefully they will
63+
* be remediated in the future.
64+
* For more info see: https://github.com/eppic-team/owl/issues/4
65+
* @since 4.2.5
66+
*/
67+
private boolean nonStandardCoordFrameConvention;
5168

5269
public PDBCrystallographicInfo() {
5370

@@ -179,6 +196,45 @@ public Matrix4d[] getNcsOperators() {
179196
public void setNcsOperators(Matrix4d[] ncsOperators) {
180197
this.ncsOperators = ncsOperators;
181198
}
199+
200+
/**
201+
* Whether this structure has a non-standard space group not supported
202+
* by Biojava. If this is true {@link #getSpaceGroup()} will be null.
203+
* @since 4.2.5
204+
*/
205+
public boolean isNonStandardSg() {
206+
return nonStandardSg;
207+
}
208+
209+
/**
210+
* Set the non-standard space group field
211+
* @param nonStandardSg
212+
* @since 4.2.5
213+
*/
214+
public void setNonStandardSg(boolean nonStandardSg) {
215+
this.nonStandardSg = nonStandardSg;
216+
}
217+
218+
/**
219+
* Whether this structure is non-standard coordinate frame convention, for which our scale matrix
220+
* calculation and thus the crystal reconstruction will be incorrect.
221+
* There's ~ 200 old structures in the PDB affected by the non-standard frame problem, hopefully they will
222+
* be remediated in the future.
223+
* For more info see: https://github.com/eppic-team/owl/issues/4
224+
* @since 4.2.5
225+
*/
226+
public boolean isNonStandardCoordFrameConvention() {
227+
return nonStandardCoordFrameConvention;
228+
}
229+
230+
/**
231+
* Set the non-standard coordinate frame convention field
232+
* @param nonStandardCoordFrameConvention
233+
* @since 4.2.5
234+
*/
235+
public void setNonStandardCoordFrameConvention(boolean nonStandardCoordFrameConvention) {
236+
this.nonStandardCoordFrameConvention = nonStandardCoordFrameConvention;
237+
}
182238

183239

184240
@Override

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.biojava.nbio.structure.GroupType;
4747
import org.biojava.nbio.structure.HetatomImpl;
4848
import org.biojava.nbio.structure.NucleotideImpl;
49+
import org.biojava.nbio.structure.PDBCrystallographicInfo;
4950
import org.biojava.nbio.structure.PDBHeader;
5051
import org.biojava.nbio.structure.ResidueNumber;
5152
import org.biojava.nbio.structure.SeqMisMatch;
@@ -151,7 +152,7 @@ public class SimpleMMcifConsumer implements MMcifConsumer {
151152
private List<StructRefSeqDif> sequenceDifs;
152153
private List<StructSiteGen> structSiteGens;
153154

154-
private AtomSites atomSites;
155+
private Matrix4d parsedScaleMatrix;
155156

156157

157158

@@ -846,6 +847,8 @@ public void documentEnd() {
846847
}
847848

848849
setStructNcsOps();
850+
851+
setCrystallographicInfoMetadata();
849852

850853

851854
Map<String,List<SeqMisMatch>> misMatchMap = new HashMap<String, List<SeqMisMatch>>();
@@ -1320,6 +1323,20 @@ private void setStructNcsOps() {
13201323
ncsOperators.toArray(new Matrix4d[ncsOperators.size()]));
13211324
}
13221325
}
1326+
1327+
private void setCrystallographicInfoMetadata() {
1328+
if (parsedScaleMatrix!=null) {
1329+
1330+
PDBCrystallographicInfo crystalInfo = structure.getCrystallographicInfo();
1331+
1332+
boolean nonStd = false;
1333+
if (!crystalInfo.getCrystalCell().checkScaleMatrix(parsedScaleMatrix)) {
1334+
nonStd = true;
1335+
}
1336+
1337+
crystalInfo.setNonStandardCoordFrameConvention(nonStd);
1338+
}
1339+
}
13231340

13241341

13251342
/** This method will return the parsed protein structure, once the parsing has been finished
@@ -1573,9 +1590,13 @@ public void newCell(Cell cell) {
15731590
public void newSymmetry(Symmetry symmetry) {
15741591
String spaceGroup = symmetry.getSpace_group_name_H_M();
15751592
SpaceGroup sg = SymoplibParser.getSpaceGroup(spaceGroup);
1576-
if (sg==null) logger.warn("Space group '"+spaceGroup+"' not recognised as a standard space group");
1577-
1578-
structure.getPDBHeader().getCrystallographicInfo().setSpaceGroup(sg);
1593+
if (sg==null) {
1594+
logger.warn("Space group '"+spaceGroup+"' not recognised as a standard space group");
1595+
structure.getPDBHeader().getCrystallographicInfo().setNonStandardSg(true);
1596+
} else {
1597+
structure.getPDBHeader().getCrystallographicInfo().setSpaceGroup(sg);
1598+
structure.getPDBHeader().getCrystallographicInfo().setNonStandardSg(false);
1599+
}
15791600
}
15801601

15811602
@Override
@@ -1584,7 +1605,22 @@ public void newStructNcsOper(StructNcsOper sNcsOper) {
15841605
}
15851606

15861607
public void newAtomSites(AtomSites atomSites) {
1587-
this.atomSites = atomSites;
1608+
1609+
try {
1610+
Matrix4d m = new Matrix4d(
1611+
Double.parseDouble(atomSites.getFract_transf_matrix11()), Double.parseDouble(atomSites.getFract_transf_matrix12()), Double.parseDouble(atomSites.getFract_transf_matrix13()), Double.parseDouble(atomSites.getFract_transf_vector1()),
1612+
Double.parseDouble(atomSites.getFract_transf_matrix21()), Double.parseDouble(atomSites.getFract_transf_matrix22()), Double.parseDouble(atomSites.getFract_transf_matrix23()), Double.parseDouble(atomSites.getFract_transf_vector2()),
1613+
Double.parseDouble(atomSites.getFract_transf_matrix31()), Double.parseDouble(atomSites.getFract_transf_matrix32()), Double.parseDouble(atomSites.getFract_transf_matrix33()), Double.parseDouble(atomSites.getFract_transf_vector3()),
1614+
0,0,0,1);
1615+
1616+
parsedScaleMatrix = m;
1617+
1618+
} catch (NumberFormatException e) {
1619+
logger.warn("Some values in _atom_sites.fract_transf_matrix or _atom_sites.fract_transf_vector could not be parsed as numbers. Can't check whether coordinate frame convention is correct! Error: {}", e.getMessage());
1620+
structure.getPDBHeader().getCrystallographicInfo().setNonStandardCoordFrameConvention(false);
1621+
1622+
// in this case parsedScaleMatrix stays null and can't be used in documentEnd()
1623+
}
15881624
}
15891625

15901626
@Override
@@ -1904,11 +1940,6 @@ public List<PdbxStructOperList> getStructOpers() {
19041940
return structOpers;
19051941
}
19061942

1907-
public AtomSites getAtomSites() {
1908-
return atomSites;
1909-
}
1910-
1911-
19121943
@Override
19131944
public void newPdbxStrucAssembly(PdbxStructAssembly strucAssembly) {
19141945
strucAssemblies.add(strucAssembly);

biojava-structure/src/main/java/org/biojava/nbio/structure/xtal/CrystalCell.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,9 +547,10 @@ public boolean checkScaleMatrixConsistency(Matrix4d scaleMatrix) {
547547
*/
548548
public boolean checkScaleMatrix(Matrix4d scaleMatrix) {
549549

550+
Matrix3d mtranspose = getMTranspose();
550551
for (int i=0;i<3;i++) {
551552
for (int j=0;j<3;j++) {
552-
if (!deltaComp(getMTranspose().getElement(i, j),scaleMatrix.getElement(i, j))) {
553+
if (!deltaComp(mtranspose.getElement(i, j),scaleMatrix.getElement(i, j))) {
553554
//System.out.println("Our value ("+i+","+j+"): "+getM().getElement(i,j));
554555
//System.out.println("Their value ("+i+","+j+"): "+scaleMatrix.getElement(i,j));
555556
return false;

biojava-structure/src/test/java/org/biojava/nbio/structure/io/mmcif/TestAtomSitesParsing.java

Lines changed: 0 additions & 61 deletions
This file was deleted.

0 commit comments

Comments
 (0)