Skip to content

Commit 19fbf7c

Browse files
committed
Fixing a few issues related to biojava#343
1) handling of alt locations 2) handling of insertion codes 3) not trying lookup of groups if bond is not disulfide 4) pdb parser now also skipping symmetry partner disulfides 5) better logging 6) added test for number of ss bonds in TestLongPdbVsMMcifParser
1 parent bdfc029 commit 19fbf7c

5 files changed

Lines changed: 153 additions & 48 deletions

File tree

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/TestLongPdbVsMmCifParsing.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public void testVeryLongPdbVsMmCif() throws IOException, StructureException {
124124

125125
@Test
126126
public void testSingle() throws IOException, StructureException {
127-
testAll(Arrays.asList("4a10"));
127+
testAll(Arrays.asList("2h5d"));
128128
}
129129

130130
@After
@@ -221,6 +221,12 @@ private void testStructureMethods(Structure sPdb, Structure sCif) {
221221
assertEquals("failed number of Compounds pdb vs cif", sPdb.getCompounds().size(), sCif.getCompounds().size());
222222

223223

224+
// ss bonds
225+
// 4ab9 contains an error in ssbond in pdb file (misses 1 ssbond)
226+
// 2bdi contains also errors, the counts in both differ a lot 80 vs 92
227+
if (!sPdb.getPDBCode().equals("4AB9") && !sPdb.getPDBCode().equals("2BDI"))
228+
assertEquals("number of ss bonds should coincide pdb vs cif", sPdb.getSSBonds().size(), sCif.getSSBonds().size());
229+
224230
}
225231

226232
private void testHeader(Structure sPdb, Structure sCif) {

biojava-structure/src/main/java/org/biojava/nbio/structure/SSBond.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525
import java.io.Serializable;
2626

27-
/** A simple bean to store disulfide bridge information, the SSBOND records in the PDB files.
27+
/**
28+
* A simple bean to store disulfide bridge information, the SSBOND records in the PDB files.
2829
*
2930
* The two residues specified here are CYS residues that form a Disulfide bridge.
3031
*
@@ -38,7 +39,7 @@ public interface SSBond extends PDBRecord, Serializable, Cloneable {
3839
public String toPDB();
3940

4041
/**
41-
* append the PDB representation of this SSBOND to the provided StringBUffer
42+
* Append the PDB representation of this SSBOND to the provided StringBUffer
4243
*
4344
* @param buf a StringBuffer to print the PDB representation to
4445
*/
@@ -54,14 +55,14 @@ public interface SSBond extends PDBRecord, Serializable, Cloneable {
5455
public void setInsCode2(String insCode2);
5556

5657
/**
57-
* set serial number of this SSBOND in PDB file
58+
* Set serial number of this SSBOND in PDB file
5859
*
5960
* @return the serial number
6061
*/
6162
public int getSerNum();
6263

6364
/**
64-
* get serial number of this SSBOND in PDB file
65+
* Get serial number of this SSBOND in PDB file
6566
*
6667
* @param serNum
6768
*/
@@ -76,7 +77,7 @@ public interface SSBond extends PDBRecord, Serializable, Cloneable {
7677
public void setChainID2(String chainID2);
7778

7879
/**
79-
* get residue number for first CYS. number and insertion code are joint
80+
* Get residue number for first CYS. number and insertion code are joined
8081
* together.
8182
*
8283
* @return the residue number of the first CYS.
@@ -87,7 +88,7 @@ public interface SSBond extends PDBRecord, Serializable, Cloneable {
8788
public void setResnum1(String resnum1);
8889

8990
/**
90-
* get residue number for second CYS. number and insertion code are joint
91+
* Get residue number for second CYS. number and insertion code are joined
9192
* together.
9293
*
9394
* @return the residue number of the second CYS.

biojava-structure/src/main/java/org/biojava/nbio/structure/SSBondImpl.java

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525

2626
import java.io.Serializable;
2727

28-
/** A simple bean to store disulfide bridge information, the SSBOND records in the PDB files.
28+
/**
29+
* A simple bean to store disulfide bridge information, the SSBOND records in the PDB files.
2930
*
3031
* The two residues specified here are CYS residues that form a Disulfide bridge.
3132
*
@@ -38,13 +39,14 @@ public class SSBondImpl implements PDBRecord, Serializable, Cloneable, SSBond {
3839

3940
private static final long serialVersionUID = -8663681100691188647L;
4041

41-
int serNum;
42-
String chainID1;
43-
String chainID2;
44-
String resnum1;
45-
String resnum2;
46-
String insCode1;
47-
String insCode2;
42+
private int serNum;
43+
44+
private String chainID1;
45+
private String chainID2;
46+
private String resnum1;
47+
private String resnum2;
48+
private String insCode1;
49+
private String insCode2;
4850

4951
public SSBondImpl(){
5052
serNum = 0;
@@ -59,7 +61,8 @@ public String toPDB(){
5961
return buf.toString();
6062
}
6163

62-
/** append the PDB representation of this SSBOND to the provided StringBUffer
64+
/**
65+
* Append the PDB representation of this SSBOND to the provided StringBuffer
6366
*
6467
* @param buf a StringBuffer to print the PDB representation to
6568
*/
@@ -110,10 +113,7 @@ public void setInsCode2(String insCode2) {
110113
this.insCode2 = insCode2;
111114
}
112115

113-
/** set serial number of this SSBOND in PDB file
114-
*
115-
* @return the serial number
116-
*/
116+
117117
@Override
118118
public int getSerNum() {
119119
return serNum;
@@ -155,12 +155,7 @@ public void setChainID2(String chainID2) {
155155
this.chainID2 = chainID2;
156156
}
157157

158-
/** get residue number for first CYS.
159-
* number and insertion code are joint together.
160-
*
161-
* @return the residue number of the first CYS.
162-
*
163-
*/
158+
164159
@Override
165160
public String getResnum1() {
166161
return resnum1;
@@ -170,12 +165,7 @@ public void setResnum1(String resnum1) {
170165
this.resnum1 = resnum1;
171166
}
172167

173-
/** get residue number for second CYS.
174-
* number and insertion code are joint together.
175-
*
176-
* @return the residue number of the second CYS.
177-
*
178-
*/
168+
179169
@Override
180170
public String getResnum2() {
181171
return resnum2;
@@ -203,4 +193,65 @@ public String toString() {
203193

204194
return s;
205195
}
196+
197+
/* (non-Javadoc)
198+
* @see java.lang.Object#hashCode()
199+
*/
200+
@Override
201+
public int hashCode() {
202+
final int prime = 31;
203+
int result = 1;
204+
result = prime * result + ((chainID1 == null) ? 0 : chainID1.hashCode());
205+
result = prime * result + ((chainID2 == null) ? 0 : chainID2.hashCode());
206+
result = prime * result + ((insCode1 == null) ? 0 : insCode1.hashCode());
207+
result = prime * result + ((insCode2 == null) ? 0 : insCode2.hashCode());
208+
result = prime * result + ((resnum1 == null) ? 0 : resnum1.hashCode());
209+
result = prime * result + ((resnum2 == null) ? 0 : resnum2.hashCode());
210+
return result;
211+
}
212+
213+
/* (non-Javadoc)
214+
* @see java.lang.Object#equals(java.lang.Object)
215+
*/
216+
@Override
217+
public boolean equals(Object obj) {
218+
if (this == obj)
219+
return true;
220+
if (obj == null)
221+
return false;
222+
if (getClass() != obj.getClass())
223+
return false;
224+
SSBondImpl other = (SSBondImpl) obj;
225+
if (chainID1 == null) {
226+
if (other.chainID1 != null)
227+
return false;
228+
} else if (!chainID1.equals(other.chainID1))
229+
return false;
230+
if (chainID2 == null) {
231+
if (other.chainID2 != null)
232+
return false;
233+
} else if (!chainID2.equals(other.chainID2))
234+
return false;
235+
if (insCode1 == null) {
236+
if (other.insCode1 != null)
237+
return false;
238+
} else if (!insCode1.equals(other.insCode1))
239+
return false;
240+
if (insCode2 == null) {
241+
if (other.insCode2 != null)
242+
return false;
243+
} else if (!insCode2.equals(other.insCode2))
244+
return false;
245+
if (resnum1 == null) {
246+
if (other.resnum1 != null)
247+
return false;
248+
} else if (!resnum1.equals(other.resnum1))
249+
return false;
250+
if (resnum2 == null) {
251+
if (other.resnum2 != null)
252+
return false;
253+
} else if (!resnum2.equals(other.resnum2))
254+
return false;
255+
return true;
256+
}
206257
}

biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
*
140140
* @author Andreas Prlic
141141
* @author Jules Jacobsen
142+
* @author Jose Duarte
142143
* @since 1.4
143144
*/
144145
public class PDBFileParser {
@@ -2252,7 +2253,8 @@ private void pdb_DBREF_Handler(String line){
22522253

22532254
//}
22542255

2255-
/* process the disulfid bond info provided by an SSBOND record
2256+
/**
2257+
* Process the disulfide bond info provided by an SSBOND record
22562258
*
22572259
*
22582260
COLUMNS DATA TYPE FIELD DEFINITION
@@ -2285,7 +2287,19 @@ private void pdb_SSBOND_Handler(String line){
22852287
String chain2 = line.substring(29,30);
22862288
String seqNum2 = line.substring(31,35).trim();
22872289
String icode2 = line.substring(35,36);
2290+
2291+
if (line.length()>=72) {
2292+
String symop1 = line.substring(59, 65).trim();
2293+
String symop2 = line.substring(66, 72).trim();
22882294

2295+
// until we implement proper treatment of symmetry in biojava #220, we can't deal with sym-related parteners properly, skipping them
2296+
if (!symop1.equals("") && !symop2.equals("") && // in case the field is missing
2297+
(!symop1.equals("1555") || !symop2.equals("1555")) ) {
2298+
logger.info("Skipping ss bond between groups {} and {} belonging to different symmetry partners, because it is not supported yet", seqNum1+icode1, seqNum2+icode2);
2299+
return;
2300+
}
2301+
}
2302+
22892303
if (icode1.equals(" "))
22902304
icode1 = "";
22912305
if (icode2.equals(" "))

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,23 +1850,38 @@ private void createSSBonds() {
18501850
// For SSBond equivalent, parse through the struct_conn records
18511851
int internalId = 0;
18521852
for (StructConn conn : structConn) {
1853+
1854+
if (!conn.getConn_type_id().equals("disulf")) continue;
1855+
1856+
18531857
String ptnr1_chainId = conn.getPtnr1_auth_asym_id();
1854-
String ptnr1_seqId = conn.getPtnr1_auth_seq_id();
1858+
1859+
String insCode1 = "";
1860+
if (!conn.getPdbx_ptnr1_PDB_ins_code().equals("?")) insCode1 = conn.getPdbx_ptnr1_PDB_ins_code();
1861+
String insCode2 = "";
1862+
if (!conn.getPdbx_ptnr2_PDB_ins_code().equals("?")) insCode2 = conn.getPdbx_ptnr2_PDB_ins_code();
1863+
1864+
String ptnr1_seqId = conn.getPtnr1_auth_seq_id() + insCode1 ;
18551865
String ptnr2_chainId = conn.getPtnr2_auth_asym_id();
1856-
String ptnr2_seqId = conn.getPtnr2_auth_seq_id();
1866+
String ptnr2_seqId = conn.getPtnr2_auth_seq_id() + insCode2;
18571867
// conn.getId() would equal disulf#.
18581868

18591869
// if we can find both of these residues -
18601870
Group s1 = lookupResidue(ptnr1_chainId, ptnr1_seqId);
18611871
Group s2 = lookupResidue(ptnr2_chainId, ptnr2_seqId);
1862-
1863-
// TODO: when issue 220 is implemented, add robust symmetry handling
1864-
// to allow disulfide bonds between symmetry-related molecules.
18651872

18661873
// and is SS - then we should create a new disulfide bond.
18671874
if (null != s1 && null != s2) {
1868-
if ("CYS".equals(s1.getPDBName()) && symop.equals(conn.getPtnr1_symmetry())
1869-
&& "CYS".equals(s2.getPDBName()) && symop.equals(conn.getPtnr2_symmetry())) {
1875+
if ("CYS".equals(s1.getPDBName()) && "CYS".equals(s2.getPDBName()) ) {
1876+
1877+
// TODO: when issue 220 is implemented, add robust symmetry handling
1878+
// to allow disulfide bonds between symmetry-related molecules.
1879+
if (!conn.getPtnr1_symmetry().equals(symop) || !conn.getPtnr2_symmetry().equals(symop) ) {
1880+
logger.info("Skipping ss bond between groups {} and {} belonging to different symmetry partners, because it is not supported yet", s1.getResidueNumber(), s2.getResidueNumber());
1881+
continue;
1882+
}
1883+
1884+
18701885
SSBondImpl bond = new SSBondImpl();
18711886

18721887
bond.setSerNum(internalId++); // An internal label what bond #
@@ -1882,7 +1897,18 @@ private void createSSBonds() {
18821897
conn.setPdbx_ptnr2_PDB_ins_code(null);
18831898
}
18841899
bond.setInsCode2(conn.getPdbx_ptnr2_PDB_ins_code());
1885-
bonds.add(bond);
1900+
1901+
if (bonds.contains(bond)) {
1902+
if (s1.getAltLocs().isEmpty() && s1.getAltLocs().isEmpty()) {
1903+
// no alt locs in either group: there's something weird with the file having to ss bonds for same pair
1904+
logger.warn("SS bond between residues {} and {} is repeated in file and neither group has alt locations, won't add it to list of SS bonds. ", s1.getResidueNumber(), s2.getResidueNumber());
1905+
} else {
1906+
// there is alt locs in either group. This is normal: an ssbond per alt loc is in file, e.g. 3dvf
1907+
logger.info("SS bond between residues {} and {} is repeated in file and one of the 2 groups has alt locations. Adding only 1 SS bond for it. ", s1.getResidueNumber(), s2.getResidueNumber());
1908+
}
1909+
} else {
1910+
bonds.add(bond);
1911+
}
18861912
}
18871913
}
18881914
}
@@ -1896,18 +1922,25 @@ private void createSSBonds() {
18961922
* @param seqId
18971923
* @return Successful = Group, Failure = null
18981924
*/
1899-
Group lookupResidue(String chainId, String seqId) {
1925+
private Group lookupResidue(String chainId, String seqId) {
19001926
try {
19011927
Chain chain = structure.getChainByPDB(chainId);
19021928
if (null != chain) {
1903-
try {
1904-
return chain.getGroupByPDB(new ResidueNumber(chainId, Integer.parseInt(seqId), ' '));
1905-
} catch (NumberFormatException e) {
1906-
logger.warn("Could not lookup residue : " + chainId + seqId);
1907-
}
1929+
try {
1930+
ResidueNumber resNum = null;
1931+
if (Character.isAlphabetic( seqId.charAt(seqId.length()-1 )) ) {
1932+
resNum = new ResidueNumber(chainId, Integer.parseInt(seqId.substring(0,seqId.length()-1)), seqId.charAt(seqId.length()-1));
1933+
} else {
1934+
resNum = new ResidueNumber(chainId, Integer.parseInt(seqId), ' ');
1935+
}
1936+
1937+
return chain.getGroupByPDB(resNum);
1938+
} catch (NumberFormatException e) {
1939+
logger.warn("Could not parse number for residue number {} specified in _struct_conn record", chainId + "-" + seqId);
1940+
}
19081941
}
19091942
} catch (StructureException e) {
1910-
logger.warn("Problem finding residue in site entry " + chainId + seqId + " - " + e.getMessage(), e.getMessage());
1943+
logger.warn("Problem finding residue " + chainId + "-" + seqId + " specified in _struct_conn record." );
19111944
}
19121945
// Could not find.
19131946
return null;

0 commit comments

Comments
 (0)