Skip to content

Commit 0d63b65

Browse files
committed
Adding long-running ECOD test. Must be run manually.
Also moves EcodInstallationTest to the right package
1 parent 8be282f commit 0d63b65

File tree

2 files changed

+170
-1
lines changed

2 files changed

+170
-1
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/domain/EcodInstallationTest.java renamed to biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/ecod/EcodInstallationTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* http://www.biojava.org/
1919
*/
2020

21-
package org.biojava.nbio.structure.test.domain;
21+
package org.biojava.nbio.structure.test.ecod;
2222

2323
import static org.junit.Assert.*;
2424

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package org.biojava.nbio.structure.test.ecod;
2+
3+
import java.io.IOException;
4+
import java.util.Arrays;
5+
import java.util.List;
6+
7+
import org.biojava.nbio.structure.Atom;
8+
import org.biojava.nbio.structure.AtomPositionMap;
9+
import org.biojava.nbio.structure.Group;
10+
import org.biojava.nbio.structure.ResidueRange;
11+
import org.biojava.nbio.structure.ResidueRangeAndLength;
12+
import org.biojava.nbio.structure.Structure;
13+
import org.biojava.nbio.structure.StructureException;
14+
import org.biojava.nbio.structure.StructureTools;
15+
import org.biojava.nbio.structure.align.util.AtomCache;
16+
import org.biojava.nbio.structure.ecod.EcodDatabase;
17+
import org.biojava.nbio.structure.ecod.EcodDomain;
18+
import org.biojava.nbio.structure.ecod.EcodFactory;
19+
import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior;
20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
23+
/**
24+
* This is not a unit test.
25+
*
26+
* It is a long-running parsing test, which sequentially parses all ECOD domains.
27+
*
28+
* The most common warning is caused by residue ranges with missing terminal CA atoms,
29+
* which cause a warning to print.
30+
*
31+
* develop83 and earlier versions also had a number of invalid ranges, which cause
32+
* error messages to print.
33+
*
34+
* Filtering log4j messages to the 'error' level will filter all but the most
35+
* grevious errors.
36+
*
37+
* @author blivens
38+
*
39+
*/
40+
public class EcodParseTest {
41+
private static final Logger logger = LoggerFactory.getLogger(EcodParseTest.class);
42+
43+
public static void main(String[] args) throws IOException {
44+
String ecodVersion = "develop83";
45+
EcodDatabase ecod = EcodFactory.getEcodDatabase(ecodVersion);
46+
AtomCache cache = new AtomCache();
47+
cache.setObsoleteBehavior(ObsoleteBehavior.FETCH_OBSOLETE);
48+
List<EcodDomain> domains = ecod.getAllDomains();
49+
// domains = Arrays.asList(ecod.getDomainsById("e1yfbB2"));
50+
// domains = Arrays.asList(ecod.getDomainsById("e1w50A2"));
51+
domains = Arrays.asList(ecod.getDomainsById("e2ftlE1"));
52+
int errors = 0;
53+
for(EcodDomain d : domains) {
54+
Atom[] ca1;
55+
Structure struct;
56+
try {
57+
struct = cache.getStructure(d.getPdbId());
58+
ca1 = StructureTools.getRepresentativeAtomArray(struct);
59+
} catch (IOException e) {
60+
logger.error("Error getting structure for "+d.getDomainId(),e);
61+
errors++;
62+
continue;
63+
} catch (StructureException e) {
64+
logger.error("Error getting structure for "+d.getDomainId(),e);
65+
errors++;
66+
continue;
67+
}
68+
69+
// Test that the ranges can be parsed
70+
String rangeStr = d.getRange();
71+
AtomPositionMap map = new AtomPositionMap(ca1);
72+
List<? extends ResidueRange> ranges;
73+
try {
74+
// Parses range given in domain
75+
ranges = ResidueRange.parseMultiple(rangeStr);
76+
} catch(Exception e) {
77+
logger.error("Error parsing "+d.getPdbId()+"_"+d.getRange(),e);
78+
errors++;
79+
continue;
80+
}
81+
boolean clean = true;
82+
for(ResidueRange r : ranges) {
83+
if( r == null ) {
84+
clean = false;
85+
}
86+
}
87+
if( ! clean ) {
88+
logger.error("Empty range for {}_{}",d.getPdbId(),d.getRange());
89+
errors++;
90+
continue;
91+
}
92+
93+
94+
// Check that the ranges are valid (or at least that they have a group)
95+
for(ResidueRange range : ranges) {
96+
try {
97+
Integer start = map.getPosition(range.getStart());
98+
if(start == null) {
99+
Group g = struct.getChainByPDB(range.getStart().getChainId()).getGroupByPDB(range.getStart());
100+
if(g!=null) {
101+
logger.warn("No CA atom for starting residue "+d.getDomainId()+"_"+range);
102+
clean = false;
103+
} else {
104+
logger.error("Start doesn't exist for "+d.getDomainId()+"_"+range.toString());
105+
clean = false;
106+
}
107+
}
108+
} catch(Exception e) {
109+
logger.error("Start doesn't exist for "+d.getDomainId()+"_"+range.toString(),e);
110+
clean = false;
111+
}
112+
try {
113+
Integer end = map.getPosition(range.getEnd());
114+
if(end == null) {
115+
Group g = null;
116+
try {
117+
g = struct.getChainByPDB(range.getEnd().getChainId()).getGroupByPDB(range.getEnd());
118+
} catch(StructureException e ) {}
119+
if(g!=null) {
120+
logger.warn("No CA atom for ending residue "+d.getDomainId()+"_"+range);
121+
clean = false;
122+
} else {
123+
logger.error("End doesn't exist for "+d.getDomainId()+"_"+range.toString());
124+
clean = false;
125+
}
126+
}
127+
} catch(Exception e) {
128+
logger.error("End doesn't exist for "+d.getDomainId()+"_"+range.toString(),e);
129+
clean = false;
130+
}
131+
}
132+
133+
// Try to recover from missing residues by trimming them to the residue range
134+
try {
135+
// Parses more flexibly, giving only a warning for missing residues
136+
ranges = ResidueRangeAndLength.parseMultiple(rangeStr,map);
137+
} catch(Exception e) {
138+
logger.error("Error parsing "+d.getPdbId()+"_"+d.getRange(),e);
139+
errors++;
140+
continue;
141+
}
142+
clean = true;
143+
for(ResidueRange r : ranges) {
144+
if( r == null ) {
145+
clean = false;
146+
}
147+
}
148+
if( ! clean ) {
149+
logger.error("Empty range for {}_{}",d.getPdbId(),d.getRange());
150+
errors++;
151+
continue;
152+
}
153+
154+
// Test whether we can use it to get a structure
155+
String pdbRangeStr = String.format("%s.%s",d.getPdbId(),d.getRange());
156+
try {
157+
cache.getStructure(pdbRangeStr);
158+
} catch(Exception e) {
159+
logger.error("Can't get range "+pdbRangeStr,e);
160+
errors++;
161+
continue;
162+
}
163+
164+
//All test passed
165+
logger.info("OK "+d.getDomainId());
166+
}
167+
logger.info("Done. {} errors.",errors);
168+
}
169+
}

0 commit comments

Comments
 (0)