Skip to content

Commit cbe8c88

Browse files
committed
ECOD Improvements
- getVersion() returns the actual version parsed - filterByHierarchy() method
1 parent 750b1a3 commit cbe8c88

File tree

4 files changed

+144
-22
lines changed

4 files changed

+144
-22
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/domain/EcodInstallationTest.java

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
1-
/**
2-
*
1+
/*
2+
* BioJava development code
3+
*
4+
* This code may be freely distributed and modified under the
5+
* terms of the GNU Lesser General Public Licence. This should
6+
* be distributed with the code. If you do not have a copy,
7+
* see:
8+
*
9+
* http://www.gnu.org/copyleft/lesser.html
10+
*
11+
* Copyright for this code is held jointly by the individual
12+
* authors. These should be listed in @author doc comments.
13+
*
14+
* For more information on the BioJava project and its aims,
15+
* or to join the biojava-l mailing list, visit the home page
16+
* at:
17+
*
18+
* http://www.biojava.org/
319
*/
20+
421
package org.biojava.nbio.structure.test.domain;
522

623
import static org.junit.Assert.*;
@@ -10,6 +27,7 @@
1027
import java.util.ArrayList;
1128
import java.util.Arrays;
1229
import java.util.Collections;
30+
import java.util.HashMap;
1331
import java.util.HashSet;
1432
import java.util.List;
1533
import java.util.Set;
@@ -37,17 +55,11 @@
3755
public class EcodInstallationTest {
3856

3957
private static final Logger logger = LoggerFactory.getLogger(EcodInstallationTest.class);
40-
private static EcodInstallation ecod;
4158
private static final String VERSION = "develop77";
4259

43-
// Set up static ecod singleton
44-
static {
45-
ecod = (EcodInstallation) EcodFactory.getEcodDatabase(VERSION);
46-
}
47-
4860
static {
4961
//System.setProperty("Log4jContextSelector", "org.apache.logging.log4j.core.async.AsyncLoggerContextSelector");
50-
}
62+
}
5163
@Rule
5264
public TemporaryFolder tmpFolder = new TemporaryFolder();
5365
@Test
@@ -65,14 +77,19 @@ public void testDownloads() throws IOException {
6577
assertTrue("No downloaded file at "+domainsFile.toString(),domainsFile.exists());
6678
}
6779

80+
6881
@Test
6982
public void testAllDomains() throws IOException {
83+
EcodDatabase ecod = EcodFactory.getEcodDatabase(VERSION);
84+
7085
List<EcodDomain> domains = ecod.getAllDomains();
7186
assertEquals("Wrong number of domains",423779,domains.size());
7287
}
7388

7489
@Test
7590
public void testByPDB() throws IOException {
91+
EcodDatabase ecod = EcodFactory.getEcodDatabase(VERSION);
92+
7693
String pdbId;
7794
String[] expectedDomains;
7895
List<EcodDomain> domains;
@@ -95,6 +112,8 @@ private void matchNames(String pdbId,String[] expected,List<EcodDomain> actual)
95112

96113
@Test
97114
public void testParsing() throws IOException {
115+
EcodDatabase ecod = EcodFactory.getEcodDatabase(VERSION);
116+
98117
String ecodId;
99118
EcodDomain domain,expected;
100119

@@ -113,7 +132,7 @@ public void testParsing() throws IOException {
113132
"UNK_F_TYPE", false, Collections.singleton("EPE")
114133
);
115134
assertEquals(ecodId,expected,domain);
116-
135+
117136
ecodId = "e4v4fAA1";
118137
domain = ecod.getDomainsById(ecodId);
119138
assertNotNull(ecodId,domain);
@@ -122,6 +141,7 @@ public void testParsing() throws IOException {
122141

123142
@Test
124143
public void testMultithreaded() throws IOException {
144+
final EcodInstallation ecod = (EcodInstallation) EcodFactory.getEcodDatabase(VERSION);
125145
ecod.clear();
126146
String[] ecodIds = new String[] {
127147
"e4s1gA1", "e4umoB1", "e4v0cA1", "e4v1af1", "e3j7yj1", "e4wfcA1","e4b0jP1",
@@ -162,4 +182,43 @@ public String toString() {
162182
}
163183
assertEquals(ecodIds.length, successful);
164184
}
185+
186+
@Test
187+
public void testFilterByHierarchy() throws IOException {
188+
EcodDatabase ecod = EcodFactory.getEcodDatabase(VERSION);
189+
190+
List<EcodDomain> filtered;
191+
Set<String> expected,actual;
192+
193+
expected = new HashSet<String>(Arrays.asList(
194+
"e4il6R1 e4pj0R1 e4pj0r1 e4ub6R1 e4ub8R1".split(" ") ));
195+
filtered = ecod.filterByHierarchy("6106.1.1");
196+
actual = new HashSet<String>();
197+
for(EcodDomain d : filtered) {
198+
actual.add(d.getDomainId());
199+
}
200+
assertEquals(expected,actual);
201+
202+
filtered = ecod.filterByHierarchy("6106.1");
203+
actual = new HashSet<String>();
204+
for(EcodDomain d : filtered) {
205+
actual.add(d.getDomainId());
206+
}
207+
assertEquals(expected,actual);
208+
209+
filtered = ecod.filterByHierarchy("6106");
210+
actual = new HashSet<String>();
211+
for(EcodDomain d : filtered) {
212+
actual.add(d.getDomainId());
213+
}
214+
assertEquals(expected,actual);
215+
}
216+
217+
@Test
218+
public void testVersion() throws IOException {
219+
EcodDatabase ecod3 = EcodFactory.getEcodDatabase("latest");
220+
String version = ecod3.getVersion();
221+
assertNotNull(version);
222+
assertNotEquals("latest", version);
223+
}
165224
}

biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodDatabase.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,6 @@
2323
import java.io.IOException;
2424
import java.util.List;
2525

26-
import org.biojava.nbio.structure.cath.CathCategory;
27-
import org.biojava.nbio.structure.cath.CathDomain;
28-
import org.biojava.nbio.structure.cath.CathFragment;
29-
import org.biojava.nbio.structure.cath.CathNode;
30-
3126
/** General API for interacting with CATH.
3227
*
3328
* @author Spencer Bliven
@@ -37,8 +32,9 @@ public interface EcodDatabase {
3732
/** Return the release version.
3833
*
3934
* @return version
35+
* @throws IOException
4036
*/
41-
public String getVersion();
37+
public String getVersion() throws IOException;
4238

4339
/**
4440
* Get a particular ECOD domain by the domain ID (e.g. "e4hhbA1")
@@ -56,6 +52,15 @@ public interface EcodDatabase {
5652
*/
5753
public List<EcodDomain> getDomainsForPdb(String pdbId) throws IOException;
5854

55+
/**
56+
* Get a list of domains within a particular level of the hierarchy
57+
* @param hierarchy A dot-separated list giving the X-group, H-group, and/or
58+
* T-group (e.g. "1.1" for all members of the RIFT-related H-group)
59+
* @return
60+
* @throws IOException
61+
*/
62+
public List<EcodDomain> filterByHierarchy(String hierarchy) throws IOException;
63+
5964
/**
6065
* Get all ECOD domains
6166
* @return

biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodFactory.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
package org.biojava.nbio.structure.ecod;
2222

23+
import java.io.IOException;
2324
import java.util.Collections;
2425
import java.util.HashMap;
2526
import java.util.Map;
@@ -69,8 +70,12 @@ public static EcodDatabase getEcodDatabase(String version) {
6970
versionedEcodDBs.put(version.toLowerCase(), ecod);
7071

7172
// If the parsed version differed from that requested, add that too
72-
if( ! versionedEcodDBs.containsKey(ecod.getVersion().toLowerCase()) ) {
73-
versionedEcodDBs.put(ecod.getVersion().toLowerCase(),ecod);
73+
try {
74+
if( ! versionedEcodDBs.containsKey(ecod.getVersion().toLowerCase()) ) {
75+
versionedEcodDBs.put(ecod.getVersion().toLowerCase(),ecod);
76+
}
77+
} catch (IOException e) {
78+
// For parsing errors, just use the requested version
7479
}
7580
}
7681

biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodInstallation.java

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,42 @@ public List<EcodDomain> getDomainsForPdb(String pdbId) throws IOException {
154154
}
155155
}
156156

157+
/**
158+
* Get a list of domains within a particular level of the hierarchy
159+
* @param hierarchy A dot-separated list giving the X-group, H-group, and/or
160+
* T-group (e.g. "1.1" for all members of the RIFT-related H-group)
161+
* @return
162+
* @throws IOException
163+
*/
164+
@Override
165+
public List<EcodDomain> filterByHierarchy(String hierarchy) throws IOException {
166+
String[] xhtGroup = hierarchy.split("\\.");
167+
Integer xGroup = xhtGroup.length>0 ? Integer.parseInt(xhtGroup[0]) : null;
168+
Integer hGroup = xhtGroup.length>1 ? Integer.parseInt(xhtGroup[1]) : null;
169+
Integer tGroup = xhtGroup.length>2 ? Integer.parseInt(xhtGroup[2]) : null;
170+
171+
List<EcodDomain> filtered = new ArrayList<EcodDomain>();
172+
for(EcodDomain d: getAllDomains()) {
173+
boolean match = true;
174+
if(xhtGroup.length>0) {
175+
match = match && xGroup.equals(d.getxGroup());
176+
}
177+
if(xhtGroup.length>1) {
178+
match = match && hGroup.equals(d.gethGroup());
179+
}
180+
if(xhtGroup.length>2) {
181+
match = match && tGroup.equals(d.gettGroup());
182+
}
183+
if(xhtGroup.length>3) {
184+
logger.warn("Ignoring unexpected additional parts of ECOD {}",hierarchy);
185+
}
186+
if(match) {
187+
filtered.add(d);
188+
}
189+
}
190+
return filtered;
191+
}
192+
157193
/**
158194
* Get a particular ECOD domain by the domain ID (e.g. "e4hhbA1")
159195
* @param ecodId
@@ -227,9 +263,12 @@ public void clear() {
227263
* Note that this may differ from the version requested in the constructor
228264
* for the special case of "latest"
229265
* @return the ECOD version
266+
* @throws IOException If an error occurs while downloading or parsing the file
230267
*/
231268
@Override
232-
public String getVersion() {
269+
public String getVersion() throws IOException {
270+
ensureDomainsFileInstalled();
271+
233272
if( parsedVersion == null) {
234273
return requestedVersion;
235274
}
@@ -353,7 +392,7 @@ private void downloadDomains() throws IOException {
353392
* @return
354393
*/
355394
private String getDomainFilename() {
356-
return String.format(DOMAINS_FILENAME_FORMAT,getVersion());
395+
return String.format(DOMAINS_FILENAME_FORMAT,requestedVersion);
357396
}
358397

359398
/**
@@ -450,8 +489,8 @@ private void parse(BufferedReader in) throws IOException {
450489
// Allocate plenty of space for ECOD as of 2015
451490
ArrayList<EcodDomain> domainsList = new ArrayList<EcodDomain>(500000);
452491

453-
Pattern versionRE = Pattern.compile("^\\s*#.*ECOD\\s*requestedVersion\\s+(\\w+)");
454-
Pattern commentRE = Pattern.compile("^\\s*#");
492+
Pattern versionRE = Pattern.compile("^\\s*#.*ECOD\\s*version\\s+(\\w+).*");
493+
Pattern commentRE = Pattern.compile("^\\s*#.*");
455494

456495
String line = in.readLine();
457496
int lineNum = 0;
@@ -564,6 +603,20 @@ public String getVersion() {
564603
}
565604

566605

606+
@Override
607+
public String toString() {
608+
String version = null;
609+
try {
610+
version = getVersion();
611+
} catch (IOException e) {
612+
// For parsing errors, use the requested version
613+
version = requestedVersion;
614+
}
615+
616+
return "EcodInstallation [cacheLocation=" + cacheLocation
617+
+ ", version=" + version + "]";
618+
}
619+
567620
public static void main(String[] args) {
568621
if( args.length!= 1) {
569622
System.out.println("usage: ecod_domains.txt");

0 commit comments

Comments
 (0)