Skip to content

Commit 97924c5

Browse files
committed
First round of fixes to ECOD format
Format was updated slightly in response to our testing, but some problems and inconsistencies remain.
1 parent 9cf551a commit 97924c5

File tree

3 files changed

+134
-59
lines changed

3 files changed

+134
-59
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/domain/EcodInstallationTest.java

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.util.ArrayList;
2828
import java.util.Arrays;
2929
import java.util.Collections;
30-
import java.util.HashMap;
3130
import java.util.HashSet;
3231
import java.util.List;
3332
import java.util.Set;
@@ -55,7 +54,7 @@
5554
public class EcodInstallationTest {
5655

5756
private static final Logger logger = LoggerFactory.getLogger(EcodInstallationTest.class);
58-
private static final String VERSION = "develop77";
57+
private static final String VERSION = "develop78";
5958

6059
static {
6160
//System.setProperty("Log4jContextSelector", "org.apache.logging.log4j.core.async.AsyncLoggerContextSelector");
@@ -80,10 +79,13 @@ public void testDownloads() throws IOException {
8079

8180
@Test
8281
public void testAllDomains() throws IOException {
82+
int expected;
8383
EcodDatabase ecod = EcodFactory.getEcodDatabase(VERSION);
8484

8585
List<EcodDomain> domains = ecod.getAllDomains();
86-
assertEquals("Wrong number of domains",423779,domains.size());
86+
expected = 423779; //version77
87+
expected = 423869; //version78
88+
assertEquals("Wrong number of domains",expected,domains.size());
8789
}
8890

8991
@Test
@@ -122,14 +124,15 @@ public void testParsing() throws IOException {
122124
expected = new EcodDomain(
123125
// Long uid, String domainId, Boolean manual,
124126
20669l, "e1lyw.1", null,
125-
// Integer xGroup, Integer hGroup, Integer tGroup, String pdbId,
126-
1,1,1,"1lyw",
127+
// Integer xGroup, Integer hGroup, Integer tGroup, Integer fGroup, String pdbId,
128+
1,1,1,2,"1lyw",
127129
// String chainId, String range, String architectureName,
128130
".", "A:3-97,B:106-346", "beta barrels",
129131
// String xGroupName, String hGroupName, String tGroupName,
130132
// String fGroupName, Boolean isAssembly, List<String> ligands
131133
"cradle loop barrel", "RIFT-related", "acid protease",
132-
"UNK_F_TYPE", false, Collections.singleton("EPE")
134+
"EF00710",//"UNK_F_TYPE",
135+
20669l, Collections.singleton("EPE")
133136
);
134137
assertEquals(ecodId,expected,domain);
135138

@@ -221,4 +224,21 @@ public void testVersion() throws IOException {
221224
assertNotNull(version);
222225
assertNotEquals("latest", version);
223226
}
227+
228+
@Test
229+
public void testAllVersions() throws IOException {
230+
int firstVersion = 45;
231+
int lastVersion = 78;
232+
List<String> versions = new ArrayList<String>();
233+
234+
for(int version = firstVersion; version<= lastVersion;version++) {
235+
versions.add("develop"+version);
236+
}
237+
versions.add("latest");
238+
for(String version : versions) {
239+
EcodInstallation ecod = (EcodInstallation)EcodFactory.getEcodDatabase(version);
240+
ecod.getAllDomains();
241+
System.out.println(version +" -> "+ ecod.getVersion());
242+
}
243+
}
224244
}

biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodDomain.java

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import java.io.Serializable;
2323
import java.util.HashSet;
24-
import java.util.List;
2524
import java.util.Set;
2625

2726
/**
@@ -34,7 +33,7 @@ public class EcodDomain implements Serializable, Cloneable {
3433
Column 1: ECOD uid - internal domain unique identifier
3534
Column 2: ECOD domain id - domain identifier
3635
Column 3: ECOD representative status - manual (curated) or automated nonrep
37-
Column 4: ECOD hierachy identifier - [X-group].[H-group].{T-group]
36+
Column 4: ECOD hierachy identifier - [X-group].[H-group].{T-group].[F-group]
3837
Column 5: PDB identifier
3938
Column 6: Chain identifier (note: case-sensitive)
4039
Column 7: PDB residue number range
@@ -54,15 +53,14 @@ public class EcodDomain implements Serializable, Cloneable {
5453

5554
private static final long serialVersionUID = -7760082165560332048L;
5655

57-
/** String for unclassified F-groups */
58-
public static final String F_UNCLASSFIED = "F_UNCLASSIFIED";
5956

6057
private Long uid;
6158
private String domainId;
6259
private Boolean manual;
6360
private Integer xGroup;
6461
private Integer hGroup;
6562
private Integer tGroup;
63+
private Integer fGroup;
6664
private String pdbId;
6765
private String chainId;
6866
private String range;
@@ -71,23 +69,24 @@ public class EcodDomain implements Serializable, Cloneable {
7169
private String hGroupName;
7270
private String tGroupName;
7371
private String fGroupName;
74-
private Boolean isAssembly; // Maybe should be a list, according to description?
72+
private Long assemblyId; //for non-assemblies, matches the uid.
7573
private Set<String> ligands;
7674

7775
/** Default constructor with all null properties */
7876
public EcodDomain() {}
7977

8078
public EcodDomain(Long uid, String domainId, Boolean manual,
81-
Integer xGroup, Integer hGroup, Integer tGroup, String pdbId,
79+
Integer xGroup, Integer hGroup, Integer tGroup, Integer fGroup, String pdbId,
8280
String chainId, String range, String architectureName,
8381
String xGroupName, String hGroupName, String tGroupName,
84-
String fGroupName, Boolean isAssembly, Set<String> ligands) {
82+
String fGroupName, Long assemblyId, Set<String> ligands) {
8583
this.uid = uid;
8684
this.domainId = domainId;
8785
this.manual = manual;
8886
this.xGroup = xGroup;
8987
this.hGroup = hGroup;
9088
this.tGroup = tGroup;
89+
this.fGroup = fGroup;
9190
this.pdbId = pdbId;
9291
this.chainId = chainId;
9392
this.range = range;
@@ -96,7 +95,7 @@ public EcodDomain(Long uid, String domainId, Boolean manual,
9695
this.hGroupName = hGroupName;
9796
this.tGroupName = tGroupName;
9897
this.fGroupName = fGroupName;
99-
this.isAssembly = isAssembly;
98+
this.assemblyId = assemblyId;
10099
this.ligands = ligands;
101100
}
102101
public EcodDomain(String domainId) {
@@ -109,6 +108,7 @@ public EcodDomain(EcodDomain o) {
109108
this.xGroup = o.xGroup;
110109
this.hGroup = o.hGroup;
111110
this.tGroup = o.tGroup;
111+
this.fGroup = o.fGroup;
112112
this.pdbId = o.pdbId;
113113
this.chainId = o.chainId;
114114
this.range = o.range;
@@ -117,7 +117,7 @@ public EcodDomain(EcodDomain o) {
117117
this.hGroupName = o.hGroupName;
118118
this.tGroupName = o.tGroupName;
119119
this.fGroupName = o.fGroupName;
120-
this.isAssembly = o.isAssembly;
120+
this.assemblyId = o.assemblyId;
121121
this.ligands = new HashSet<String>(o.ligands);
122122
}
123123

@@ -144,24 +144,30 @@ public Boolean getManual() {
144144
public void setManual(Boolean manual) {
145145
this.manual = manual;
146146
}
147-
public Integer getxGroup() {
147+
public Integer getXGroup() {
148148
return xGroup;
149149
}
150-
public void setxGroup(Integer xGroup) {
150+
public void setXGroup(Integer xGroup) {
151151
this.xGroup = xGroup;
152152
}
153-
public Integer gethGroup() {
153+
public Integer getHGroup() {
154154
return hGroup;
155155
}
156-
public void sethGroup(Integer hGroup) {
156+
public void setHGroup(Integer hGroup) {
157157
this.hGroup = hGroup;
158158
}
159-
public Integer gettGroup() {
159+
public Integer getTGroup() {
160160
return tGroup;
161161
}
162-
public void settGroup(Integer tGroup) {
162+
public void setTGroup(Integer tGroup) {
163163
this.tGroup = tGroup;
164164
}
165+
public Integer getFGroup() {
166+
return fGroup;
167+
}
168+
public void setFGroup(Integer fGroup) {
169+
this.fGroup = fGroup;
170+
}
165171
public String getPdbId() {
166172
return pdbId;
167173
}
@@ -186,35 +192,38 @@ public String getArchitectureName() {
186192
public void setArchitectureName(String architectureName) {
187193
this.architectureName = architectureName;
188194
}
189-
public String getxGroupName() {
195+
public String getXGroupName() {
190196
return xGroupName;
191197
}
192-
public void setxGroupName(String xGroupName) {
198+
public void setXGroupName(String xGroupName) {
193199
this.xGroupName = xGroupName;
194200
}
195-
public String gethGroupName() {
201+
public String getHGroupName() {
196202
return hGroupName;
197203
}
198-
public void sethGroupName(String hGroupName) {
204+
public void setHGroupName(String hGroupName) {
199205
this.hGroupName = hGroupName;
200206
}
201-
public String gettGroupName() {
207+
public String getTGroupName() {
202208
return tGroupName;
203209
}
204-
public void settGroupName(String tGroupName) {
210+
public void setGroupName(String tGroupName) {
205211
this.tGroupName = tGroupName;
206212
}
207-
public String getfGroupName() {
213+
public String getFGroupName() {
208214
return fGroupName;
209215
}
210-
public void setfGroupName(String fGroupName) {
216+
public void setFGroupName(String fGroupName) {
211217
this.fGroupName = fGroupName;
212218
}
213-
public Boolean getIsAssembly() {
214-
return isAssembly;
219+
/**
220+
* @return The assembly ID, or the DomainId if not in an assembly, or null if unknown.
221+
*/
222+
public Long getAssemblyId() {
223+
return assemblyId;
215224
}
216-
public void setIsAssembly(Boolean isAssembly) {
217-
this.isAssembly = isAssembly;
225+
public void setAssemblyId(Long assemblyId) {
226+
this.assemblyId = assemblyId;
218227
}
219228
public Set<String> getLigands() {
220229
return ligands;
@@ -230,12 +239,12 @@ public void setLigands(Set<String> ligands) {
230239
public String toString() {
231240
return "EcodDomain [uid=" + uid + ", domainId=" + domainId
232241
+ ", manual=" + manual + ", xGroup=" + xGroup + ", hGroup="
233-
+ hGroup + ", tGroup=" + tGroup + ", pdbId=" + pdbId
242+
+ hGroup + ", tGroup=" + tGroup + ", fGroup="+ fGroup + ", pdbId=" + pdbId
234243
+ ", chainId=" + chainId + ", range=" + range
235244
+ ", architectureName=" + architectureName + ", xGroupName="
236245
+ xGroupName + ", hGroupName=" + hGroupName + ", tGroupName="
237-
+ tGroupName + ", fGroupName=" + fGroupName + ", isAssembly="
238-
+ isAssembly + ", ligands=" + ligands + "]";
246+
+ tGroupName + ", fGroupName=" + fGroupName + ", assemblyId="
247+
+ assemblyId + ", ligands=" + ligands + "]";
239248
}
240249

241250
/* (non-Javadoc)
@@ -253,11 +262,12 @@ public int hashCode() {
253262
+ ((domainId == null) ? 0 : domainId.hashCode());
254263
result = prime * result
255264
+ ((fGroupName == null) ? 0 : fGroupName.hashCode());
265+
result = prime * result + ((fGroup == null) ? 0 : fGroup.hashCode());
256266
result = prime * result + ((hGroup == null) ? 0 : hGroup.hashCode());
257267
result = prime * result
258268
+ ((hGroupName == null) ? 0 : hGroupName.hashCode());
259269
result = prime * result
260-
+ ((isAssembly == null) ? 0 : isAssembly.hashCode());
270+
+ ((assemblyId == null) ? 0 : assemblyId.hashCode());
261271
result = prime * result + ((ligands == null) ? 0 : ligands.hashCode());
262272
result = prime * result + ((manual == null) ? 0 : manual.hashCode());
263273
result = prime * result + ((pdbId == null) ? 0 : pdbId.hashCode());
@@ -304,6 +314,11 @@ public boolean equals(Object obj) {
304314
return false;
305315
} else if (!fGroupName.equals(other.fGroupName))
306316
return false;
317+
if (fGroup == null) {
318+
if (other.fGroup != null)
319+
return false;
320+
} else if (!fGroup.equals(other.fGroup))
321+
return false;
307322
if (hGroup == null) {
308323
if (other.hGroup != null)
309324
return false;
@@ -314,10 +329,10 @@ public boolean equals(Object obj) {
314329
return false;
315330
} else if (!hGroupName.equals(other.hGroupName))
316331
return false;
317-
if (isAssembly == null) {
318-
if (other.isAssembly != null)
332+
if (assemblyId == null) {
333+
if (other.assemblyId != null)
319334
return false;
320-
} else if (!isAssembly.equals(other.isAssembly))
335+
} else if (!assemblyId.equals(other.assemblyId))
321336
return false;
322337
if (ligands == null) {
323338
if (other.ligands != null)

0 commit comments

Comments
 (0)