Skip to content

Commit dd61ec1

Browse files
committed
biojava#276 updating to latest version of HMMER API. Incorporating some feedback from Rob Finn to match more closely what is being reported at the Pfam website.
1 parent e0b0456 commit dd61ec1

File tree

3 files changed

+54
-23
lines changed

3 files changed

+54
-23
lines changed

biojava-ws/src/main/java/demo/HmmerDemo.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,15 @@ public static void main(String[] args){
4242

4343
try {
4444
// first we get a UniProt sequence
45-
String uniProtID = "P26663";
45+
String uniProtID = "P08487";
4646
ProteinSequence seq = getUniprot(uniProtID);
4747

4848

4949
// now we submit this sequence to the Hmmer web site
50-
HmmerScan hmmer = new RemoteHmmerScan();
50+
RemoteHmmerScan hmmer = new RemoteHmmerScan();
5151

5252
SortedSet<HmmerResult> results = hmmer.scan(seq);
5353

54-
55-
5654
// and now let's print out the obtained annotations
5755

5856
System.out.println(String.format("#\t%15s\t%10s\t%s\t%s\t%8s\t%s",
@@ -68,7 +66,7 @@ public static void main(String[] args){
6866
counter,
6967
hmmerResult.getName(), domain.getHmmAcc(),
7068
domain.getSqFrom(),domain.getSqTo(),
71-
hmmerResult.getEvalue(), hmmerResult.getDesc()
69+
domain.getEvalue(), hmmerResult.getDesc()
7270
));
7371

7472
}

biojava-ws/src/main/java/org/biojava/nbio/ws/hmmer/HmmerDomain.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ public class HmmerDomain implements Comparable<HmmerDomain>, Serializable{
4444
String hmmName;
4545
String hmmDesc;
4646
String hmmAcc;
47+
Float evalue;
48+
49+
public Float getEvalue() {
50+
return evalue;
51+
}
52+
public void setEvalue(Float evalue) {
53+
this.evalue = evalue;
54+
}
55+
4756
public Integer getSqFrom() {
4857
return sqFrom;
4958
}

biojava-ws/src/main/java/org/biojava/nbio/ws/hmmer/RemoteHmmerScan.java

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,11 @@ public class RemoteHmmerScan implements HmmerScan {
4141

4242
public static String HMMER_SERVICE = "http://www.ebi.ac.uk/Tools/hmmer/search/hmmscan";
4343

44-
// The Gathering threshold indicates to HMMER to use the threshold defined in the HMM file to be searched.
45-
// This ensures that there are no false positive results.
46-
47-
public boolean DEFAULT_SEARCH_CUT_GA = true;
48-
49-
private boolean searchWithCutGA;
5044
public RemoteHmmerScan(){
51-
searchWithCutGA = DEFAULT_SEARCH_CUT_GA;
45+
5246
}
5347

48+
5449
@Override
5550
public SortedSet<HmmerResult> scan(ProteinSequence sequence) throws IOException {
5651

@@ -73,8 +68,12 @@ public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation
7368

7469
postContent.append("hmmdb=pfam");
7570

76-
if ( searchWithCutGA )
77-
postContent.append("&cut_ga=1");
71+
72+
// by default hmmscan runs with the HMMER3 cut_ga parameter enabled, the "gathering freshold", which depends on
73+
// the cutoffs defined in the underlying HMM files.
74+
// to request a different cutoff by e-value this could be enabled:
75+
//postContent.append("&E=1");
76+
7877

7978
postContent.append("&seq=");
8079
postContent.append(sequence.getSequenceAsString());
@@ -90,7 +89,6 @@ public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation
9089

9190
connection.setRequestProperty("Accept:","application/json");
9291

93-
9492
connection.setRequestProperty("Content-Length", "" +
9593
Integer.toString(postContent.toString().getBytes().length));
9694

@@ -102,8 +100,6 @@ public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation
102100
wr.close ();
103101

104102

105-
106-
107103
// //Now get the redirect URL
108104
URL respUrl = new URL( connection.getHeaderField( "Location" ));
109105

@@ -158,7 +154,8 @@ public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation
158154
} else if ( dclO instanceof Integer){
159155
dcl = (Integer) dclO;
160156
}
161-
157+
158+
162159
hmmResult.setAcc((String)hit.get("acc"));
163160
hmmResult.setDcl(dcl);
164161
hmmResult.setDesc((String)hit.get("desc"));
@@ -175,9 +172,34 @@ public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation
175172
for ( int j= 0 ; j < hmmdomains.size() ; j++){
176173
JSONObject d = hmmdomains.getJSONObject(j);
177174
//System.out.println(d);
178-
Integer is_reported = getInteger(d.get("is_reported"));
179-
if ( is_reported != 1) {
180-
//System.out.println("excluding: " + d);
175+
Integer is_included = getInteger(d.get("is_included"));
176+
if ( is_included == 0) {
177+
// System.out.println(" excluding: " + d.get("alihmmdesc") + " " + d.get("alihmmname") + " " +
178+
// hit.get("evalue") + " " +
179+
// d.get("alisqfrom") + " " +
180+
// d.get("alisqto"));
181+
continue;
182+
}
183+
184+
185+
// this filters out multiple hits to the same clan
186+
Integer outcompeted = getInteger(d.get("outcompeted"));
187+
if ( outcompeted != null && outcompeted == 1) {
188+
// System.out.println(" outcompeted: " + d.get("alihmmdesc") + " " + d.get("alihmmname")+ " " +
189+
// hit.get("evalue") + " " +
190+
// d.get("alisqfrom") + " " +
191+
// d.get("alisqto")
192+
// );
193+
continue;
194+
}
195+
196+
Integer significant = getInteger(d.get("significant"));
197+
198+
if ( significant != 1) {
199+
// System.out.println(" not significant: " + d.get("alihmmdesc") + " " + d.get("alihmmname")+ " " +
200+
// hit.get("evalue") + " " +
201+
// d.get("alisqfrom") + " " +
202+
// d.get("alisqto"));
181203
continue;
182204
}
183205

@@ -188,13 +210,15 @@ public SortedSet<HmmerResult> scan(ProteinSequence sequence, URL serviceLocation
188210

189211
dom.setHmmFrom(getInteger(d.get("alihmmfrom")));
190212
dom.setHmmTo(getInteger(d.get("alihmmto")));
191-
dom.setSimCount((Integer)d.get("aliSimCount"));
213+
dom.setSimCount((Integer) d.get("aliSimCount"));
192214
dom.setSqFrom(getInteger(d.get("alisqfrom")));
193215
dom.setSqTo(getInteger(d.get("alisqto")));
194216
dom.setHmmName((String)d.get("alihmmname"));
217+
dom.setEvalue(Float.parseFloat((String)d.get("ievalue")));
218+
195219
domains.add(dom);
196220

197-
System.out.println(d.get("alicsline"));
221+
198222
}
199223

200224
hmmResult.setDomains(domains);

0 commit comments

Comments
 (0)