Skip to content

Commit b3edb04

Browse files
committed
related to #800
Although I couldn't reproduce the original error. I followed the method documentation and moved the close action to process() from process(int) since process(int) should never close the resource under any circumstances anyhow. Also went out of scope a little and verified that the underlying InputStream was open and closed at appropriate steps. Used streams to reduce complexity of a double nested for loop.(If that's frowned upon I'll change it back) Tweaked documentation to use links when refering to library classes.
1 parent 6f353fe commit b3edb04

File tree

2 files changed

+59
-37
lines changed

2 files changed

+59
-37
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
3535
import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
3636
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
37-
import org.biojava.nbio.core.sequence.features.AbstractFeature;
3837
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
3938
import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
4039
import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
@@ -47,9 +46,10 @@
4746
import java.util.ArrayList;
4847
import java.util.HashMap;
4948
import java.util.LinkedHashMap;
49+
import java.util.List;
5050

5151
/**
52-
* Use GenbankReaderHelper as an example of how to use this class where GenbankReaderHelper should be the
52+
* Use {@link GenbankReaderHelper} as an example of how to use this class where {@link GenbankReaderHelper} should be the
5353
* primary class used to read Genbank files
5454
*
5555
*/
@@ -66,9 +66,9 @@ public boolean isClosed() {
6666
}
6767

6868
/**
69-
* If you are going to use FileProxyProteinSequenceCreator then do not use this constructor because we need details about
70-
* local file offsets for quick reads. InputStreams does not give you the name of the stream to access quickly via file seek. A seek in
71-
* an inputstream is forced to read all the data so you don't gain anything.
69+
* If you are going to use {@link FileProxyProteinSequenceCreator} then do not use this constructor because we need details about
70+
* local file offsets for quick reads. {@link InputStream} does not give you the name of the stream to access quickly via file seek. A seek in
71+
* an {@link InputStream} is forced to read all the data so you don't gain anything.
7272
* @param is
7373
* @param headerParser
7474
* @param sequenceCreator
@@ -107,18 +107,21 @@ public GenbankReader(
107107

108108
/**
109109
* The parsing is done in this method.<br>
110-
* This method tries to process all the available Genbank records
110+
* This method will return all the available Genbank records
111111
* in the File or InputStream, closes the underlying resource,
112112
* and return the results in {@link LinkedHashMap}.<br>
113-
* You don't need to call {@link #close()} after calling this method.
113+
* You don't need to call {@link GenbankReader#close()} after calling this method.
114114
* @see #process(int)
115115
* @return {@link HashMap} containing all the parsed Genbank records
116116
* present, starting current fileIndex onwards.
117117
* @throws IOException
118118
* @throws CompoundNotFoundException
119+
* @throws OutOfMemoryError if the input resource is larger than the allocated heap.
119120
*/
120121
public LinkedHashMap<String,S> process() throws IOException, CompoundNotFoundException {
121-
return process(-1);
122+
LinkedHashMap<String,S> result = process(-1);
123+
close();
124+
return result;
122125
}
123126

124127
/**
@@ -137,13 +140,16 @@ public LinkedHashMap<String,S> process() throws IOException, CompoundNotFoundExc
137140
* @see #process()
138141
* @author Amr AL-Hossary
139142
* @since 3.0.6
140-
* @param max maximum number of records to return, <code>-1</code> for infinity.
143+
* @param max maximum number of records to return.
141144
* @return {@link HashMap} containing maximum <code>max</code> parsed Genbank records
142145
* present, starting current fileIndex onwards.
143146
* @throws IOException
144147
* @throws CompoundNotFoundException
145148
*/
146149
public LinkedHashMap<String,S> process(final int max) throws IOException, CompoundNotFoundException {
150+
151+
if(closed) throw new IOException("Cannot perform action: resource has been closed.");
152+
147153
LinkedHashMap<String,S> sequences = new LinkedHashMap<>();
148154
@SuppressWarnings("unchecked")
149155
int i=0;
@@ -158,12 +164,9 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
158164
genbankParser.getSequenceHeaderParser().parseHeader(genbankParser.getHeader(), sequence);
159165

160166
// add features to new sequence
161-
for (String k: genbankParser.getFeatures().keySet()){
162-
for (AbstractFeature f: genbankParser.getFeatures(k)){
163-
//f.getLocations().setSequence(sequence); // can't set proper sequence source to features. It is actually needed? Don't think so...
164-
sequence.addFeature(f);
165-
}
166-
}
167+
genbankParser.getFeatures().values().stream()
168+
.flatMap(List::stream)
169+
.forEach(sequence::addFeature);
167170

168171
// add taxonomy ID to new sequence
169172
ArrayList<DBReferenceInfo> dbQualifier = genbankParser.getDatabaseReferences().get("db_xref");
@@ -175,10 +178,6 @@ public LinkedHashMap<String,S> process(final int max) throws IOException, Compou
175178
sequences.put(sequence.getAccession().getID(), sequence);
176179
}
177180

178-
if (max < 0) {
179-
close();
180-
}
181-
182181
return sequences;
183182
}
184183

@@ -187,11 +186,12 @@ public void close() {
187186
bufferedReader.close();
188187
this.closed = true;
189188
} catch (IOException e) {
190-
logger.error("Couldn't close the reader. {}", e.getMessage());
189+
logger.error("Couldn't close the reader.", e);
191190
this.closed = false;
192191
}
193192
}
194193

194+
//TODO turn this into a test case?
195195
public static void main(String[] args) throws Exception {
196196
String proteinFile = "src/test/resources/BondFeature.gb";
197197
FileInputStream is = new FileInputStream(proteinFile);
@@ -206,6 +206,7 @@ public static void main(String[] args) throws Exception {
206206
LinkedHashMap<String,DNASequence> dnaSequences = dnaReader.process();
207207
System.out.println(dnaSequences);
208208

209+
//TODO restore CraftedFeature.gb or delete this code
209210
String crazyFile = "src/test/resources/CraftedFeature.gb";
210211
is = new FileInputStream(crazyFile);
211212
GenbankReader<DNASequence, NucleotideCompound> crazyReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));

biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,6 @@
2020
*/
2121
package org.biojava.nbio.core.sequence.io;
2222

23-
import java.io.IOException;
24-
import java.io.InputStream;
25-
import java.util.ArrayList;
26-
import java.util.LinkedHashMap;
27-
import java.util.List;
28-
import java.util.Map;
29-
3023
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
3124
import org.biojava.nbio.core.sequence.DNASequence;
3225
import org.biojava.nbio.core.sequence.ProteinSequence;
@@ -37,15 +30,18 @@
3730
import org.biojava.nbio.core.sequence.features.FeatureInterface;
3831
import org.biojava.nbio.core.sequence.features.Qualifier;
3932
import org.biojava.nbio.core.sequence.template.AbstractSequence;
40-
import org.junit.After;
41-
import org.junit.AfterClass;
42-
import org.junit.Assert;
43-
import org.junit.Before;
44-
import org.junit.BeforeClass;
45-
import org.junit.Test;
33+
import org.junit.*;
4634
import org.slf4j.Logger;
4735
import org.slf4j.LoggerFactory;
4836

37+
import java.io.BufferedInputStream;
38+
import java.io.IOException;
39+
import java.io.InputStream;
40+
import java.util.ArrayList;
41+
import java.util.LinkedHashMap;
42+
import java.util.List;
43+
import java.util.Map;
44+
4945
import static org.hamcrest.CoreMatchers.is;
5046
import static org.junit.Assert.*;
5147

@@ -161,7 +157,7 @@ public void testProcess() throws Exception {
161157
*/
162158
@Test
163159
public void testPartialProcess() throws IOException, CompoundNotFoundException, NoSuchFieldException {
164-
InputStream inStream = this.getClass().getResourceAsStream("/two-dnaseqs.gb");
160+
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/two-dnaseqs.gb"));
165161

166162
GenbankReader<DNASequence, NucleotideCompound> genbankDNA
167163
= new GenbankReader<>(
@@ -173,27 +169,29 @@ public void testPartialProcess() throws IOException, CompoundNotFoundException,
173169
// First call to process(1) returns the first sequence
174170
LinkedHashMap<String, DNASequence> dnaSequences = genbankDNA.process(1);
175171

172+
assertFalse(inStream.isclosed());
176173
assertNotNull(dnaSequences);
177174
assertEquals(1, dnaSequences.size());
178175
assertNotNull(dnaSequences.get("vPetite"));
179176

180177
// Second call to process(1) returns the second sequence
181178
dnaSequences = genbankDNA.process(1);
179+
assertFalse(inStream.isclosed());
182180
assertNotNull(dnaSequences);
183181
assertEquals(1, dnaSequences.size());
184182
assertNotNull(dnaSequences.get("sbFDR"));
185183

186184
assertFalse(genbankDNA.isClosed());
187185
genbankDNA.close();
188186
assertTrue(genbankDNA.isClosed());
189-
187+
assertTrue(inStream.isclosed());
190188
}
191189

192190
@Test
193191
public void CDStest() throws Exception {
194192
logger.info("CDS Test");
195193

196-
InputStream inStream = this.getClass().getResourceAsStream("/BondFeature.gb");
194+
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/BondFeature.gb"));
197195
assertNotNull(inStream);
198196

199197
GenbankReader<ProteinSequence, AminoAcidCompound> GenbankProtein
@@ -203,7 +201,7 @@ public void CDStest() throws Exception {
203201
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())
204202
);
205203
LinkedHashMap<String, ProteinSequence> proteinSequences = GenbankProtein.process();
206-
inStream.close();
204+
assertTrue(inStream.isclosed());
207205

208206

209207
Assert.assertTrue(proteinSequences.size() == 1);
@@ -260,4 +258,27 @@ public void testNcbiExpandedAccessionFormats() throws Exception {
260258
DNASequence header2 = readGenbankResource("/empty_header2.gb");
261259
assertEquals("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018", header2.getOriginalHeader());
262260
}
261+
262+
/**
263+
* Helper class to be able to verify the closed state of the input stream.
264+
*/
265+
private class CheckableInputStream extends BufferedInputStream {
266+
267+
private boolean closed;
268+
269+
CheckableInputStream(InputStream in) {
270+
super(in);
271+
closed = false;
272+
}
273+
274+
@Override
275+
public void close() throws IOException {
276+
super.close();
277+
closed = true;
278+
}
279+
280+
boolean isclosed(){
281+
return closed;
282+
}
283+
}
263284
}

0 commit comments

Comments
 (0)