Skip to content

Commit 2c61dcc

Browse files
committed
Confirm Genbank parser handles new accession formats.
1 parent 1ef9a4a commit 2c61dcc

File tree

4 files changed

+64
-0
lines changed

4 files changed

+64
-0
lines changed

biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,4 +223,44 @@ public void CDStest() throws Exception {
223223

224224
}
225225

226+
private DNASequence readGenbankResource(final String resource) throws Exception {
227+
DNASequence sequence = null;
228+
InputStream inputStream = null;
229+
try {
230+
inputStream = getClass().getResourceAsStream(resource);
231+
232+
GenbankReader<DNASequence, NucleotideCompound> genbankDNA
233+
= new GenbankReader<>(
234+
inputStream,
235+
new GenericGenbankHeaderParser<>(),
236+
new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())
237+
);
238+
LinkedHashMap<String, DNASequence> dnaSequences = genbankDNA.process();
239+
sequence = dnaSequences.values().iterator().next();
240+
}
241+
catch (Exception e) {
242+
fail(e.getMessage());
243+
}
244+
finally {
245+
try {
246+
inputStream.close();
247+
}
248+
catch (Exception e) {
249+
// ignore
250+
}
251+
}
252+
return sequence;
253+
}
254+
255+
@Test
256+
public void testNcbiExpandedAccessionFormats() throws Exception {
257+
DNASequence header0 = readGenbankResource("/empty_header0.gb");
258+
assertEquals("CP032762 5868661 bp DNA circular BCT 15-OCT-2018", header0.getOriginalHeader());
259+
260+
DNASequence header1 = readGenbankResource("/empty_header1.gb");
261+
assertEquals("AZZZAA02123456789 9999999999 bp DNA linear PRI 15-OCT-2018", header1.getOriginalHeader());
262+
263+
DNASequence header2 = readGenbankResource("/empty_header2.gb");
264+
assertEquals("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018", header2.getOriginalHeader());
265+
}
226266
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
LOCUS CP032762 5868661 bp DNA circular BCT 15-OCT-2018
2+
DEFINITION no sequence
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
ORIGIN
8+
//
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
LOCUS AZZZAA02123456789 9999999999 bp DNA linear PRI 15-OCT-2018
2+
DEFINITION no sequence
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
ORIGIN
8+
//
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
LOCUS AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018
2+
DEFINITION no sequence
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
ORIGIN
8+
//

0 commit comments

Comments
 (0)