Skip to content

Commit 9de8e36

Browse files
committed
Dak’s new unit test that demonstrates a problem when parsing FASTA files
1 parent 906eb6e commit 9de8e36

File tree

3 files changed

+97
-0
lines changed

3 files changed

+97
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package org.biojava.nbio.core.fasta;
2+
3+
import java.io.InputStream;
4+
import java.util.LinkedHashMap;
5+
6+
import static org.junit.Assert.* ;
7+
import static org.hamcrest.CoreMatchers.* ;
8+
9+
import org.biojava.nbio.core.sequence.ProteinSequence;
10+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
11+
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
12+
import org.biojava.nbio.core.sequence.io.FastaReader;
13+
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
14+
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
15+
import org.biojava.nbio.core.sequence.io.util.ClasspathResource;
16+
import org.junit.Test;
17+
18+
19+
public class TestFASTAReader {
20+
21+
@Test
22+
public void testProcessAll() throws Exception {
23+
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/P02768.fasta");
24+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
25+
try( InputStream inStream = r.getInputStream() ) {
26+
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
27+
inStream,
28+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
29+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
30+
LinkedHashMap<String, ProteinSequence> sequences = fastaReader.process();
31+
assertThat(sequences,is(notNullValue()));
32+
assertThat(sequences.size(),is(1));
33+
} finally {
34+
if(fastaReader != null) fastaReader.close();
35+
}
36+
}
37+
@Test
38+
public void testProcess1() throws Exception {
39+
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/P02768.fasta");
40+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
41+
try( InputStream inStream = r.getInputStream() ) {
42+
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
43+
inStream,
44+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
45+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
46+
assertThat(fastaReader.process(1),is(notNullValue()));
47+
assertThat(fastaReader.process(1),is(nullValue()));
48+
} finally {
49+
if(fastaReader != null) fastaReader.close();
50+
}
51+
}
52+
@Test
53+
public void testProcess1v2() throws Exception {
54+
ClasspathResource r = new ClasspathResource("org/biojava/nbio/core/fasta/TwoSequences.fasta");
55+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = null ;
56+
try( InputStream inStream = r.getInputStream() ) {
57+
fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
58+
inStream,
59+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
60+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
61+
assertThat(fastaReader.process(1),is(notNullValue()));
62+
assertThat(fastaReader.process(1),is(notNullValue()));
63+
assertThat(fastaReader.process(1),is(nullValue()));
64+
} finally {
65+
if(fastaReader != null) fastaReader.close();
66+
}
67+
}
68+
69+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
2+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
3+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
4+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
5+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
6+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
7+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
8+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
9+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
10+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
11+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
12+
AASQAALGL
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
>sp|P02768|ALBU_HUMAN Serum albumin OS=Homo sapiens GN=ALB PE=1 SV=2
2+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
3+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
4+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
5+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
6+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
7+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
8+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
9+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
10+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
11+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
12+
AASQAALGL
13+
>sp|P00698|LYSC_CHICK Lysozyme C OS=Gallus gallus GN=LYZ PE=1 SV=1
14+
MRSLLILVLCFLPLAALGKVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQA
15+
TNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDG
16+
NGMNAWVAWRNRCKGTDVQAWIRGCRL

0 commit comments

Comments
 (0)