Skip to content

Commit 5ed645d

Browse files
committed
Adding example for parsing large FASTA files
1 parent ad1dfab commit 5ed645d

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

core/readwrite.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
Reading and Writing of Basic sequence file formats
2+
==================================================
3+
4+
5+
TODO: needs more examples
6+
7+
8+
## FASTA
9+
10+
BioJava can be used to parse large FASTA files. The example below can parse a 1GB (compressed) version of TREMBL with standard memory settings.
11+
12+
13+
```java
14+
15+
16+
17+
/** Download a large file, e.g. ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
18+
* and pass in path to local location of file
19+
*
20+
* @param args
21+
*/
22+
public static void main(String[] args) {
23+
24+
if ( args.length < 1) {
25+
System.err.println("First argument needs to be path to fasta file");
26+
return;
27+
}
28+
29+
File f = new File(args[0]);
30+
31+
if ( ! f.exists()) {
32+
System.err.println("File does not exist " + args[0]);
33+
return;
34+
}
35+
36+
try {
37+
38+
// automatically uncompresses files using InputStreamProvider
39+
InputStreamProvider isp = new InputStreamProvider();
40+
41+
InputStream inStream = isp.getInputStream(f);
42+
43+
FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>(
44+
inStream,
45+
new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
46+
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
47+
48+
LinkedHashMap<String, ProteinSequence> b;
49+
50+
51+
int nrSeq = 0;
52+
53+
while ((b = fastaReader.process(10)) != null) {
54+
for (String key : b.keySet()) {
55+
nrSeq++;
56+
System.out.println(nrSeq + " : " + key + " " + b.get(key));
57+
}
58+
59+
}
60+
} catch (Exception ex) {
61+
Logger.getLogger(ParseFastaFileDemo.class.getName()).log(Level.SEVERE, null, ex);
62+
}
63+
}
64+
```

0 commit comments

Comments
 (0)