2424import java .io .File ;
2525import java .io .InputStream ;
2626import java .util .LinkedHashMap ;
27- import java .util .logging .Level ;
28- import java .util .logging .Logger ;
2927import org .biojava .nbio .core .sequence .ProteinSequence ;
3028import org .biojava .nbio .core .sequence .compound .AminoAcidCompound ;
3129import org .biojava .nbio .core .sequence .compound .AminoAcidCompoundSet ;
@@ -44,81 +42,77 @@ public class ParseFastaFileDemo {
4442 public ParseFastaFileDemo (){
4543
4644
47- }
45+ }
4846
49- /** e.g. download ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
47+ /**
48+ * e.g. download ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz
5049 * and pass in path to local location of file
5150 *
5251 * @param args
5352 */
54- public static void main (String [] args ) {
55-
56- int mb = 1024 *1024 ;
53+ public static void main (String [] args ) throws Exception {
5754
58- //Getting the runtime reference from system
59- Runtime runtime = Runtime .getRuntime ();
55+ int mb = 1024 *1024 ;
6056
61- System .out .println ("##### Heap utilization statistics [MB] #####" );
57+ //Getting the runtime reference from system
58+ Runtime runtime = Runtime .getRuntime ();
6259
63- //Print used memory
64- System .out .println ("Used Memory:"
65- + (runtime .totalMemory () - runtime .freeMemory ()) / mb );
60+ System .out .println ("##### Heap utilization statistics [MB] #####" );
6661
67- //Print free memory
68- System .out .println ("Free Memory:"
69- + runtime .freeMemory () / mb );
62+ //Print used memory
63+ System .out .println ("Used Memory:"
64+ + ( runtime .totalMemory () - runtime . freeMemory () ) / mb );
7065
71- //Print total available memory
72- System .out .println ("Total Memory:" + runtime .totalMemory () / mb );
66+ //Print free memory
67+ System .out .println ("Free Memory:"
68+ + runtime .freeMemory () / mb );
7369
74- //Print Maximum available memory
75- System .out .println ("Max Memory:" + runtime .maxMemory () / mb );
70+ //Print total available memory
71+ System .out .println ("Total Memory:" + runtime .totalMemory () / mb );
7672
73+ //Print Maximum available memory
74+ System .out .println ("Max Memory:" + runtime .maxMemory () / mb );
7775
78- if ( args .length < 1 ) {
79- System .err .println ("First argument needs to be path to fasta file" );
80- return ;
81- }
82-
83- File f = new File (args [0 ]);
8476
85- if ( ! f . exists () ) {
86- System .err .println ("File does not exist " + args [ 0 ] );
87- return ;
88- }
77+ if ( args . length < 1 ) {
78+ System .err .println ("First argument needs to be path to fasta file" );
79+ return ;
80+ }
8981
90- long timeS = System . currentTimeMillis ( );
82+ File f = new File ( args [ 0 ] );
9183
92- try {
84+ if ( ! f .exists ()) {
85+ System .err .println ("File does not exist " + args [0 ]);
86+ return ;
87+ }
9388
94- // automatically uncompress files using InputStreamProvider
95- InputStreamProvider isp = new InputStreamProvider ();
89+ long timeS = System .currentTimeMillis ();
9690
97- InputStream inStream = isp .getInputStream (f );
91+ // automatically uncompress files using InputStreamProvider
92+ InputStreamProvider isp = new InputStreamProvider ();
9893
94+ InputStream inStream = isp .getInputStream (f );
9995
100- FastaReader <ProteinSequence , AminoAcidCompound > fastaReader = new FastaReader <ProteinSequence , AminoAcidCompound >(
101- inStream ,
102- new GenericFastaHeaderParser <ProteinSequence , AminoAcidCompound >(),
103- new ProteinSequenceCreator (AminoAcidCompoundSet .getAminoAcidCompoundSet ()));
10496
105- LinkedHashMap <String , ProteinSequence > b ;
97+ FastaReader <ProteinSequence , AminoAcidCompound > fastaReader = new FastaReader <ProteinSequence , AminoAcidCompound >(
98+ inStream ,
99+ new GenericFastaHeaderParser <ProteinSequence , AminoAcidCompound >(),
100+ new ProteinSequenceCreator (AminoAcidCompoundSet .getAminoAcidCompoundSet ()));
106101
107- int nrSeq = 0 ;
102+ LinkedHashMap < String , ProteinSequence > b ;
108103
109- while ((b = fastaReader .process (100 )) != null ) {
110- for (String key : b .keySet ()) {
111- nrSeq ++;
112- //System.out.println(nrSeq + " : " + key + " " + b.get(key));
113- if ( nrSeq % 100000 == 0 )
114- System .out .println (nrSeq );
115- }
104+ int nrSeq = 0 ;
116105
117- }
118- long timeE = System .currentTimeMillis ();
119- System .out .println ("parsed a total of " + nrSeq + " TREMBL sequences! in " + (timeE - timeS ));
120- } catch (Exception ex ) {
121- Logger .getLogger (ParseFastaFileDemo .class .getName ()).log (Level .SEVERE , null , ex );
106+ while ((b = fastaReader .process (100 )) != null ) {
107+ for (String key : b .keySet ()) {
108+ nrSeq ++;
109+ System .out .println (nrSeq + " : " + key + " " + b .get (key ));
110+ if ( nrSeq % 100000 == 0 )
111+ System .out .println (nrSeq );
122112 }
113+
123114 }
115+ long timeE = System .currentTimeMillis ();
116+ System .out .println ("parsed a total of " + nrSeq + " TREMBL sequences! in " + (timeE - timeS ));
117+ }
124118}
0 commit comments