2727import java .io .InputStream ;
2828import java .util .Map ;
2929import java .util .Scanner ;
30+ import java .util .Vector ;
3031
3132import org .biojava3 .alignment .io .StockholmFileAnnotation .StockholmFileAnnotationReference ;
3233import org .biojava3 .core .exceptions .ParserException ;
@@ -246,54 +247,117 @@ public class StockholmFileParser {
246247 private static final int STATUS_IN_SEQUENCE = 20 ;
247248
248249 private int status =STATUS_OUTSIDE_FILE ;
250+ Scanner internalScanner = null ;
251+ private InputStream cashedInputStream ;
249252
250253
251254 /**
252- * Parses a Stockholm file and returns a {@link StockholmStructure} object with its content
255+ * Parses a Stockholm file and returns a {@link StockholmStructure} object with its content.<br>
256+ * This function is meant to be used for single access to specific
257+ * file and it closes the file after doing its assigned job. Any subsequent call
258+ * to {@link #parseNext(int)} will throw an exception or will function with unpredicted behavior.
253259 *
254260 * @param filename complete(?) path to the file from where to read the content
255261 * @return stockholm file content
256- * @throws Exception
262+ * @throws IOException when an exception occurred while opening/reading/closing the file+
263+ * @throws ParserException if unexpected format is encountered
264+ */
265+ public StockholmStructure parse (String filename ) throws IOException ,ParserException {
266+ InputStream inStream = new InputStreamProvider ().getInputStream (filename );
267+ StockholmStructure structure = parse (inStream );
268+ inStream .close ();
269+ return structure ;
270+ }
271+ /**
272+ * Parses a Stockholm file and returns a {@link StockholmStructure} object with its content.<br>
273+ * This function doesn't close the file after doing its assigned job; to allow for further calls of {@link #parseNext(int)}.
274+ * @see #parseNext(int)
275+ *
276+ * @param filename complete(?) path to the file from where to read the content
277+ * @param max maximum number of files to read, <code>-1</code> for all
278+ * @return a vector of {@link StockholmStructure} containing parsed structures.
279+ * @throws IOException when an exception occurred while opening/reading/closing the file+
280+ * @throws ParserException if unexpected format is encountered
257281 */
258- public StockholmStructure parseFile (String filename ) throws Exception {
282+ public Vector < StockholmStructure > parse (String filename , int max ) throws IOException , ParserException {
259283 InputStreamProvider isp = new InputStreamProvider ();
260- InputStream inStream = null ;
261- try {
262- inStream = isp .getInputStream (filename );
263- } catch (Exception e ) {
264- // something is wrong with the file!
265- e .printStackTrace ();
266- throw new IOException ("Error reading the file" );
267- }
268-
269- return parseFile (inStream );
284+ InputStream inStream = isp .getInputStream (filename );
285+ Vector <StockholmStructure > structures = parse (inStream , max );
286+ return structures ;
270287 }
271288
272- /**parses {@link InputStream} and returns {@link StockholmStructure} object containing its contents .
289+ /**parses {@link InputStream} and returns a the first contained alignment in a {@link StockholmStructure} object.
273290 * Used mainly for multiple files within the same input stream, (e.g. when
274291 * reading from Pfam flat files. <br>
275- * TODO This method should leave the stream unclosed.
292+ * This method leaves the stream open for further calls of {@link #parseNext(int)}.
293+ * @see #parseNext(int)
276294 * @param inStream the {@link InputStream} containing the file to read.
277295 * @return a {@link StockholmStructure} object representing file contents.
278- * @throws IOException
279- * @throws Exception
296+ * @throws IOException
297+ * @throws ParserException
280298 */
281- public StockholmStructure parseFile (InputStream inStream ) throws Exception {
282- Scanner scanner = new Scanner (inStream );
283- return parseFile (scanner );
299+ public StockholmStructure parse (InputStream inStream ) throws ParserException , IOException {
300+ return parse (inStream ,1 ).firstElement ();
284301 }
285302
303+ /**parses an {@link InputStream} and returns maximum <code>max</code> object contained in
304+ * that file.<br>
305+ * This method leaves the stream open for further calls of {@link #parseNext(int)}.
306+ *
307+ * @see #parseNext(int)
308+ * @param inStream the stream to parse
309+ * @param max maximum number of structures to try to parse
310+ * @return a {@link Vector} of {@link StockholmStructure} objects.
311+ * @throws IOException in case an I/O Exception occurred.
312+ */
313+ public Vector <StockholmStructure > parse (InputStream inStream , int max ) throws IOException {
314+ if (inStream != this .cashedInputStream ) {
315+ this .cashedInputStream =inStream ;
316+ this .internalScanner =null ;
317+ }
318+
319+ if (internalScanner == null ) {
320+ internalScanner = new Scanner (inStream );
321+ }
322+ Vector <StockholmStructure > structures = new Vector <StockholmStructure >();
323+ while (max != -1 && max -- >0 ) {
324+ StockholmStructure structure = parse (internalScanner );
325+ if (structure != null ){
326+ structures .add (structure );
327+ }
328+ }
329+ return structures ;
330+ }
331+
332+ /**Tries to parse and return as maximum as <code>max</code> structures in the last used file or input stream.<br>
333+ * Please consider calling either {@link #parse(InputStream)},
334+ * {@link #parse(InputStream, int)}, or {@link #parse(String, int)} before calling this function.
335+ * @param max
336+ * @return
337+ * @throws IOException
338+ */
339+ public Vector <StockholmStructure > parseNext (int max ) throws IOException {
340+ return parse (this .cashedInputStream , max );
341+ }
286342
287343 /**
288- * Parses a stockholm file and returns a {@link StockholmStructure} object with its content
344+ * Parses a Stockholm file and returns a {@link StockholmStructure} object with its content.
345+ * This method returns just after reaching the end of structure delimiter line ("//"), leaving any remaining empty lines unconsumed.
289346 *
290347 * @param scanner from where to read the file content
291- * @return stockholm file content
348+ * @return Stockholm file content
292349 * @throws IOException
293350 * @throws Exception
294351 */
295- public StockholmStructure parseFile (Scanner scanner ) throws ParserException , IOException {
296- stockholmStructure = new StockholmStructure ();
352+ StockholmStructure parse (Scanner scanner ) throws ParserException , IOException {
353+ if (scanner == null ) {
354+ if (internalScanner != null ){
355+ scanner = internalScanner ;
356+ }else {
357+ throw new IllegalArgumentException ("No Scanner defined" );
358+ }
359+ }
360+ this .stockholmStructure = new StockholmStructure ();
297361 String line = null ;
298362 int linesCount = 0 ;
299363 try {
@@ -332,7 +396,7 @@ public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOE
332396 // #=GS <seqname> <featurename> <generic per-sequence annotation, free text>
333397 int index1 =line .indexOf (' ' , 5 );
334398 String seqName =line .substring (5 , index1 );
335- while (line .charAt (++index1 )== ' ' )
399+ while (line .charAt (++index1 )<= ' ' )//i.e. white space
336400 ;//keep advancing
337401 int index2 =line .indexOf (' ' , index1 );
338402 String featureName =line .substring (index1 , index2 );
@@ -360,14 +424,14 @@ public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOE
360424 }
361425 } else if (line .trim ().equals ("//" )) {
362426 status =STATUS_OUTSIDE_FILE ;
363- break ;//TODO should we just break immediately or jump next empty lines?
427+ break ;//should we just break immediately or jump next empty lines?
364428 } else /*if (!line.startsWith("#")) */ {
365429 if (status == STATUS_IN_SEQUENCE ) {
366430 // This line corresponds to a sequence. Something like:
367431 // O83071/192-246 MTCRAQLIAVPRASSLAEAIACAQKMRVSRVPVYERS
368432 handleSequenceLine (line );
369- }else if (status ==STATUS_OUTSIDE_FILE ) {//TODO change this condition to enable reading multiple MSA in single file.
370- throw new ParserException ("The end of file character was allready reached but there are still sequence lines" );
433+ // }else if (status==STATUS_OUTSIDE_FILE) {
434+ // throw new ParserException("The end of file character was allready reached but there are still sequence lines");
371435 }else {
372436 System .err .println ("Error: Unknown or unexpected line [" +line +"].\n Please contact the Biojava team." );
373437 throw new ParserException ("Error: Unknown or unexpected line [" +line +"]." );
@@ -393,7 +457,7 @@ public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOE
393457 }
394458 }
395459
396- return stockholmStructure ;
460+ return this . stockholmStructure ;
397461 }
398462
399463 /**
@@ -455,8 +519,8 @@ private void handleFileAnnotation(String featureName, String value) {
455519 stockholmStructure .getFileAnnotation ().setGFNumSequences (value );
456520 } else if (featureName .equals (GF_DB_COMMENT )) {
457521 stockholmStructure .getFileAnnotation ().setGFDBComment (value );
458- } else if (featureName .equals (GF_DB_REFERENCE )) {
459- stockholmStructure .getFileAnnotation ().addDBReference (value );
522+ // } else if (featureName.equals(GF_DB_REFERENCE)) {
523+ // stockholmStructure.getFileAnnotation().addDBReference(value);
460524 } else if (featureName .equals (GF_REFERENCE_COMMENT )) {
461525 stockholmStructure .getFileAnnotation ().setGFRefComment (value );
462526 } else if (featureName .equals (GF_REFERENCE_NUMBER )) {
@@ -498,7 +562,7 @@ private void handleFileAnnotation(String featureName, String value) {
498562
499563 /**usually a single line of:<br>
500564 * #=GC <feature> <Generic per-Column annotation, exactly 1 char per column>
501- * @param featureName TODO
565+ * @param featureName the feature name :)
502566 * @param value the line to be parsed.
503567 */
504568 private void handleConsensusAnnotation (String featureName , String value ) {
@@ -592,15 +656,22 @@ private void handleResidueAnnotation(String seqName, String featureName,String v
592656 //TODO implement toString()
593657
594658
595- public static void main (String [] args ) throws Exception {
659+ // public static void main(String[] args) throws Exception {
596660// StockholmFileParser fileParser = new StockholmFileParser();
597- // StockholmStructure parsedFile = fileParser.parseFile(ClassLoader.getSystemClassLoader().getResourceAsStream("longTest(Ankyrin repeat).sto"));
598- //
599- // Map<String, StringBuffer> sequences = parsedFile.getSequences();
600- // Set<String> keySet = sequences.keySet();
601- // for (String key: keySet) {
602- // System.out.println("seq: "+key);
603- // System.out.println("\t\t\t"+sequences.get(key));
661+ // Vector<StockholmStructure> structures = fileParser.parse("D:\\BII-PhD\\Research\\Pfam23.0\\Pfam-A.seed.gz",5);
662+ // displaySequences(structures);
663+ // structures= fileParser.parseNext(5);
664+ // displaySequences(structures);
665+ // }
666+ // public static void displaySequences(Vector<StockholmStructure> structures) {
667+ // for (StockholmStructure structure : structures) {
668+ // System.out.println("----------------- Structure "+structure.getFileAnnotation().getIdentification()+" -----------");
669+ // Map<String, StringBuffer> sequences = structure.getSequences();
670+ // Set<String> keySet = sequences.keySet();
671+ // for (String key: keySet) {
672+ // System.out.println("seq: "+key);
673+ // System.out.println("\t\t\t"+sequences.get(key));
674+ // }
604675// }
605- }
676+ // }
606677}
0 commit comments