9292 */
9393public class StockholmFileParser {
9494
95+ /**indicates reading as much as possible, without limits */
96+ public static final int INFINITY = -1 ;
9597 /** #=GF <feature> <Generic per-File annotation, free text> */
9698 private static final String GENERIC_PER_FILE_ANNOTATION = "GF" ;
9799 /** #=GC <feature> <Generic per-Column annotation, exactly 1 char per column> */
@@ -274,10 +276,10 @@ public StockholmStructure parse(String filename) throws IOException,ParserExcept
274276 * This function doesn't close the file after doing its assigned job; to allow for further calls of {@link #parseNext(int)}.
275277 * @see #parseNext(int)
276278 *
277- * @param filename complete(?) path to the file from where to read the content
278- * @param max maximum number of files to read, <code>-1</code> for all
279+ * @param filename file from where to read the content. see {@link InputStreamProvider} for more details.
280+ * @param max maximum number of files to read, {@link #INFINITY} for all.
279281 * @return a vector of {@link StockholmStructure} containing parsed structures.
280- * @throws IOException when an exception occurred while opening/reading/closing the file+
282+ * @throws IOException when an exception occurred while opening/reading/closing the file.
281283 * @throws ParserException if unexpected format is encountered
282284 */
283285 public List <StockholmStructure > parse (String filename , int max ) throws IOException ,ParserException {
@@ -300,17 +302,22 @@ public StockholmStructure parse(InputStream inStream) throws ParserException, IO
300302 return parse (inStream ,1 ).get (0 );
301303 }
302304
303- /**parses an {@link InputStream} and returns maximum <code>max</code> object contained in
305+ /**parses an {@link InputStream} and returns at maximum <code>max</code> objects contained in
304306 * that file.<br>
305307 * This method leaves the stream open for further calls of {@link #parse(InputStream, int)} (same function) or {@link #parseNext(int)}.
306308 *
307309 * @see #parseNext(int)
308310 * @param inStream the stream to parse
309- * @param max maximum number of structures to try to parse
310- * @return a {@link List} of {@link StockholmStructure} objects.
311+ * @param max maximum number of structures to try to
312+ * parse, {@link #INFINITY} to try to obtain as much as possible.
313+ * @return a {@link List} of {@link StockholmStructure} objects. If there are no more
314+ * structures, an empty list is returned.
311315 * @throws IOException in case an I/O Exception occurred.
312316 */
313317 public List <StockholmStructure > parse (InputStream inStream , int max ) throws IOException {
318+ if (max < INFINITY ) {
319+ throw new IllegalArgumentException ("max can't be -ve value " +max );
320+ }
314321 if (inStream != this .cashedInputStream ) {
315322 this .cashedInputStream =inStream ;
316323 this .internalScanner =null ;
@@ -320,10 +327,12 @@ public List<StockholmStructure> parse(InputStream inStream, int max) throws IOEx
320327 internalScanner = new Scanner (inStream );
321328 }
322329 ArrayList <StockholmStructure > structures = new ArrayList <StockholmStructure >();
323- while (max != - 1 && max -- >0 ) {
330+ while (max != INFINITY && max -- >0 ) {
324331 StockholmStructure structure = parse (internalScanner );
325332 if (structure != null ){
326333 structures .add (structure );
334+ }else {
335+ break ;
327336 }
328337 }
329338 return structures ;
@@ -345,7 +354,7 @@ public List<StockholmStructure> parseNext(int max) throws IOException {
345354 * This method returns just after reaching the end of structure delimiter line ("//"), leaving any remaining empty lines unconsumed.
346355 *
347356 * @param scanner from where to read the file content
348- * @return Stockholm file content
357+ * @return Stockholm file content, <code>null</code> if couldn't or no more structures.
349358 * @throws IOException
350359 * @throws Exception
351360 */
@@ -357,11 +366,10 @@ StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
357366 throw new IllegalArgumentException ("No Scanner defined" );
358367 }
359368 }
360- this .stockholmStructure = new StockholmStructure ();
361369 String line = null ;
362370 int linesCount = 0 ;
363371 try {
364- do {
372+ while ( scanner . hasNextLine ()) {
365373 line = scanner .nextLine ();
366374 // if the file is empty
367375 //this condition will not happen, just left in case we decided to go for buffereedReader again for performance purpose.
@@ -417,8 +425,9 @@ StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
417425 if (status == STATUS_OUTSIDE_FILE ) {
418426 status = STATUS_INSIDE_FILE ;
419427 String [] header = line .split ("\\ s+" );
420- stockholmStructure .getFileAnnotation ().setFormat (header [1 ]);
421- stockholmStructure .getFileAnnotation ().setVersion (header [2 ]);
428+ this .stockholmStructure = new StockholmStructure ();
429+ this .stockholmStructure .getFileAnnotation ().setFormat (header [1 ]);
430+ this .stockholmStructure .getFileAnnotation ().setVersion (header [2 ]);
422431 } else {
423432 throw new ParserException ("Uexpected Format line: [" +line +"]" );
424433 }
@@ -437,34 +446,35 @@ StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
437446 throw new ParserException ("Error: Unknown or unexpected line [" +line +"]." );
438447 }
439448 }
440-
441449 linesCount ++;
442- } while (scanner .hasNextLine ());
443-
450+ }
444451 } catch (IOException e ) {
445452 e .printStackTrace ();
446453 throw new IOException ("Error parsing Stockholm file" );
447454 }
448-
449- int length = -1 ;
450- Map <String , StringBuffer > sequences = stockholmStructure .getSequences ();
451- for (String sequencename : sequences .keySet ()) {
452- StringBuffer sequence = sequences .get (sequencename );
453- if (length == -1 ) {
454- length = sequence .length ();
455- } else if (length != sequence .length ()) {
456- throw new RuntimeException ("Sequences have different lengths" );
455+ StockholmStructure structure = this .stockholmStructure ;
456+ this .stockholmStructure =null ;
457+ if (structure != null ) {
458+ int length = -1 ;
459+ Map <String , StringBuffer > sequences = structure .getSequences ();
460+ for (String sequencename : sequences .keySet ()) {
461+ StringBuffer sequence = sequences .get (sequencename );
462+ if (length == -1 ) {
463+ length = sequence .length ();
464+ } else if (length != sequence .length ()) {
465+ throw new RuntimeException (
466+ "Sequences have different lengths" );
467+ }
457468 }
458469 }
459-
460- return this .stockholmStructure ;
470+ return structure ;
461471 }
462472
463473 /**
464474 * Handles a line that corresponds to a sequence. <br>
465475 * e.g.: COATB_BPIKE/30-81
466- * AEPNAATNYATEAMDSLKTQAIDLISQTWPVVTTVVVAGLVIRLFKKFSSKA
467- * <b>Warning : This function seems to fail when dealing with sequence with intrinsic space</b>
476+ * AEPNAATNYATEAMDSLKTQAIDLISQTWPVVTTVVVAGLVIRLFKKFSSKA<br>
477+ * N.B. : This function can't tolerate sequences with intrinsic white space.
468478 * @param line
469479 * the line to be parsed
470480 * @throws Exception
@@ -479,7 +489,7 @@ private void handleSequenceLine(String line) throws ParserException {
479489 }
480490
481491 /**
482- * #=GF < feature> < Generic per-File annotation, free text>
492+ * #=GF < feature> < Generic per-File annotation, free text>
483493 * @param featureName
484494 * @param value the line to be parsed
485495 */
@@ -621,8 +631,7 @@ private void handleSequenceAnnotation(String seqName, String featureName,String
621631 }
622632
623633 /**
624- * #=GR <seqname> <feature> <Generic per-Residue annotation, exactly 1 char
625- * per residue>
634+ * #=GR <seqname> <feature> <Generic per-Residue annotation, exactly 1 char per residue>
626635 *
627636 * @param line
628637 * the line to be parsed
0 commit comments