Skip to content

Commit cd294f6

Browse files
committed
Proper fixation of the bug, adding INFINITY as a constant, and documentation
git-svn-id: http://code.open-bio.org/repos/biojava/biojava-live/trunk@9914 7c6358e6-4a41-0410-a743-a5b2a554c398
1 parent 5f405b7 commit cd294f6

1 file changed

Lines changed: 40 additions & 31 deletions

File tree

biojava3-alignment/src/main/java/org/biojava3/alignment/io/StockholmFileParser.java

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@
9292
*/
9393
public class StockholmFileParser {
9494

95+
/**indicates reading as much as possible, without limits */
96+
public static final int INFINITY = -1;
9597
/** #=GF <feature> <Generic per-File annotation, free text> */
9698
private static final String GENERIC_PER_FILE_ANNOTATION = "GF";
9799
/** #=GC <feature> <Generic per-Column annotation, exactly 1 char per column> */
@@ -274,10 +276,10 @@ public StockholmStructure parse(String filename) throws IOException,ParserExcept
274276
* This function doesn't close the file after doing its assigned job; to allow for further calls of {@link #parseNext(int)}.
275277
* @see #parseNext(int)
276278
*
277-
* @param filename complete(?) path to the file from where to read the content
278-
* @param max maximum number of files to read, <code>-1</code> for all
279+
* @param filename file from where to read the content. see {@link InputStreamProvider} for more details.
280+
* @param max maximum number of files to read, {@link #INFINITY} for all.
279281
* @return a vector of {@link StockholmStructure} containing parsed structures.
280-
* @throws IOException when an exception occurred while opening/reading/closing the file+
282+
* @throws IOException when an exception occurred while opening/reading/closing the file.
281283
* @throws ParserException if unexpected format is encountered
282284
*/
283285
public List<StockholmStructure> parse(String filename, int max) throws IOException,ParserException{
@@ -300,17 +302,22 @@ public StockholmStructure parse(InputStream inStream) throws ParserException, IO
300302
return parse(inStream,1).get(0);
301303
}
302304

303-
/**parses an {@link InputStream} and returns maximum <code>max</code> object contained in
305+
/**parses an {@link InputStream} and returns at maximum <code>max</code> objects contained in
304306
* that file.<br>
305307
* This method leaves the stream open for further calls of {@link #parse(InputStream, int)} (same function) or {@link #parseNext(int)}.
306308
*
307309
* @see #parseNext(int)
308310
* @param inStream the stream to parse
309-
* @param max maximum number of structures to try to parse
310-
* @return a {@link List} of {@link StockholmStructure} objects.
311+
* @param max maximum number of structures to try to
312+
* parse, {@link #INFINITY} to try to obtain as much as possible.
313+
* @return a {@link List} of {@link StockholmStructure} objects. If there are no more
314+
* structures, an empty list is returned.
311315
* @throws IOException in case an I/O Exception occurred.
312316
*/
313317
public List<StockholmStructure> parse(InputStream inStream, int max) throws IOException {
318+
if (max < INFINITY) {
319+
throw new IllegalArgumentException("max can't be -ve value "+max);
320+
}
314321
if (inStream != this.cashedInputStream) {
315322
this.cashedInputStream=inStream;
316323
this.internalScanner=null;
@@ -320,10 +327,12 @@ public List<StockholmStructure> parse(InputStream inStream, int max) throws IOEx
320327
internalScanner= new Scanner(inStream);
321328
}
322329
ArrayList<StockholmStructure> structures= new ArrayList<StockholmStructure>();
323-
while (max != -1 && max-- >0) {
330+
while (max != INFINITY && max-- >0) {
324331
StockholmStructure structure = parse(internalScanner);
325332
if(structure != null){
326333
structures.add(structure);
334+
}else {
335+
break;
327336
}
328337
}
329338
return structures;
@@ -345,7 +354,7 @@ public List<StockholmStructure> parseNext(int max) throws IOException {
345354
* This method returns just after reaching the end of structure delimiter line ("//"), leaving any remaining empty lines unconsumed.
346355
*
347356
* @param scanner from where to read the file content
348-
* @return Stockholm file content
357+
* @return Stockholm file content, <code>null</code> if couldn't or no more structures.
349358
* @throws IOException
350359
* @throws Exception
351360
*/
@@ -357,11 +366,10 @@ StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
357366
throw new IllegalArgumentException("No Scanner defined");
358367
}
359368
}
360-
this.stockholmStructure = new StockholmStructure();
361369
String line = null;
362370
int linesCount = 0;
363371
try {
364-
do {
372+
while(scanner.hasNextLine()){
365373
line = scanner.nextLine();
366374
// if the file is empty
367375
//this condition will not happen, just left in case we decided to go for buffereedReader again for performance purpose.
@@ -417,8 +425,9 @@ StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
417425
if (status== STATUS_OUTSIDE_FILE) {
418426
status= STATUS_INSIDE_FILE;
419427
String[] header = line.split("\\s+");
420-
stockholmStructure.getFileAnnotation().setFormat(header[1]);
421-
stockholmStructure.getFileAnnotation().setVersion(header[2]);
428+
this.stockholmStructure = new StockholmStructure();
429+
this.stockholmStructure.getFileAnnotation().setFormat(header[1]);
430+
this.stockholmStructure.getFileAnnotation().setVersion(header[2]);
422431
} else {
423432
throw new ParserException("Uexpected Format line: ["+line+"]");
424433
}
@@ -437,34 +446,35 @@ StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
437446
throw new ParserException("Error: Unknown or unexpected line [" +line+"].");
438447
}
439448
}
440-
441449
linesCount++;
442-
} while (scanner.hasNextLine());
443-
450+
}
444451
} catch (IOException e) {
445452
e.printStackTrace();
446453
throw new IOException("Error parsing Stockholm file");
447454
}
448-
449-
int length = -1;
450-
Map<String, StringBuffer> sequences = stockholmStructure.getSequences();
451-
for (String sequencename : sequences.keySet()) {
452-
StringBuffer sequence = sequences.get(sequencename);
453-
if (length == -1) {
454-
length = sequence.length();
455-
} else if (length != sequence.length()) {
456-
throw new RuntimeException("Sequences have different lengths");
455+
StockholmStructure structure = this.stockholmStructure;
456+
this.stockholmStructure=null;
457+
if (structure != null) {
458+
int length = -1;
459+
Map<String, StringBuffer> sequences = structure.getSequences();
460+
for (String sequencename : sequences.keySet()) {
461+
StringBuffer sequence = sequences.get(sequencename);
462+
if (length == -1) {
463+
length = sequence.length();
464+
} else if (length != sequence.length()) {
465+
throw new RuntimeException(
466+
"Sequences have different lengths");
467+
}
457468
}
458469
}
459-
460-
return this.stockholmStructure;
470+
return structure;
461471
}
462472

463473
/**
464474
* Handles a line that corresponds to a sequence. <br>
465475
* e.g.: COATB_BPIKE/30-81
466-
* AEPNAATNYATEAMDSLKTQAIDLISQTWPVVTTVVVAGLVIRLFKKFSSKA
467-
* <b>Warning: This function seems to fail when dealing with sequence with intrinsic space</b>
476+
* AEPNAATNYATEAMDSLKTQAIDLISQTWPVVTTVVVAGLVIRLFKKFSSKA<br>
477+
* N.B.: This function can't tolerate sequences with intrinsic white space.
468478
* @param line
469479
* the line to be parsed
470480
* @throws Exception
@@ -479,7 +489,7 @@ private void handleSequenceLine(String line) throws ParserException {
479489
}
480490

481491
/**
482-
* #=GF <feature> <Generic per-File annotation, free text>
492+
* #=GF &lt;feature&gt; &lt;Generic per-File annotation, free text&gt;
483493
* @param featureName
484494
* @param value the line to be parsed
485495
*/
@@ -621,8 +631,7 @@ private void handleSequenceAnnotation(String seqName, String featureName,String
621631
}
622632

623633
/**
624-
* #=GR <seqname> <feature> <Generic per-Residue annotation, exactly 1 char
625-
* per residue>
634+
* #=GR &lt;seqname&gt; &lt;feature&gt; &lt;Generic per-Residue annotation, exactly 1 char per residue&gt;
626635
*
627636
* @param line
628637
* the line to be parsed

0 commit comments

Comments
 (0)