Skip to content

Commit e842124

Browse files
committed
Changing parseFile(xxx) function set into parse(xxx) and adding parseNext(int)
git-svn-id: http://code.open-bio.org/repos/biojava/biojava-live/trunk@9832 7c6358e6-4a41-0410-a743-a5b2a554c398
1 parent 6eb094b commit e842124

File tree

1 file changed

+112
-41
lines changed

1 file changed

+112
-41
lines changed

biojava3-alignment/src/main/java/org/biojava3/alignment/io/StockholmFileParser.java

Lines changed: 112 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.io.InputStream;
2828
import java.util.Map;
2929
import java.util.Scanner;
30+
import java.util.Vector;
3031

3132
import org.biojava3.alignment.io.StockholmFileAnnotation.StockholmFileAnnotationReference;
3233
import org.biojava3.core.exceptions.ParserException;
@@ -246,54 +247,117 @@ public class StockholmFileParser {
246247
private static final int STATUS_IN_SEQUENCE = 20;
247248

248249
private int status=STATUS_OUTSIDE_FILE;
250+
Scanner internalScanner= null;
251+
private InputStream cashedInputStream;
249252

250253

251254
/**
252-
* Parses a Stockholm file and returns a {@link StockholmStructure} object with its content
255+
* Parses a Stockholm file and returns a {@link StockholmStructure} object with its content.<br>
256+
* This function is meant to be used for single access to specific
257+
* file and it closes the file after doing its assigned job. Any subsequent call
258+
* to {@link #parseNext(int)} will throw an exception or will function with unpredicted behavior.
253259
*
254260
* @param filename complete(?) path to the file from where to read the content
255261
* @return stockholm file content
256-
* @throws Exception
262+
* @throws IOException when an exception occurred while opening/reading/closing the file+
263+
* @throws ParserException if unexpected format is encountered
264+
*/
265+
public StockholmStructure parse(String filename) throws IOException,ParserException{
266+
InputStream inStream = new InputStreamProvider().getInputStream(filename);
267+
StockholmStructure structure = parse(inStream);
268+
inStream.close();
269+
return structure;
270+
}
271+
/**
272+
* Parses a Stockholm file and returns a {@link StockholmStructure} object with its content.<br>
273+
* This function doesn't close the file after doing its assigned job; to allow for further calls of {@link #parseNext(int)}.
274+
* @see #parseNext(int)
275+
*
276+
* @param filename complete(?) path to the file from where to read the content
277+
* @param max maximum number of files to read, <code>-1</code> for all
278+
* @return a vector of {@link StockholmStructure} containing parsed structures.
279+
* @throws IOException when an exception occurred while opening/reading/closing the file+
280+
* @throws ParserException if unexpected format is encountered
257281
*/
258-
public StockholmStructure parseFile(String filename) throws Exception {
282+
public Vector<StockholmStructure> parse(String filename, int max) throws IOException,ParserException{
259283
InputStreamProvider isp = new InputStreamProvider();
260-
InputStream inStream = null;
261-
try {
262-
inStream = isp.getInputStream(filename);
263-
} catch (Exception e) {
264-
// something is wrong with the file!
265-
e.printStackTrace();
266-
throw new IOException("Error reading the file");
267-
}
268-
269-
return parseFile(inStream);
284+
InputStream inStream = isp.getInputStream(filename);
285+
Vector<StockholmStructure> structures = parse(inStream, max);
286+
return structures;
270287
}
271288

272-
/**parses {@link InputStream} and returns {@link StockholmStructure} object containing its contents.
289+
/**parses {@link InputStream} and returns a the first contained alignment in a {@link StockholmStructure} object.
273290
* Used mainly for multiple files within the same input stream, (e.g. when
274291
* reading from Pfam flat files. <br>
275-
* TODO This method should leave the stream unclosed.
292+
* This method leaves the stream open for further calls of {@link #parseNext(int)}.
293+
* @see #parseNext(int)
276294
* @param inStream the {@link InputStream} containing the file to read.
277295
* @return a {@link StockholmStructure} object representing file contents.
278-
* @throws IOException
279-
* @throws Exception
296+
* @throws IOException
297+
* @throws ParserException
280298
*/
281-
public StockholmStructure parseFile(InputStream inStream) throws Exception {
282-
Scanner scanner= new Scanner(inStream);
283-
return parseFile(scanner);
299+
public StockholmStructure parse(InputStream inStream) throws ParserException, IOException {
300+
return parse(inStream,1).firstElement();
284301
}
285302

303+
/**parses an {@link InputStream} and returns maximum <code>max</code> object contained in
304+
* that file.<br>
305+
* This method leaves the stream open for further calls of {@link #parseNext(int)}.
306+
*
307+
* @see #parseNext(int)
308+
* @param inStream the stream to parse
309+
* @param max maximum number of structures to try to parse
310+
* @return a {@link Vector} of {@link StockholmStructure} objects.
311+
* @throws IOException in case an I/O Exception occurred.
312+
*/
313+
public Vector<StockholmStructure> parse(InputStream inStream, int max) throws IOException {
314+
if (inStream != this.cashedInputStream) {
315+
this.cashedInputStream=inStream;
316+
this.internalScanner=null;
317+
}
318+
319+
if (internalScanner == null) {
320+
internalScanner= new Scanner(inStream);
321+
}
322+
Vector<StockholmStructure> structures= new Vector<StockholmStructure>();
323+
while (max != -1 && max-- >0) {
324+
StockholmStructure structure = parse(internalScanner);
325+
if(structure != null){
326+
structures.add(structure);
327+
}
328+
}
329+
return structures;
330+
}
331+
332+
/**Tries to parse and return as maximum as <code>max</code> structures in the last used file or input stream.<br>
333+
* Please consider calling either {@link #parse(InputStream)},
334+
* {@link #parse(InputStream, int)}, or {@link #parse(String, int)} before calling this function.
335+
* @param max
336+
* @return
337+
* @throws IOException
338+
*/
339+
public Vector<StockholmStructure> parseNext(int max) throws IOException {
340+
return parse(this.cashedInputStream, max);
341+
}
286342

287343
/**
288-
* Parses a stockholm file and returns a {@link StockholmStructure} object with its content
344+
* Parses a Stockholm file and returns a {@link StockholmStructure} object with its content.
345+
* This method returns just after reaching the end of structure delimiter line ("//"), leaving any remaining empty lines unconsumed.
289346
*
290347
* @param scanner from where to read the file content
291-
* @return stockholm file content
348+
* @return Stockholm file content
292349
* @throws IOException
293350
* @throws Exception
294351
*/
295-
public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOException {
296-
stockholmStructure = new StockholmStructure();
352+
StockholmStructure parse(Scanner scanner) throws ParserException, IOException {
353+
if (scanner == null) {
354+
if(internalScanner != null){
355+
scanner = internalScanner;
356+
}else {
357+
throw new IllegalArgumentException("No Scanner defined");
358+
}
359+
}
360+
this.stockholmStructure = new StockholmStructure();
297361
String line = null;
298362
int linesCount = 0;
299363
try {
@@ -332,7 +396,7 @@ public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOE
332396
// #=GS <seqname> <featurename> <generic per-sequence annotation, free text>
333397
int index1=line.indexOf(' ', 5);
334398
String seqName=line.substring(5, index1);
335-
while (line.charAt(++index1)== ' ')
399+
while (line.charAt(++index1)<= ' ')//i.e. white space
336400
;//keep advancing
337401
int index2=line.indexOf(' ', index1);
338402
String featureName=line.substring(index1, index2);
@@ -360,14 +424,14 @@ public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOE
360424
}
361425
} else if (line.trim().equals("//")) {
362426
status=STATUS_OUTSIDE_FILE;
363-
break;//TODO should we just break immediately or jump next empty lines?
427+
break;//should we just break immediately or jump next empty lines?
364428
} else /*if (!line.startsWith("#")) */{
365429
if (status == STATUS_IN_SEQUENCE) {
366430
// This line corresponds to a sequence. Something like:
367431
// O83071/192-246 MTCRAQLIAVPRASSLAEAIACAQKMRVSRVPVYERS
368432
handleSequenceLine(line);
369-
}else if (status==STATUS_OUTSIDE_FILE) {//TODO change this condition to enable reading multiple MSA in single file.
370-
throw new ParserException("The end of file character was allready reached but there are still sequence lines");
433+
// }else if (status==STATUS_OUTSIDE_FILE) {
434+
// throw new ParserException("The end of file character was allready reached but there are still sequence lines");
371435
}else {
372436
System.err.println("Error: Unknown or unexpected line [" +line+"].\nPlease contact the Biojava team.");
373437
throw new ParserException("Error: Unknown or unexpected line [" +line+"].");
@@ -393,7 +457,7 @@ public StockholmStructure parseFile(Scanner scanner) throws ParserException, IOE
393457
}
394458
}
395459

396-
return stockholmStructure;
460+
return this.stockholmStructure;
397461
}
398462

399463
/**
@@ -455,8 +519,8 @@ private void handleFileAnnotation(String featureName, String value) {
455519
stockholmStructure.getFileAnnotation().setGFNumSequences(value);
456520
} else if (featureName.equals(GF_DB_COMMENT)) {
457521
stockholmStructure.getFileAnnotation().setGFDBComment(value);
458-
} else if (featureName.equals(GF_DB_REFERENCE)) {
459-
stockholmStructure.getFileAnnotation().addDBReference(value);
522+
// } else if (featureName.equals(GF_DB_REFERENCE)) {
523+
// stockholmStructure.getFileAnnotation().addDBReference(value);
460524
} else if (featureName.equals(GF_REFERENCE_COMMENT)) {
461525
stockholmStructure.getFileAnnotation().setGFRefComment(value);
462526
} else if (featureName.equals(GF_REFERENCE_NUMBER)) {
@@ -498,7 +562,7 @@ private void handleFileAnnotation(String featureName, String value) {
498562

499563
/**usually a single line of:<br>
500564
* #=GC &lt;feature&gt; &lt;Generic per-Column annotation, exactly 1 char per column&gt;
501-
* @param featureName TODO
565+
* @param featureName the feature name :)
502566
* @param value the line to be parsed.
503567
*/
504568
private void handleConsensusAnnotation(String featureName, String value) {
@@ -592,15 +656,22 @@ private void handleResidueAnnotation(String seqName, String featureName,String v
592656
//TODO implement toString()
593657

594658

595-
public static void main(String[] args) throws Exception {
659+
// public static void main(String[] args) throws Exception {
596660
// StockholmFileParser fileParser = new StockholmFileParser();
597-
// StockholmStructure parsedFile = fileParser.parseFile(ClassLoader.getSystemClassLoader().getResourceAsStream("longTest(Ankyrin repeat).sto"));
598-
//
599-
// Map<String, StringBuffer> sequences = parsedFile.getSequences();
600-
// Set<String> keySet = sequences.keySet();
601-
// for (String key: keySet) {
602-
// System.out.println("seq: "+key);
603-
// System.out.println("\t\t\t"+sequences.get(key));
661+
// Vector<StockholmStructure> structures = fileParser.parse("D:\\BII-PhD\\Research\\Pfam23.0\\Pfam-A.seed.gz",5);
662+
// displaySequences(structures);
663+
// structures= fileParser.parseNext(5);
664+
// displaySequences(structures);
665+
// }
666+
// public static void displaySequences(Vector<StockholmStructure> structures) {
667+
// for (StockholmStructure structure : structures) {
668+
// System.out.println("----------------- Structure "+structure.getFileAnnotation().getIdentification()+" -----------");
669+
// Map<String, StringBuffer> sequences = structure.getSequences();
670+
// Set<String> keySet = sequences.keySet();
671+
// for (String key: keySet) {
672+
// System.out.println("seq: "+key);
673+
// System.out.println("\t\t\t"+sequences.get(key));
674+
// }
604675
// }
605-
}
676+
// }
606677
}

0 commit comments

Comments
 (0)