|
| 1 | +/* |
| 2 | + * BioJava development code |
| 3 | + * |
| 4 | + * This code may be freely distributed and modified under the |
| 5 | + * terms of the GNU Lesser General Public Licence. This should |
| 6 | + * be distributed with the code. If you do not have a copy, |
| 7 | + * see: |
| 8 | + * |
| 9 | + * http://www.gnu.org/copyleft/lesser.html |
| 10 | + * |
| 11 | + * Copyright for this code is held jointly by the individual |
| 12 | + * authors. These should be listed in @author doc comments. |
| 13 | + * |
| 14 | + * For more information on the BioJava project and its aims, |
| 15 | + * or to join the biojava-l mailing list, visit the home page |
| 16 | + * at: |
| 17 | + * |
| 18 | + * http://www.biojava.org/ |
| 19 | + * |
| 20 | + * Created on 01-21-2010 |
| 21 | + */ |
| 22 | +package org.biojava.nbio.core.sequence.io; |
| 23 | + |
| 24 | +import org.biojava.nbio.core.exceptions.CompoundNotFoundException; |
| 25 | +import org.biojava.nbio.core.sequence.RNASequence; |
| 26 | +import org.biojava.nbio.core.sequence.compound.NucleotideCompound; |
| 27 | +import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface; |
| 28 | +import org.biojava.nbio.core.sequence.io.template.SequenceParserInterface; |
| 29 | +import org.biojava.nbio.core.sequence.loader.SequenceFileProxyLoader; |
| 30 | +import org.biojava.nbio.core.sequence.template.AbstractSequence; |
| 31 | +import org.biojava.nbio.core.sequence.template.CompoundSet; |
| 32 | +import org.biojava.nbio.core.sequence.template.ProxySequenceReader; |
| 33 | + |
| 34 | +import java.io.File; |
| 35 | +import java.io.IOException; |
| 36 | +import java.util.List; |
| 37 | + |
| 38 | +/** |
| 39 | + * This class is a good example of using the SequenceCreatorInterface where during parsing of the stream |
| 40 | + * the sequence and the offset index are passed to create a Protein sequence that will be loaded in lazily. |
| 41 | + * This way you can load very large fasta files and store accession id and delay loading the sequence to save |
| 42 | + * memory. The index is the file stream offset so when a RNASequence has a call to getSequence() the |
| 43 | + * SequenceFileProxyLoader will open the file and offset to the index and retrieve the sequence. |
| 44 | + * |
| 45 | + * Same approach can be used for genome sequence data stored in a local fasta file, in a database or via http |
| 46 | + * interface to a remote server |
| 47 | + * |
| 48 | + * @author Scooter Willis <willishf at gmail dot com> |
| 49 | + */ |
| 50 | +public class FileProxyRNASequenceCreator implements |
| 51 | + SequenceCreatorInterface<NucleotideCompound> { |
| 52 | + |
| 53 | + CompoundSet<NucleotideCompound> compoundSet = null; |
| 54 | + File file = null; |
| 55 | + SequenceParserInterface sequenceParser; |
| 56 | + |
| 57 | + /** |
| 58 | + * Need File so that we can store full path name in SequenceFileProxyLoader for Random File access as a quick read |
| 59 | + * @param fastaFile |
| 60 | + * @param compoundSet |
| 61 | + */ |
| 62 | + public FileProxyRNASequenceCreator(File file, |
| 63 | + CompoundSet<NucleotideCompound> compoundSet, |
| 64 | + SequenceParserInterface sequenceParser) { |
| 65 | + this.compoundSet = compoundSet; |
| 66 | + this.file = file; |
| 67 | + this.sequenceParser = sequenceParser; |
| 68 | + } |
| 69 | + |
| 70 | + /** |
| 71 | + * Even though we are passing in the sequence we really only care about the length of the sequence and the offset |
| 72 | + * index in the fasta file. |
| 73 | + * @param sequence |
| 74 | + * @param index |
| 75 | + * @return |
| 76 | + * @throws CompoundNotFoundException |
| 77 | + * @throws IOException |
| 78 | + */ |
| 79 | + @Override |
| 80 | + public AbstractSequence<NucleotideCompound> getSequence(String sequence, long index ) throws CompoundNotFoundException, IOException { |
| 81 | + SequenceFileProxyLoader<NucleotideCompound> sequenceFileProxyLoader = new SequenceFileProxyLoader<NucleotideCompound>( |
| 82 | + file, |
| 83 | + sequenceParser, |
| 84 | + index, |
| 85 | + sequence.length(), |
| 86 | + compoundSet); |
| 87 | + return new RNASequence(sequenceFileProxyLoader, compoundSet); |
| 88 | + } |
| 89 | + |
| 90 | + /** |
| 91 | + * Should be able to extend the same concept to a remote URL call or database connection. Not supported yet |
| 92 | + * @param proxyLoader |
| 93 | + * @param index |
| 94 | + * @return |
| 95 | + */ |
| 96 | + @Override |
| 97 | + public AbstractSequence<NucleotideCompound> getSequence( |
| 98 | + ProxySequenceReader<NucleotideCompound> proxyLoader, long index) { |
| 99 | + throw new UnsupportedOperationException("Not supported yet."); |
| 100 | + } |
| 101 | + |
| 102 | + /** |
| 103 | + * Not sure of use case and currently not supported |
| 104 | + * @param list |
| 105 | + * @return |
| 106 | + */ |
| 107 | + @Override |
| 108 | + public AbstractSequence<NucleotideCompound> getSequence( |
| 109 | + List<NucleotideCompound> list) { |
| 110 | + throw new UnsupportedOperationException("Not supported yet."); |
| 111 | + } |
| 112 | +} |
0 commit comments