There are multiple ways how to read a Genbank file.
GenbankProxySequenceReader<AminoAcidCompound> genbankProteinReader
= new GenbankProxySequenceReader<AminoAcidCompound>("/tmp", "NP_000257", AminoAcidCompoundSet.getAminoAcidCompoundSet());
ProteinSequence proteinSequence = new ProteinSequence(genbankProteinReader);
genbankProteinReader.getHeaderParser().parseHeader(genbankProteinReader.getHeader(), proteinSequence);
System.out.println("Sequence" + "(" + proteinSequence.getAccession() + "," + proteinSequence.getLength() + ")=" +
proteinSequence.getSequenceAsString().substring(0, 10) + "...");
GenbankProxySequenceReader<NucleotideCompound> genbankDNAReader
= new GenbankProxySequenceReader<NucleotideCompound>("/tmp", "NM_001126", DNACompoundSet.getDNACompoundSet());
DNASequence dnaSequence = new DNASequence(genbankDNAReader);
genbankDNAReader.getHeaderParser().parseHeader(genbankDNAReader.getHeader(), dnaSequence);
System.out.println("Sequence" + "(" + dnaSequence.getAccession() + "," + dnaSequence.getLength() + ")=" +
dnaSequence.getSequenceAsString().substring(0, 10) + "..."); File dnaFile = new File("src/test/resources/NM_000266.gb");
File protFile = new File("src/test/resources/BondFeature.gb");
LinkedHashMap<String, DNASequence> dnaSequences = GenbankReaderHelper.readGenbankDNASequence( dnaFile );
for (DNASequence sequence : dnaSequences.values()) {
System.out.println( sequence.getSequenceAsString() );
}
LinkedHashMap<String, ProteinSequence> protSequences = GenbankReaderHelper.readGenbankProteinSequence(protFile);
for (ProteinSequence sequence : protSequences.values()) {
System.out.println( sequence.getSequenceAsString() );
} FileInputStream is = new FileInputStream(dnaFile);
GenbankReader<DNASequence, NucleotideCompound> dnaReader = new GenbankReader<DNASequence, NucleotideCompound>(
is,
new GenericGenbankHeaderParser<DNASequence,NucleotideCompound>(),
new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())
);
dnaSequences = dnaReader.process();
is.close();
System.out.println(dnaSequences);
is = new FileInputStream(protFile);
GenbankReader<ProteinSequence, AminoAcidCompound> protReader = new GenbankReader<ProteinSequence, AminoAcidCompound>(
is,
new GenericGenbankHeaderParser<ProteinSequence,AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())
);
protSequences = protReader.process();
is.close();
System.out.println(protSequences);
```
# Write a Genbank file
Use the GenbankWriterHelper to write DNA sequences into a Genbank file.
```java
// First let's read dome DNA sequences from a genbank file
File dnaFile = new File("src/test/resources/NM_000266.gb");
LinkedHashMap<String, DNASequence> dnaSequences = GenbankReaderHelper.readGenbankDNASequence( dnaFile );
ByteArrayOutputStream fragwriter = new ByteArrayOutputStream();
ArrayList<DNASequence> seqs = new ArrayList<DNASequence>();
for(DNASequence seq : dnaSequences.values()) {
seqs.add(seq);
}
// ok now we got some DNA sequence data. Next step is to write it
GenbankWriterHelper.writeNucleotideSequence(fragwriter, seqs,
GenbankWriterHelper.LINEAR_DNA);
// the fragwriter object now contains a string representation in the Genbank format
// and you could write this into a file
// or print it out on the console
System.out.println(fragwriter.toString());Navigation: Home | Book 4: The Genomics Module | Chapter 5 : Genebank