Skip to content

Commit 4d1cf58

Browse files
authored
Github issue 843 - Genebank parser (#919)
* Github issue 843 * Changes * Add transl_except to parser * Test * Test changes
1 parent 15ce47d commit 4d1cf58

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankSequenceParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,8 @@ private void parseFeatureTag(List<String[]> section) {
222222
Qualifier q = new Qualifier(key, val.replace('\n', ' '));
223223
gbFeature.addQualifier(key, q);
224224
} else {
225-
if (key.equalsIgnoreCase("translation")) {
225+
if (key.equalsIgnoreCase("translation") || key.equals("anticodon")
226+
|| key.equals("transl_except")) {
226227
// strip spaces from sequence
227228
val = val.replaceAll("\\s+", "");
228229
Qualifier q = new Qualifier(key, val);

biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,7 @@
3636
import org.slf4j.LoggerFactory;
3737

3838
import java.io.*;
39-
import java.util.ArrayList;
40-
import java.util.LinkedHashMap;
41-
import java.util.List;
42-
import java.util.Map;
39+
import java.util.*;
4340

4441
import static org.hamcrest.CoreMatchers.is;
4542
import static org.junit.Assert.*;
@@ -333,6 +330,35 @@ public void readSequenceWithZeroSpanFeature() throws IOException, CompoundNotFou
333330
assertEquals(Strand.NEGATIVE, fLocation.getStrand());
334331
}
335332

333+
/**
334+
* Biojava fails to parse anticodon and transl_except feature qualifiers when they line wrap.
335+
* https://github.com/biojava/biojava/issues/843
336+
*/
337+
@Test
338+
public void testGithub843() throws Exception {
339+
CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/NC_018080.gb"));
340+
assertNotNull(inStream);
341+
342+
GenbankReader<DNASequence, NucleotideCompound> genbankDNA
343+
= new GenbankReader<>(
344+
inStream,
345+
new GenericGenbankHeaderParser<>(),
346+
new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())
347+
);
348+
349+
LinkedHashMap<String, DNASequence> dnaSequences = genbankDNA.process();
350+
assertNotNull(dnaSequences);
351+
352+
DNASequence dna = new ArrayList<>(dnaSequences.values()).get(0);
353+
assertNotNull(dna);
354+
355+
FeatureInterface<AbstractSequence<NucleotideCompound>, NucleotideCompound> tRNAFeature = dna.getFeaturesByType("tRNA").get(0);
356+
String anticodon = tRNAFeature.getQualifiers().get("anticodon").get(0).getValue();
357+
assertEquals("(pos:complement(1123552..1123554),aa:Leu,seq:caa)", anticodon);
358+
String transl_except = tRNAFeature.getQualifiers().get("transl_except").get(0).getValue();
359+
assertEquals("(pos:complement(1123552..1123554),aa:Leu)",transl_except);
360+
}
361+
336362
/**
337363
* Helper class to be able to verify the closed state of the input stream.
338364
*/
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
LOCUS NC_018080 6402658 bp DNA circular CON 27-OCT-2020
2+
DEFINITION Pseudomonas aeruginosa DK2
3+
ACCESSION
4+
VERSION .0
5+
KEYWORDS .
6+
FEATURES Location/Qualifiers
7+
source 1..6402658
8+
/organism="Pseudomonas aeruginosa DK2"
9+
/mol_type="genomic DNA"
10+
/strain="DK2"
11+
/db_xref="taxon:1093787"
12+
gene complement(1123502..1123588)
13+
/locus_tag="PADK2_RS05265"
14+
/old_locus_tag="PADK2_t29613"
15+
tRNA complement(1123502..1123588)
16+
/locus_tag="PADK2_RS05265"
17+
/old_locus_tag="PADK2_t29613"
18+
/product="tRNA-Leu"
19+
/inference="COORDINATES: profile:tRNAscan-SE:2.0.6"
20+
/note="Derived by automated computational analysis using
21+
gene prediction method: tRNAscan-SE."
22+
/anticodon=(pos:complement(1123552..1123554),aa:Leu,
23+
seq:caa)
24+
/transl_except=(pos:complement(1123552..1123554),
25+
aa:Leu)
26+
ORIGIN
27+
1 tttaaagaga ccggcgattc tagtgaaatc gaacgggcag gtcaatttcc aaccagcgat
28+
//

0 commit comments

Comments
 (0)