diff --git a/.gitattributes b/.gitattributes
index a90ac4bbb2..9e293e1595 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -37,7 +37,7 @@
*.sto text
*.tsv text
*.txt text
-*.xml text
+*.xml text eol=lf #Causing decompression test to fail when line endings in org/biojava/nbio/core/util/build.xml are crlf
*.xsd text
*.yml text
diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java
index fb575f8ab1..951cce40c0 100644
--- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java
+++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java
@@ -26,15 +26,8 @@
package org.biojava.nbio.core.sequence.io;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
-import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.DataSource;
-import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.TaxonomyID;
-import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
-import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
-import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
-import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
-import org.biojava.nbio.core.sequence.features.AbstractFeature;
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
@@ -43,13 +36,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
+import java.util.List;
/**
- * Use GenbankReaderHelper as an example of how to use this class where GenbankReaderHelper should be the
+ * Use {@link GenbankReaderHelper} as an example of how to use this class where {@link GenbankReaderHelper} should be the
* primary class used to read Genbank files
*
*/
@@ -66,9 +66,9 @@ public boolean isClosed() {
}
/**
- * If you are going to use FileProxyProteinSequenceCreator then do not use this constructor because we need details about
- * local file offsets for quick reads. InputStreams does not give you the name of the stream to access quickly via file seek. A seek in
- * an inputstream is forced to read all the data so you don't gain anything.
+ * If you are going to use {@link FileProxyProteinSequenceCreator} then do not use this constructor because we need details about
+ * local file offsets for quick reads. {@link InputStream} does not give you the name of the stream to access quickly via file seek. A seek in
+ * an {@link InputStream} is forced to read all the data so you don't gain anything.
* @param is
* @param headerParser
* @param sequenceCreator
@@ -107,18 +107,21 @@ public GenbankReader(
/**
* The parsing is done in this method.
- * This method tries to process all the available Genbank records
+ * This method will return all the available Genbank records
* in the File or InputStream, closes the underlying resource,
* and return the results in {@link LinkedHashMap}.
- * You don't need to call {@link #close()} after calling this method.
+ * You don't need to call {@link GenbankReader#close()} after calling this method.
* @see #process(int)
* @return {@link HashMap} containing all the parsed Genbank records
* present, starting current fileIndex onwards.
* @throws IOException
* @throws CompoundNotFoundException
+ * @throws OutOfMemoryError if the input resource is larger than the allocated heap.
*/
public LinkedHashMap process() throws IOException, CompoundNotFoundException {
- return process(-1);
+ LinkedHashMap result = process(-1);
+ close();
+ return result;
}
/**
@@ -137,13 +140,18 @@ public LinkedHashMap process() throws IOException, CompoundNotFoundExc
* @see #process()
* @author Amr AL-Hossary
* @since 3.0.6
- * @param max maximum number of records to return, -1 for infinity.
+ * @param max maximum number of records to return.
* @return {@link HashMap} containing maximum max parsed Genbank records
* present, starting current fileIndex onwards.
* @throws IOException
* @throws CompoundNotFoundException
*/
public LinkedHashMap process(final int max) throws IOException, CompoundNotFoundException {
+
+ if(closed){
+ throw new IOException("Cannot perform action: resource has been closed.");
+ }
+
LinkedHashMap sequences = new LinkedHashMap<>();
@SuppressWarnings("unchecked")
int i=0;
@@ -158,12 +166,9 @@ public LinkedHashMap process(final int max) throws IOException, Compou
genbankParser.getSequenceHeaderParser().parseHeader(genbankParser.getHeader(), sequence);
// add features to new sequence
- for (String k: genbankParser.getFeatures().keySet()){
- for (AbstractFeature f: genbankParser.getFeatures(k)){
- //f.getLocations().setSequence(sequence); // can't set proper sequence source to features. It is actually needed? Don't think so...
- sequence.addFeature(f);
- }
- }
+ genbankParser.getFeatures().values().stream()
+ .flatMap(List::stream)
+ .forEach(sequence::addFeature);
// add taxonomy ID to new sequence
ArrayList dbQualifier = genbankParser.getDatabaseReferences().get("db_xref");
@@ -175,10 +180,6 @@ public LinkedHashMap process(final int max) throws IOException, Compou
sequences.put(sequence.getAccession().getID(), sequence);
}
- if (max < 0) {
- close();
- }
-
return sequences;
}
@@ -187,33 +188,9 @@ public void close() {
bufferedReader.close();
this.closed = true;
} catch (IOException e) {
- logger.error("Couldn't close the reader. {}", e.getMessage());
+ logger.error("Couldn't close the reader.", e);
this.closed = false;
}
}
-
- public static void main(String[] args) throws Exception {
- String proteinFile = "src/test/resources/BondFeature.gb";
- FileInputStream is = new FileInputStream(proteinFile);
-
- GenbankReader proteinReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
- LinkedHashMap proteinSequences = proteinReader.process();
- System.out.println(proteinSequences);
-
- String inputFile = "src/test/resources/NM_000266.gb";
- is = new FileInputStream(inputFile);
- GenbankReader dnaReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
- LinkedHashMap dnaSequences = dnaReader.process();
- System.out.println(dnaSequences);
-
- String crazyFile = "src/test/resources/CraftedFeature.gb";
- is = new FileInputStream(crazyFile);
- GenbankReader crazyReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
- LinkedHashMap crazyAnnotatedSequences = crazyReader.process();
-
- is.close();
- System.out.println(crazyAnnotatedSequences);
- }
-
}
diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java
index 045900263a..2b74f9cacd 100644
--- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java
+++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java
@@ -32,7 +32,11 @@
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
-import org.biojava.nbio.core.sequence.features.*;
+import org.biojava.nbio.core.sequence.features.AbstractFeature;
+import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
+import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface;
+import org.biojava.nbio.core.sequence.features.FeatureRetriever;
+import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface;
import org.biojava.nbio.core.sequence.io.GenbankSequenceParser;
import org.biojava.nbio.core.sequence.io.GenericGenbankHeaderParser;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
@@ -41,7 +45,14 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.*;
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
@@ -54,7 +65,7 @@
*/
public class GenbankProxySequenceReader extends StringProxySequenceReader implements FeaturesKeyWordInterface, DatabaseReferenceInterface, FeatureRetriever {
- private final static Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);
+ private static final Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);
private static final String eutilBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; //
private String genbankDirectoryCache = null;
diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java
index c9f3a0cd8a..4845da4cb1 100644
--- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java
+++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java
@@ -20,13 +20,6 @@
*/
package org.biojava.nbio.core.sequence.io;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.ProteinSequence;
@@ -46,8 +39,20 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
import static org.hamcrest.CoreMatchers.is;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
/**
*
@@ -161,7 +166,7 @@ public void testProcess() throws Exception {
*/
@Test
public void testPartialProcess() throws IOException, CompoundNotFoundException, NoSuchFieldException {
- InputStream inStream = this.getClass().getResourceAsStream("/two-dnaseqs.gb");
+ CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/two-dnaseqs.gb"));
GenbankReader genbankDNA
= new GenbankReader<>(
@@ -173,12 +178,14 @@ public void testPartialProcess() throws IOException, CompoundNotFoundException,
// First call to process(1) returns the first sequence
LinkedHashMap dnaSequences = genbankDNA.process(1);
+ assertFalse(inStream.isclosed());
assertNotNull(dnaSequences);
assertEquals(1, dnaSequences.size());
assertNotNull(dnaSequences.get("vPetite"));
// Second call to process(1) returns the second sequence
dnaSequences = genbankDNA.process(1);
+ assertFalse(inStream.isclosed());
assertNotNull(dnaSequences);
assertEquals(1, dnaSequences.size());
assertNotNull(dnaSequences.get("sbFDR"));
@@ -186,14 +193,14 @@ public void testPartialProcess() throws IOException, CompoundNotFoundException,
assertFalse(genbankDNA.isClosed());
genbankDNA.close();
assertTrue(genbankDNA.isClosed());
-
+ assertTrue(inStream.isclosed());
}
@Test
public void CDStest() throws Exception {
logger.info("CDS Test");
- InputStream inStream = this.getClass().getResourceAsStream("/BondFeature.gb");
+ CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/BondFeature.gb"));
assertNotNull(inStream);
GenbankReader GenbankProtein
@@ -203,7 +210,7 @@ public void CDStest() throws Exception {
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())
);
LinkedHashMap proteinSequences = GenbankProtein.process();
- inStream.close();
+ assertTrue(inStream.isclosed());
Assert.assertTrue(proteinSequences.size() == 1);
@@ -260,4 +267,27 @@ public void testNcbiExpandedAccessionFormats() throws Exception {
DNASequence header2 = readGenbankResource("/empty_header2.gb");
assertEquals("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018", header2.getOriginalHeader());
}
+
+ /**
+ * Helper class to be able to verify the closed state of the input stream.
+ */
+ private class CheckableInputStream extends BufferedInputStream {
+
+ private boolean closed;
+
+ CheckableInputStream(InputStream in) {
+ super(in);
+ closed = false;
+ }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ closed = true;
+ }
+
+ boolean isclosed() {
+ return closed;
+ }
+ }
}
diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java
index 6d2180a641..2205de4b27 100644
--- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java
+++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java
@@ -24,21 +24,27 @@
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
+import org.biojava.nbio.core.sequence.features.AbstractFeature;
import org.biojava.nbio.core.sequence.features.FeatureInterface;
+import org.biojava.nbio.core.sequence.features.Qualifier;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import org.junit.Assert;
+import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
-import org.biojava.nbio.core.sequence.features.AbstractFeature;
-import org.biojava.nbio.core.sequence.features.Qualifier;
/**
* Testing example for issue #834
@@ -58,7 +64,7 @@ public GenbankProxySequenceReaderTest(String gi) {
@Parameterized.Parameters
public static Collection getExamples() {
- String[][] out = new String[][]{
+ String[][] accessorIds = new String[][]{
{"399235158"},
{"7525057"},
{"379015144"},
@@ -69,9 +75,52 @@ public static Collection getExamples() {
{"254839678"}
};
- return Arrays.asList(out);
+ return Arrays.asList(accessorIds);
+ }
+
+ /**
+ * In {@link GenbankProxySequenceReader} there is a check to see if the requested files are already in the temp
+ * directory before attempting to retrieve them from the remote server. so simply copying the test files to the temp
+ * directory avoids calling out to the server and hitting a 429 status code from the server which fails the build.
+ * @throws IOException
+ */
+ @Before
+ public void copyTestFiles() throws IOException {
+ Collection accessorIds = getExamples();
+ for (String[] arr: accessorIds) {
+ copyTestFileToWorkingDirectory(arr[0]+".gb");
+ }
}
+ /**
+ * Convenience method for {@link GenbankProxySequenceReaderTest#copyTestFiles()}
+ * @param filename name of the file to copy from the resource folder
+ * @throws IOException when something goes wrong with copying the files.
+ */
+ private void copyTestFileToWorkingDirectory(String filename) throws IOException {
+ String destRoot = System.getProperty("java.io.tmpdir");
+
+ //if the directory does not end with a slash or backslash then add one
+ if(!(destRoot.endsWith("/") || destRoot.endsWith("\\"))){
+ destRoot += destRoot.contains("/")? "/" : "\\";
+ }
+
+ String dest = destRoot + filename;
+ String src = "org/biojava/nbio/core/sequence/loader/" + filename;
+
+ //Remove any pre-existing files
+ File d = new File(dest);
+ d.delete();
+
+ try(FileOutputStream destination = new FileOutputStream(d);
+ InputStream is = this.getClass().getClassLoader().getResourceAsStream(src);
+ ReadableByteChannel source = Channels.newChannel(is)) {
+
+ destination.getChannel().transferFrom(source, 0, Long.MAX_VALUE);
+ }
+ }
+
+
@Test
public void testFeatures() throws IOException, InterruptedException, CompoundNotFoundException {
logger.info("run test for protein: {}", gi);
@@ -120,9 +169,6 @@ so it should be done here (manualy).
Assert.assertTrue(!codedBy.isEmpty());
logger.info("\t\tcoded_by: {}", codedBy);
}
-
- // genbank has limits on requests per second, we need to give it some time for next test or otherwise we get 429 http error codes - JD 2018-12-14
- Thread.sleep(500);
}
@Test
@@ -161,9 +207,5 @@ public void testProteinSequenceFactoring() throws Exception {
} else {
logger.info("target {} has no CDS", gi);
}
-
- // genbank has limits on requests per second, we need to give it some time for next test or otherwise we get 429 http error codes - JD 2018-12-14
- Thread.sleep(500);
-
}
}
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/152970917.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/152970917.gb
new file mode 100644
index 0000000000..70d24fa039
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/152970917.gb
@@ -0,0 +1,70 @@
+LOCUS YP_001336026 324 aa linear CON 16-DEC-2014
+DEFINITION lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA acyltransferase
+ [Klebsiella pneumoniae subsp. pneumoniae MGH 78578].
+ACCESSION YP_001336026
+VERSION YP_001336026.1
+DBLINK BioProject: PRJNA57619
+DBSOURCE REFSEQ: accession NC_009648.1
+KEYWORDS RefSeq.
+SOURCE Klebsiella pneumoniae subsp. pneumoniae MGH 78578
+ ORGANISM Klebsiella pneumoniae subsp. pneumoniae MGH 78578
+ Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales;
+ Enterobacteriaceae; Klebsiella.
+REFERENCE 1 (residues 1 to 324)
+ CONSRTM NCBI Genome Project
+ TITLE Direct Submission
+ JOURNAL Submitted (09-JUL-2007) National Center for Biotechnology
+ Information, NIH, Bethesda, MD 20894, USA
+REFERENCE 2 (residues 1 to 324)
+ AUTHORS McClelland,M., Sanderson,E.K., Spieth,J., Clifton,W.S.,
+ Latreille,P., Sabo,A., Pepin,K., Bhonagiri,V., Porwollik,S., Ali,J.
+ and Wilson,R.K.
+ CONSRTM The Klebsiella pneumonia Genome Sequencing Project
+ TITLE Direct Submission
+ JOURNAL Submitted (06-SEP-2006) Genetics, Genome Sequencing Center, 4444
+ Forest Park Parkway, St. Louis, MO 63108, USA
+COMMENT VALIDATED REFSEQ: This record has undergone validation or
+ preliminary review. The reference sequence was derived from
+ ABR77796.
+ Method: conceptual translation.
+FEATURES Location/Qualifiers
+ source 1..324
+ /organism="Klebsiella pneumoniae subsp. pneumoniae MGH
+ 78578"
+ /strain="ATCC 700721; MGH 78578"
+ /sub_species="pneumoniae"
+ /db_xref="ATCC:700721"
+ /db_xref="taxon:272620"
+ Protein 1..324
+ /product="lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA
+ acyltransferase"
+ /calculated_mol_wt=37353
+ Region 1..310
+ /region_name="PRK08943"
+ /note="lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA
+ acyltransferase; Validated"
+ /db_xref="CDD:236355"
+ Site order(139,142,144,161..164,210..212)
+ /site_type="other"
+ /note="putative acyl-acceptor binding pocket"
+ /db_xref="CDD:153246"
+ CDS 1..324
+ /gene="msbB"
+ /locus_tag="KPN_02370"
+ /coded_by="complement(NC_009648.1:2595658..2596632)"
+ /inference="ab initio prediction:Genemark:2.0"
+ /inference="protein motif:Pfam:IPR004960"
+ /note="Transfers myristate or laurate, activated on ACP,
+ to the lipid IVA moiety of (KDO)2-(lauroyl)-lipid IVA"
+ /transl_table=11
+ /db_xref="GeneID:5340071"
+CONTIG join(WP_002911442.1:1..324)
+ORIGIN
+ 1 metkknnief ipkfeksfll prywgawlgv fafagialtp psfrdpllgk lgrlvgrlak
+ 61 ssrrraqinl lycfpeksey ereaiidamy asapqamvmm aelglrdpqk ilarvdwqgk
+ 121 aiidemqrnn ekviflvpha wgvdipamlm asggqkmaam fhnqgnpvfd yvwntvrrrf
+ 181 ggrmharndg ikpfiqsvrq gywgyylpdq dhgaehsefv dffatykatl paigrlmkvc
+ 241 rarvvplfpv ydskthrltv lvrppmddll daddttiarr mneevevfvk phteqytwil
+ 301 kllktrkpge iepykrkelf pkkk
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/254839678.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/254839678.gb
new file mode 100644
index 0000000000..6a3022cbe7
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/254839678.gb
@@ -0,0 +1,114 @@
+LOCUS 3IAN_A 321 aa linear BCT 24-NOV-2018
+DEFINITION Chain A, Chitinase.
+ACCESSION 3IAN_A
+VERSION 3IAN_A
+DBSOURCE pdb: molecule 3IAN, chain 65, release Nov 21, 2018;
+ deposition: Jul 14, 2009;
+ class: HYDROLASE;
+ source: Mmdb_id: 999999, Pdb_id 1: 3IAN;
+ Exp. method: X-ray Diffraction.
+KEYWORDS .
+SOURCE Lactococcus lactis subsp. lactis
+ ORGANISM Lactococcus lactis subsp. lactis
+ Bacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae;
+ Lactococcus.
+REFERENCE 1 (residues 1 to 321)
+ AUTHORS Bonanno,J.B., Rutter,M., Bain,K.T., Miller,S., Ozyurt,S.,
+ Sauder,J.M., Burley,S.K. and Almo,S.C.
+ TITLE Crystal structure of a chitinase from Lactococcus lactis subsp.
+ lactis
+ JOURNAL Unpublished
+REFERENCE 2 (residues 1 to 321)
+ AUTHORS Bonanno,J.B., Rutter,M., Bain,K.T., Miller,S., Ozyurt,S.,
+ Sauder,J.M., Burley,S.K., Almo,S.C. and New York SGX Research
+ Center for Structural Genomics (NYSGXRC).
+ TITLE Direct Submission
+ JOURNAL Submitted (14-JUL-2009)
+COMMENT Crystal structure of a chitinase from Lactococcus lactis subsp.
+ lactis.
+FEATURES Location/Qualifiers
+ source 1..321
+ /organism="Lactococcus lactis subsp. lactis"
+ /sub_species="lactis"
+ /db_xref="taxon:1360"
+ Het join(bond(115),bond(117),bond(76))
+ /heterogen="(NA,2572)"
+ Region 4..313
+ /region_name="Chi1"
+ /note="Chitinase [Carbohydrate transport and metabolism];
+ COG3469"
+ /db_xref="CDD:226000"
+ Region 5..288
+ /region_name="Glyco_hydro_18"
+ /note="Glycosyl hydrolases family 18; pfam00704"
+ /db_xref="CDD:279094"
+ SecStr 5..12
+ /sec_str_type="sheet"
+ /note="strand 1"
+ Site order(10,46,122,124,189,191,283)
+ /site_type="active"
+ /note="putative active site [active]"
+ /db_xref="CDD:119350"
+ SecStr 24..28
+ /sec_str_type="sheet"
+ /note="strand 2"
+ SecStr 40..45
+ /sec_str_type="sheet"
+ /note="strand 3"
+ SecStr 65..78
+ /sec_str_type="helix"
+ /note="helix 1"
+ SecStr 80..89
+ /sec_str_type="sheet"
+ /note="strand 4"
+ SecStr 100..114
+ /sec_str_type="helix"
+ /note="helix 2"
+ SecStr 117..124
+ /sec_str_type="sheet"
+ /note="strand 5"
+ SecStr 133..151
+ /sec_str_type="helix"
+ /note="helix 3"
+ SecStr 155..163
+ /sec_str_type="sheet"
+ /note="strand 6"
+ SecStr 172..180
+ /sec_str_type="helix"
+ /note="helix 4"
+ SecStr 184..190
+ /sec_str_type="sheet"
+ /note="strand 7"
+ SecStr 196..201
+ /sec_str_type="sheet"
+ /note="strand 8"
+ SecStr 204..209
+ /sec_str_type="sheet"
+ /note="strand 9"
+ SecStr 215..228
+ /sec_str_type="helix"
+ /note="helix 5"
+ SecStr 240..246
+ /sec_str_type="sheet"
+ /note="strand 10"
+ SecStr 261..273
+ /sec_str_type="helix"
+ /note="helix 6"
+ SecStr 278..283
+ /sec_str_type="sheet"
+ /note="strand 11"
+ SecStr 289..293
+ /sec_str_type="sheet"
+ /note="strand 12"
+ SecStr 300..307
+ /sec_str_type="helix"
+ /note="helix 7"
+ORIGIN
+ 1 msldkvlvgy whnwkstgkd gykggssadf nlsstqegyn vinvsfmktp egqtlptfkp
+ 61 ynktdtefra eisklnaegk svlialggad ahielkksqe sdfvneiirl vdtygfdgld
+ 121 idleqaaiea adnqtvipsa lkkvkdhyrk dgknfmitma pefpyltssg kyapyinnld
+ 181 syydfinpqy ynqggdgfwd sdlnmwisqs ndekkedfly gltqrlvtgt dgfikipask
+ 241 fviglpsnnd aaatgyvkdp navknalnrl kasgneikgl mtwsvnwdag tnsngekynn
+ 301 tfvntyapml fnneghhhhh h
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/379015144.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/379015144.gb
new file mode 100644
index 0000000000..667440c7a7
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/379015144.gb
@@ -0,0 +1,66 @@
+LOCUS YP_005291380 338 aa linear CON 17-DEC-2014
+DEFINITION leukocidin/hemolysin toxin family protein [Staphylococcus aureus
+ subsp. aureus VC40].
+ACCESSION YP_005291380
+VERSION YP_005291380.1
+DBLINK BioProject: PRJNA88071
+DBSOURCE REFSEQ: accession NC_016912.1
+KEYWORDS RefSeq.
+SOURCE Staphylococcus aureus subsp. aureus VC40
+ ORGANISM Staphylococcus aureus subsp. aureus VC40
+ Bacteria; Firmicutes; Bacilli; Bacillales; Staphylococcus.
+REFERENCE 1 (residues 1 to 338)
+ AUTHORS Sass,P., Berscheid,A., Jansen,A., Oedenkoven,M., Szekat,C.,
+ Strittmatter,A., Gottschalk,G. and Bierbaum,G.
+ TITLE Genome sequence of Staphylococcus aureus VC40, a vancomycin- and
+ daptomycin-resistant strain, to study the genetics of development
+ of resistance to currently applied last-resort antibiotics
+ JOURNAL J. Bacteriol. 194 (8), 2107-2108 (2012)
+ PUBMED 22461548
+REFERENCE 2 (residues 1 to 338)
+ CONSRTM NCBI Genome Project
+ TITLE Direct Submission
+ JOURNAL Submitted (21-FEB-2012) National Center for Biotechnology
+ Information, NIH, Bethesda, MD 20894, USA
+REFERENCE 3 (residues 1 to 338)
+ AUTHORS Sass,P., Berscheid,A., Jansen,A., Oedenkoven,M., Szekat,C.,
+ Strittmatter,A., Gottschalk,G. and Bierbaum,G.
+ TITLE Direct Submission
+ JOURNAL Submitted (25-AUG-2011) Institute of Medical Microbiology,
+ Immunology and Parasitology, University of Bonn, Sigmund-Freud-Str.
+ 25, Bonn 53105, Germany
+COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final
+ NCBI review. The reference sequence is identical to AEZ37946.
+ Method: conceptual translation.
+FEATURES Location/Qualifiers
+ source 1..338
+ /organism="Staphylococcus aureus subsp. aureus VC40"
+ /strain="VC40"
+ /sub_species="aureus"
+ /db_xref="taxon:1028799"
+ /country="Germany"
+ /collection_date="2002"
+ /note="laboratory mutant selected for 60 microgram per ml
+ vancomycin resistance"
+ Protein 1..338
+ /product="leukocidin/hemolysin toxin family protein"
+ /calculated_mol_wt=38555
+ Region 65..323
+ /region_name="Leukocidin"
+ /note="Leukocidin/Hemolysin toxin family; pfam07968"
+ /db_xref="CDD:311773"
+ CDS 1..338
+ /locus_tag="SAVC_08965"
+ /coded_by="complement(NC_016912.1:1946987..1948003)"
+ /transl_table=11
+ /db_xref="GeneID:11869971"
+CONTIG join(WP_000595324.1:1..338)
+ORIGIN
+ 1 mikqlcknit ictlalsttf tvlpatsfak inseikqvse knldgdtkmy trtattsdsq
+ 61 knitqslqfn fltepnydke tvfikakgti gsglrildpn gywnstlrwp gsysvsiqnv
+ 121 ddnnntnvtd fapknqdesr evkytygykt ggdfsinrgg ltgnitkesn ysetisyqqp
+ 181 syrtlldqst shkgvgwkve ahlinnmghd htrqltndsd nrtkseifsl trngnlwakd
+ 241 nftpkdkmpv tvsegfnpef lavmshdkkd kgksqfvvhy krsmdefkid wnrhgfwgyw
+ 301 sgenhvdkke eklsalyevd wkthnvkfvk vlndnekk
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353147.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353147.gb
new file mode 100644
index 0000000000..b965e249c5
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353147.gb
@@ -0,0 +1,186 @@
+LOCUS 4AE0_A 535 aa linear BCT 10-OCT-2012
+DEFINITION Chain A, Diphtheria Toxin.
+ACCESSION 4AE0_A
+VERSION 4AE0_A
+DBSOURCE pdb: molecule 4AE0, chain 65, release Apr 18, 2012;
+ deposition: Jan 4, 2012;
+ class: Toxin;
+ source: Mmdb_id: 98377, Pdb_id 1: 4AE0;
+ Exp. method: X-Ray Diffraction.
+KEYWORDS .
+SOURCE Corynebacterium diphtheriae
+ ORGANISM Corynebacterium diphtheriae
+ Bacteria; Actinobacteria; Corynebacteriales; Corynebacteriaceae;
+ Corynebacterium.
+REFERENCE 1 (residues 1 to 535)
+ AUTHORS Malito,E., Bursulaya,B., Chen,C., Surdo,P.L., Picchianti,M.,
+ Balducci,E., Biancucci,M., Brock,A., Berti,F., Bottomley,M.J.,
+ Nissum,M., Costantino,P., Rappuoli,R. and Spraggon,G.
+ TITLE Structural basis for lack of toxicity of the diphtheria toxin
+ mutant CRM197
+ JOURNAL Proc. Natl. Acad. Sci. U.S.A. 109 (14), 5229-5234 (2012)
+ PUBMED 22431623
+REFERENCE 2 (residues 1 to 535)
+ AUTHORS Malito,E. and Spraggon,G.
+ TITLE Direct Submission
+ JOURNAL Submitted (04-JAN-2012)
+COMMENT Crystal Structure Of Diphtheria Toxin Mutant Crm197.
+FEATURES Location/Qualifiers
+ source 1..535
+ /organism="Corynebacterium diphtheriae"
+ /db_xref="taxon:1717"
+ Region 1..187
+ /region_name="Diphtheria_C"
+ /note="Diphtheria toxin, C domain; pfam02763"
+ /db_xref="CDD:280859"
+ Region 1..171
+ /region_name="Domain 1"
+ /note="NCBI Domains"
+ SecStr 10..15
+ /sec_str_type="sheet"
+ /note="strand 1"
+ SecStr 16..22
+ /sec_str_type="sheet"
+ /note="strand 2"
+ Site order(20..24,27,31,34..36,38,53..55,65,148)
+ /site_type="other"
+ /note="nad+ binding pocket [chemical binding]"
+ /db_xref="CDD:238651"
+ SecStr 52..58
+ /sec_str_type="sheet"
+ /note="strand 3"
+ SecStr 65..68
+ /sec_str_type="sheet"
+ /note="strand 4"
+ SecStr 78..86
+ /sec_str_type="sheet"
+ /note="strand 5"
+ SecStr 87..94
+ /sec_str_type="sheet"
+ /note="strand 6"
+ SecStr 99..106
+ /sec_str_type="helix"
+ /note="helix 1"
+ SecStr 120..127
+ /sec_str_type="helix"
+ /note="helix 2"
+ SecStr 132..139
+ /sec_str_type="sheet"
+ /note="strand 7"
+ SecStr 146..152
+ /sec_str_type="sheet"
+ /note="strand 8"
+ SecStr 159..167
+ /sec_str_type="sheet"
+ /note="strand 9"
+ Region 172..382
+ /region_name="Domain 2"
+ /note="NCBI Domains"
+ SecStr 176..183
+ /sec_str_type="helix"
+ /note="helix 3"
+ Bond bond(186,201)
+ /bond_type="disulfide"
+ Region 200..379
+ /region_name="Diphtheria_T"
+ /note="Diphtheria toxin, T domain; pfam02764"
+ /db_xref="CDD:280860"
+ SecStr 206..223
+ /sec_str_type="helix"
+ /note="helix 4"
+ SecStr 224..232
+ /sec_str_type="helix"
+ /note="helix 5"
+ SecStr 240..255
+ /sec_str_type="helix"
+ /note="helix 6"
+ SecStr 260..268
+ /sec_str_type="helix"
+ /note="helix 7"
+ SecStr 275..288
+ /sec_str_type="helix"
+ /note="helix 8"
+ SecStr 297..304
+ /sec_str_type="helix"
+ /note="helix 9"
+ SecStr 315..318
+ /sec_str_type="sheet"
+ /note="strand 10"
+ SecStr 319..322
+ /sec_str_type="sheet"
+ /note="strand 11"
+ SecStr 326..343
+ /sec_str_type="helix"
+ /note="helix 10"
+ SecStr 359..376
+ /sec_str_type="helix"
+ /note="helix 11"
+ Region 381..534
+ /region_name="Diphtheria_R"
+ /note="Diphtheria toxin, R domain; pfam01324"
+ /db_xref="CDD:279642"
+ Region 383..535
+ /region_name="Domain 3"
+ /note="NCBI Domains"
+ SecStr 388..392
+ /sec_str_type="sheet"
+ /note="strand 12"
+ SecStr 393..399
+ /sec_str_type="sheet"
+ /note="strand 13"
+ SecStr 404..408
+ /sec_str_type="sheet"
+ /note="strand 14"
+ SecStr 409..413
+ /sec_str_type="sheet"
+ /note="strand 15"
+ SecStr 414..423
+ /sec_str_type="sheet"
+ /note="strand 16"
+ SecStr 424..427
+ /sec_str_type="sheet"
+ /note="strand 17"
+ SecStr 428..436
+ /sec_str_type="sheet"
+ /note="strand 18"
+ SecStr 440..444
+ /sec_str_type="sheet"
+ /note="strand 19"
+ SecStr 447..453
+ /sec_str_type="sheet"
+ /note="strand 20"
+ SecStr 455..465
+ /sec_str_type="sheet"
+ /note="strand 21"
+ Bond bond(461,471)
+ /bond_type="disulfide"
+ SecStr 467..475
+ /sec_str_type="sheet"
+ /note="strand 22"
+ SecStr 478..481
+ /sec_str_type="sheet"
+ /note="strand 23"
+ SecStr 484..494
+ /sec_str_type="sheet"
+ /note="strand 24"
+ SecStr 495..498
+ /sec_str_type="sheet"
+ /note="strand 25"
+ SecStr 507..514
+ /sec_str_type="sheet"
+ /note="strand 26"
+ SecStr 524..535
+ /sec_str_type="sheet"
+ /note="strand 27"
+ORIGIN
+ 1 gaddvvdssk sfvmenfssy hgtkpgyvds iqkgiqkpks gtqgnydddw kefystdnky
+ 61 daagysvdne nplsgkaggv vkvtypgltk vlalkvdnae tikkelglsl teplmeqvgt
+ 121 eefikrfgdg asrvvlslpf aegsssveyi nnweqakals veleinfetr gkrgqdamye
+ 181 ymaqacagnr vrrsvgssls cinldwdvir dktktkiesl kehgpiknkm sespnktvse
+ 241 ekakqyleef hqtalehpel selktvtgtn pvfaganyaa wavnvaqvid setadnlekt
+ 301 taalsilpgi gsvmgiadga vhhnteeiva qsialsslmv aqaiplvgel vdigfaaynf
+ 361 vesiinlfqv vhnsynrpay spghktqpfl hdgyavswnt vedsiirtgf qgesghdiki
+ 421 taentplpia gvllptipgk ldvnkskthi svngrkirmr craidgdvtf crpkspvyvg
+ 481 ngvhanlhva fhrsssekih sneissdsig vlgyqktvdh tkvnsklslf feiks
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353148.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353148.gb
new file mode 100644
index 0000000000..f1340266aa
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353148.gb
@@ -0,0 +1,184 @@
+LOCUS 4AE1_A 535 aa linear BCT 10-OCT-2012
+DEFINITION Chain A, Diphtheria Toxin.
+ACCESSION 4AE1_A
+VERSION 4AE1_A
+DBSOURCE pdb: molecule 4AE1, chain 65, release Apr 18, 2012;
+ deposition: Jan 4, 2012;
+ class: Toxin;
+ source: Mmdb_id: 98378, Pdb_id 1: 4AE1;
+ Exp. method: X-Ray Diffraction.
+KEYWORDS .
+SOURCE Corynebacterium diphtheriae
+ ORGANISM Corynebacterium diphtheriae
+ Bacteria; Actinobacteria; Corynebacteriales; Corynebacteriaceae;
+ Corynebacterium.
+REFERENCE 1 (residues 1 to 535)
+ AUTHORS Malito,E., Bursulaya,B., Chen,C., Surdo,P.L., Picchianti,M.,
+ Balducci,E., Biancucci,M., Brock,A., Berti,F., Bottomley,M.J.,
+ Nissum,M., Costantino,P., Rappuoli,R. and Spraggon,G.
+ TITLE Structural basis for lack of toxicity of the diphtheria toxin
+ mutant CRM197
+ JOURNAL Proc. Natl. Acad. Sci. U.S.A. 109 (14), 5229-5234 (2012)
+ PUBMED 22431623
+REFERENCE 2 (residues 1 to 535)
+ AUTHORS Malito,E. and Spraggon,G.
+ TITLE Direct Submission
+ JOURNAL Submitted (04-JAN-2012)
+COMMENT Crystal Structure Of Diphtheria Toxin Mutant Crm197 In Complex With
+ Nicotinamide.
+FEATURES Location/Qualifiers
+ source 1..535
+ /organism="Corynebacterium diphtheriae"
+ /db_xref="taxon:1717"
+ Region 1..187
+ /region_name="Diphtheria_C"
+ /note="Diphtheria toxin, C domain; pfam02763"
+ /db_xref="CDD:280859"
+ Region 1..171
+ /region_name="Domain 1"
+ /note="NCBI Domains"
+ SecStr 10..15
+ /sec_str_type="sheet"
+ /note="strand 1"
+ SecStr 16..25
+ /sec_str_type="sheet"
+ /note="strand 2"
+ Site order(20..24,27,31,34..36,38,53..55,65,148)
+ /site_type="other"
+ /note="nad+ binding pocket [chemical binding]"
+ /db_xref="CDD:238651"
+ SecStr 52..57
+ /sec_str_type="sheet"
+ /note="strand 3"
+ SecStr 65..68
+ /sec_str_type="sheet"
+ /note="strand 4"
+ SecStr 78..86
+ /sec_str_type="sheet"
+ /note="strand 5"
+ SecStr 87..94
+ /sec_str_type="sheet"
+ /note="strand 6"
+ SecStr 99..106
+ /sec_str_type="helix"
+ /note="helix 1"
+ SecStr 120..127
+ /sec_str_type="helix"
+ /note="helix 2"
+ SecStr 132..139
+ /sec_str_type="sheet"
+ /note="strand 7"
+ SecStr 147..152
+ /sec_str_type="sheet"
+ /note="strand 8"
+ SecStr 159..167
+ /sec_str_type="sheet"
+ /note="strand 9"
+ Region 172..382
+ /region_name="Domain 2"
+ /note="NCBI Domains"
+ SecStr 176..183
+ /sec_str_type="helix"
+ /note="helix 3"
+ Bond bond(186,201)
+ /bond_type="disulfide"
+ Region 200..379
+ /region_name="Diphtheria_T"
+ /note="Diphtheria toxin, T domain; pfam02764"
+ /db_xref="CDD:280860"
+ SecStr 206..220
+ /sec_str_type="helix"
+ /note="helix 4"
+ SecStr 240..254
+ /sec_str_type="helix"
+ /note="helix 5"
+ SecStr 260..268
+ /sec_str_type="helix"
+ /note="helix 6"
+ SecStr 275..288
+ /sec_str_type="helix"
+ /note="helix 7"
+ SecStr 297..304
+ /sec_str_type="helix"
+ /note="helix 8"
+ SecStr 315..318
+ /sec_str_type="sheet"
+ /note="strand 10"
+ SecStr 319..322
+ /sec_str_type="sheet"
+ /note="strand 11"
+ SecStr 326..343
+ /sec_str_type="helix"
+ /note="helix 9"
+ SecStr 359..376
+ /sec_str_type="helix"
+ /note="helix 10"
+ Region 381..534
+ /region_name="Diphtheria_R"
+ /note="Diphtheria toxin, R domain; pfam01324"
+ /db_xref="CDD:279642"
+ Region 383..535
+ /region_name="Domain 3"
+ /note="NCBI Domains"
+ SecStr 388..392
+ /sec_str_type="sheet"
+ /note="strand 12"
+ SecStr 393..399
+ /sec_str_type="sheet"
+ /note="strand 13"
+ SecStr 404..408
+ /sec_str_type="sheet"
+ /note="strand 14"
+ SecStr 409..413
+ /sec_str_type="sheet"
+ /note="strand 15"
+ SecStr 414..423
+ /sec_str_type="sheet"
+ /note="strand 16"
+ SecStr 424..427
+ /sec_str_type="sheet"
+ /note="strand 17"
+ SecStr 428..436
+ /sec_str_type="sheet"
+ /note="strand 18"
+ SecStr 440..444
+ /sec_str_type="sheet"
+ /note="strand 19"
+ SecStr 447..453
+ /sec_str_type="sheet"
+ /note="strand 20"
+ SecStr 455..465
+ /sec_str_type="sheet"
+ /note="strand 21"
+ Bond bond(461,471)
+ /bond_type="disulfide"
+ SecStr 467..475
+ /sec_str_type="sheet"
+ /note="strand 22"
+ SecStr 478..481
+ /sec_str_type="sheet"
+ /note="strand 23"
+ SecStr 484..494
+ /sec_str_type="sheet"
+ /note="strand 24"
+ SecStr 495..498
+ /sec_str_type="sheet"
+ /note="strand 25"
+ SecStr 507..514
+ /sec_str_type="sheet"
+ /note="strand 26"
+ SecStr 524..535
+ /sec_str_type="sheet"
+ /note="strand 27"
+ORIGIN
+ 1 gaddvvdssk sfvmenfssy hgtkpgyvds iqkgiqkpks gtqgnydddw kefystdnky
+ 61 daagysvdne nplsgkaggv vkvtypgltk vlalkvdnae tikkelglsl teplmeqvgt
+ 121 eefikrfgdg asrvvlslpf aegsssveyi nnweqakals veleinfetr gkrgqdamye
+ 181 ymaqacagnr vrrsvgssls cinldwdvir dktktkiesl kehgpiknkm sespnktvse
+ 241 ekakqyleef hqtalehpel selktvtgtn pvfaganyaa wavnvaqvid setadnlekt
+ 301 taalsilpgi gsvmgiadga vhhnteeiva qsialsslmv aqaiplvgel vdigfaaynf
+ 361 vesiinlfqv vhnsynrpay spghktqpfl hdgyavswnt vedsiirtgf qgesghdiki
+ 421 taentplpia gvllptipgk ldvnkskthi svngrkirmr craidgdvtf crpkspvyvg
+ 481 ngvhanlhva fhrsssekih sneissdsig vlgyqktvdh tkvnsklslf feiks
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353149.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353149.gb
new file mode 100644
index 0000000000..4237b720e4
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353149.gb
@@ -0,0 +1,187 @@
+LOCUS 4AE1_B 535 aa linear BCT 10-OCT-2012
+DEFINITION Chain B, Diphtheria Toxin.
+ACCESSION 4AE1_B
+VERSION 4AE1_B
+DBSOURCE pdb: molecule 4AE1, chain 66, release Apr 18, 2012;
+ deposition: Jan 4, 2012;
+ class: Toxin;
+ source: Mmdb_id: 98378, Pdb_id 1: 4AE1;
+ Exp. method: X-Ray Diffraction.
+KEYWORDS .
+SOURCE Corynebacterium diphtheriae
+ ORGANISM Corynebacterium diphtheriae
+ Bacteria; Actinobacteria; Corynebacteriales; Corynebacteriaceae;
+ Corynebacterium.
+REFERENCE 1 (residues 1 to 535)
+ AUTHORS Malito,E., Bursulaya,B., Chen,C., Surdo,P.L., Picchianti,M.,
+ Balducci,E., Biancucci,M., Brock,A., Berti,F., Bottomley,M.J.,
+ Nissum,M., Costantino,P., Rappuoli,R. and Spraggon,G.
+ TITLE Structural basis for lack of toxicity of the diphtheria toxin
+ mutant CRM197
+ JOURNAL Proc. Natl. Acad. Sci. U.S.A. 109 (14), 5229-5234 (2012)
+ PUBMED 22431623
+REFERENCE 2 (residues 1 to 535)
+ AUTHORS Malito,E. and Spraggon,G.
+ TITLE Direct Submission
+ JOURNAL Submitted (04-JAN-2012)
+COMMENT Crystal Structure Of Diphtheria Toxin Mutant Crm197 In Complex With
+ Nicotinamide.
+FEATURES Location/Qualifiers
+ source 1..535
+ /organism="Corynebacterium diphtheriae"
+ /db_xref="taxon:1717"
+ Region 1..187
+ /region_name="Diphtheria_C"
+ /note="Diphtheria toxin, C domain; pfam02763"
+ /db_xref="CDD:280859"
+ Region 1..171
+ /region_name="Domain 4"
+ /note="NCBI Domains"
+ SecStr 10..15
+ /sec_str_type="sheet"
+ /note="strand 28"
+ SecStr 16..22
+ /sec_str_type="sheet"
+ /note="strand 29"
+ Site order(20..24,27,31,34..36,38,53..55,65,148)
+ /site_type="other"
+ /note="nad+ binding pocket [chemical binding]"
+ /db_xref="CDD:238651"
+ SecStr 54..57
+ /sec_str_type="sheet"
+ /note="strand 30"
+ SecStr 65..68
+ /sec_str_type="sheet"
+ /note="strand 31"
+ SecStr 78..86
+ /sec_str_type="sheet"
+ /note="strand 32"
+ SecStr 87..94
+ /sec_str_type="sheet"
+ /note="strand 33"
+ SecStr 99..106
+ /sec_str_type="helix"
+ /note="helix 11"
+ SecStr 120..127
+ /sec_str_type="helix"
+ /note="helix 12"
+ SecStr 132..139
+ /sec_str_type="sheet"
+ /note="strand 34"
+ SecStr 147..152
+ /sec_str_type="sheet"
+ /note="strand 35"
+ SecStr 159..167
+ /sec_str_type="sheet"
+ /note="strand 36"
+ Region 172..382
+ /region_name="Domain 5"
+ /note="NCBI Domains"
+ SecStr 176..183
+ /sec_str_type="helix"
+ /note="helix 13"
+ Bond bond(186,201)
+ /bond_type="disulfide"
+ Region 200..379
+ /region_name="Diphtheria_T"
+ /note="Diphtheria toxin, T domain; pfam02764"
+ /db_xref="CDD:280860"
+ SecStr 206..223
+ /sec_str_type="helix"
+ /note="helix 14"
+ SecStr 224..232
+ /sec_str_type="helix"
+ /note="helix 15"
+ SecStr 240..254
+ /sec_str_type="helix"
+ /note="helix 16"
+ SecStr 260..268
+ /sec_str_type="helix"
+ /note="helix 17"
+ SecStr 275..288
+ /sec_str_type="helix"
+ /note="helix 18"
+ SecStr 297..304
+ /sec_str_type="helix"
+ /note="helix 19"
+ SecStr 315..318
+ /sec_str_type="sheet"
+ /note="strand 37"
+ SecStr 319..322
+ /sec_str_type="sheet"
+ /note="strand 38"
+ SecStr 326..343
+ /sec_str_type="helix"
+ /note="helix 20"
+ SecStr 359..376
+ /sec_str_type="helix"
+ /note="helix 21"
+ Region 381..534
+ /region_name="Diphtheria_R"
+ /note="Diphtheria toxin, R domain; pfam01324"
+ /db_xref="CDD:279642"
+ Region 383..535
+ /region_name="Domain 6"
+ /note="NCBI Domains"
+ SecStr 388..392
+ /sec_str_type="sheet"
+ /note="strand 39"
+ SecStr 393..399
+ /sec_str_type="sheet"
+ /note="strand 40"
+ SecStr 404..408
+ /sec_str_type="sheet"
+ /note="strand 41"
+ SecStr 409..413
+ /sec_str_type="sheet"
+ /note="strand 42"
+ SecStr 414..423
+ /sec_str_type="sheet"
+ /note="strand 43"
+ SecStr 424..427
+ /sec_str_type="sheet"
+ /note="strand 44"
+ SecStr 428..436
+ /sec_str_type="sheet"
+ /note="strand 45"
+ SecStr 440..444
+ /sec_str_type="sheet"
+ /note="strand 46"
+ SecStr 447..453
+ /sec_str_type="sheet"
+ /note="strand 47"
+ SecStr 455..465
+ /sec_str_type="sheet"
+ /note="strand 48"
+ Bond bond(461,471)
+ /bond_type="disulfide"
+ SecStr 467..475
+ /sec_str_type="sheet"
+ /note="strand 49"
+ SecStr 478..481
+ /sec_str_type="sheet"
+ /note="strand 50"
+ SecStr 484..494
+ /sec_str_type="sheet"
+ /note="strand 51"
+ SecStr 495..498
+ /sec_str_type="sheet"
+ /note="strand 52"
+ SecStr 507..514
+ /sec_str_type="sheet"
+ /note="strand 53"
+ SecStr 524..535
+ /sec_str_type="sheet"
+ /note="strand 54"
+ORIGIN
+ 1 gaddvvdssk sfvmenfssy hgtkpgyvds iqkgiqkpks gtqgnydddw kefystdnky
+ 61 daagysvdne nplsgkaggv vkvtypgltk vlalkvdnae tikkelglsl teplmeqvgt
+ 121 eefikrfgdg asrvvlslpf aegsssveyi nnweqakals veleinfetr gkrgqdamye
+ 181 ymaqacagnr vrrsvgssls cinldwdvir dktktkiesl kehgpiknkm sespnktvse
+ 241 ekakqyleef hqtalehpel selktvtgtn pvfaganyaa wavnvaqvid setadnlekt
+ 301 taalsilpgi gsvmgiadga vhhnteeiva qsialsslmv aqaiplvgel vdigfaaynf
+ 361 vesiinlfqv vhnsynrpay spghktqpfl hdgyavswnt vedsiirtgf qgesghdiki
+ 421 taentplpia gvllptipgk ldvnkskthi svngrkirmr craidgdvtf crpkspvyvg
+ 481 ngvhanlhva fhrsssekih sneissdsig vlgyqktvdh tkvnsklslf feiks
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/399235158.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/399235158.gb
new file mode 100644
index 0000000000..ceed26194e
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/399235158.gb
@@ -0,0 +1,83 @@
+LOCUS AFP42651 630 aa linear BCT 31-JAN-2014
+DEFINITION Fatty-acid-CoA ligase FadD32 [Mycolicibacterium smegmatis MC2 155].
+ACCESSION AFP42651
+VERSION AFP42651.1
+DBLINK BioProject: PRJNA38453
+ BioSample: SAMN02603392
+DBSOURCE accession CP001663.1
+KEYWORDS .
+SOURCE Mycolicibacterium smegmatis MC2 155
+ ORGANISM Mycolicibacterium smegmatis MC2 155
+ Bacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae;
+ Mycolicibacterium.
+REFERENCE 1 (residues 1 to 630)
+ AUTHORS Perrodou,E., Deshayes,C., Muller,J., Schaeffer,C., Van
+ Dorsselaer,A., Ripp,R., Poch,O., Reyrat,J.M. and Lecompte,O.
+ TITLE ICDS database: interrupted CoDing sequences in prokaryotic genomes
+ JOURNAL Nucleic Acids Res. 34 (DATABASE ISSUE), D338-D343 (2006)
+ PUBMED 16381882
+REFERENCE 2 (residues 1 to 630)
+ AUTHORS Deshayes,C., Perrodou,E., Gallien,S., Euphrasie,D., Schaeffer,C.,
+ Van-Dorsselaer,A., Poch,O., Lecompte,O. and Reyrat,J.M.
+ TITLE Interrupted coding sequences in Mycobacterium smegmatis: authentic
+ mutations or sequencing errors?
+ JOURNAL Genome Biol. 8 (2), R20 (2007)
+ PUBMED 17295914
+ REMARK Publication Status: Online-Only
+REFERENCE 3 (residues 1 to 630)
+ AUTHORS Gallien,S., Perrodou,E., Carapito,C., Deshayes,C., Reyrat,J.M., Van
+ Dorsselaer,A., Poch,O., Schaeffer,C. and Lecompte,O.
+ TITLE Ortho-proteogenomics: multiple proteomes investigation through
+ orthology and a new MS-based protocol
+ JOURNAL Genome Res. 19 (1), 128-135 (2009)
+ PUBMED 18955433
+REFERENCE 4 (residues 1 to 630)
+ AUTHORS Reyrat,J.M., Perrodou,E., Deshayes,C., Euphrasie,D., Gagniere,N.,
+ Gallien,S., Jones,M., Kocincova,D., Poch,O., Quevillon,E., Ripp,R.,
+ Schaeffer,C., Singh,A., Van Dorsselaer,A. and Lecompte,O.
+ TITLE Re-annotation of the genome sequence of Mycobacterium smegmatis
+ JOURNAL Unpublished
+REFERENCE 5 (residues 1 to 630)
+ AUTHORS Perrodou,E., Reyrat,J.M., Deshayes,C., Euphrasie,D., Gagniere,N.,
+ Gallien,S., Jones,M., Kocincova,D., Poch,O., Quevillon,E., Ripp,R.,
+ Schaeffer,C., Singh,A., Van Dorsselaer,A. and Lecompte,O.
+ TITLE Direct Submission
+ JOURNAL Submitted (22-JUN-2009) Laboratory of Integrative Bioinformatics
+ and Genomics, Institute of Genetics and Molecular and Cellular
+ Biology, 1 rue Laurent Fries BP 10142, Illkirch Cedex 67404, France
+COMMENT Method: conceptual translation.
+FEATURES Location/Qualifiers
+ source 1..630
+ /organism="Mycolicibacterium smegmatis MC2 155"
+ /strain="MC2 155"
+ /db_xref="taxon:246196"
+ Protein 1..630
+ /product="Fatty-acid-CoA ligase FadD32"
+ Region 1..630
+ /region_name="PRK07769"
+ /note="long-chain-fatty-acid--CoA ligase; Validated"
+ /db_xref="CDD:181109"
+ CDS 1..630
+ /gene="fadD32"
+ /locus_tag="MSMEI_6225"
+ /coded_by="complement(CP001663.1:6463934..6465826)"
+ /experiment="Nterminal peptide experimentally determined
+ by amino acid sequencing after protein digestion"
+ /note="GO_function: GO:0003824;
+ GO_process: GO:0008152"
+ /transl_table=11
+ /db_xref="PFAM:PF00501"
+ORIGIN
+ 1 mpfhnpfikd gqikfpdgss ivahverwak vrgdklayrf ldfsterdgv prdltwaqfs
+ 61 arnravaarl qqvtqpgdrv ailcpqnldy lvaffgalya griavplfdp sepghvgrlh
+ 121 avldnchpsa ilttteaaeg vrkffrtrpa nqrprviavd avpddvastw vnpdepdett
+ 181 iaylqytsgs triptgvqit hlnlatnvvq viealegeeg drglswlpff hdmglitall
+ 241 apmighyftf mtpaafvrrp erwirelark egdtggtisv apnfafdhaa argvpkpgsp
+ 301 pldlsnvkav lngsepisaa tvrrfneafg pfgfppkaik psyglaeatl fvsttpsaee
+ 361 pkiitvdrdq lnsgrivevd adspkavaqa sagkvgiaew avivdaesat elpdgqvgei
+ 421 wisgqnmgtg ywgkpeesva tfqnilksrt npshaegatd datwvrtgdy gafydgdlyi
+ 481 tgrvkdlvii dgrnhypqdl eysaqeaska irtgyvaafs vpanqlpdev fenahsgikr
+ 541 dpddtseqlv ivaerapgah kldigpitdd iraaiavrhg vtvrdvllta agaiprtssg
+ 601 kigrracraa yldgslragk vandfpdatd
+//
+
diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/7525057.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/7525057.gb
new file mode 100644
index 0000000000..1eccc2480d
--- /dev/null
+++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/7525057.gb
@@ -0,0 +1,87 @@
+LOCUS NP_051038 123 aa linear PLN 26-MAR-2010
+DEFINITION ribosomal protein S12 (chloroplast) [Arabidopsis thaliana].
+ACCESSION NP_051038
+VERSION NP_051038.1
+DBLINK Project: 116
+ BioProject: PRJNA116
+DBSOURCE REFSEQ: accession NC_000932.1
+KEYWORDS RefSeq.
+SOURCE chloroplast Arabidopsis thaliana (thale cress)
+ ORGANISM Arabidopsis thaliana
+ Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
+ Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae;
+ Pentapetalae; rosids; malvids; Brassicales; Brassicaceae;
+ Camelineae; Arabidopsis.
+REFERENCE 1 (residues 1 to 123)
+ AUTHORS Sato,S., Nakamura,Y., Kaneko,T., Asamizu,E. and Tabata,S.
+ TITLE Complete structure of the chloroplast genome of Arabidopsis
+ thaliana
+ JOURNAL DNA Res. 6 (5), 283-290 (1999)
+ PUBMED 10574454
+REFERENCE 2 (residues 1 to 123)
+ CONSRTM NCBI Genome Project
+ TITLE Direct Submission
+ JOURNAL Submitted (07-APR-2000) National Center for Biotechnology
+ Information, NIH, Bethesda, MD 20894, USA
+REFERENCE 3 (residues 1 to 123)
+ AUTHORS Nakamura,Y.
+ TITLE Direct Submission
+ JOURNAL Submitted (09-SEP-1999) Laboratory of Gene Structure 2, Kazusa DNA
+ Research Institute, Yana 1532-3, Kisarazu, Chiba 292-0812, Japan
+COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The
+ reference sequence was derived from BAA84409.
+ Method: conceptual translation.
+FEATURES Location/Qualifiers
+ source 1..123
+ /organism="Arabidopsis thaliana"
+ /organelle="plastid:chloroplast"
+ /db_xref="taxon:3702"
+ /ecotype="Columbia"
+ Protein 1..123
+ /product="ribosomal protein S12"
+ /calculated_mol_wt=13633
+ Region 1..123
+ /region_name="rps12"
+ /note="ribosomal protein S12; CHL00051"
+ /db_xref="CDD:176992"
+ Site order(4..5,7..8,11..12)
+ /site_type="other"
+ /note="S17 interaction site [polypeptide binding]"
+ /db_xref="CDD:239466"
+ Site 4
+ /site_type="other"
+ /note="S8 interaction site"
+ /db_xref="CDD:239466"
+ Site order(12..14,26,28..29,31,46..47,49..51,58,66,69..70,
+ 83..84,88..89,110)
+ /site_type="other"
+ /note="16S rRNA interaction site [nucleotide binding]"
+ /db_xref="CDD:239466"
+ Site order(43..44,88)
+ /site_type="other"
+ /note="streptomycin interaction site [chemical binding]"
+ /db_xref="CDD:239466"
+ Site 44..45
+ /site_type="other"
+ /note="23S rRNA interaction site [nucleotide binding]"
+ /db_xref="CDD:239466"
+ Site order(45..50,70..78)
+ /site_type="other"
+ /note="aminoacyl-tRNA interaction site (A-site)
+ [nucleotide binding]"
+ /db_xref="CDD:239466"
+ CDS 1..123
+ /gene="rps12"
+ /locus_tag="ArthCp047"
+ /coded_by="join(complement(NC_000932.1:69611..69724),
+ NC_000932.1:139856..140087,NC_000932.1:140625..140650)"
+ /trans_splicing
+ /note="trans-spliced"
+ /transl_table=11
+ /db_xref="GeneID:844801"
+ORIGIN
+ 1 mptikqlirn trqpirnvtk spalrgcpqr rgtctrvyti tpkkpnsalr kvarvrltsg
+ 61 feitayipgi ghnlqehsvv lvrggrvkdl pgvryhivrg tldavgvkdr qqgrskygvk
+ 121 kpk
+//
+