From b3edb046fc26e6011b093e81c73c1dae4e23a161 Mon Sep 17 00:00:00 2001 From: Lee Date: Mon, 18 Feb 2019 16:58:23 -0500 Subject: [PATCH 1/9] related to #800 Although I couldn't reproduce the original error. I followed the method documentation and moved the close action to process() from process(int) since process(int) should never close the resource under any circumstances anyhow. Also went out of scope a little and verified that the underlying InputStream was open and closed at appropriate steps. Used streams to reduce complexity of a double nested for loop.(If that's frowned upon I'll change it back) Tweaked documentation to use links when refering to library classes. --- .../nbio/core/sequence/io/GenbankReader.java | 41 +++++++------- .../core/sequence/io/GenbankReaderTest.java | 55 +++++++++++++------ 2 files changed, 59 insertions(+), 37 deletions(-) diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java index fb575f8ab1..0abf10e810 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java @@ -34,7 +34,6 @@ import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; import org.biojava.nbio.core.sequence.compound.DNACompoundSet; import org.biojava.nbio.core.sequence.compound.NucleotideCompound; -import org.biojava.nbio.core.sequence.features.AbstractFeature; import org.biojava.nbio.core.sequence.features.DBReferenceInfo; import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface; import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface; @@ -47,9 +46,10 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; /** - * Use GenbankReaderHelper as an example of how to use this class where GenbankReaderHelper should be the + * Use {@link GenbankReaderHelper} as an example of how to use this class where {@link GenbankReaderHelper} should be the * primary class used to read Genbank files * */ @@ -66,9 +66,9 @@ public boolean isClosed() { } /** - * If you are going to use FileProxyProteinSequenceCreator then do not use this constructor because we need details about - * local file offsets for quick reads. InputStreams does not give you the name of the stream to access quickly via file seek. A seek in - * an inputstream is forced to read all the data so you don't gain anything. + * If you are going to use {@link FileProxyProteinSequenceCreator} then do not use this constructor because we need details about + * local file offsets for quick reads. {@link InputStream} does not give you the name of the stream to access quickly via file seek. A seek in + * an {@link InputStream} is forced to read all the data so you don't gain anything. * @param is * @param headerParser * @param sequenceCreator @@ -107,18 +107,21 @@ public GenbankReader( /** * The parsing is done in this method.
- * This method tries to process all the available Genbank records + * This method will return all the available Genbank records * in the File or InputStream, closes the underlying resource, * and return the results in {@link LinkedHashMap}.
- * You don't need to call {@link #close()} after calling this method. + * You don't need to call {@link GenbankReader#close()} after calling this method. * @see #process(int) * @return {@link HashMap} containing all the parsed Genbank records * present, starting current fileIndex onwards. * @throws IOException * @throws CompoundNotFoundException + * @throws OutOfMemoryError if the input resource is larger than the allocated heap. */ public LinkedHashMap process() throws IOException, CompoundNotFoundException { - return process(-1); + LinkedHashMap result = process(-1); + close(); + return result; } /** @@ -137,13 +140,16 @@ public LinkedHashMap process() throws IOException, CompoundNotFoundExc * @see #process() * @author Amr AL-Hossary * @since 3.0.6 - * @param max maximum number of records to return, -1 for infinity. + * @param max maximum number of records to return. * @return {@link HashMap} containing maximum max parsed Genbank records * present, starting current fileIndex onwards. * @throws IOException * @throws CompoundNotFoundException */ public LinkedHashMap process(final int max) throws IOException, CompoundNotFoundException { + + if(closed) throw new IOException("Cannot perform action: resource has been closed."); + LinkedHashMap sequences = new LinkedHashMap<>(); @SuppressWarnings("unchecked") int i=0; @@ -158,12 +164,9 @@ public LinkedHashMap process(final int max) throws IOException, Compou genbankParser.getSequenceHeaderParser().parseHeader(genbankParser.getHeader(), sequence); // add features to new sequence - for (String k: genbankParser.getFeatures().keySet()){ - for (AbstractFeature f: genbankParser.getFeatures(k)){ - //f.getLocations().setSequence(sequence); // can't set proper sequence source to features. It is actually needed? Don't think so... - sequence.addFeature(f); - } - } + genbankParser.getFeatures().values().stream() + .flatMap(List::stream) + .forEach(sequence::addFeature); // add taxonomy ID to new sequence ArrayList dbQualifier = genbankParser.getDatabaseReferences().get("db_xref"); @@ -175,10 +178,6 @@ public LinkedHashMap process(final int max) throws IOException, Compou sequences.put(sequence.getAccession().getID(), sequence); } - if (max < 0) { - close(); - } - return sequences; } @@ -187,11 +186,12 @@ public void close() { bufferedReader.close(); this.closed = true; } catch (IOException e) { - logger.error("Couldn't close the reader. {}", e.getMessage()); + logger.error("Couldn't close the reader.", e); this.closed = false; } } + //TODO turn this into a test case? public static void main(String[] args) throws Exception { String proteinFile = "src/test/resources/BondFeature.gb"; FileInputStream is = new FileInputStream(proteinFile); @@ -206,6 +206,7 @@ public static void main(String[] args) throws Exception { LinkedHashMap dnaSequences = dnaReader.process(); System.out.println(dnaSequences); + //TODO restore CraftedFeature.gb or delete this code String crazyFile = "src/test/resources/CraftedFeature.gb"; is = new FileInputStream(crazyFile); GenbankReader crazyReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java index c9f3a0cd8a..727cf37d8f 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java @@ -20,13 +20,6 @@ */ package org.biojava.nbio.core.sequence.io; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - import org.biojava.nbio.core.exceptions.CompoundNotFoundException; import org.biojava.nbio.core.sequence.DNASequence; import org.biojava.nbio.core.sequence.ProteinSequence; @@ -37,15 +30,18 @@ import org.biojava.nbio.core.sequence.features.FeatureInterface; import org.biojava.nbio.core.sequence.features.Qualifier; import org.biojava.nbio.core.sequence.template.AbstractSequence; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.*; @@ -161,7 +157,7 @@ public void testProcess() throws Exception { */ @Test public void testPartialProcess() throws IOException, CompoundNotFoundException, NoSuchFieldException { - InputStream inStream = this.getClass().getResourceAsStream("/two-dnaseqs.gb"); + CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/two-dnaseqs.gb")); GenbankReader genbankDNA = new GenbankReader<>( @@ -173,12 +169,14 @@ public void testPartialProcess() throws IOException, CompoundNotFoundException, // First call to process(1) returns the first sequence LinkedHashMap dnaSequences = genbankDNA.process(1); + assertFalse(inStream.isclosed()); assertNotNull(dnaSequences); assertEquals(1, dnaSequences.size()); assertNotNull(dnaSequences.get("vPetite")); // Second call to process(1) returns the second sequence dnaSequences = genbankDNA.process(1); + assertFalse(inStream.isclosed()); assertNotNull(dnaSequences); assertEquals(1, dnaSequences.size()); assertNotNull(dnaSequences.get("sbFDR")); @@ -186,14 +184,14 @@ public void testPartialProcess() throws IOException, CompoundNotFoundException, assertFalse(genbankDNA.isClosed()); genbankDNA.close(); assertTrue(genbankDNA.isClosed()); - + assertTrue(inStream.isclosed()); } @Test public void CDStest() throws Exception { logger.info("CDS Test"); - InputStream inStream = this.getClass().getResourceAsStream("/BondFeature.gb"); + CheckableInputStream inStream = new CheckableInputStream(this.getClass().getResourceAsStream("/BondFeature.gb")); assertNotNull(inStream); GenbankReader GenbankProtein @@ -203,7 +201,7 @@ public void CDStest() throws Exception { new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()) ); LinkedHashMap proteinSequences = GenbankProtein.process(); - inStream.close(); + assertTrue(inStream.isclosed()); Assert.assertTrue(proteinSequences.size() == 1); @@ -260,4 +258,27 @@ public void testNcbiExpandedAccessionFormats() throws Exception { DNASequence header2 = readGenbankResource("/empty_header2.gb"); assertEquals("AZZZAA02123456789 10000000000 bp DNA linear PRI 15-OCT-2018", header2.getOriginalHeader()); } + + /** + * Helper class to be able to verify the closed state of the input stream. + */ + private class CheckableInputStream extends BufferedInputStream { + + private boolean closed; + + CheckableInputStream(InputStream in) { + super(in); + closed = false; + } + + @Override + public void close() throws IOException { + super.close(); + closed = true; + } + + boolean isclosed(){ + return closed; + } + } } From c77515e2db565f0737950fd496f64015c35c65ce Mon Sep 17 00:00:00 2001 From: Lee Date: Tue, 19 Feb 2019 12:30:33 -0500 Subject: [PATCH 2/9] Made requested changes to imports. Added braces to if statement Removed main method --- .../nbio/core/sequence/io/GenbankReader.java | 44 +++++-------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java index 0abf10e810..951cce40c0 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/io/GenbankReader.java @@ -26,14 +26,8 @@ package org.biojava.nbio.core.sequence.io; import org.biojava.nbio.core.exceptions.CompoundNotFoundException; -import org.biojava.nbio.core.sequence.DNASequence; import org.biojava.nbio.core.sequence.DataSource; -import org.biojava.nbio.core.sequence.ProteinSequence; import org.biojava.nbio.core.sequence.TaxonomyID; -import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; -import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; -import org.biojava.nbio.core.sequence.compound.DNACompoundSet; -import org.biojava.nbio.core.sequence.compound.NucleotideCompound; import org.biojava.nbio.core.sequence.features.DBReferenceInfo; import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface; import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface; @@ -42,7 +36,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; @@ -148,7 +148,9 @@ public LinkedHashMap process() throws IOException, CompoundNotFoundExc */ public LinkedHashMap process(final int max) throws IOException, CompoundNotFoundException { - if(closed) throw new IOException("Cannot perform action: resource has been closed."); + if(closed){ + throw new IOException("Cannot perform action: resource has been closed."); + } LinkedHashMap sequences = new LinkedHashMap<>(); @SuppressWarnings("unchecked") @@ -190,31 +192,5 @@ public void close() { this.closed = false; } } - - //TODO turn this into a test case? - public static void main(String[] args) throws Exception { - String proteinFile = "src/test/resources/BondFeature.gb"; - FileInputStream is = new FileInputStream(proteinFile); - - GenbankReader proteinReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); - LinkedHashMap proteinSequences = proteinReader.process(); - System.out.println(proteinSequences); - - String inputFile = "src/test/resources/NM_000266.gb"; - is = new FileInputStream(inputFile); - GenbankReader dnaReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); - LinkedHashMap dnaSequences = dnaReader.process(); - System.out.println(dnaSequences); - - //TODO restore CraftedFeature.gb or delete this code - String crazyFile = "src/test/resources/CraftedFeature.gb"; - is = new FileInputStream(crazyFile); - GenbankReader crazyReader = new GenbankReader<>(is, new GenericGenbankHeaderParser<>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); - LinkedHashMap crazyAnnotatedSequences = crazyReader.process(); - - is.close(); - System.out.println(crazyAnnotatedSequences); - } - } From 7601f17d3499d4e84fbb9119a31f8b37543b69d5 Mon Sep 17 00:00:00 2001 From: Lee Date: Tue, 19 Feb 2019 12:37:16 -0500 Subject: [PATCH 3/9] Made requested changes to imports in test class. Made style change to isClosed() method --- .../nbio/core/sequence/io/GenbankReaderTest.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java index 727cf37d8f..4845da4cb1 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/io/GenbankReaderTest.java @@ -30,7 +30,12 @@ import org.biojava.nbio.core.sequence.features.FeatureInterface; import org.biojava.nbio.core.sequence.features.Qualifier; import org.biojava.nbio.core.sequence.template.AbstractSequence; -import org.junit.*; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,7 +48,11 @@ import java.util.Map; import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; /** * @@ -277,7 +286,7 @@ public void close() throws IOException { closed = true; } - boolean isclosed(){ + boolean isclosed() { return closed; } } From 9197e890f8db6dddded1f687bbae64097fd90911 Mon Sep 17 00:00:00 2001 From: Lee Date: Tue, 19 Feb 2019 15:47:47 -0500 Subject: [PATCH 4/9] Changing line end for xml to Unix format to make the TestUncompressInputStream test pass --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index a90ac4bbb2..9e293e1595 100644 --- a/.gitattributes +++ b/.gitattributes @@ -37,7 +37,7 @@ *.sto text *.tsv text *.txt text -*.xml text +*.xml text eol=lf #Causing decompression test to fail when line endings in org/biojava/nbio/core/util/build.xml are crlf *.xsd text *.yml text From 11c9beb17f8a7c8f581ffa2b58fdf3e00250f1bd Mon Sep 17 00:00:00 2001 From: Lee Date: Thu, 21 Feb 2019 22:26:21 -0500 Subject: [PATCH 5/9] #831 Normally I would have opened a new branch. but this issue was causing this pull to fail it's build so I'm doing both in one fel swoop. I put the target files in the resource directory and manually copy them over before each test into the temporary working directory which is scanned before calling out in case the file is already there. this should fix the 429 errors in this class specifically. --- biojava-core/pom.xml | 5 + .../loader/GenbankProxySequenceReader.java | 17 +- .../GenbankProxySequenceReaderTest.java | 53 +++-- .../GenbankProxySequenceReader/152970917.gb | 70 +++++++ .../GenbankProxySequenceReader/254839678.gb | 114 +++++++++++ .../GenbankProxySequenceReader/379015144.gb | 66 +++++++ .../GenbankProxySequenceReader/381353147.gb | 186 +++++++++++++++++ .../GenbankProxySequenceReader/381353148.gb | 184 +++++++++++++++++ .../GenbankProxySequenceReader/381353149.gb | 187 ++++++++++++++++++ .../GenbankProxySequenceReader/399235158.gb | 83 ++++++++ .../GenbankProxySequenceReader/7525057.gb | 87 ++++++++ 11 files changed, 1038 insertions(+), 14 deletions(-) create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/152970917.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/254839678.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/379015144.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353147.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353148.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353149.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/399235158.gb create mode 100644 biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/7525057.gb diff --git a/biojava-core/pom.xml b/biojava-core/pom.xml index 5b0ed01b0b..739ae360f0 100644 --- a/biojava-core/pom.xml +++ b/biojava-core/pom.xml @@ -44,6 +44,11 @@ junit test + + org.apache.commons + commons-io + 1.3.2 + org.slf4j diff --git a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java index 045900263a..2b74f9cacd 100644 --- a/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java +++ b/biojava-core/src/main/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReader.java @@ -32,7 +32,11 @@ import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; import org.biojava.nbio.core.sequence.compound.DNACompoundSet; import org.biojava.nbio.core.sequence.compound.NucleotideCompound; -import org.biojava.nbio.core.sequence.features.*; +import org.biojava.nbio.core.sequence.features.AbstractFeature; +import org.biojava.nbio.core.sequence.features.DBReferenceInfo; +import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface; +import org.biojava.nbio.core.sequence.features.FeatureRetriever; +import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface; import org.biojava.nbio.core.sequence.io.GenbankSequenceParser; import org.biojava.nbio.core.sequence.io.GenericGenbankHeaderParser; import org.biojava.nbio.core.sequence.template.AbstractSequence; @@ -41,7 +45,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; @@ -54,7 +65,7 @@ */ public class GenbankProxySequenceReader extends StringProxySequenceReader implements FeaturesKeyWordInterface, DatabaseReferenceInterface, FeatureRetriever { - private final static Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class); + private static final Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class); private static final String eutilBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; // private String genbankDirectoryCache = null; diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java index 6d2180a641..ecfdc5c7fd 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java @@ -20,25 +20,30 @@ */ package org.biojava.nbio.core.sequence.loader; +import org.apache.commons.io.IOUtils; import org.biojava.nbio.core.exceptions.CompoundNotFoundException; import org.biojava.nbio.core.sequence.ProteinSequence; import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; +import org.biojava.nbio.core.sequence.features.AbstractFeature; import org.biojava.nbio.core.sequence.features.FeatureInterface; +import org.biojava.nbio.core.sequence.features.Qualifier; import org.biojava.nbio.core.sequence.template.AbstractSequence; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import org.biojava.nbio.core.sequence.features.AbstractFeature; -import org.biojava.nbio.core.sequence.features.Qualifier; /** * Testing example for issue #834 @@ -58,7 +63,7 @@ public GenbankProxySequenceReaderTest(String gi) { @Parameterized.Parameters public static Collection getExamples() { - String[][] out = new String[][]{ + String[][] accessorIds = new String[][]{ {"399235158"}, {"7525057"}, {"379015144"}, @@ -69,9 +74,42 @@ public static Collection getExamples() { {"254839678"} }; - return Arrays.asList(out); + return Arrays.asList(accessorIds); + } + + /** + * In {@link GenbankProxySequenceReader} there is a check to see if the requested files are already in the temp + * directory before attemting to retrieve them from the remote server. so simply copying the test files to the temp + * directory avoids calling out to the server and hitting a 429 status code from the server which fails the build. + * @throws IOException + */ + @Before + public void copyTestFiles() throws IOException { + Collection accessorIds = getExamples(); + for (String[] arr: accessorIds) { + copyTestFileToWorkingDirectory(arr[0]+".gb"); + } + } + + /** + * Convenience method for {@link GenbankProxySequenceReaderTest#copyTestFiles()} + * @param filename name of the file to copy from the resource folder + * @throws IOException when something goes wrong with copying the files. + */ + private void copyTestFileToWorkingDirectory(String filename) throws IOException { + String dest = System.getProperty("java.io.tmpdir") + filename; + String src = "org/biojava/nbio/core/sequence/GenbankProxySequenceReader/" + filename; + + FileOutputStream destination = new FileOutputStream(new File(dest)); + InputStream source = this.getClass().getClassLoader().getResourceAsStream(src); + + IOUtils.copy(source, destination); + + destination.close(); + source.close(); } + @Test public void testFeatures() throws IOException, InterruptedException, CompoundNotFoundException { logger.info("run test for protein: {}", gi); @@ -120,9 +158,6 @@ so it should be done here (manualy). Assert.assertTrue(!codedBy.isEmpty()); logger.info("\t\tcoded_by: {}", codedBy); } - - // genbank has limits on requests per second, we need to give it some time for next test or otherwise we get 429 http error codes - JD 2018-12-14 - Thread.sleep(500); } @Test @@ -161,9 +196,5 @@ public void testProteinSequenceFactoring() throws Exception { } else { logger.info("target {} has no CDS", gi); } - - // genbank has limits on requests per second, we need to give it some time for next test or otherwise we get 429 http error codes - JD 2018-12-14 - Thread.sleep(500); - } } diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/152970917.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/152970917.gb new file mode 100644 index 0000000000..70d24fa039 --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/152970917.gb @@ -0,0 +1,70 @@ +LOCUS YP_001336026 324 aa linear CON 16-DEC-2014 +DEFINITION lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA acyltransferase + [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]. +ACCESSION YP_001336026 +VERSION YP_001336026.1 +DBLINK BioProject: PRJNA57619 +DBSOURCE REFSEQ: accession NC_009648.1 +KEYWORDS RefSeq. +SOURCE Klebsiella pneumoniae subsp. pneumoniae MGH 78578 + ORGANISM Klebsiella pneumoniae subsp. pneumoniae MGH 78578 + Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales; + Enterobacteriaceae; Klebsiella. +REFERENCE 1 (residues 1 to 324) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (09-JUL-2007) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 2 (residues 1 to 324) + AUTHORS McClelland,M., Sanderson,E.K., Spieth,J., Clifton,W.S., + Latreille,P., Sabo,A., Pepin,K., Bhonagiri,V., Porwollik,S., Ali,J. + and Wilson,R.K. + CONSRTM The Klebsiella pneumonia Genome Sequencing Project + TITLE Direct Submission + JOURNAL Submitted (06-SEP-2006) Genetics, Genome Sequencing Center, 4444 + Forest Park Parkway, St. Louis, MO 63108, USA +COMMENT VALIDATED REFSEQ: This record has undergone validation or + preliminary review. The reference sequence was derived from + ABR77796. + Method: conceptual translation. +FEATURES Location/Qualifiers + source 1..324 + /organism="Klebsiella pneumoniae subsp. pneumoniae MGH + 78578" + /strain="ATCC 700721; MGH 78578" + /sub_species="pneumoniae" + /db_xref="ATCC:700721" + /db_xref="taxon:272620" + Protein 1..324 + /product="lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA + acyltransferase" + /calculated_mol_wt=37353 + Region 1..310 + /region_name="PRK08943" + /note="lipid A biosynthesis (KDO)2-(lauroyl)-lipid IVA + acyltransferase; Validated" + /db_xref="CDD:236355" + Site order(139,142,144,161..164,210..212) + /site_type="other" + /note="putative acyl-acceptor binding pocket" + /db_xref="CDD:153246" + CDS 1..324 + /gene="msbB" + /locus_tag="KPN_02370" + /coded_by="complement(NC_009648.1:2595658..2596632)" + /inference="ab initio prediction:Genemark:2.0" + /inference="protein motif:Pfam:IPR004960" + /note="Transfers myristate or laurate, activated on ACP, + to the lipid IVA moiety of (KDO)2-(lauroyl)-lipid IVA" + /transl_table=11 + /db_xref="GeneID:5340071" +CONTIG join(WP_002911442.1:1..324) +ORIGIN + 1 metkknnief ipkfeksfll prywgawlgv fafagialtp psfrdpllgk lgrlvgrlak + 61 ssrrraqinl lycfpeksey ereaiidamy asapqamvmm aelglrdpqk ilarvdwqgk + 121 aiidemqrnn ekviflvpha wgvdipamlm asggqkmaam fhnqgnpvfd yvwntvrrrf + 181 ggrmharndg ikpfiqsvrq gywgyylpdq dhgaehsefv dffatykatl paigrlmkvc + 241 rarvvplfpv ydskthrltv lvrppmddll daddttiarr mneevevfvk phteqytwil + 301 kllktrkpge iepykrkelf pkkk +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/254839678.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/254839678.gb new file mode 100644 index 0000000000..6a3022cbe7 --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/254839678.gb @@ -0,0 +1,114 @@ +LOCUS 3IAN_A 321 aa linear BCT 24-NOV-2018 +DEFINITION Chain A, Chitinase. +ACCESSION 3IAN_A +VERSION 3IAN_A +DBSOURCE pdb: molecule 3IAN, chain 65, release Nov 21, 2018; + deposition: Jul 14, 2009; + class: HYDROLASE; + source: Mmdb_id: 999999, Pdb_id 1: 3IAN; + Exp. method: X-ray Diffraction. +KEYWORDS . +SOURCE Lactococcus lactis subsp. lactis + ORGANISM Lactococcus lactis subsp. lactis + Bacteria; Firmicutes; Bacilli; Lactobacillales; Streptococcaceae; + Lactococcus. +REFERENCE 1 (residues 1 to 321) + AUTHORS Bonanno,J.B., Rutter,M., Bain,K.T., Miller,S., Ozyurt,S., + Sauder,J.M., Burley,S.K. and Almo,S.C. + TITLE Crystal structure of a chitinase from Lactococcus lactis subsp. + lactis + JOURNAL Unpublished +REFERENCE 2 (residues 1 to 321) + AUTHORS Bonanno,J.B., Rutter,M., Bain,K.T., Miller,S., Ozyurt,S., + Sauder,J.M., Burley,S.K., Almo,S.C. and New York SGX Research + Center for Structural Genomics (NYSGXRC). + TITLE Direct Submission + JOURNAL Submitted (14-JUL-2009) +COMMENT Crystal structure of a chitinase from Lactococcus lactis subsp. + lactis. +FEATURES Location/Qualifiers + source 1..321 + /organism="Lactococcus lactis subsp. lactis" + /sub_species="lactis" + /db_xref="taxon:1360" + Het join(bond(115),bond(117),bond(76)) + /heterogen="(NA,2572)" + Region 4..313 + /region_name="Chi1" + /note="Chitinase [Carbohydrate transport and metabolism]; + COG3469" + /db_xref="CDD:226000" + Region 5..288 + /region_name="Glyco_hydro_18" + /note="Glycosyl hydrolases family 18; pfam00704" + /db_xref="CDD:279094" + SecStr 5..12 + /sec_str_type="sheet" + /note="strand 1" + Site order(10,46,122,124,189,191,283) + /site_type="active" + /note="putative active site [active]" + /db_xref="CDD:119350" + SecStr 24..28 + /sec_str_type="sheet" + /note="strand 2" + SecStr 40..45 + /sec_str_type="sheet" + /note="strand 3" + SecStr 65..78 + /sec_str_type="helix" + /note="helix 1" + SecStr 80..89 + /sec_str_type="sheet" + /note="strand 4" + SecStr 100..114 + /sec_str_type="helix" + /note="helix 2" + SecStr 117..124 + /sec_str_type="sheet" + /note="strand 5" + SecStr 133..151 + /sec_str_type="helix" + /note="helix 3" + SecStr 155..163 + /sec_str_type="sheet" + /note="strand 6" + SecStr 172..180 + /sec_str_type="helix" + /note="helix 4" + SecStr 184..190 + /sec_str_type="sheet" + /note="strand 7" + SecStr 196..201 + /sec_str_type="sheet" + /note="strand 8" + SecStr 204..209 + /sec_str_type="sheet" + /note="strand 9" + SecStr 215..228 + /sec_str_type="helix" + /note="helix 5" + SecStr 240..246 + /sec_str_type="sheet" + /note="strand 10" + SecStr 261..273 + /sec_str_type="helix" + /note="helix 6" + SecStr 278..283 + /sec_str_type="sheet" + /note="strand 11" + SecStr 289..293 + /sec_str_type="sheet" + /note="strand 12" + SecStr 300..307 + /sec_str_type="helix" + /note="helix 7" +ORIGIN + 1 msldkvlvgy whnwkstgkd gykggssadf nlsstqegyn vinvsfmktp egqtlptfkp + 61 ynktdtefra eisklnaegk svlialggad ahielkksqe sdfvneiirl vdtygfdgld + 121 idleqaaiea adnqtvipsa lkkvkdhyrk dgknfmitma pefpyltssg kyapyinnld + 181 syydfinpqy ynqggdgfwd sdlnmwisqs ndekkedfly gltqrlvtgt dgfikipask + 241 fviglpsnnd aaatgyvkdp navknalnrl kasgneikgl mtwsvnwdag tnsngekynn + 301 tfvntyapml fnneghhhhh h +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/379015144.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/379015144.gb new file mode 100644 index 0000000000..667440c7a7 --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/379015144.gb @@ -0,0 +1,66 @@ +LOCUS YP_005291380 338 aa linear CON 17-DEC-2014 +DEFINITION leukocidin/hemolysin toxin family protein [Staphylococcus aureus + subsp. aureus VC40]. +ACCESSION YP_005291380 +VERSION YP_005291380.1 +DBLINK BioProject: PRJNA88071 +DBSOURCE REFSEQ: accession NC_016912.1 +KEYWORDS RefSeq. +SOURCE Staphylococcus aureus subsp. aureus VC40 + ORGANISM Staphylococcus aureus subsp. aureus VC40 + Bacteria; Firmicutes; Bacilli; Bacillales; Staphylococcus. +REFERENCE 1 (residues 1 to 338) + AUTHORS Sass,P., Berscheid,A., Jansen,A., Oedenkoven,M., Szekat,C., + Strittmatter,A., Gottschalk,G. and Bierbaum,G. + TITLE Genome sequence of Staphylococcus aureus VC40, a vancomycin- and + daptomycin-resistant strain, to study the genetics of development + of resistance to currently applied last-resort antibiotics + JOURNAL J. Bacteriol. 194 (8), 2107-2108 (2012) + PUBMED 22461548 +REFERENCE 2 (residues 1 to 338) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (21-FEB-2012) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 3 (residues 1 to 338) + AUTHORS Sass,P., Berscheid,A., Jansen,A., Oedenkoven,M., Szekat,C., + Strittmatter,A., Gottschalk,G. and Bierbaum,G. + TITLE Direct Submission + JOURNAL Submitted (25-AUG-2011) Institute of Medical Microbiology, + Immunology and Parasitology, University of Bonn, Sigmund-Freud-Str. + 25, Bonn 53105, Germany +COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final + NCBI review. The reference sequence is identical to AEZ37946. + Method: conceptual translation. +FEATURES Location/Qualifiers + source 1..338 + /organism="Staphylococcus aureus subsp. aureus VC40" + /strain="VC40" + /sub_species="aureus" + /db_xref="taxon:1028799" + /country="Germany" + /collection_date="2002" + /note="laboratory mutant selected for 60 microgram per ml + vancomycin resistance" + Protein 1..338 + /product="leukocidin/hemolysin toxin family protein" + /calculated_mol_wt=38555 + Region 65..323 + /region_name="Leukocidin" + /note="Leukocidin/Hemolysin toxin family; pfam07968" + /db_xref="CDD:311773" + CDS 1..338 + /locus_tag="SAVC_08965" + /coded_by="complement(NC_016912.1:1946987..1948003)" + /transl_table=11 + /db_xref="GeneID:11869971" +CONTIG join(WP_000595324.1:1..338) +ORIGIN + 1 mikqlcknit ictlalsttf tvlpatsfak inseikqvse knldgdtkmy trtattsdsq + 61 knitqslqfn fltepnydke tvfikakgti gsglrildpn gywnstlrwp gsysvsiqnv + 121 ddnnntnvtd fapknqdesr evkytygykt ggdfsinrgg ltgnitkesn ysetisyqqp + 181 syrtlldqst shkgvgwkve ahlinnmghd htrqltndsd nrtkseifsl trngnlwakd + 241 nftpkdkmpv tvsegfnpef lavmshdkkd kgksqfvvhy krsmdefkid wnrhgfwgyw + 301 sgenhvdkke eklsalyevd wkthnvkfvk vlndnekk +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353147.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353147.gb new file mode 100644 index 0000000000..b965e249c5 --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353147.gb @@ -0,0 +1,186 @@ +LOCUS 4AE0_A 535 aa linear BCT 10-OCT-2012 +DEFINITION Chain A, Diphtheria Toxin. +ACCESSION 4AE0_A +VERSION 4AE0_A +DBSOURCE pdb: molecule 4AE0, chain 65, release Apr 18, 2012; + deposition: Jan 4, 2012; + class: Toxin; + source: Mmdb_id: 98377, Pdb_id 1: 4AE0; + Exp. method: X-Ray Diffraction. +KEYWORDS . +SOURCE Corynebacterium diphtheriae + ORGANISM Corynebacterium diphtheriae + Bacteria; Actinobacteria; Corynebacteriales; Corynebacteriaceae; + Corynebacterium. +REFERENCE 1 (residues 1 to 535) + AUTHORS Malito,E., Bursulaya,B., Chen,C., Surdo,P.L., Picchianti,M., + Balducci,E., Biancucci,M., Brock,A., Berti,F., Bottomley,M.J., + Nissum,M., Costantino,P., Rappuoli,R. and Spraggon,G. + TITLE Structural basis for lack of toxicity of the diphtheria toxin + mutant CRM197 + JOURNAL Proc. Natl. Acad. Sci. U.S.A. 109 (14), 5229-5234 (2012) + PUBMED 22431623 +REFERENCE 2 (residues 1 to 535) + AUTHORS Malito,E. and Spraggon,G. + TITLE Direct Submission + JOURNAL Submitted (04-JAN-2012) +COMMENT Crystal Structure Of Diphtheria Toxin Mutant Crm197. +FEATURES Location/Qualifiers + source 1..535 + /organism="Corynebacterium diphtheriae" + /db_xref="taxon:1717" + Region 1..187 + /region_name="Diphtheria_C" + /note="Diphtheria toxin, C domain; pfam02763" + /db_xref="CDD:280859" + Region 1..171 + /region_name="Domain 1" + /note="NCBI Domains" + SecStr 10..15 + /sec_str_type="sheet" + /note="strand 1" + SecStr 16..22 + /sec_str_type="sheet" + /note="strand 2" + Site order(20..24,27,31,34..36,38,53..55,65,148) + /site_type="other" + /note="nad+ binding pocket [chemical binding]" + /db_xref="CDD:238651" + SecStr 52..58 + /sec_str_type="sheet" + /note="strand 3" + SecStr 65..68 + /sec_str_type="sheet" + /note="strand 4" + SecStr 78..86 + /sec_str_type="sheet" + /note="strand 5" + SecStr 87..94 + /sec_str_type="sheet" + /note="strand 6" + SecStr 99..106 + /sec_str_type="helix" + /note="helix 1" + SecStr 120..127 + /sec_str_type="helix" + /note="helix 2" + SecStr 132..139 + /sec_str_type="sheet" + /note="strand 7" + SecStr 146..152 + /sec_str_type="sheet" + /note="strand 8" + SecStr 159..167 + /sec_str_type="sheet" + /note="strand 9" + Region 172..382 + /region_name="Domain 2" + /note="NCBI Domains" + SecStr 176..183 + /sec_str_type="helix" + /note="helix 3" + Bond bond(186,201) + /bond_type="disulfide" + Region 200..379 + /region_name="Diphtheria_T" + /note="Diphtheria toxin, T domain; pfam02764" + /db_xref="CDD:280860" + SecStr 206..223 + /sec_str_type="helix" + /note="helix 4" + SecStr 224..232 + /sec_str_type="helix" + /note="helix 5" + SecStr 240..255 + /sec_str_type="helix" + /note="helix 6" + SecStr 260..268 + /sec_str_type="helix" + /note="helix 7" + SecStr 275..288 + /sec_str_type="helix" + /note="helix 8" + SecStr 297..304 + /sec_str_type="helix" + /note="helix 9" + SecStr 315..318 + /sec_str_type="sheet" + /note="strand 10" + SecStr 319..322 + /sec_str_type="sheet" + /note="strand 11" + SecStr 326..343 + /sec_str_type="helix" + /note="helix 10" + SecStr 359..376 + /sec_str_type="helix" + /note="helix 11" + Region 381..534 + /region_name="Diphtheria_R" + /note="Diphtheria toxin, R domain; pfam01324" + /db_xref="CDD:279642" + Region 383..535 + /region_name="Domain 3" + /note="NCBI Domains" + SecStr 388..392 + /sec_str_type="sheet" + /note="strand 12" + SecStr 393..399 + /sec_str_type="sheet" + /note="strand 13" + SecStr 404..408 + /sec_str_type="sheet" + /note="strand 14" + SecStr 409..413 + /sec_str_type="sheet" + /note="strand 15" + SecStr 414..423 + /sec_str_type="sheet" + /note="strand 16" + SecStr 424..427 + /sec_str_type="sheet" + /note="strand 17" + SecStr 428..436 + /sec_str_type="sheet" + /note="strand 18" + SecStr 440..444 + /sec_str_type="sheet" + /note="strand 19" + SecStr 447..453 + /sec_str_type="sheet" + /note="strand 20" + SecStr 455..465 + /sec_str_type="sheet" + /note="strand 21" + Bond bond(461,471) + /bond_type="disulfide" + SecStr 467..475 + /sec_str_type="sheet" + /note="strand 22" + SecStr 478..481 + /sec_str_type="sheet" + /note="strand 23" + SecStr 484..494 + /sec_str_type="sheet" + /note="strand 24" + SecStr 495..498 + /sec_str_type="sheet" + /note="strand 25" + SecStr 507..514 + /sec_str_type="sheet" + /note="strand 26" + SecStr 524..535 + /sec_str_type="sheet" + /note="strand 27" +ORIGIN + 1 gaddvvdssk sfvmenfssy hgtkpgyvds iqkgiqkpks gtqgnydddw kefystdnky + 61 daagysvdne nplsgkaggv vkvtypgltk vlalkvdnae tikkelglsl teplmeqvgt + 121 eefikrfgdg asrvvlslpf aegsssveyi nnweqakals veleinfetr gkrgqdamye + 181 ymaqacagnr vrrsvgssls cinldwdvir dktktkiesl kehgpiknkm sespnktvse + 241 ekakqyleef hqtalehpel selktvtgtn pvfaganyaa wavnvaqvid setadnlekt + 301 taalsilpgi gsvmgiadga vhhnteeiva qsialsslmv aqaiplvgel vdigfaaynf + 361 vesiinlfqv vhnsynrpay spghktqpfl hdgyavswnt vedsiirtgf qgesghdiki + 421 taentplpia gvllptipgk ldvnkskthi svngrkirmr craidgdvtf crpkspvyvg + 481 ngvhanlhva fhrsssekih sneissdsig vlgyqktvdh tkvnsklslf feiks +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353148.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353148.gb new file mode 100644 index 0000000000..f1340266aa --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353148.gb @@ -0,0 +1,184 @@ +LOCUS 4AE1_A 535 aa linear BCT 10-OCT-2012 +DEFINITION Chain A, Diphtheria Toxin. +ACCESSION 4AE1_A +VERSION 4AE1_A +DBSOURCE pdb: molecule 4AE1, chain 65, release Apr 18, 2012; + deposition: Jan 4, 2012; + class: Toxin; + source: Mmdb_id: 98378, Pdb_id 1: 4AE1; + Exp. method: X-Ray Diffraction. +KEYWORDS . +SOURCE Corynebacterium diphtheriae + ORGANISM Corynebacterium diphtheriae + Bacteria; Actinobacteria; Corynebacteriales; Corynebacteriaceae; + Corynebacterium. +REFERENCE 1 (residues 1 to 535) + AUTHORS Malito,E., Bursulaya,B., Chen,C., Surdo,P.L., Picchianti,M., + Balducci,E., Biancucci,M., Brock,A., Berti,F., Bottomley,M.J., + Nissum,M., Costantino,P., Rappuoli,R. and Spraggon,G. + TITLE Structural basis for lack of toxicity of the diphtheria toxin + mutant CRM197 + JOURNAL Proc. Natl. Acad. Sci. U.S.A. 109 (14), 5229-5234 (2012) + PUBMED 22431623 +REFERENCE 2 (residues 1 to 535) + AUTHORS Malito,E. and Spraggon,G. + TITLE Direct Submission + JOURNAL Submitted (04-JAN-2012) +COMMENT Crystal Structure Of Diphtheria Toxin Mutant Crm197 In Complex With + Nicotinamide. +FEATURES Location/Qualifiers + source 1..535 + /organism="Corynebacterium diphtheriae" + /db_xref="taxon:1717" + Region 1..187 + /region_name="Diphtheria_C" + /note="Diphtheria toxin, C domain; pfam02763" + /db_xref="CDD:280859" + Region 1..171 + /region_name="Domain 1" + /note="NCBI Domains" + SecStr 10..15 + /sec_str_type="sheet" + /note="strand 1" + SecStr 16..25 + /sec_str_type="sheet" + /note="strand 2" + Site order(20..24,27,31,34..36,38,53..55,65,148) + /site_type="other" + /note="nad+ binding pocket [chemical binding]" + /db_xref="CDD:238651" + SecStr 52..57 + /sec_str_type="sheet" + /note="strand 3" + SecStr 65..68 + /sec_str_type="sheet" + /note="strand 4" + SecStr 78..86 + /sec_str_type="sheet" + /note="strand 5" + SecStr 87..94 + /sec_str_type="sheet" + /note="strand 6" + SecStr 99..106 + /sec_str_type="helix" + /note="helix 1" + SecStr 120..127 + /sec_str_type="helix" + /note="helix 2" + SecStr 132..139 + /sec_str_type="sheet" + /note="strand 7" + SecStr 147..152 + /sec_str_type="sheet" + /note="strand 8" + SecStr 159..167 + /sec_str_type="sheet" + /note="strand 9" + Region 172..382 + /region_name="Domain 2" + /note="NCBI Domains" + SecStr 176..183 + /sec_str_type="helix" + /note="helix 3" + Bond bond(186,201) + /bond_type="disulfide" + Region 200..379 + /region_name="Diphtheria_T" + /note="Diphtheria toxin, T domain; pfam02764" + /db_xref="CDD:280860" + SecStr 206..220 + /sec_str_type="helix" + /note="helix 4" + SecStr 240..254 + /sec_str_type="helix" + /note="helix 5" + SecStr 260..268 + /sec_str_type="helix" + /note="helix 6" + SecStr 275..288 + /sec_str_type="helix" + /note="helix 7" + SecStr 297..304 + /sec_str_type="helix" + /note="helix 8" + SecStr 315..318 + /sec_str_type="sheet" + /note="strand 10" + SecStr 319..322 + /sec_str_type="sheet" + /note="strand 11" + SecStr 326..343 + /sec_str_type="helix" + /note="helix 9" + SecStr 359..376 + /sec_str_type="helix" + /note="helix 10" + Region 381..534 + /region_name="Diphtheria_R" + /note="Diphtheria toxin, R domain; pfam01324" + /db_xref="CDD:279642" + Region 383..535 + /region_name="Domain 3" + /note="NCBI Domains" + SecStr 388..392 + /sec_str_type="sheet" + /note="strand 12" + SecStr 393..399 + /sec_str_type="sheet" + /note="strand 13" + SecStr 404..408 + /sec_str_type="sheet" + /note="strand 14" + SecStr 409..413 + /sec_str_type="sheet" + /note="strand 15" + SecStr 414..423 + /sec_str_type="sheet" + /note="strand 16" + SecStr 424..427 + /sec_str_type="sheet" + /note="strand 17" + SecStr 428..436 + /sec_str_type="sheet" + /note="strand 18" + SecStr 440..444 + /sec_str_type="sheet" + /note="strand 19" + SecStr 447..453 + /sec_str_type="sheet" + /note="strand 20" + SecStr 455..465 + /sec_str_type="sheet" + /note="strand 21" + Bond bond(461,471) + /bond_type="disulfide" + SecStr 467..475 + /sec_str_type="sheet" + /note="strand 22" + SecStr 478..481 + /sec_str_type="sheet" + /note="strand 23" + SecStr 484..494 + /sec_str_type="sheet" + /note="strand 24" + SecStr 495..498 + /sec_str_type="sheet" + /note="strand 25" + SecStr 507..514 + /sec_str_type="sheet" + /note="strand 26" + SecStr 524..535 + /sec_str_type="sheet" + /note="strand 27" +ORIGIN + 1 gaddvvdssk sfvmenfssy hgtkpgyvds iqkgiqkpks gtqgnydddw kefystdnky + 61 daagysvdne nplsgkaggv vkvtypgltk vlalkvdnae tikkelglsl teplmeqvgt + 121 eefikrfgdg asrvvlslpf aegsssveyi nnweqakals veleinfetr gkrgqdamye + 181 ymaqacagnr vrrsvgssls cinldwdvir dktktkiesl kehgpiknkm sespnktvse + 241 ekakqyleef hqtalehpel selktvtgtn pvfaganyaa wavnvaqvid setadnlekt + 301 taalsilpgi gsvmgiadga vhhnteeiva qsialsslmv aqaiplvgel vdigfaaynf + 361 vesiinlfqv vhnsynrpay spghktqpfl hdgyavswnt vedsiirtgf qgesghdiki + 421 taentplpia gvllptipgk ldvnkskthi svngrkirmr craidgdvtf crpkspvyvg + 481 ngvhanlhva fhrsssekih sneissdsig vlgyqktvdh tkvnsklslf feiks +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353149.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353149.gb new file mode 100644 index 0000000000..4237b720e4 --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353149.gb @@ -0,0 +1,187 @@ +LOCUS 4AE1_B 535 aa linear BCT 10-OCT-2012 +DEFINITION Chain B, Diphtheria Toxin. +ACCESSION 4AE1_B +VERSION 4AE1_B +DBSOURCE pdb: molecule 4AE1, chain 66, release Apr 18, 2012; + deposition: Jan 4, 2012; + class: Toxin; + source: Mmdb_id: 98378, Pdb_id 1: 4AE1; + Exp. method: X-Ray Diffraction. +KEYWORDS . +SOURCE Corynebacterium diphtheriae + ORGANISM Corynebacterium diphtheriae + Bacteria; Actinobacteria; Corynebacteriales; Corynebacteriaceae; + Corynebacterium. +REFERENCE 1 (residues 1 to 535) + AUTHORS Malito,E., Bursulaya,B., Chen,C., Surdo,P.L., Picchianti,M., + Balducci,E., Biancucci,M., Brock,A., Berti,F., Bottomley,M.J., + Nissum,M., Costantino,P., Rappuoli,R. and Spraggon,G. + TITLE Structural basis for lack of toxicity of the diphtheria toxin + mutant CRM197 + JOURNAL Proc. Natl. Acad. Sci. U.S.A. 109 (14), 5229-5234 (2012) + PUBMED 22431623 +REFERENCE 2 (residues 1 to 535) + AUTHORS Malito,E. and Spraggon,G. + TITLE Direct Submission + JOURNAL Submitted (04-JAN-2012) +COMMENT Crystal Structure Of Diphtheria Toxin Mutant Crm197 In Complex With + Nicotinamide. +FEATURES Location/Qualifiers + source 1..535 + /organism="Corynebacterium diphtheriae" + /db_xref="taxon:1717" + Region 1..187 + /region_name="Diphtheria_C" + /note="Diphtheria toxin, C domain; pfam02763" + /db_xref="CDD:280859" + Region 1..171 + /region_name="Domain 4" + /note="NCBI Domains" + SecStr 10..15 + /sec_str_type="sheet" + /note="strand 28" + SecStr 16..22 + /sec_str_type="sheet" + /note="strand 29" + Site order(20..24,27,31,34..36,38,53..55,65,148) + /site_type="other" + /note="nad+ binding pocket [chemical binding]" + /db_xref="CDD:238651" + SecStr 54..57 + /sec_str_type="sheet" + /note="strand 30" + SecStr 65..68 + /sec_str_type="sheet" + /note="strand 31" + SecStr 78..86 + /sec_str_type="sheet" + /note="strand 32" + SecStr 87..94 + /sec_str_type="sheet" + /note="strand 33" + SecStr 99..106 + /sec_str_type="helix" + /note="helix 11" + SecStr 120..127 + /sec_str_type="helix" + /note="helix 12" + SecStr 132..139 + /sec_str_type="sheet" + /note="strand 34" + SecStr 147..152 + /sec_str_type="sheet" + /note="strand 35" + SecStr 159..167 + /sec_str_type="sheet" + /note="strand 36" + Region 172..382 + /region_name="Domain 5" + /note="NCBI Domains" + SecStr 176..183 + /sec_str_type="helix" + /note="helix 13" + Bond bond(186,201) + /bond_type="disulfide" + Region 200..379 + /region_name="Diphtheria_T" + /note="Diphtheria toxin, T domain; pfam02764" + /db_xref="CDD:280860" + SecStr 206..223 + /sec_str_type="helix" + /note="helix 14" + SecStr 224..232 + /sec_str_type="helix" + /note="helix 15" + SecStr 240..254 + /sec_str_type="helix" + /note="helix 16" + SecStr 260..268 + /sec_str_type="helix" + /note="helix 17" + SecStr 275..288 + /sec_str_type="helix" + /note="helix 18" + SecStr 297..304 + /sec_str_type="helix" + /note="helix 19" + SecStr 315..318 + /sec_str_type="sheet" + /note="strand 37" + SecStr 319..322 + /sec_str_type="sheet" + /note="strand 38" + SecStr 326..343 + /sec_str_type="helix" + /note="helix 20" + SecStr 359..376 + /sec_str_type="helix" + /note="helix 21" + Region 381..534 + /region_name="Diphtheria_R" + /note="Diphtheria toxin, R domain; pfam01324" + /db_xref="CDD:279642" + Region 383..535 + /region_name="Domain 6" + /note="NCBI Domains" + SecStr 388..392 + /sec_str_type="sheet" + /note="strand 39" + SecStr 393..399 + /sec_str_type="sheet" + /note="strand 40" + SecStr 404..408 + /sec_str_type="sheet" + /note="strand 41" + SecStr 409..413 + /sec_str_type="sheet" + /note="strand 42" + SecStr 414..423 + /sec_str_type="sheet" + /note="strand 43" + SecStr 424..427 + /sec_str_type="sheet" + /note="strand 44" + SecStr 428..436 + /sec_str_type="sheet" + /note="strand 45" + SecStr 440..444 + /sec_str_type="sheet" + /note="strand 46" + SecStr 447..453 + /sec_str_type="sheet" + /note="strand 47" + SecStr 455..465 + /sec_str_type="sheet" + /note="strand 48" + Bond bond(461,471) + /bond_type="disulfide" + SecStr 467..475 + /sec_str_type="sheet" + /note="strand 49" + SecStr 478..481 + /sec_str_type="sheet" + /note="strand 50" + SecStr 484..494 + /sec_str_type="sheet" + /note="strand 51" + SecStr 495..498 + /sec_str_type="sheet" + /note="strand 52" + SecStr 507..514 + /sec_str_type="sheet" + /note="strand 53" + SecStr 524..535 + /sec_str_type="sheet" + /note="strand 54" +ORIGIN + 1 gaddvvdssk sfvmenfssy hgtkpgyvds iqkgiqkpks gtqgnydddw kefystdnky + 61 daagysvdne nplsgkaggv vkvtypgltk vlalkvdnae tikkelglsl teplmeqvgt + 121 eefikrfgdg asrvvlslpf aegsssveyi nnweqakals veleinfetr gkrgqdamye + 181 ymaqacagnr vrrsvgssls cinldwdvir dktktkiesl kehgpiknkm sespnktvse + 241 ekakqyleef hqtalehpel selktvtgtn pvfaganyaa wavnvaqvid setadnlekt + 301 taalsilpgi gsvmgiadga vhhnteeiva qsialsslmv aqaiplvgel vdigfaaynf + 361 vesiinlfqv vhnsynrpay spghktqpfl hdgyavswnt vedsiirtgf qgesghdiki + 421 taentplpia gvllptipgk ldvnkskthi svngrkirmr craidgdvtf crpkspvyvg + 481 ngvhanlhva fhrsssekih sneissdsig vlgyqktvdh tkvnsklslf feiks +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/399235158.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/399235158.gb new file mode 100644 index 0000000000..ceed26194e --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/399235158.gb @@ -0,0 +1,83 @@ +LOCUS AFP42651 630 aa linear BCT 31-JAN-2014 +DEFINITION Fatty-acid-CoA ligase FadD32 [Mycolicibacterium smegmatis MC2 155]. +ACCESSION AFP42651 +VERSION AFP42651.1 +DBLINK BioProject: PRJNA38453 + BioSample: SAMN02603392 +DBSOURCE accession CP001663.1 +KEYWORDS . +SOURCE Mycolicibacterium smegmatis MC2 155 + ORGANISM Mycolicibacterium smegmatis MC2 155 + Bacteria; Actinobacteria; Corynebacteriales; Mycobacteriaceae; + Mycolicibacterium. +REFERENCE 1 (residues 1 to 630) + AUTHORS Perrodou,E., Deshayes,C., Muller,J., Schaeffer,C., Van + Dorsselaer,A., Ripp,R., Poch,O., Reyrat,J.M. and Lecompte,O. + TITLE ICDS database: interrupted CoDing sequences in prokaryotic genomes + JOURNAL Nucleic Acids Res. 34 (DATABASE ISSUE), D338-D343 (2006) + PUBMED 16381882 +REFERENCE 2 (residues 1 to 630) + AUTHORS Deshayes,C., Perrodou,E., Gallien,S., Euphrasie,D., Schaeffer,C., + Van-Dorsselaer,A., Poch,O., Lecompte,O. and Reyrat,J.M. + TITLE Interrupted coding sequences in Mycobacterium smegmatis: authentic + mutations or sequencing errors? + JOURNAL Genome Biol. 8 (2), R20 (2007) + PUBMED 17295914 + REMARK Publication Status: Online-Only +REFERENCE 3 (residues 1 to 630) + AUTHORS Gallien,S., Perrodou,E., Carapito,C., Deshayes,C., Reyrat,J.M., Van + Dorsselaer,A., Poch,O., Schaeffer,C. and Lecompte,O. + TITLE Ortho-proteogenomics: multiple proteomes investigation through + orthology and a new MS-based protocol + JOURNAL Genome Res. 19 (1), 128-135 (2009) + PUBMED 18955433 +REFERENCE 4 (residues 1 to 630) + AUTHORS Reyrat,J.M., Perrodou,E., Deshayes,C., Euphrasie,D., Gagniere,N., + Gallien,S., Jones,M., Kocincova,D., Poch,O., Quevillon,E., Ripp,R., + Schaeffer,C., Singh,A., Van Dorsselaer,A. and Lecompte,O. + TITLE Re-annotation of the genome sequence of Mycobacterium smegmatis + JOURNAL Unpublished +REFERENCE 5 (residues 1 to 630) + AUTHORS Perrodou,E., Reyrat,J.M., Deshayes,C., Euphrasie,D., Gagniere,N., + Gallien,S., Jones,M., Kocincova,D., Poch,O., Quevillon,E., Ripp,R., + Schaeffer,C., Singh,A., Van Dorsselaer,A. and Lecompte,O. + TITLE Direct Submission + JOURNAL Submitted (22-JUN-2009) Laboratory of Integrative Bioinformatics + and Genomics, Institute of Genetics and Molecular and Cellular + Biology, 1 rue Laurent Fries BP 10142, Illkirch Cedex 67404, France +COMMENT Method: conceptual translation. +FEATURES Location/Qualifiers + source 1..630 + /organism="Mycolicibacterium smegmatis MC2 155" + /strain="MC2 155" + /db_xref="taxon:246196" + Protein 1..630 + /product="Fatty-acid-CoA ligase FadD32" + Region 1..630 + /region_name="PRK07769" + /note="long-chain-fatty-acid--CoA ligase; Validated" + /db_xref="CDD:181109" + CDS 1..630 + /gene="fadD32" + /locus_tag="MSMEI_6225" + /coded_by="complement(CP001663.1:6463934..6465826)" + /experiment="Nterminal peptide experimentally determined + by amino acid sequencing after protein digestion" + /note="GO_function: GO:0003824; + GO_process: GO:0008152" + /transl_table=11 + /db_xref="PFAM:PF00501" +ORIGIN + 1 mpfhnpfikd gqikfpdgss ivahverwak vrgdklayrf ldfsterdgv prdltwaqfs + 61 arnravaarl qqvtqpgdrv ailcpqnldy lvaffgalya griavplfdp sepghvgrlh + 121 avldnchpsa ilttteaaeg vrkffrtrpa nqrprviavd avpddvastw vnpdepdett + 181 iaylqytsgs triptgvqit hlnlatnvvq viealegeeg drglswlpff hdmglitall + 241 apmighyftf mtpaafvrrp erwirelark egdtggtisv apnfafdhaa argvpkpgsp + 301 pldlsnvkav lngsepisaa tvrrfneafg pfgfppkaik psyglaeatl fvsttpsaee + 361 pkiitvdrdq lnsgrivevd adspkavaqa sagkvgiaew avivdaesat elpdgqvgei + 421 wisgqnmgtg ywgkpeesva tfqnilksrt npshaegatd datwvrtgdy gafydgdlyi + 481 tgrvkdlvii dgrnhypqdl eysaqeaska irtgyvaafs vpanqlpdev fenahsgikr + 541 dpddtseqlv ivaerapgah kldigpitdd iraaiavrhg vtvrdvllta agaiprtssg + 601 kigrracraa yldgslragk vandfpdatd +// + diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/7525057.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/7525057.gb new file mode 100644 index 0000000000..1eccc2480d --- /dev/null +++ b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/7525057.gb @@ -0,0 +1,87 @@ +LOCUS NP_051038 123 aa linear PLN 26-MAR-2010 +DEFINITION ribosomal protein S12 (chloroplast) [Arabidopsis thaliana]. +ACCESSION NP_051038 +VERSION NP_051038.1 +DBLINK Project: 116 + BioProject: PRJNA116 +DBSOURCE REFSEQ: accession NC_000932.1 +KEYWORDS RefSeq. +SOURCE chloroplast Arabidopsis thaliana (thale cress) + ORGANISM Arabidopsis thaliana + Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; + Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; + Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; + Camelineae; Arabidopsis. +REFERENCE 1 (residues 1 to 123) + AUTHORS Sato,S., Nakamura,Y., Kaneko,T., Asamizu,E. and Tabata,S. + TITLE Complete structure of the chloroplast genome of Arabidopsis + thaliana + JOURNAL DNA Res. 6 (5), 283-290 (1999) + PUBMED 10574454 +REFERENCE 2 (residues 1 to 123) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (07-APR-2000) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 3 (residues 1 to 123) + AUTHORS Nakamura,Y. + TITLE Direct Submission + JOURNAL Submitted (09-SEP-1999) Laboratory of Gene Structure 2, Kazusa DNA + Research Institute, Yana 1532-3, Kisarazu, Chiba 292-0812, Japan +COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The + reference sequence was derived from BAA84409. + Method: conceptual translation. +FEATURES Location/Qualifiers + source 1..123 + /organism="Arabidopsis thaliana" + /organelle="plastid:chloroplast" + /db_xref="taxon:3702" + /ecotype="Columbia" + Protein 1..123 + /product="ribosomal protein S12" + /calculated_mol_wt=13633 + Region 1..123 + /region_name="rps12" + /note="ribosomal protein S12; CHL00051" + /db_xref="CDD:176992" + Site order(4..5,7..8,11..12) + /site_type="other" + /note="S17 interaction site [polypeptide binding]" + /db_xref="CDD:239466" + Site 4 + /site_type="other" + /note="S8 interaction site" + /db_xref="CDD:239466" + Site order(12..14,26,28..29,31,46..47,49..51,58,66,69..70, + 83..84,88..89,110) + /site_type="other" + /note="16S rRNA interaction site [nucleotide binding]" + /db_xref="CDD:239466" + Site order(43..44,88) + /site_type="other" + /note="streptomycin interaction site [chemical binding]" + /db_xref="CDD:239466" + Site 44..45 + /site_type="other" + /note="23S rRNA interaction site [nucleotide binding]" + /db_xref="CDD:239466" + Site order(45..50,70..78) + /site_type="other" + /note="aminoacyl-tRNA interaction site (A-site) + [nucleotide binding]" + /db_xref="CDD:239466" + CDS 1..123 + /gene="rps12" + /locus_tag="ArthCp047" + /coded_by="join(complement(NC_000932.1:69611..69724), + NC_000932.1:139856..140087,NC_000932.1:140625..140650)" + /trans_splicing + /note="trans-spliced" + /transl_table=11 + /db_xref="GeneID:844801" +ORIGIN + 1 mptikqlirn trqpirnvtk spalrgcpqr rgtctrvyti tpkkpnsalr kvarvrltsg + 61 feitayipgi ghnlqehsvv lvrggrvkdl pgvryhivrg tldavgvkdr qqgrskygvk + 121 kpk +// + From c6f17ca37a70c37a9c77f0dec92c32d8bcd842ad Mon Sep 17 00:00:00 2001 From: Lee Date: Thu, 21 Feb 2019 22:33:55 -0500 Subject: [PATCH 6/9] #831 Fixing missing slash on the travis build server (works without fix on windows but added check for backslash anyhow) --- .../sequence/loader/GenbankProxySequenceReaderTest.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java index ecfdc5c7fd..35f141cb91 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java @@ -97,7 +97,14 @@ public void copyTestFiles() throws IOException { * @throws IOException when something goes wrong with copying the files. */ private void copyTestFileToWorkingDirectory(String filename) throws IOException { - String dest = System.getProperty("java.io.tmpdir") + filename; + String destRoot = System.getProperty("java.io.tmpdir"); + + //if the directory does not end with a slash or backslash then add one + if(!(destRoot.endsWith("/") || destRoot.endsWith("\\"))){ + destRoot += destRoot.contains("/")? "/" : "\\"; + } + + String dest = destRoot + filename; String src = "org/biojava/nbio/core/sequence/GenbankProxySequenceReader/" + filename; FileOutputStream destination = new FileOutputStream(new File(dest)); From e221bd5f9874b57296c54f94eb0905e10e35e621 Mon Sep 17 00:00:00 2001 From: Lee Date: Fri, 22 Feb 2019 21:04:41 -0500 Subject: [PATCH 7/9] #831 Removed use of apache io library --- biojava-core/pom.xml | 5 ----- .../loader/GenbankProxySequenceReaderTest.java | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/biojava-core/pom.xml b/biojava-core/pom.xml index 739ae360f0..5b0ed01b0b 100644 --- a/biojava-core/pom.xml +++ b/biojava-core/pom.xml @@ -44,11 +44,6 @@ junit test - - org.apache.commons - commons-io - 1.3.2 - org.slf4j diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java index 35f141cb91..e75e1f476b 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java @@ -20,7 +20,6 @@ */ package org.biojava.nbio.core.sequence.loader; -import org.apache.commons.io.IOUtils; import org.biojava.nbio.core.exceptions.CompoundNotFoundException; import org.biojava.nbio.core.sequence.ProteinSequence; import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; @@ -79,7 +78,7 @@ public static Collection getExamples() { /** * In {@link GenbankProxySequenceReader} there is a check to see if the requested files are already in the temp - * directory before attemting to retrieve them from the remote server. so simply copying the test files to the temp + * directory before attempting to retrieve them from the remote server. so simply copying the test files to the temp * directory avoids calling out to the server and hitting a 429 status code from the server which fails the build. * @throws IOException */ @@ -107,11 +106,21 @@ private void copyTestFileToWorkingDirectory(String filename) throws IOException String dest = destRoot + filename; String src = "org/biojava/nbio/core/sequence/GenbankProxySequenceReader/" + filename; + //Remove any pre-existing files + File d = new File(dest); + d.delete(); + FileOutputStream destination = new FileOutputStream(new File(dest)); InputStream source = this.getClass().getClassLoader().getResourceAsStream(src); - IOUtils.copy(source, destination); +a int read; + byte[] buffer = new byte[1024]; + + while((read = source.read(buffer)) > 0){ + destination.write(buffer, 0, read); + } + destination.flush(); destination.close(); source.close(); } From b652a431ba98c93b702b6c04eca4de790fc396b1 Mon Sep 17 00:00:00 2001 From: Lee Date: Fri, 22 Feb 2019 21:07:04 -0500 Subject: [PATCH 8/9] #831 typo --- .../core/sequence/loader/GenbankProxySequenceReaderTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java index e75e1f476b..30e96a4865 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java @@ -113,7 +113,7 @@ private void copyTestFileToWorkingDirectory(String filename) throws IOException FileOutputStream destination = new FileOutputStream(new File(dest)); InputStream source = this.getClass().getClassLoader().getResourceAsStream(src); -a int read; + int read; byte[] buffer = new byte[1024]; while((read = source.read(buffer)) > 0){ From 4fd4149b4b570341065380de635bab56e36b836c Mon Sep 17 00:00:00 2001 From: Lee Date: Wed, 27 Feb 2019 16:36:36 -0500 Subject: [PATCH 9/9] made requested changes --- .../GenbankProxySequenceReaderTest.java | 19 +++++++------------ .../152970917.gb | 0 .../254839678.gb | 0 .../379015144.gb | 0 .../381353147.gb | 0 .../381353148.gb | 0 .../381353149.gb | 0 .../399235158.gb | 0 .../7525057.gb | 0 9 files changed, 7 insertions(+), 12 deletions(-) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/152970917.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/254839678.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/379015144.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/381353147.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/381353148.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/381353149.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/399235158.gb (100%) rename biojava-core/src/test/resources/org/biojava/nbio/core/sequence/{GenbankProxySequenceReader => loader}/7525057.gb (100%) diff --git a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java index 30e96a4865..2205de4b27 100644 --- a/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java +++ b/biojava-core/src/test/java/org/biojava/nbio/core/sequence/loader/GenbankProxySequenceReaderTest.java @@ -40,6 +40,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -104,25 +106,18 @@ private void copyTestFileToWorkingDirectory(String filename) throws IOException } String dest = destRoot + filename; - String src = "org/biojava/nbio/core/sequence/GenbankProxySequenceReader/" + filename; + String src = "org/biojava/nbio/core/sequence/loader/" + filename; //Remove any pre-existing files File d = new File(dest); d.delete(); - FileOutputStream destination = new FileOutputStream(new File(dest)); - InputStream source = this.getClass().getClassLoader().getResourceAsStream(src); + try(FileOutputStream destination = new FileOutputStream(d); + InputStream is = this.getClass().getClassLoader().getResourceAsStream(src); + ReadableByteChannel source = Channels.newChannel(is)) { - int read; - byte[] buffer = new byte[1024]; - - while((read = source.read(buffer)) > 0){ - destination.write(buffer, 0, read); + destination.getChannel().transferFrom(source, 0, Long.MAX_VALUE); } - - destination.flush(); - destination.close(); - source.close(); } diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/152970917.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/152970917.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/152970917.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/152970917.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/254839678.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/254839678.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/254839678.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/254839678.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/379015144.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/379015144.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/379015144.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/379015144.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353147.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353147.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353147.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353147.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353148.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353148.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353148.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353148.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353149.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353149.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/381353149.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/381353149.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/399235158.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/399235158.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/399235158.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/399235158.gb diff --git a/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/7525057.gb b/biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/7525057.gb similarity index 100% rename from biojava-core/src/test/resources/org/biojava/nbio/core/sequence/GenbankProxySequenceReader/7525057.gb rename to biojava-core/src/test/resources/org/biojava/nbio/core/sequence/loader/7525057.gb