From 5c1a335d43cf951344bd359e8f2ddec0e88a932e Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Mon, 4 Jun 2018 17:21:38 +0200 Subject: [PATCH 01/11] Improving CathInstallation logging log slow downloads upon start --- .../java/org/biojava/nbio/structure/cath/CathInstallation.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java index 265248d361..7c21a88e62 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java @@ -636,6 +636,7 @@ private void parseCathDomall(BufferedReader bufferedReader) throws IOException{ protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException{ // System.out.println("downloading " + remoteURL + " to: " + localFile); + LOGGER.info("Downloaded file {} to local file {}", remoteURL, localFile); long timeS = System.currentTimeMillis(); File tempFile = File.createTempFile(FileDownloadUtils.getFilePrefix(localFile), "."+ FileDownloadUtils.getFileExtension(localFile)); @@ -665,7 +666,7 @@ protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOEx disp = disp / 1024.0; } long timeE = System.currentTimeMillis(); - LOGGER.info("Downloaded file {} ({}) to local file {} in {} sec.", remoteURL, String.format("%.1f",disp) + unit, localFile, (timeE - timeS)/1000); + LOGGER.info("Downloaded {} in {} sec. to {}", String.format("%.1f",disp) + unit, (timeE - timeS)/1000, localFile); } private boolean domainDescriptionFileAvailable(){ From 4785d57be7839ace552328c1097fa4595f41a3da Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Mon, 4 Jun 2018 22:03:14 +0200 Subject: [PATCH 02/11] Add test for #703 --- .../structure/io/util/FileDownloadUtils.java | 47 ++++++- .../structure/align/util/AtomCacheTest.java | 130 ++++++++++++++++-- 2 files changed, 162 insertions(+), 15 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/util/FileDownloadUtils.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/util/FileDownloadUtils.java index ab8833d716..98adc90b2e 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/util/FileDownloadUtils.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/util/FileDownloadUtils.java @@ -21,9 +21,6 @@ */ package org.biojava.nbio.structure.io.util; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -36,6 +33,15 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class FileDownloadUtils { @@ -240,6 +246,41 @@ public static URLConnection prepareURLConnection(String url, int timeout) throws connection.setConnectTimeout(timeout); return connection; } + + /** + * Recursively delete a folder & contents + * + * @param dir directory to delete + */ + public static void deleteDirectory(Path dir) throws IOException { + if(dir == null || !Files.exists(dir)) + return; + Files.walkFileTree(dir, new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException { + if (e != null) { + throw e; + } + Files.delete(dir); + return FileVisitResult.CONTINUE; + } + }); + } + /** + * Recursively delete a folder & contents + * + * @param dir directory to delete + */ + public static void deleteDirectory(String dir) throws IOException { + deleteDirectory(Paths.get(dir)); + } + public static void main(String[] args) { String url; diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java index 08858f46b1..cb088ed953 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java @@ -20,26 +20,48 @@ */ package org.biojava.nbio.structure.align.util; -import org.biojava.nbio.structure.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.FileAttribute; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import java.util.zip.GZIPOutputStream; + +import org.biojava.nbio.structure.AtomPositionMap; +import org.biojava.nbio.structure.Chain; +import org.biojava.nbio.structure.Group; +import org.biojava.nbio.structure.ResidueRangeAndLength; +import org.biojava.nbio.structure.Structure; +import org.biojava.nbio.structure.StructureException; +import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.StructureIdentifier; +import org.biojava.nbio.structure.StructureTools; +import org.biojava.nbio.structure.SubstructureIdentifier; import org.biojava.nbio.structure.io.LocalPDBDirectory; import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; +import org.biojava.nbio.structure.io.util.FileDownloadUtils; import org.biojava.nbio.structure.io.MMCIFFileReader; import org.biojava.nbio.structure.scop.ScopDatabase; import org.biojava.nbio.structure.scop.ScopFactory; import org.junit.After; import org.junit.Before; import org.junit.Test; - -import java.io.File; -import java.io.IOException; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.List; -import java.util.Locale; - -import static org.junit.Assert.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -49,22 +71,30 @@ */ public class AtomCacheTest { + private static Logger logger = LoggerFactory.getLogger(AtomCacheTest.class); private AtomCache cache; private String previousPDB_DIR; + private String previousPDB_CACHE_DIR; + private AtomCache cleanCache = new AtomCache(); @Before public void setUp() { previousPDB_DIR = System.getProperty(UserConfiguration.PDB_DIR, null); + previousPDB_CACHE_DIR = System.getProperty(UserConfiguration.PDB_CACHE_DIR, null); cache = new AtomCache(); cache.setObsoleteBehavior(ObsoleteBehavior.FETCH_OBSOLETE); + StructureIO.setAtomCache(cache); // Use a fixed SCOP version for stability ScopFactory.setScopDatabase(ScopFactory.VERSION_1_75B); } @After public void tearDown() { - if (previousPDB_DIR != null) + if (previousPDB_DIR != null) { System.setProperty(UserConfiguration.PDB_DIR, previousPDB_DIR); + System.setProperty(UserConfiguration.PDB_CACHE_DIR, previousPDB_CACHE_DIR); + } + StructureIO.setAtomCache(cleanCache); } /** @@ -312,4 +342,80 @@ public void testSeqRes() throws StructureException, IOException { } + /** + * Test for #703 - Chemical component cache poisoning + * + * Handle empty CIF files + * @throws IOException + * @throws StructureException + */ + @Test + public void testEmptyChemComp() throws IOException, StructureException { + Path tmpCache = Paths.get(System.getProperty("java.io.tmpdir"),"BIOJAVA_TEST_CACHE"); + logger.info("Testing AtomCache at {}", tmpCache.toString()); + System.setProperty(UserConfiguration.PDB_DIR, tmpCache.toString()); + System.setProperty(UserConfiguration.PDB_CACHE_DIR, tmpCache.toString()); + + FileDownloadUtils.deleteDirectory(tmpCache); + Files.createDirectory(tmpCache); + try { + cache.setPath(tmpCache.toString()); + cache.setCachePath(tmpCache.toString()); + cache.setUseMmCif(true); + + // Create an empty chemcomp + Path sub = tmpCache.resolve(Paths.get("chemcomp", "ALA.cif.gz")); + Files.createDirectories(sub.getParent()); + Files.createFile(sub); + assertTrue(Files.exists(sub)); + assertEquals(0, Files.size(sub)); + + Structure s = cache.getStructure("1A4W"); + + assertNotNull(s); + } finally { + FileDownloadUtils.deleteDirectory(tmpCache); + } + } + + /** + * Test for #703 - Chemical component cache poisoning + * + * Handle empty CIF files + * @throws IOException + * @throws StructureException + */ + @Test + public void testEmptyGZChemComp() throws IOException, StructureException { + Path tmpCache = Paths.get(System.getProperty("java.io.tmpdir"),"BIOJAVA_TEST_CACHE"); + logger.info("Testing AtomCache at {}", tmpCache.toString()); + System.setProperty(UserConfiguration.PDB_DIR, tmpCache.toString()); + System.setProperty(UserConfiguration.PDB_CACHE_DIR, tmpCache.toString()); + + FileDownloadUtils.deleteDirectory(tmpCache); + Files.createDirectory(tmpCache); + try { + cache.setPath(tmpCache.toString()); + cache.setCachePath(tmpCache.toString()); + cache.setUseMmCif(true); + + // Create an empty chemcomp + Path sub = tmpCache.resolve(Paths.get("chemcomp", "ALA.cif.gz")); + Files.createDirectories(sub.getParent()); + try(GZIPOutputStream out = new GZIPOutputStream(new FileOutputStream(sub.toFile()))) { + // don't write anything + out.flush(); + } + assertTrue(Files.exists(sub)); + assertTrue(0 < Files.size(sub)); + + Structure s = cache.getStructure("1A4W"); + + assertNotNull(s); + + } finally { + FileDownloadUtils.deleteDirectory(tmpCache); + } + } + } From c3cec9187eae380878a462495cbb98dbfdcdce5e Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Fri, 8 Jun 2018 12:26:22 +0200 Subject: [PATCH 03/11] Support initial comments in MMCif files (e.g. those generated by PyMOL) --- .../org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java index 1b6851c6d8..a02f90139d 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java @@ -212,6 +212,9 @@ public void parse(BufferedReader buf) // the first line is a data_PDBCODE line, test if this looks like a mmcif file line = buf.readLine(); + while( line != null && (line.isEmpty() || line.startsWith(COMMENT_CHAR))) { + line = buf.readLine(); + } if (line == null || !line.startsWith(MMCIF_TOP_HEADER)){ logger.error("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'"); triggerDocumentEnd(); From b207d34bfe946d4eb8eccfb3483cf97b2b8bb769 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Fri, 8 Jun 2018 12:27:17 +0200 Subject: [PATCH 04/11] Make tests for #703 fail with an assertion The main change hasn't been implemented, so we want tests to fail. However, the tests exposed some NPE and IO exceptions. These are now fixed, so the tests fail in the expected manner. - Use ATP ligand, which is not covered by the ReducedChemCompProvider - Use the ReducedChemCompProvider as a fallback consistently, preventing null chemComp - Defensive parsing in SimpleMMcifConsumer - Robust test, doesn't require internet! --- .../io/mmcif/DownloadChemCompProvider.java | 13 +++- .../io/mmcif/SimpleMMcifConsumer.java | 8 ++- .../structure/align/util/AtomCacheTest.java | 59 ++++++++++++++---- .../src/test/resources/atp.cif.gz | Bin 0 -> 842 bytes 4 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 biojava-structure/src/test/resources/atp.cif.gz diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java index fac2a86cbb..a1484848db 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java @@ -85,6 +85,8 @@ public class DownloadChemCompProvider implements ChemCompProvider { protectedIDs.add("AUX"); protectedIDs.add("NUL"); } + + private static ChemCompProvider fallback = null; // Fallback provider if the download fails /** by default we will download only some of the files. User has to request that all files should be downloaded... * @@ -272,7 +274,10 @@ public ChemComp getChemComp(String recordName) { ChemComp chemComp = dict.getChemComp(recordName); - return chemComp; + // May be null if the file was corrupt. Fall back on ReducedChemCompProvider in that case + if(chemComp != null) { + return chemComp; + } } catch (IOException e) { @@ -296,9 +301,11 @@ public ChemComp getChemComp(String recordName) { // see https://github.com/biojava/biojava/issues/315 // probably a network error happened. Try to use the ReducedChemCOmpProvider - ReducedChemCompProvider reduced = new ReducedChemCompProvider(); + if( fallback == null) { + fallback = new ReducedChemCompProvider(); + } - return reduced.getChemComp(recordName); + return fallback.getChemComp(recordName); } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java index 1b34f1238b..6bdedba4b7 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java @@ -328,7 +328,13 @@ public void newAtomSite(AtomSite atom) { String recordName = atom.getGroup_PDB(); String residueNumberS = atom.getAuth_seq_id(); - Integer residueNrInt = Integer.parseInt(residueNumberS); + Integer residueNrInt; + if(residueNumberS != null) { + residueNrInt = Integer.parseInt(residueNumberS); + } else { + String label_seq_id = atom.getLabel_seq_id(); + residueNrInt = Integer.parseInt(label_seq_id); + } // the 3-letter name of the group: String groupCode3 = atom.getLabel_comp_id(); diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java index cb088ed953..d37ecf9124 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java @@ -29,10 +29,10 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.nio.file.attribute.FileAttribute; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; @@ -53,8 +53,9 @@ import org.biojava.nbio.structure.io.LocalPDBDirectory; import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; -import org.biojava.nbio.structure.io.util.FileDownloadUtils; import org.biojava.nbio.structure.io.MMCIFFileReader; +import org.biojava.nbio.structure.io.mmcif.model.ChemComp; +import org.biojava.nbio.structure.io.util.FileDownloadUtils; import org.biojava.nbio.structure.scop.ScopDatabase; import org.biojava.nbio.structure.scop.ScopFactory; import org.junit.After; @@ -364,17 +365,34 @@ public void testEmptyChemComp() throws IOException, StructureException { cache.setUseMmCif(true); // Create an empty chemcomp - Path sub = tmpCache.resolve(Paths.get("chemcomp", "ALA.cif.gz")); - Files.createDirectories(sub.getParent()); - Files.createFile(sub); - assertTrue(Files.exists(sub)); - assertEquals(0, Files.size(sub)); + Path chemCompCif = tmpCache.resolve(Paths.get("chemcomp", "ATP.cif.gz")); + Files.createDirectories(chemCompCif.getParent()); + Files.createFile(chemCompCif); + assertTrue(Files.exists(chemCompCif)); + assertEquals(0, Files.size(chemCompCif)); + + // Copy stub file into place + Path testCif = tmpCache.resolve(Paths.get("data", "structures", "divided", "mmCIF", "ab","1abc.cif.gz")); + Files.createDirectories(testCif.getParent()); + URL resource = AtomCacheTest.class.getResource("/atp.cif.gz"); + File src = new File(resource.getPath()); + FileDownloadUtils.copy(src, testCif.toFile()); - Structure s = cache.getStructure("1A4W"); + // Load structure + Structure s = cache.getStructure("1ABC"); assertNotNull(s); + + Group g = s.getChainByPDB("A").getAtomGroup(0); + assertTrue(g.getPDBName().equals("ATP")); + + // should be unknown + ChemComp chem = g.getChemComp(); + assertNotNull(chem); + assertTrue(chem.getAtoms().size() > 0); + assertEquals("NON-POLYMER", chem.getType()); } finally { - FileDownloadUtils.deleteDirectory(tmpCache); +// FileDownloadUtils.deleteDirectory(tmpCache); } } @@ -387,6 +405,7 @@ public void testEmptyChemComp() throws IOException, StructureException { */ @Test public void testEmptyGZChemComp() throws IOException, StructureException { + Path tmpCache = Paths.get(System.getProperty("java.io.tmpdir"),"BIOJAVA_TEST_CACHE"); logger.info("Testing AtomCache at {}", tmpCache.toString()); System.setProperty(UserConfiguration.PDB_DIR, tmpCache.toString()); @@ -400,7 +419,7 @@ public void testEmptyGZChemComp() throws IOException, StructureException { cache.setUseMmCif(true); // Create an empty chemcomp - Path sub = tmpCache.resolve(Paths.get("chemcomp", "ALA.cif.gz")); + Path sub = tmpCache.resolve(Paths.get("chemcomp", "ATP.cif.gz")); Files.createDirectories(sub.getParent()); try(GZIPOutputStream out = new GZIPOutputStream(new FileOutputStream(sub.toFile()))) { // don't write anything @@ -409,12 +428,28 @@ public void testEmptyGZChemComp() throws IOException, StructureException { assertTrue(Files.exists(sub)); assertTrue(0 < Files.size(sub)); - Structure s = cache.getStructure("1A4W"); + // Copy stub file into place + Path testCif = tmpCache.resolve(Paths.get("data", "structures", "divided", "mmCIF", "ab","1abc.cif.gz")); + Files.createDirectories(testCif.getParent()); + URL resource = AtomCacheTest.class.getResource("/atp.cif.gz"); + File src = new File(resource.getPath()); + FileDownloadUtils.copy(src, testCif.toFile()); + + // Load structure + Structure s = cache.getStructure("1ABC"); assertNotNull(s); + Group g = s.getChainByPDB("A").getAtomGroup(0); + assertTrue(g.getPDBName().equals("ATP")); + + // should be unknown + ChemComp chem = g.getChemComp(); + assertNotNull(chem); + assertTrue(chem.getAtoms().size() > 0); + assertEquals("NON-POLYMER", chem.getType()); } finally { - FileDownloadUtils.deleteDirectory(tmpCache); +// FileDownloadUtils.deleteDirectory(tmpCache); } } diff --git a/biojava-structure/src/test/resources/atp.cif.gz b/biojava-structure/src/test/resources/atp.cif.gz new file mode 100644 index 0000000000000000000000000000000000000000..b1db8226e6eca6ae8c1f17d996eba0dcd655aa04 GIT binary patch literal 842 zcmV-Q1GW4giwFoR?iX7C17UPrb0`g!kigKJ=?kkGG4P_3hJqIgI`3 zvU{Djx69k%b=>CX`}eOK+qt;@Hti1OKkz4t8(hL>z7)4k1 zVmQy=OV$1-eaEu5{r^MhH;tBp}^2bh8YODp)6!&rl!oA)EW+a(4x$QP>02=tO7#_WCRxZ6cdbE zp7B4EGOCs7y9D!KZ7S1(F^xXshEB@Luv8|cGCOXKAf83z8!!@OSW{+W_RM+`80nTW zj=(qd4Qts%X<82^wFU<8zNO4cnYgTE6Ag+olx%ij{C%JOigz@ZX^%yk1E)#C=~Lo~ zr8Q5$XV{lYo}%MpL=7h9qY!BX{P2`h%Tp7=7whPTmwcq z>SfRPN|D|$pgSj?=~mR)0)dr1<5Y^WH2q1gl*qxV!*T{ptA$wdhM^{QOg0u*eWw8z}H|BaZUh9x$#4u?qNY%qXM%K zLdZj?(*h&cxCJN*tg9*0Jf*}ln@*EhX~Q# Date: Mon, 4 Sep 2017 11:04:33 +0200 Subject: [PATCH 05/11] Re-download empty structure or chemical component structures Files less than 40 bytes are deleted to allow for gzip headers. This addresses #703 --- .../biojava/nbio/structure/io/LocalPDBDirectory.java | 11 +++++++++++ .../structure/io/mmcif/DownloadChemCompProvider.java | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java index a85c3e701a..761b3849e1 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java @@ -127,6 +127,9 @@ public static enum FetchBehavior { protected static final String lineSplit = System.getProperty("file.separator"); + /** Minimum size for a valid file, in bytes */ + public static final long MIN_PDB_FILE_SIZE = 40; + private File path; private List extensions; @@ -687,6 +690,14 @@ public File getLocalFile(String pdbId) { for(String ex : getExtensions() ){ File f = new File(searchdir,prefix + pdbId.toLowerCase() + ex) ; if ( f.exists()) { + // delete files that are too short to have contents + if( f.length() < MIN_PDB_FILE_SIZE ) { + boolean success = f.delete(); + if( ! success) { + return null; + } + assert(!f.exists()); + } return f; } } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java index a1484848db..26fc62a745 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java @@ -42,6 +42,7 @@ import org.biojava.nbio.core.util.InputStreamProvider; import org.biojava.nbio.structure.align.util.HTTPConnectionTools; import org.biojava.nbio.structure.align.util.UserConfiguration; +import org.biojava.nbio.structure.io.LocalPDBDirectory; import org.biojava.nbio.structure.io.mmcif.model.ChemComp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -344,6 +345,12 @@ private static boolean fileExists(String recordName){ File f = new File(fileName); + // delete files that are too short to have contents + if( f.length() < LocalPDBDirectory.MIN_PDB_FILE_SIZE ) { + f.delete(); + return false; + } + return f.exists(); } From 62fb2a4739f46c66901d7ec40a5b608970a9cdf4 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Fri, 8 Jun 2018 15:22:21 +0200 Subject: [PATCH 06/11] Set and restore the ChemCompGroupFactory singleton This is necessary when changing the cache path. --- .../nbio/structure/io/LocalPDBDirectory.java | 4 ++-- .../io/mmcif/ChemCompGroupFactory.java | 12 +++++++--- .../structure/align/util/AtomCacheTest.java | 22 +++++++++++++++++-- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java index 761b3849e1..0f1079de1c 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java @@ -127,8 +127,8 @@ public static enum FetchBehavior { protected static final String lineSplit = System.getProperty("file.separator"); - /** Minimum size for a valid file, in bytes */ - public static final long MIN_PDB_FILE_SIZE = 40; + /** Minimum size for a valid structure file (CIF or PDB), in bytes */ + public static final long MIN_PDB_FILE_SIZE = 40; // Empty gzip files are 20bytes. Add a few more for buffer. private File path; private List extensions; diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java index dd4585fb33..0651a69b4a 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java @@ -68,9 +68,8 @@ public static ChemComp getChemComp(String recordName){ * again. Note that this change can have unexpected behavior of * code executed afterwards. *

- * Changing the provider does not reset the cache, so Chemical - * Component definitions already downloaded from previous providers - * will be used. To reset the cache see {@link #getCache()). + * Changing the provider also resets the cache, so any groups + * previously accessed will be re-downloaded and reread. * * @param provider */ @@ -84,6 +83,13 @@ public static void setChemCompProvider(ChemCompProvider provider) { public static ChemCompProvider getChemCompProvider(){ return chemCompProvider; } + + /** + * Force the cache to be reset. + */ + public static void clearCache() { + cache.clear(); + } public static Group getGroupFromChemCompDictionary(String recordName) { diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java index d37ecf9124..2db5eea69a 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java @@ -54,6 +54,9 @@ import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; import org.biojava.nbio.structure.io.MMCIFFileReader; +import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; +import org.biojava.nbio.structure.io.mmcif.ChemCompProvider; +import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; import org.biojava.nbio.structure.io.mmcif.model.ChemComp; import org.biojava.nbio.structure.io.util.FileDownloadUtils; import org.biojava.nbio.structure.scop.ScopDatabase; @@ -77,6 +80,7 @@ public class AtomCacheTest { private String previousPDB_DIR; private String previousPDB_CACHE_DIR; private AtomCache cleanCache = new AtomCache(); + private ChemCompProvider previousChemCompProvider = ChemCompGroupFactory.getChemCompProvider(); @Before public void setUp() { @@ -85,8 +89,10 @@ public void setUp() { cache = new AtomCache(); cache.setObsoleteBehavior(ObsoleteBehavior.FETCH_OBSOLETE); StructureIO.setAtomCache(cache); + // Use a fixed SCOP version for stability ScopFactory.setScopDatabase(ScopFactory.VERSION_1_75B); + logger.warn("setUp()"); } @After @@ -96,6 +102,7 @@ public void tearDown() { System.setProperty(UserConfiguration.PDB_CACHE_DIR, previousPDB_CACHE_DIR); } StructureIO.setAtomCache(cleanCache); + ChemCompGroupFactory.setChemCompProvider(previousChemCompProvider); } /** @@ -363,7 +370,8 @@ public void testEmptyChemComp() throws IOException, StructureException { cache.setPath(tmpCache.toString()); cache.setCachePath(tmpCache.toString()); cache.setUseMmCif(true); - + ChemCompGroupFactory.setChemCompProvider(new DownloadChemCompProvider(tmpCache.toString())); + // Create an empty chemcomp Path chemCompCif = tmpCache.resolve(Paths.get("chemcomp", "ATP.cif.gz")); Files.createDirectories(chemCompCif.getParent()); @@ -381,6 +389,10 @@ public void testEmptyChemComp() throws IOException, StructureException { // Load structure Structure s = cache.getStructure("1ABC"); + // Should have re-downloaded the file + assertTrue(Files.size(chemCompCif) > LocalPDBDirectory.MIN_PDB_FILE_SIZE); + + // Structure should have valid ChemComp now assertNotNull(s); Group g = s.getChainByPDB("A").getAtomGroup(0); @@ -417,6 +429,8 @@ public void testEmptyGZChemComp() throws IOException, StructureException { cache.setPath(tmpCache.toString()); cache.setCachePath(tmpCache.toString()); cache.setUseMmCif(true); + ChemCompGroupFactory.setChemCompProvider(new DownloadChemCompProvider(tmpCache.toString())); + // Create an empty chemcomp Path sub = tmpCache.resolve(Paths.get("chemcomp", "ATP.cif.gz")); @@ -426,7 +440,7 @@ public void testEmptyGZChemComp() throws IOException, StructureException { out.flush(); } assertTrue(Files.exists(sub)); - assertTrue(0 < Files.size(sub)); + assertTrue(0 < Files.size(sub) && Files.size(sub) < LocalPDBDirectory.MIN_PDB_FILE_SIZE); // Copy stub file into place Path testCif = tmpCache.resolve(Paths.get("data", "structures", "divided", "mmCIF", "ab","1abc.cif.gz")); @@ -438,6 +452,10 @@ public void testEmptyGZChemComp() throws IOException, StructureException { // Load structure Structure s = cache.getStructure("1ABC"); + // Should have re-downloaded the file + assertTrue(Files.size(sub) > LocalPDBDirectory.MIN_PDB_FILE_SIZE); + + // Structure should have valid ChemComp assertNotNull(s); Group g = s.getChainByPDB("A").getAtomGroup(0); From f5b325f76ee93e0714ffc13618da7699cfafe502 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Tue, 12 Jun 2018 15:54:06 +0200 Subject: [PATCH 07/11] Incorporating suggestions from @josemduarte See comments on #774 --- .../nbio/structure/cath/CathInstallation.java | 2 +- .../nbio/structure/io/LocalPDBDirectory.java | 22 ++++++++++--------- .../io/mmcif/ChemCompGroupFactory.java | 6 +++-- .../io/mmcif/DownloadChemCompProvider.java | 3 +++ .../biojava/nbio/structure/TestAtomCache.java | 2 +- .../structure/align/util/AtomCacheTest.java | 4 ++-- 6 files changed, 23 insertions(+), 16 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java index 7c21a88e62..70c55d69d6 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/cath/CathInstallation.java @@ -636,7 +636,7 @@ private void parseCathDomall(BufferedReader bufferedReader) throws IOException{ protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException{ // System.out.println("downloading " + remoteURL + " to: " + localFile); - LOGGER.info("Downloaded file {} to local file {}", remoteURL, localFile); + LOGGER.info("Downloading file {} to local file {}", remoteURL, localFile); long timeS = System.currentTimeMillis(); File tempFile = File.createTempFile(FileDownloadUtils.getFilePrefix(localFile), "."+ FileDownloadUtils.getFileExtension(localFile)); diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java index 0f1079de1c..bfa51746f0 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java @@ -35,6 +35,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; +import java.nio.file.Files; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; @@ -405,8 +406,9 @@ public void prefetchStructure(String pdbId) throws IOException { * Attempts to delete all versions of a structure from the local directory. * @param pdbId * @return True if one or more files were deleted + * @throws IOException if the file cannot be deleted */ - public boolean deleteStructure(String pdbId){ + public boolean deleteStructure(String pdbId) throws IOException{ boolean deleted = false; // Force getLocalFile to check in obsolete locations ObsoleteBehavior obsolete = getObsoleteBehavior(); @@ -663,8 +665,9 @@ protected File getDir(String pdbId, boolean obsolete) { * Searches for previously downloaded files * @param pdbId * @return A file pointing to the existing file, or null if not found + * @throws IOException If the file exists but is empty and can't be deleted */ - public File getLocalFile(String pdbId) { + public File getLocalFile(String pdbId) throws IOException { // Search for existing files @@ -692,11 +695,8 @@ public File getLocalFile(String pdbId) { if ( f.exists()) { // delete files that are too short to have contents if( f.length() < MIN_PDB_FILE_SIZE ) { - boolean success = f.delete(); - if( ! success) { - return null; - } - assert(!f.exists()); + Files.delete(f.toPath()); + return null; } return f; } @@ -708,9 +708,11 @@ public File getLocalFile(String pdbId) { } protected boolean checkFileExists(String pdbId){ - File path = getLocalFile(pdbId); - if ( path != null) - return true; + try { + File path = getLocalFile(pdbId); + if ( path != null) + return true; + } catch(IOException e) {} return false; } diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java index 0651a69b4a..eee59a0a7c 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/ChemCompGroupFactory.java @@ -69,7 +69,7 @@ public static ChemComp getChemComp(String recordName){ * code executed afterwards. *

* Changing the provider also resets the cache, so any groups - * previously accessed will be re-downloaded and reread. + * previously accessed will be reread or re-downloaded. * * @param provider */ @@ -85,7 +85,9 @@ public static ChemCompProvider getChemCompProvider(){ } /** - * Force the cache to be reset. + * Force the in-memory cache to be reset. + * + * Note that the ChemCompProvider may have additional memory or disk caches that need to be cleared too. */ public static void clearCache() { cache.clear(); diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java index 26fc62a745..3558aae18a 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java @@ -306,6 +306,7 @@ public ChemComp getChemComp(String recordName) { fallback = new ReducedChemCompProvider(); } + logger.warn("Falling back to ReducedChemCompProvider for {}. This could indicate a network error.", recordName); return fallback.getChemComp(recordName); } @@ -347,6 +348,8 @@ private static boolean fileExists(String recordName){ // delete files that are too short to have contents if( f.length() < LocalPDBDirectory.MIN_PDB_FILE_SIZE ) { + // Delete defensively. + // Note that if delete is unsuccessful, we re-download the file anyways f.delete(); return false; } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAtomCache.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAtomCache.java index 2c2e8fc431..199f0295d1 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAtomCache.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestAtomCache.java @@ -46,7 +46,7 @@ public class TestAtomCache { private AtomCache cache; @Before - public void setUp() { + public void setUp() throws IOException { cache = new AtomCache(); // Delete files which were cached in previous tests diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java index 2db5eea69a..7ba0074f91 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java @@ -404,7 +404,7 @@ public void testEmptyChemComp() throws IOException, StructureException { assertTrue(chem.getAtoms().size() > 0); assertEquals("NON-POLYMER", chem.getType()); } finally { -// FileDownloadUtils.deleteDirectory(tmpCache); + FileDownloadUtils.deleteDirectory(tmpCache); } } @@ -467,7 +467,7 @@ public void testEmptyGZChemComp() throws IOException, StructureException { assertTrue(chem.getAtoms().size() > 0); assertEquals("NON-POLYMER", chem.getType()); } finally { -// FileDownloadUtils.deleteDirectory(tmpCache); + FileDownloadUtils.deleteDirectory(tmpCache); } } From 13f267c322d05de3aaa04e8daaebc4e5940529ff Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Thu, 14 Jun 2018 12:04:15 +0200 Subject: [PATCH 08/11] Add GlobalsHelper class to manage our global state Use GlobalsHelper.pushState()/restoreState() before and after tests to ensure that state isn't carried between tests. This is applied to the AtomCacheTest to fix test regressions while simplifying the code. --- .../biojava/nbio/structure/StructureIO.java | 5 + .../io/mmcif/DownloadChemCompProvider.java | 30 ++--- .../structure/align/util/AtomCacheTest.java | 18 +-- .../structure/test/util/GlobalsHelper.java | 107 ++++++++++++++++++ 4 files changed, 132 insertions(+), 28 deletions(-) create mode 100644 biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureIO.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureIO.java index 31fe6adb25..a0757cf8f4 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureIO.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/StructureIO.java @@ -119,6 +119,11 @@ private static void checkInitAtomCache() { public static void setAtomCache(AtomCache c){ cache = c; } + + public static AtomCache getAtomCache() { + checkInitAtomCache(); + return cache; + } /** * Returns the first biologicalAssembly that is available for a protein structure. For more documentation on quaternary structures see: diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java index 3558aae18a..42e7692782 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java @@ -95,25 +95,28 @@ public class DownloadChemCompProvider implements ChemCompProvider { boolean downloadAll = false; public DownloadChemCompProvider(){ - logger.debug("Initialising DownloadChemCompProvider"); - - // note that path is static, so this is just to make sure that all non-static methods will have path initialised - initPath(); + this(null); } public DownloadChemCompProvider(String cacheFilePath){ logger.debug("Initialising DownloadChemCompProvider"); // note that path is static, so this is just to make sure that all non-static methods will have path initialised - path = new File(cacheFilePath); + if(cacheFilePath != null) { + path = new File(cacheFilePath); + } } - private static void initPath(){ - + /** + * Get this provider's cache path + * @return + */ + public static File getPath(){ if (path==null) { UserConfiguration config = new UserConfiguration(); path = new File(config.getCacheFilePath()); } + return path; } /** @@ -130,7 +133,7 @@ public void checkDoFirstInstall(){ // this makes sure there is a file separator between every component, // if path has a trailing file separator or not, it will work for both cases - File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY); + File dir = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY); File f = new File(dir, "components.cif.gz"); if ( ! f.exists()) { @@ -164,7 +167,7 @@ private void split() throws IOException { logger.info("Installing individual chem comp files ..."); - File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY); + File dir = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY); File f = new File(dir, "components.cif.gz"); @@ -215,7 +218,7 @@ private void split() throws IOException { */ private void writeID(String contents, String currentID) throws IOException{ - String localName = DownloadChemCompProvider.getLocalFileName(currentID); + String localName = getLocalFileName(currentID); try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) { @@ -322,16 +325,15 @@ public static String getLocalFileName(String recordName){ recordName = "_" + recordName; } - initPath(); - - File f = new File(path, CHEM_COMP_CACHE_DIRECTORY); + File f = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY); if (! f.exists()){ logger.info("Creating directory " + f); boolean success = f.mkdir(); // we've checked in initPath that path is writable, so there's no need to check if it succeeds // in the unlikely case that in the meantime it isn't writable at least we log an error - if (!success) logger.error("Directory {} could not be created",f); + if (!success) + logger.error("Directory {} could not be created",f); } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java index 7ba0074f91..0cecbb1574 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/align/util/AtomCacheTest.java @@ -55,12 +55,12 @@ import org.biojava.nbio.structure.io.LocalPDBDirectory.ObsoleteBehavior; import org.biojava.nbio.structure.io.MMCIFFileReader; import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; -import org.biojava.nbio.structure.io.mmcif.ChemCompProvider; import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; import org.biojava.nbio.structure.io.mmcif.model.ChemComp; import org.biojava.nbio.structure.io.util.FileDownloadUtils; import org.biojava.nbio.structure.scop.ScopDatabase; import org.biojava.nbio.structure.scop.ScopFactory; +import org.biojava.nbio.structure.test.util.GlobalsHelper; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -77,32 +77,22 @@ public class AtomCacheTest { private static Logger logger = LoggerFactory.getLogger(AtomCacheTest.class); private AtomCache cache; - private String previousPDB_DIR; - private String previousPDB_CACHE_DIR; - private AtomCache cleanCache = new AtomCache(); - private ChemCompProvider previousChemCompProvider = ChemCompGroupFactory.getChemCompProvider(); @Before public void setUp() { - previousPDB_DIR = System.getProperty(UserConfiguration.PDB_DIR, null); - previousPDB_CACHE_DIR = System.getProperty(UserConfiguration.PDB_CACHE_DIR, null); + GlobalsHelper.pushState(); + cache = new AtomCache(); cache.setObsoleteBehavior(ObsoleteBehavior.FETCH_OBSOLETE); StructureIO.setAtomCache(cache); // Use a fixed SCOP version for stability ScopFactory.setScopDatabase(ScopFactory.VERSION_1_75B); - logger.warn("setUp()"); } @After public void tearDown() { - if (previousPDB_DIR != null) { - System.setProperty(UserConfiguration.PDB_DIR, previousPDB_DIR); - System.setProperty(UserConfiguration.PDB_CACHE_DIR, previousPDB_CACHE_DIR); - } - StructureIO.setAtomCache(cleanCache); - ChemCompGroupFactory.setChemCompProvider(previousChemCompProvider); + GlobalsHelper.restoreState(); } /** diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java new file mode 100644 index 0000000000..ecd130e688 --- /dev/null +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java @@ -0,0 +1,107 @@ +package org.biojava.nbio.structure.test.util; + +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; + +import org.biojava.nbio.structure.StructureIO; +import org.biojava.nbio.structure.align.util.AtomCache; +import org.biojava.nbio.structure.align.util.UserConfiguration; +import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; +import org.biojava.nbio.structure.io.mmcif.ChemCompProvider; +import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; +import org.biojava.nbio.structure.scop.ScopDatabase; +import org.biojava.nbio.structure.scop.ScopFactory; + +/** + * Helper class to manage all the global state changes in BioJava. + * For instance, this should be used in tests before modifying PDB_PATH. + * + * Used by tests during setup and teardown to ensure a clean environment + * + * This class is a singleton. + * @author Spencer Bliven + * + */ +public final class GlobalsHelper { + + private static class PathInfo { + public final String pdbPath; + public final String pdbCachePath; + public final AtomCache atomCache; + public final ChemCompProvider chemCompProvider; + public final String downloadChemCompProviderPath; + public final ScopDatabase scop; + + public PathInfo() { + pdbPath = System.getProperty(UserConfiguration.PDB_DIR, null); + pdbCachePath = System.getProperty(UserConfiguration.PDB_CACHE_DIR, null); + atomCache = StructureIO.getAtomCache(); + chemCompProvider = ChemCompGroupFactory.getChemCompProvider(); + downloadChemCompProviderPath = DownloadChemCompProvider.getPath().getPath(); + scop = ScopFactory.getSCOP(); + } + } + + // Saves defaults as stack + private static Deque stack = new LinkedList<>(); + static { + // Save default state + pushState(); + } + + /** + * GlobalsHelper should not be instantiated. + */ + private GlobalsHelper() {} + + /** + * Save current global state to the stack + */ + public static void pushState() { + PathInfo paths = new PathInfo(); + stack.addFirst(paths); + } + + /** + * Sets a new PDB_PATH and PDB_CACHE_PATH consistently. + * + * Previous values can be restored with {@link #restoreState()}. + * @param path + */ + public static void setPdbPath(String path) { + pushState(); + System.setProperty(UserConfiguration.PDB_DIR, path); + System.setProperty(UserConfiguration.PDB_CACHE_DIR, path); + + AtomCache cache = new AtomCache(path); + StructureIO.setAtomCache(cache); + + // Note side effect setting the path for all DownloadChemCompProvider due to static state + ChemCompProvider provider = new DownloadChemCompProvider(path); + ChemCompGroupFactory.setChemCompProvider(provider); + } + + /** + * Restore global state to the previous settings + * @throws NoSuchElementException if there is no prior state to restore + */ + public static void restoreState() { + PathInfo paths = stack.removeFirst(); + + System.setProperty(UserConfiguration.PDB_DIR, paths.pdbPath); + System.setProperty(UserConfiguration.PDB_CACHE_DIR, paths.pdbCachePath); + + StructureIO.setAtomCache(paths.atomCache); + + // Use side effect setting the path for all DownloadChemCompProvider due to static state + new DownloadChemCompProvider(paths.downloadChemCompProviderPath); + + ChemCompGroupFactory.setChemCompProvider(paths.chemCompProvider); + + ScopFactory.setScopDatabase(paths.scop); + } + + +} From c1c3c30516ea89d66fc71a0c7464b1c57eec58c1 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Thu, 14 Jun 2018 15:42:02 +0200 Subject: [PATCH 09/11] Fix NullPointer when restoring global state Maven runs tests with a clean environment, so we can't restore PDB_DIR --- .../nbio/structure/io/LocalPDBDirectory.java | 4 ++-- .../structure/test/util/GlobalsHelper.java | 23 ++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java index bfa51746f0..b01252b380 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java @@ -426,7 +426,7 @@ public boolean deleteStructure(String pdbId) throws IOException{ // delete file boolean success = existing.delete(); if(success) { - logger.info("Deleting "+existing.getAbsolutePath()); + logger.debug("Deleting "+existing.getAbsolutePath()); } deleted = deleted || success; @@ -435,7 +435,7 @@ public boolean deleteStructure(String pdbId) throws IOException{ if(parent != null) { success = parent.delete(); if(success) { - logger.info("Deleting "+parent.getAbsolutePath()); + logger.debug("Deleting "+parent.getAbsolutePath()); } } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java index ecd130e688..c756d5fd8a 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/test/util/GlobalsHelper.java @@ -70,8 +70,17 @@ public static void pushState() { * Previous values can be restored with {@link #restoreState()}. * @param path */ - public static void setPdbPath(String path) { + public static void setPdbPath(String path, String cachePath) { pushState(); + if(path == null || cachePath == null) { + UserConfiguration config = new UserConfiguration(); + if(path == null) { + path = config.getPdbFilePath(); + } + if(cachePath == null) { + cachePath = config.getCacheFilePath(); + } + } System.setProperty(UserConfiguration.PDB_DIR, path); System.setProperty(UserConfiguration.PDB_CACHE_DIR, path); @@ -90,8 +99,16 @@ public static void setPdbPath(String path) { public static void restoreState() { PathInfo paths = stack.removeFirst(); - System.setProperty(UserConfiguration.PDB_DIR, paths.pdbPath); - System.setProperty(UserConfiguration.PDB_CACHE_DIR, paths.pdbCachePath); + if(paths.pdbPath == null) { + System.clearProperty(UserConfiguration.PDB_DIR); + } else { + System.setProperty(UserConfiguration.PDB_DIR, paths.pdbPath); + } + if(paths.pdbCachePath == null) { + System.clearProperty(UserConfiguration.PDB_CACHE_DIR); + } else { + System.setProperty(UserConfiguration.PDB_CACHE_DIR, paths.pdbCachePath); + } StructureIO.setAtomCache(paths.atomCache); From 0c10a2ef58553bcf7d703caed340d7634a28ac3d Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Fri, 15 Jun 2018 13:17:42 +0200 Subject: [PATCH 10/11] Fix test failure due to PDB change 4LNC was updated to remove the X-Ray experimental method. This switches the test to 6F2Q, which uses both Neutron & Xray. --- .../biojava/nbio/structure/TestExperimentalTechniques.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestExperimentalTechniques.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestExperimentalTechniques.java index f9915e47db..25436297a6 100644 --- a/biojava-structure/src/test/java/org/biojava/nbio/structure/TestExperimentalTechniques.java +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/TestExperimentalTechniques.java @@ -31,7 +31,7 @@ public class TestExperimentalTechniques { @Test - public void test4LNC() throws IOException, StructureException { + public void test6F2Q() throws IOException, StructureException { // a multiple experimental techniques PDB entry (X-RAY + NEUTRON DIFFRACTION) @@ -40,9 +40,9 @@ public void test4LNC() throws IOException, StructureException { StructureIO.setAtomCache(cache); cache.setUseMmCif(false); - Structure sPdb = StructureIO.getStructure("4LNC"); + Structure sPdb = StructureIO.getStructure("6F2Q"); cache.setUseMmCif(true); - Structure sCif = StructureIO.getStructure("4LNC"); + Structure sCif = StructureIO.getStructure("6F2Q"); comparePdbToCif(sPdb, sCif); From 08ab3e11827b74158fb82fbcf26964cea34d9594 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Mon, 18 Jun 2018 11:44:58 +0200 Subject: [PATCH 11/11] Revert fix for mmcif files without auth_seq_id column Parsing such a file again throws a NumberFormatException. Further work/discussion of this issue is on #775, but it was blocking the merging of #774. --- .../structure/io/mmcif/SimpleMMcifConsumer.java | 8 +------- biojava-structure/src/test/resources/atp.cif.gz | Bin 842 -> 841 bytes 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java index 6bdedba4b7..1b34f1238b 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifConsumer.java @@ -328,13 +328,7 @@ public void newAtomSite(AtomSite atom) { String recordName = atom.getGroup_PDB(); String residueNumberS = atom.getAuth_seq_id(); - Integer residueNrInt; - if(residueNumberS != null) { - residueNrInt = Integer.parseInt(residueNumberS); - } else { - String label_seq_id = atom.getLabel_seq_id(); - residueNrInt = Integer.parseInt(label_seq_id); - } + Integer residueNrInt = Integer.parseInt(residueNumberS); // the 3-letter name of the group: String groupCode3 = atom.getLabel_comp_id(); diff --git a/biojava-structure/src/test/resources/atp.cif.gz b/biojava-structure/src/test/resources/atp.cif.gz index b1db8226e6eca6ae8c1f17d996eba0dcd655aa04..7167313a22581136566eeebe028bb3fd99a11e54 100644 GIT binary patch literal 841 zcmV-P1GfAhiwFq(eJ5K017UP-lblR<`FBezK%ctqI8@j`3 z^D?e4m)HHvurALZ-#>4xr|SCaxY^bJz@MmYa0#pFRNXoZ@4u|?mM=eZIo)b|;BPI5 z%gu*=KW)38*Gs4CrGGD%#@E;5uHU|Xc#pqv7`t)b4e!U*KUX6D9`~D`>;2L1-#)15 zkJFD@wX)u*Xv5X4yX|o^`}6Db{P~+grPM+#=1PH@KQGi=q2|Ru6vhBHL`7)9CH|!V z9#J7^&HwQbf>Juauke2u71(_R=D}IbFajGr!xB(RO_(XL#|kWhc7|aHE?7kd!6ws$ zS>AY15AM@7EP=6(VTNJI$~+*pVU9PR)Kd)>0m+MJlzf3D+l2X|%wEHi1|ljj%m?7O z?9haTqO1brGMz(#p-&7m5OrHwEXqt(nKijJ9Qde3nTfFpOGQ}?h7idJEb%F37~SxU z|C*IiwM-u}%!9SLObf=e`;2=!E33eAnUu>6+!|3lOUAchB+9U=%%)FoO3tlvybg=e2C2K~aX1%>c%K?6c4K3(OVR{WZ*i(np|nPIYS^=9m`FTJzCF*_T4{}nXAE;*JTvDqZP0C4 zDzJ)Yz63mvAj)NOxzrv(qzkO#8NZ)U4nQ($2q?Cv2BGwssd^TZE7_E(q1ijvhEWN6 z#WTK8q%{oa;KVb1Bk1gcz$%_`CPi7E{UjGk;#f6dB>|?|XTcb`of7w#^cl9}M6QhO z`t4-PJc}G7A_OuwPScm$sd{gE?a#we%K@Zxqs)>3Uxi7)IRPZ)rmHwz!J@QA4Q3(6 zSVmCL0wdSH11JeBRF!ESP~w@*r%9}|?U^ZeK&--2OyzdU2f>pTb=xyj5|FB=iRZEH TofBow{08rb0`g!kigKJ=?kkGG4P_3hJqIgI`3 zvU{Djx69k%b=>CX`}eOK+qt;@Hti1OKkz4t8(hL>z7)4k1 zVmQy=OV$1-eaEu5{r^MhH;tBp}^2bh8YODp)6!&rl!oA)EW+a(4x$QP>02=tO7#_WCRxZ6cdbE zp7B4EGOCs7y9D!KZ7S1(F^xXshEB@Luv8|cGCOXKAf83z8!!@OSW{+W_RM+`80nTW zj=(qd4Qts%X<82^wFU<8zNO4cnYgTE6Ag+olx%ij{C%JOigz@ZX^%yk1E)#C=~Lo~ zr8Q5$XV{lYo}%MpL=7h9qY!BX{P2`h%Tp7=7whPTmwcq z>SfRPN|D|$pgSj?=~mR)0)dr1<5Y^WH2q1gl*qxV!*T{ptA$wdhM^{QOg0u*eWw8z}H|BaZUh9x$#4u?qNY%qXM%K zLdZj?(*h&cxCJN*tg9*0Jf*}ln@*EhX~Q#