Skip to content

Commit 3123589

Browse files
committed
Solving biojava#704 and adding some tests for biojava#703
1 parent 67ca7f2 commit 3123589

File tree

3 files changed

+118
-22
lines changed

3 files changed

+118
-22
lines changed

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/DownloadChemCompProvider.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import java.io.InputStreamReader;
3030
import java.io.PrintWriter;
3131
import java.io.StringWriter;
32-
import java.net.HttpURLConnection;
3332
import java.net.URL;
3433
import java.net.URLConnection;
3534
import java.nio.file.Files;
@@ -49,14 +48,15 @@
4948

5049

5150

52-
/** This provider of chemical components can download and cache chemical component definition files from the RCSB PDB web site.
53-
* It is the default way to access these definitions.
54-
* If this provider is called he first time, it will download and install all chemical
55-
* component definitions in a local directory.
56-
* Once the definition files have been installed, it has quick startup time and low memory requirements.
51+
/**
52+
* This provider of chemical components can download and cache chemical component definition files from the RCSB PDB web site.
53+
* It is the default way to access these definitions.
54+
* If this provider is called he first time, it will download and install all chemical
55+
* component definitions in a local directory.
56+
* Once the definition files have been installed, it has quick startup time and low memory requirements.
5757
*
58-
* An alternative provider, that keeps all definitions in memory is the {@link AllChemCompProvider}. Another provider, that
59-
* does not require any network access, but only can support a limited set of chemical component definitions, is the {@link ReducedChemCompProvider}.
58+
* An alternative provider, that keeps all definitions in memory is the {@link AllChemCompProvider}. Another provider, that
59+
* does not require any network access, but only can support a limited set of chemical component definitions, is the {@link ReducedChemCompProvider}.
6060
*
6161
*
6262
* @author Andreas Prlic
@@ -310,7 +310,8 @@ public ChemComp getChemComp(String recordName) {
310310

311311
}
312312

313-
/** Returns the file name that contains the definition for this {@link ChemComp}
313+
/**
314+
* Returns the file name that contains the definition for this {@link ChemComp}
314315
*
315316
* @param recordName the ID of the {@link ChemComp}
316317
* @return full path to the file
@@ -359,6 +360,7 @@ private static boolean downloadChemCompRecord(String recordName) {
359360
File newFile;
360361
try{
361362
newFile = File.createTempFile("chemcomp"+recordName, "cif");
363+
logger.debug("Will write chem comp file to temp file {}", newFile.toString());
362364
}
363365
catch(IOException e){
364366
logger.error("Could not write to temp directory {} to create the chemical component download temp file", System.getProperty("java.io.tmpdir"));
@@ -379,7 +381,6 @@ private static boolean downloadChemCompRecord(String recordName) {
379381

380382
try {
381383
url = new URL(u);
382-
383384
URLConnection uconn = URLConnectionTools.openURLConnection(url);
384385

385386
try( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(newFile)));

biojava-structure/src/main/java/org/biojava/nbio/structure/io/mmcif/SimpleMMcifParser.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -211,24 +211,28 @@ public void parse(BufferedReader buf)
211211
Set<String> loopWarnings = new HashSet<String>(); // used only to reduce logging statements
212212

213213
String category = null;
214-
215-
216-
// the first line is a data_PDBCODE line, test if this looks like a mmcif file
217-
line = buf.readLine();
218-
if (line == null || !line.startsWith(MMCIF_TOP_HEADER)){
219-
logger.error("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'");
220-
triggerDocumentEnd();
221-
return;
222-
}
214+
215+
boolean foundHeader = false;
223216

224217
while ( (line = buf.readLine ()) != null ){
225218

226219
if (line.isEmpty() || line.startsWith(COMMENT_CHAR)) continue;
227220

221+
if (!foundHeader) {
222+
// the first non-comment line is a data_PDBCODE line, test if this looks like a mmcif file
223+
if (line.startsWith(MMCIF_TOP_HEADER)){
224+
foundHeader = true;
225+
continue;
226+
} else {
227+
triggerDocumentEnd();
228+
throw new IOException("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'");
229+
}
230+
}
231+
228232
logger.debug(inLoop + " " + line);
229233

230234
if (line.startsWith(MMCIF_TOP_HEADER)){
231-
// either first line in file, or beginning of new section
235+
// either first line in file, or beginning of new section (data block in CIF parlance)
232236
if ( inLoop) {
233237
//System.out.println("new data and in loop: " + line);
234238
inLoop = false;

biojava-structure/src/test/java/org/biojava/nbio/structure/TestDownloadChemCompProvider.java

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,20 @@
2020
*/
2121
package org.biojava.nbio.structure;
2222

23-
import junit.framework.TestCase;
2423
import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider;
2524
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
25+
import org.junit.Test;
26+
import static org.junit.Assert.*;
2627

27-
public class TestDownloadChemCompProvider extends TestCase{
28+
import java.io.File;
29+
import java.io.FileOutputStream;
30+
import java.io.IOException;
31+
import java.io.PrintWriter;
32+
import java.util.zip.GZIPOutputStream;
2833

34+
public class TestDownloadChemCompProvider {
35+
36+
@Test
2937
public void testProtectedIDs(){
3038

3139
String id = "CON";
@@ -36,5 +44,88 @@ public void testProtectedIDs(){
3644

3745
assertEquals(cc.getId(), id);
3846
}
47+
48+
@Test
49+
public void testRedirectWorks() {
50+
// since August 2017, RCSB is redirecting:
51+
// http://rcsb.org/pdb/files/ligand/HEM.cif ----> http://files.org/ligands/HEM.cif
52+
// see #703
53+
54+
File file = new File(DownloadChemCompProvider.getLocalFileName("HEM"));
55+
file.delete();
56+
57+
DownloadChemCompProvider prov = new DownloadChemCompProvider();
58+
59+
DownloadChemCompProvider.serverBaseUrl = "http://www.rcsb.org/pdb/files/ligand/";
60+
61+
ChemComp cc = prov.getChemComp("HEM");
62+
63+
//System.out.println(file.toString());
64+
65+
assertTrue(file.exists());
66+
67+
// just in case the we did get garbage, let's clean up
68+
file.delete();
69+
70+
assertNotNull(cc);
71+
72+
assertNotNull(cc.getName());
73+
74+
// reset to default URL or otherwise we could affect other tests
75+
DownloadChemCompProvider.serverBaseUrl = DownloadChemCompProvider.DEFAULT_SERVER_URL;
76+
}
77+
78+
@Test
79+
public void testWeDontCacheGarbage() {
80+
// see #703
81+
82+
File file = new File(DownloadChemCompProvider.getLocalFileName("HEM"));
83+
84+
file.delete();
85+
86+
DownloadChemCompProvider prov = new DownloadChemCompProvider();
87+
88+
// a fake URL that should give a 404
89+
DownloadChemCompProvider.serverBaseUrl = "http://www.rcsb.org/non-existent-ligand-url/";
90+
91+
ChemComp cc = prov.getChemComp("HEM");
92+
93+
// we got a 404 back from server so we shouldn't have cached a file
94+
assertTrue(!file.exists());
95+
96+
file.delete();
97+
98+
// we couldn't parse, thus there should be no content
99+
assertNull(cc.getName());
100+
101+
// reset to default URL or otherwise we could affect other tests
102+
DownloadChemCompProvider.serverBaseUrl = DownloadChemCompProvider.DEFAULT_SERVER_URL;
103+
104+
105+
}
106+
107+
@Test
108+
public void testIfWeCachedGarbageWeCanDetectIt() throws IOException {
109+
// see #703
110+
111+
File file = new File(DownloadChemCompProvider.getLocalFileName("HEM"));
112+
113+
PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(file)));
114+
pw.println("A lot of garbage");
115+
pw.close();
116+
117+
DownloadChemCompProvider prov = new DownloadChemCompProvider();
118+
119+
ChemComp cc = prov.getChemComp("HEM");
120+
121+
assertTrue(file.exists());
122+
123+
file.delete();
124+
125+
assertNull(cc.getName());
126+
127+
128+
129+
}
39130

40131
}

0 commit comments

Comments
 (0)