Skip to content

Commit b8e69a4

Browse files
committed
Two utility methods to validate the downloaded file
Currently, we validate the file size only. We could validate the content using any hashing function later.
1 parent 8267ca5 commit b8e69a4

File tree

6 files changed

+88
-5
lines changed

6 files changed

+88
-5
lines changed

biojava-core/src/main/java/org/biojava/nbio/core/util/FileDownloadUtils.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323

2424
import java.io.File;
2525
import java.io.FileInputStream;
26+
import java.io.FileNotFoundException;
2627
import java.io.FileOutputStream;
2728
import java.io.IOException;
2829
import java.io.InputStream;
30+
import java.io.PrintStream;
2931
import java.net.HttpURLConnection;
3032
import java.net.SocketTimeoutException;
3133
import java.net.URL;
@@ -39,6 +41,7 @@
3941
import java.nio.file.Paths;
4042
import java.nio.file.SimpleFileVisitor;
4143
import java.nio.file.attribute.BasicFileAttributes;
44+
import java.util.Scanner;
4245

4346
import org.slf4j.Logger;
4447
import org.slf4j.LoggerFactory;
@@ -161,6 +164,64 @@ public static void downloadFile(URL url, File destination) throws IOException {
161164
tempFile.delete();
162165

163166
}
167+
168+
public static void createValidationFiles(URL url, File localDestination, URL hashURL){
169+
try {
170+
URLConnection resourceConnection = url.openConnection();
171+
createValidationFiles(resourceConnection, localDestination, hashURL);
172+
} catch (IOException e) {
173+
logger.warn("could not open connection to resource file due to exception", e);
174+
}
175+
}
176+
public static void createValidationFiles(URLConnection resourceUrlConnection, File localDestination, URL hashURL){
177+
long size = resourceUrlConnection.getContentLengthLong();
178+
if(size != -1) {
179+
System.out.println("Content-Length: " + size);
180+
File sizeFile = new File(localDestination.getParentFile(), localDestination.getName()+".size");
181+
try (PrintStream sizePrintStream = new PrintStream(sizeFile)) {
182+
sizePrintStream.print(size);
183+
sizePrintStream.close();
184+
} catch (FileNotFoundException e) {
185+
logger.warn("could not write validation size file due to exception", e);
186+
}
187+
}
188+
189+
if(hashURL == null)
190+
return;
191+
192+
try {
193+
File hashFile = new File(localDestination.getParentFile(), localDestination.getName()+".hash");
194+
downloadFile(hashURL, hashFile);
195+
} catch (IOException e) {
196+
logger.warn("could not write validation hash file due to exception", e);
197+
}
198+
}
199+
200+
public static boolean validateFile(File localFile) {
201+
File sizeFile = new File(localFile.getParentFile(), localFile.getName()+".size");
202+
if(sizeFile.exists()) {
203+
Scanner scanner = null;
204+
try {
205+
scanner = new Scanner(sizeFile);
206+
long expectedSize = scanner.nextLong();
207+
if (expectedSize != localFile.length()) {
208+
logger.warn("File size does not match expected");
209+
return false;
210+
}
211+
} catch (FileNotFoundException e) {
212+
logger.warn("could not validate size of file ["+ localFile+ "] due to exception", e);
213+
} finally {
214+
scanner.close();
215+
}
216+
}
217+
218+
File hashFile = new File(localFile.getParentFile(), localFile.getName()+".hash");
219+
if(hashFile.exists()) {
220+
throw new UnsupportedOperationException("Not yet implemented");
221+
}
222+
223+
return true;
224+
}
164225

165226
/**
166227
* Converts path to Unix convention and adds a terminating slash if it was

biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodFactory.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ public static EcodDatabase getEcodDatabase(String version) {
8989
}
9090
} catch (IOException e) {
9191
// For parsing errors, just use the requested version
92+
// What about corrupted downloading errors?? Amr
93+
logger.warn("Cound not get Ecod version, or file is corrupted", e);
94+
return null;
9295
}
9396
}
9497
logger.trace("Releasing EcodFactory lock after getting version "+version);

biojava-structure/src/main/java/org/biojava/nbio/structure/ecod/EcodInstallation.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ private boolean domainsAvailable() {
369369
try {
370370
File f = getDomainFile();
371371

372-
if (!f.exists() || f.length() <= 0 )
372+
if (! (f.exists() && FileDownloadUtils.validateFile(f)))
373373
return false;
374374

375375
// Re-download old copies of "latest"
@@ -406,7 +406,10 @@ private void downloadDomains() throws IOException {
406406
File localFile = getDomainFile();
407407

408408
logger.info("Downloading {} to: {}",domainsURL, localFile);
409+
FileDownloadUtils.createValidationFiles(domainsURL, localFile, null);
409410
FileDownloadUtils.downloadFile(domainsURL, localFile);
411+
if(! FileDownloadUtils.validateFile(localFile))
412+
throw new IOException("Downloaded file invalid: "+ localFile);
410413
} catch (MalformedURLException e) {
411414
logger.error("Malformed url: "+ url + DOMAINS_PATH + getDomainFilename(),e);
412415
} finally {

biojava-structure/src/main/java/org/biojava/nbio/structure/io/LocalPDBDirectory.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ protected InputStream getInputStream(PdbId pdbId) throws IOException{
373373
throw new IOException("Structure "+pdbId+" not found and unable to download.");
374374
}
375375

376+
if(! FileDownloadUtils.validateFile(file))
377+
throw new IOException("Downloaded file invalid: "+file);
378+
376379
InputStreamProvider isp = new InputStreamProvider();
377380

378381
InputStream inputStream = isp.getInputStream(file);
@@ -395,6 +398,8 @@ public void prefetchStructure(String pdbId) throws IOException {
395398
if(!file.exists()) {
396399
throw new IOException("Structure "+pdbId+" not found and unable to download.");
397400
}
401+
if(! FileDownloadUtils.validateFile(file))
402+
throw new IOException("Downloaded file invalid: "+file);
398403
}
399404

400405
/**
@@ -576,7 +581,10 @@ private File downloadStructure(PdbId pdbId, String pathOnServer, boolean obsolet
576581
logger.info("Fetching " + ftp);
577582
logger.info("Writing to "+ realFile);
578583

584+
FileDownloadUtils.createValidationFiles(url, realFile, null);
579585
FileDownloadUtils.downloadFile(url, realFile);
586+
if(! FileDownloadUtils.validateFile(realFile))
587+
throw new IOException("Downloaded file invalid: "+realFile);
580588

581589
return realFile;
582590
}

biojava-structure/src/main/java/org/biojava/nbio/structure/io/sifts/SiftsMappingProvider.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,15 @@ public static List<SiftsEntity> getSiftsMapping(String pdbId) throws IOException
8787
if ( ! dest.exists()){
8888
String u = String.format(fileLoc,pdbId);
8989
URL url = new URL(u);
90+
logger.debug("Downloading SIFTS file {} validation metadata.",url);
91+
FileDownloadUtils.createValidationFiles(url, dest, null);
9092
logger.debug("Downloading SIFTS file {} to {}",url,dest);
9193
FileDownloadUtils.downloadFile(url, dest);
9294
}
9395

96+
if(! FileDownloadUtils.validateFile(dest))
97+
throw new IOException("Downloaded file invalid: "+dest);
98+
9499
InputStreamProvider prov = new InputStreamProvider();
95100
InputStream is = prov.getInputStream(dest);
96101
SiftsXMLParser parser = new SiftsXMLParser();

biojava-structure/src/main/java/org/biojava/nbio/structure/scop/ScopInstallation.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -741,38 +741,41 @@ protected void downloadComFile() throws FileNotFoundException, IOException{
741741

742742
protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException{
743743
logger.info("Downloading " + remoteURL + " to: " + localFile);
744+
FileDownloadUtils.createValidationFiles(remoteURL, localFile, null);
744745
FileDownloadUtils.downloadFile(remoteURL, localFile);
746+
if(! FileDownloadUtils.validateFile(localFile))
747+
throw new IOException("Downloaded file invalid: "+localFile);
745748
}
746749

747750
private boolean claFileAvailable(){
748751
String fileName = getClaFilename();
749752

750753
File f = new File(fileName);
751754

752-
return f.exists() && f.length()>0;
755+
return f.exists() && FileDownloadUtils.validateFile(f);
753756
}
754757

755758
private boolean desFileAvailable(){
756759
String fileName = getDesFilename();
757760

758761
File f = new File(fileName);
759-
return f.exists() && f.length()>0;
762+
return f.exists() && FileDownloadUtils.validateFile(f);
760763
}
761764

762765
private boolean hieFileAvailable(){
763766
String fileName = getHieFilename();
764767

765768
File f = new File(fileName);
766769

767-
return f.exists() && f.length()>0;
770+
return f.exists() && FileDownloadUtils.validateFile(f);
768771
}
769772

770773
private boolean comFileAvailable(){
771774
String fileName = getComFilename();
772775

773776
File f = new File(fileName);
774777

775-
return f.exists() && f.length()>0;
778+
return f.exists() && FileDownloadUtils.validateFile(f);
776779
}
777780

778781
protected String getClaFilename(){

0 commit comments

Comments
 (0)