Skip to content

Commit 2e3b35f

Browse files
authored
Merge pull request biojava#4 from andreasprlic/mmtf105
new version of test now only compares parsing in RAM
2 parents d277194 + 80a4e0c commit 2e3b35f

File tree

2 files changed

+94
-2744
lines changed

2 files changed

+94
-2744
lines changed

biojava-structure/src/test/java/org/biojava/nbio/structure/io/mmtf/TestMmtfPerformance.java

Lines changed: 94 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,75 +2,134 @@
22

33
import org.biojava.nbio.structure.Structure;
44
import org.biojava.nbio.structure.StructureIO;
5+
import org.biojava.nbio.structure.TestStructureCrossReferences;
56
import org.biojava.nbio.structure.io.PDBFileParser;
67
import org.biojava.nbio.structure.io.mmcif.AllChemCompProvider;
78
import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
89
import org.biojava.nbio.structure.io.mmcif.ChemCompProvider;
910
import org.junit.Test;
1011
import org.rcsb.mmtf.dataholders.MmtfStructure;
1112
import org.rcsb.mmtf.decoder.ReaderUtils;
13+
import org.slf4j.Logger;
14+
import org.slf4j.LoggerFactory;
1215

13-
import java.io.FileInputStream;
14-
import java.io.IOException;
15-
import java.io.InputStream;
16+
import java.io.*;
17+
import java.net.URL;
1618
import java.nio.file.Files;
1719
import java.nio.file.Path;
1820
import java.nio.file.Paths;
21+
import java.util.zip.GZIPInputStream;
1922

2023
import static org.junit.Assert.assertEquals;
24+
import static org.junit.Assert.assertNotNull;
2125
import static org.junit.Assert.assertTrue;
2226

2327
/**
2428
* Created by andreas on 1/9/17.
2529
*/
2630
public class TestMmtfPerformance {
2731

28-
// @Test
29-
// public void test3J3Q() throws IOException{
30-
//
31-
//// AllChemCompProvider cc = new AllChemCompProvider();
32-
//// ChemCompGroupFactory.setChemCompProvider(cc);
33-
//
34-
// long timeS = System.currentTimeMillis();
35-
// ClassLoader classLoader = getClass().getClassLoader();
36-
// Structure structure = MmtfActions.readFromFile((Paths.get(classLoader.getResource("org/biojava/nbio/structure/io/mmtf/3J3Q.mmtf").getPath())));
37-
// assertEquals(structure.getPDBCode(),"3J3Q");
38-
// //assertEquals(structure.getChains().size(),6);
39-
// long timeE = System.currentTimeMillis();
40-
//
41-
// System.out.println("time to load from local file: " + (timeE - timeS) + " ms.");
42-
//
43-
// }
32+
private static final Logger logger = LoggerFactory.getLogger(TestMmtfPerformance.class);
33+
34+
private static final int NUMBER_OF_REPEATS = 10;
35+
36+
// Returns the contents of the file in a byte array.
37+
public static byte[] getBytesFromFile(File file) throws IOException {
38+
// Get the size of the file
39+
long length = file.length();
40+
41+
// You cannot create an array using a long type.
42+
// It needs to be an int type.
43+
// Before converting to an int type, check
44+
// to ensure that file is not larger than Integer.MAX_VALUE.
45+
if (length > Integer.MAX_VALUE) {
46+
// File is too large
47+
throw new IOException("File is too large!");
48+
}
49+
50+
// Create the byte array to hold the data
51+
byte[] bytes = new byte[(int)length];
52+
53+
// Read in the bytes
54+
int offset = 0;
55+
int numRead = 0;
56+
57+
InputStream is = new FileInputStream(file);
58+
try {
59+
while (offset < bytes.length
60+
&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
61+
offset += numRead;
62+
}
63+
} finally {
64+
is.close();
65+
}
66+
67+
// Ensure all the bytes have been read in
68+
if (offset < bytes.length) {
69+
throw new IOException("Could not completely read file "+file.getName());
70+
}
71+
return bytes;
72+
}
4473

45-
@Test
46-
public void test4CUP() throws IOException{
74+
static String convertStreamToString(java.io.InputStream is) {
75+
java.util.Scanner s = new java.util.Scanner(is).useDelimiter("\\A");
76+
return s.hasNext() ? s.next() : "";
77+
}
4778

48-
long timeS = System.currentTimeMillis();
4979

50-
ClassLoader classLoader = getClass().getClassLoader();
51-
Structure structure = MmtfActions.readFromFile((Paths.get(classLoader.getResource("org/biojava/nbio/structure/io/mmtf/4CUP.mmtf").getPath())));
80+
public byte[] getByteArrayFromInputStream(InputStream is) throws IOException {
81+
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
5282

53-
assertEquals(structure.getPDBCode(),"4CUP");
54-
assertEquals(structure.getChains().size(),6);
83+
int nRead;
84+
byte[] data = new byte[16384];
5585

56-
long timeE = System.currentTimeMillis();
86+
while ((nRead = is.read(data, 0, data.length)) != -1) {
87+
buffer.write(data, 0, nRead);
88+
}
5789

58-
Path path = Paths.get(classLoader.getResource("org/biojava/nbio/structure/io/4cup.pdb").getPath());
90+
buffer.flush();
5991

60-
InputStream is = Files.newInputStream(path);
92+
return buffer.toByteArray();
93+
94+
}
95+
96+
@Test
97+
public void test3HBX() throws Exception{
98+
String pdbId = "3HBX";
99+
100+
URL url = new URL("https://files.rcsb.org/download/"+pdbId+".pdb.gz");
101+
102+
String pdbFile = convertStreamToString(new GZIPInputStream(url.openStream()));
103+
104+
long pdbStart = System.currentTimeMillis();
61105

62106
PDBFileParser parser = new PDBFileParser();
63107

64-
Structure s = parser.parsePDBFile(is);
108+
for ( int i =0 ; i< NUMBER_OF_REPEATS ; i++) {
109+
110+
Structure pdbStructure = parser.parsePDBFile(new ByteArrayInputStream(pdbFile.getBytes()));
111+
}
112+
long pdbEnd = System.currentTimeMillis();
65113

66-
long timeF = System.currentTimeMillis();
67114

68-
//todo: add mmcif for comparison
115+
URL mmtfURL = new URL("https://mmtf.rcsb.org/v1.0/full/" + pdbId + ".mmtf.gz");
69116

70-
// System.out.println("time to parse mmtf:" + (timeE-timeS));
71-
// System.out.println("time to parse PDB: " + (timeF-timeE));
72117

73-
assertTrue( "It should not be the case, but it is faster to parse a PDB file ("+(timeF -timeE)+" ms.) than MMTF ("+( timeE-timeS)+" ms.)!",( timeF -timeE) > ( timeE-timeS));
118+
byte[] mmtfdata = getByteArrayFromInputStream(new GZIPInputStream((mmtfURL.openStream())));
74119

120+
long mmtfStart = System.currentTimeMillis();
121+
122+
for ( int i =0 ; i< NUMBER_OF_REPEATS ; i++) {
123+
Structure mmtfStructure = MmtfActions.readFromInputStream(new ByteArrayInputStream(mmtfdata));
124+
}
125+
long mmtfEnd = System.currentTimeMillis();
126+
127+
long timeMMTF = (mmtfEnd-mmtfStart);
128+
long timePDB = (pdbEnd-pdbStart);
129+
logger.warn("average time to parse mmtf: " + (timeMMTF/NUMBER_OF_REPEATS));
130+
logger.warn("average time to parse PDB : " + (timePDB/NUMBER_OF_REPEATS));
131+
//
132+
assertTrue( "It should not be the case, but it is faster to parse a PDB file ("+timePDB+" ms.) than MMTF ("+( timeMMTF)+" ms.)!",( timePDB) > ( timeMMTF));
133+
//
75134
}
76135
}

0 commit comments

Comments
 (0)