Skip to content

Commit e964822

Browse files
committed
JAVA-332: Have GridFSDBInputFile.MyInputStream.skip be smart about not fetching grid fs chunks that are being entirely skipped over
1 parent 68f5d7f commit e964822

3 files changed

Lines changed: 100 additions & 6 deletions

File tree

src/main/com/mongodb/gridfs/GridFSDBFile.java

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,11 @@ public int read(byte[] b){
133133
public int read(byte[] b, int off, int len){
134134

135135
if ( _data == null || _offset >= _data.length ){
136-
137-
if ( _nextChunk >= _numChunks )
136+
if ( _currentChunkIdx + 1 >= _numChunks )
138137
return -1;
139138

140-
_data = getChunk( _nextChunk );
139+
_data = getChunk( ++_currentChunkIdx );
141140
_offset = 0;
142-
_nextChunk++;
143141
}
144142

145143
int r = Math.min( len , _data.length - _offset );
@@ -148,9 +146,39 @@ public int read(byte[] b, int off, int len){
148146
return r;
149147
}
150148

149+
/**
150+
* Will smartly skips over chunks without fetching them if possible.
151+
*/
152+
public long skip(long numBytesToSkip) throws IOException {
153+
if (numBytesToSkip <= 0)
154+
return 0;
155+
156+
if (_currentChunkIdx == _numChunks)
157+
//We're actually skipping over the back end of the file, short-circuit here
158+
//Don't count those extra bytes to skip in with the return value
159+
return 0;
160+
161+
if (_offset + numBytesToSkip <= _chunkSize) {
162+
//We're skipping over bytes in the current chunk, adjust the offset accordingly
163+
_offset += numBytesToSkip;
164+
if (_data == null && _currentChunkIdx < _numChunks)
165+
_data = getChunk(_currentChunkIdx);
166+
167+
return numBytesToSkip;
168+
}
169+
170+
//We skipping over the remainder of this chunk, could do this less recursively...
171+
long skippedBytes = _chunkSize - _offset;
172+
_offset = 0;
173+
++_currentChunkIdx;
174+
_data = null;
175+
176+
return skippedBytes + skip(numBytesToSkip - skippedBytes);
177+
}
178+
151179
final int _numChunks;
152180

153-
int _nextChunk = 0;
181+
int _currentChunkIdx = -1;
154182
int _offset;
155183
byte[] _data = null;
156184
}

src/main/com/mongodb/util/MyAsserts.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ public static void assertEquals( short a , short b ){
7373
if ( a != b )
7474
throw new MyAssert( "" + a + " != " + b );
7575
}
76+
77+
public static void assertEquals( byte expected , byte result ) {
78+
if ( expected != result )
79+
throw new MyAssert( "" + expected + " != " + result );
80+
}
7681

7782
public static void assertEquals( double a , double b , double diff ){
7883
if ( Math.abs( a - b ) > diff )

src/test/com/mongodb/gridfs/GridFSTest.java

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,11 @@ public void testBadChunkSize() throws Exception {
167167
fileSize = 10 * 1024 * 1024;
168168

169169
byte[] randomBytes = new byte[fileSize];
170-
for (int idx = 0; idx < 2 * GridFS.MAX_CHUNKSIZE; ++idx)
170+
for (int idx = 0; idx < fileSize; ++idx)
171171
randomBytes[idx] = (byte)(256 * Math.random());
172172

173173
GridFSInputFile inputFile = _fs.createFile(randomBytes);
174+
inputFile.setFilename("bad_chunk_size.bin");
174175
try{
175176
inputFile.save(0);
176177
fail("should have received an exception about a chunk size being zero");
@@ -197,6 +198,66 @@ public void testBadChunkSize() throws Exception {
197198
assertArrayEquals(randomBytes, savedFileBytes);
198199
}
199200

201+
@Test(groups = {"basic"})
202+
public void testInputStreamSkipping() throws Exception {
203+
//int chunkSize = 5;
204+
int chunkSize = GridFS.DEFAULT_CHUNKSIZE;
205+
int fileSize = 7 * chunkSize;
206+
207+
208+
byte[] fileBytes = new byte[fileSize];
209+
for (int idx = 0; idx < fileSize; ++idx)
210+
fileBytes[idx] = (byte)(idx % 251);
211+
//Don't want chunks to be aligned at byte position 0
212+
213+
GridFSInputFile inputFile = _fs.createFile(fileBytes);
214+
inputFile.setFilename("input_stream_skipping.bin");
215+
inputFile.save(chunkSize);
216+
217+
GridFSDBFile savedFile = _fs.findOne(new BasicDBObject("_id", inputFile.getId()));
218+
GridFSDBFile.MyInputStream inputStream = (GridFSDBFile.MyInputStream)savedFile.getInputStream();
219+
220+
//Quick run-through, make sure the file is as expected
221+
for (int idx = 0; idx < fileSize; ++idx)
222+
assertEquals((byte)(idx % 251), (byte)inputStream.read());
223+
224+
inputStream = (GridFSDBFile.MyInputStream)savedFile.getInputStream();
225+
226+
int position = 0;
227+
assertEquals((byte)(position++ % 251), (byte)inputStream.read());
228+
229+
long skipped = inputStream.skip(1);
230+
assertEquals(1, skipped);
231+
position += 1;
232+
assertEquals((byte)(position++ % 251), (byte)inputStream.read());
233+
234+
skipped = inputStream.skip(chunkSize);
235+
assertEquals(chunkSize, skipped);
236+
position += chunkSize;
237+
assertEquals((byte)(position++ % 251), (byte)inputStream.read());
238+
239+
skipped = inputStream.skip(-1);
240+
assertEquals(0, skipped);
241+
skipped = inputStream.skip(0);
242+
assertEquals(0, skipped);
243+
244+
skipped = inputStream.skip(3 * chunkSize);
245+
assertEquals(3 * chunkSize, skipped);
246+
position += 3 * chunkSize;
247+
assertEquals((byte)(position++ % 251), (byte)inputStream.read());
248+
249+
//Make sure skipping works when we skip to an exact chunk boundary
250+
long toSkip = inputStream.available();
251+
skipped = inputStream.skip(toSkip);
252+
assertEquals(toSkip, skipped);
253+
position += toSkip;
254+
assertEquals((byte)(position++ % 251), (byte)inputStream.read());
255+
256+
skipped = inputStream.skip(2 * fileSize);
257+
assertEquals(fileSize - position, skipped);
258+
assertEquals(-1, inputStream.read());
259+
}
260+
200261
final DB _db;
201262
final GridFS _fs;
202263

0 commit comments

Comments
 (0)