diff --git a/src/main/org/bson/BSONDecoder.java b/src/main/org/bson/BSONDecoder.java
index 78ef6c350e9..4b21d4c369b 100644
--- a/src/main/org/bson/BSONDecoder.java
+++ b/src/main/org/bson/BSONDecoder.java
@@ -5,8 +5,8 @@
import static org.bson.BSON.*;
import java.io.*;
+import java.lang.ref.*;
-import org.bson.io.*;
import org.bson.types.*;
public class BSONDecoder {
@@ -42,7 +42,7 @@ public int decode( InputStream in , BSONCallback callback )
return decode( new Input( in ) , callback );
}
- public int decode( Input in , BSONCallback callback )
+ int decode( Input in , BSONCallback callback )
throws IOException {
if ( _in != null || _callback != null )
@@ -62,22 +62,21 @@ public int decode( Input in , BSONCallback callback )
int decode()
throws IOException {
-
- final int start = _in._read;
+ //
+ // We already read four bytes for length
+ final int start = _in.getBytesRead() - 4;
- final int len = _in.readInt();
-
_callback.objectStart();
while ( decodeElement() );
_callback.objectDone();
- final int read = _in._read - start;
+ final int read = _in.getBytesRead() - start;
- if ( read != len ){
+ if ( read != _in._length ) {
//throw new IllegalArgumentException( "bad data. lengths don't match " + read + " != " + len );
}
- return len;
+ return _in._length;
}
boolean decodeElement()
@@ -251,32 +250,154 @@ Object _readBasicObject()
}
class Input {
- Input( InputStream in ){
+ /**
+ * Maximum size of readahead. This ensures that we copy in memory at most
+ * readahead bytes if the buffer does not contain enough continuous bytes.
+ * Must be lower or equal than size of _charBuffer to prevent a buffer overflow.
+ */
+ final private static int MAX_READAHEADSIZE = 512;
+
+ Input( final InputStream in )
+ throws IOException {
_in = in;
_read = 0;
+ //
+ // Limit Buffer to only read 4 bytes for the real length
+ _length = 4;
+ _length = readInt();
}
-
- int readInt()
- throws IOException {
- _read += 4;
- return Bits.readInt( _in );
+ /**
+ * Ensures that a continuous block of bytes is loaded to the buffer. Its responsibility to consume
+ * the complete block.
+ *
+ * @param blockSize
+ * @throws IOException
+ */
+ void ensureContinuousBlock(int blockSize)
+ throws IOException {
+ //
+ // Enough bytes already loaded?
+ if(_o + blockSize <= _l)
+ return;
+
+ final int remaining = _l - _o;
+ //
+ // Is buffer large enough for block?
+ if(blockSize < _random.length) {
+ //
+ // copy the rest in the buffer to the front
+ System.arraycopy(_random, _o, _random, 0, remaining);
+ }
+ else {
+ //
+ // Allocate a larger buffer
+ final byte largerBuffer[] = new byte[blockSize + MAX_READAHEADSIZE];
+ //
+ // copy the rest of the old buffer to the front of the new
+ System.arraycopy(_random, _o, largerBuffer, 0, remaining);
+ //
+ // swap the buffers
+ _random = largerBuffer;
+ }
+ //
+ // Increase the numbers of bytes by all processed bytes (offset with current buffer)
+ // Buffer is now aligned with the front
+ _read += _o;
+
+ _o = 0;
+ _l = remaining;
+ //
+ // Calculate possible readahead. It is not allowed to read beyond the end of the current object (_length)
+ final int bytesTillEnd = _length - _read - _l;
+ final int readahead = Math.min(Math.min(MAX_READAHEADSIZE, _random.length - remaining), bytesTillEnd);
+
+ int wanted = Math.max(readahead, blockSize - remaining);
+
+ while(wanted > 0 && _l < blockSize) {
+ //
+ // Read as much as we wanted at the end of the buffer
+ int rd = _in.read(_random, _l, wanted);
+ //
+ // EOS reached?
+ if(rd < 0)
+ break;
+ //
+ // Increase end and reduced wanted by bytes read from InputStream
+ _l = _l + rd;
+ wanted -=rd;
+ }
+ //
+ // Ups, we were not able to read enough bytes from stream
+ if(_l < blockSize) {
+ throw new RuntimeException("end of stream reached");
+ }
}
- long readLong()
+ /**
+ * Reads an integer.
+ *
+ * @return
+ * @throws IOException
+ */
+ final int readInt()
throws IOException {
- _read += 8;
- return Bits.readLong( _in );
+ //
+ // All integers are 4 bytes
+ ensureContinuousBlock(4);
+ //
+ // Code copied from java.io.Bits
+ return
+ ((_random[_o++] & 0xFF) << 0) +
+ ((_random[_o++] & 0xFF) << 8) +
+ ((_random[_o++] & 0xFF) << 16) +
+ ((_random[_o++]) << 24);
}
-
+ /**
+ * Reads a long.
+ *
+ * @return
+ * @throws IOException
+ */
+ long readLong()
+ throws IOException {
+ //
+ // All longs are 8 bytes
+ ensureContinuousBlock(8);
+ //
+ // Code copied from java.io.Bits
+ return ((_random[_o++] & 0xFFL) << 0) +
+ ((_random[_o++] & 0xFFL) << 8) +
+ ((_random[_o++] & 0xFFL) << 16) +
+ ((_random[_o++] & 0xFFL) << 24) +
+ ((_random[_o++] & 0xFFL) << 32) +
+ ((_random[_o++] & 0xFFL) << 40) +
+ ((_random[_o++] & 0xFFL) << 48) +
+ (((long) _random[_o++]) << 56);
+ }
+ /**
+ * Simply read a double
+ *
+ * @return
+ * @throws IOException
+ */
double readDouble()
throws IOException {
return Double.longBitsToDouble( readLong() );
}
-
+ /**
+ * Read the next byte from stream.
+ *
+ * @return
+ * @throws IOException
+ */
byte read()
- throws IOException {
- _read++;
- return (byte)(_in.read() & 0xFF);
+ throws IOException {
+ //
+ // Ensure that one byte can be read
+ ensureContinuousBlock(1);
+ //
+ // Simply return the byte
+ return _random[_o++];
}
void fill( byte b[] )
@@ -285,111 +406,300 @@ void fill( byte b[] )
}
void fill( byte b[] , int len )
- throws IOException {
- int off = 0;
- while ( len > 0 ){
- int x = _in.read( b , off , len );
- _read += x;
- off += x;
- len -= x;
+ throws IOException {
+ //
+ // Take the remaining bytes from the buffer
+ int remaining = _l - _o;
+ //
+ // Did we alread read enough bytes?
+ if(remaining >= len) {
+ System.arraycopy(_random, _o, b, 0, len);
+ _o += len;
+
+ return;
+ }
+ //
+ // Take the complete remaining bytes from buffer
+ if(remaining > 0) {
+ System.arraycopy(_random, _o, b, 0, remaining);
+ //
+ // Reduced needed bytes
+ len -= remaining;
+ //
+ // leave it up to the next ensure a continuous block
+ _o = _l;
+ }
+ //
+ // Read the rest direct from the InputStream
+ while ( len > 0 ) {
+ final int bytesRead = _in.read( b , remaining , len );
+ //
+ // Reduced needed bytes
+ len -= bytesRead;
+ //
+ // Increase the number of read bytes because we reading directly from _in
+ _read += bytesRead;
+
+ remaining += bytesRead;
+ }
+ }
+ /**
+ * Read a multibyte character with the first given as parameter c1.
+ *
+ * @param c1
+ * @return
+ * @throws IOException
+ */
+ int readMultiByte(int c1, int charBufferPosition)
+ throws IOException {
+ switch (c1 >> 4) {
+ case 12:
+ case 13: {
+ //
+ // We need at least one byte for the character and one for the null to terminate
+ assert charBufferPosition < _charBuffer.length;
+ ensureContinuousBlock(2);
+ //
+ // Read next byte and check for correctness
+ final int c2 = _random[_o++];
+
+ if ((c2 & 0xC0) != 0x80)
+ _charBuffer[charBufferPosition++] = '\uFFFD';
+ else
+ _charBuffer[charBufferPosition++] = (char)(((c1 & 0x1F) << 6) | (c2 & 0x3F));
+
+ break;
+ }
+ case 14: {
+ //
+ // We need at least two bytes for the character and one for the null to terminate
+ assert charBufferPosition < _charBuffer.length;
+ ensureContinuousBlock(3);
+ //
+ // Read next bytes and check for correctness
+ final int c2 = _random[_o++];
+ final int c3 = _random[_o++];
+
+ if (((c2 & 0xC0) != 0x80) || ((c3 & 0xC0) != 0x80))
+ _charBuffer[charBufferPosition++] = '\uFFFD';
+ else
+ _charBuffer[charBufferPosition++] = (char)(((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | ((c3 & 0x3F) << 0));
+
+ break;
+ }
+ case 15: {
+ //
+ // We need at least three bytes for the character and one for the null to terminate
+ ensureContinuousBlock(4);
+ //
+ // Read next bytes and check for correctness
+ final int c2 = _random[_o++];
+ final int c3 = _random[_o++];
+ final int c4 = _random[_o++];
+ // Use a surrogate pair to represent it.
+ // ch is 0..fffff (20 bits)
+ final int ch = ((c1&0x7)<<18) + ((c2&0x3f)<<12) + ((c3&0x3f)<<6) + (c4&0x3f) - 0x10000;
+
+ _charBuffer[charBufferPosition++] = (char) (0xd800 + (ch >> 10)); // top 10 bits
+ _charBuffer[charBufferPosition++] = (char) (0xdc00 + (ch & 0x3ff)); // bottom 10 bits
+
+ break;
+ }
+ default:
+ _charBuffer[charBufferPosition++] = '\uFFFD';
}
+
+ return charBufferPosition;
}
-
- boolean _isAscii( byte b ){
- return b >=0 && b <= 127;
- }
+ /**
+ * Read an null terminated string in UTF8 from {@link InputStream}.
+ * We assume that null terminated strings have small lengths and are mostly ascii.
+ *
+ * @return
+ * @throws IOException
+ */
String readCStr()
- throws IOException {
-
- boolean isAcii = true;
-
- // short circuit 1 byte strings
- {
- _random[0] = read();
- if ( _random[0] == 0 )
- return "";
-
- _random[1] = read();
- if ( _random[1] == 0 ){
- String out = ONE_BYTE_STRINGS[_random[0]];
- if ( out != null )
- return out;
- return new String( _random , 0 , 1 , "UTF-8" );
- }
-
- _stringBuffer.reset();
- _stringBuffer.write( _random[0] );
- _stringBuffer.write( _random[1] );
+ throws IOException {
+ //
+ // Position within _charBuffer
+ int charBufferPosition = 0;
+ //
+ // Claim a StringBuilder for bulding strings longer than charBuffer
+ StringBuilder stringBuilder = _stringBuilder.get();
+
+ if(stringBuilder == null) {
+ stringBuilder = new StringBuilder(_charBuffer.length * 2);
+ _stringBuilder = new SoftReference(stringBuilder);
+ }
+ else
+ stringBuilder.setLength(0);
+ //
+ // Fill the buffer with the first byte
+ ensureContinuousBlock(1);
+
+ outer:
+ while ( true ) {
+ //
+ // This is the fast inner loop where every character is completely located in the buffer
+ // Since we read at maximum MAX_READAHEADSIZE and _charBuffer.length is greater than MAX_READAHEADSIZE
+ // there is no need to check for a buffer overflow in _charBuffer.
+ assert(_l - _o < _charBuffer.length - charBufferPosition);
+
+ while(_o < _l) {
+ //
+ // Read next byte from buffer
+ final int b = _random[_o++];
+ //
+ // Normal ascii character? Its the most common case
+ if( b > 0) {
+ //
+ // Append it to the end of our buffer
+ assert charBufferPosition < _charBuffer.length;
+ _charBuffer[charBufferPosition++] = (char)b;
+ }
+ else if( b == 0) {
+ break outer;
+ }
+ else {
+ //
+ // Read a multibyte. Its currently not optimized because this case is infrequent
+ charBufferPosition = readMultiByte(b & 0xff, charBufferPosition);
+ }
+ }
+ //
+ // We need more bytes in the buffer, at least one byte
+ ensureContinuousBlock(1);
+ //
+ // If there are to much characters in the buffer, then append _charBuffer to StringBuilder
+ // and reset the _charBuffer. This ensures that the byteBuffer does not rise a char buffer overflow
+ if(_l - _o > _charBuffer.length - charBufferPosition) {
+ stringBuilder.append(_charBuffer, 0, charBufferPosition);
+ charBufferPosition = 0;
+ }
+ }
+ //
+ // Some characters in _charBuffer
+ if(charBufferPosition > 0) {
+ //
+ // if string is empty then create the string direct from _charBuffer
+ if(stringBuilder.length() == 0) {
+ return allocateString( _charBuffer, charBufferPosition);
+ }
+ //
+ // Append _charBuffer to final string
+ stringBuilder.append(_charBuffer, 0, charBufferPosition);
+ }
+ //
+ // Ok, we got an empty string
+ if(stringBuilder.length() == 0)
+ return "";
+
+ return stringBuilder.toString();
+ }
+ /**
+ * Allocate a string from a char[]. This method uses a string cache do reduce memory consumption.
+ *
+ * @param charBuffer
+ * @param length
+ * @return
+ */
+ private String allocateString(final char charBuffer[], final int length) {
+ //
+ // We try to cache short strings
+ if(length < 16) {
+ //
+ // building a simple hash with the characters from charBuffer
+ int h = charBuffer[0] + 31;
+ for(int i = 1 ; i < length; i++)
+ h = h * 31 + charBuffer[i];
+ //
+ // calculate index within hashtable
+ final int hashIndex = h & (_stringCache.length - 1);
+ //
+ // try to read cached string
+ String cachedString = _stringCache[hashIndex];
+ //
+ // Found a cached string with correct length?
+ if(cachedString != null && cachedString.length() == length) {
+ int i = length - 1;
+ //
+ // Compare starting from the end
+ while(i >= 0) {
+ if(charBuffer[i] != cachedString.charAt(i))
+ break;
+
+ --i;
+ }
+ //
+ // if both are equal we can return the cached instance of this string
+ if(i < 0)
+ return cachedString;
+ }
+ //
+ // Write a new string to cache. overwrite any previous value
+ cachedString = new String(charBuffer, 0, length);
+ _stringCache[hashIndex] = cachedString;
- isAcii = _isAscii( _random[0] ) && _isAscii( _random[1] );
- }
-
-
- while ( true ){
- byte b = read();
- if ( b == 0 )
- break;
- _stringBuffer.write( b );
- isAcii = isAcii && _isAscii( b );
- }
+ return cachedString;
+ }
- String out = null;
- if ( isAcii ){
- out = _stringBuffer.asString();
- }
- else {
- try {
- out = _stringBuffer.asString( "UTF-8" );
- }
- catch ( UnsupportedOperationException e ){
- throw new RuntimeException( "impossible" , e );
- }
- }
- _stringBuffer.reset();
- return out;
+ return new String(charBuffer, 0, length);
}
-
+ /**
+ * Read an UTF8-String from {@link InputStream}.
+ *
+ * @return
+ * @throws IOException
+ */
String readUTF8String()
throws IOException {
- int size = readInt();
+ //
+ // Read size and ensure that the complete string is in the buffer
+ final int size = readInt();
if ( size < 0 || size > ( 3 * 1024 * 1024 ) )
throw new RuntimeException( "bad string size: " + size );
- byte[] b = size < _random.length ? _random : new byte[size];
-
- fill( b , size );
+ ensureContinuousBlock(size);
+ //
+ // Start of the string is the current pointer in buffer
+ final int startOfString = _o;
+ //
+ // Increase offset by size of string
+ _o += size;
+
try {
- return new String( b , 0 , size - 1 , "UTF-8" );
+ return new String( _random, startOfString , size - 1 , "UTF-8" );
}
catch ( java.io.UnsupportedEncodingException uee ){
throw new RuntimeException( "impossible" , uee );
}
}
+ /**
+ * Returns the number of bytes read so far.
+ *
+ * @return
+ */
+ int getBytesRead() {
+ return _read + _o;
+ }
+ int _o;
+ int _l;
int _read;
+
final InputStream _in;
+ int _length;
}
-
private Input _in;
private BSONCallback _callback;
- private byte[] _random = new byte[1024]; // has to be used within a single function
-
- private PoolOutputBuffer _stringBuffer = new PoolOutputBuffer();
-
- static final String[] ONE_BYTE_STRINGS = new String[128];
- static void _fillRange( byte min, byte max ){
- while ( min < max ){
- String s = "";
- s += (char)min;
- ONE_BYTE_STRINGS[(int)min] = s;
- min++;
- }
- }
- static {
- _fillRange( (byte)'0' , (byte)'9' );
- _fillRange( (byte)'a' , (byte)'z' );
- _fillRange( (byte)'A' , (byte)'Z' );
- }
+ private byte[] _random = new byte[1024];
+ private char _charBuffer[] = new char[1024];
+
+ private static String _stringCache[] = new String[1024];
+ /**
+ * {@link SoftReference} to {@link StringBuilder} to allow reclaiming of memory by GC
+ */
+ private SoftReference _stringBuilder = new SoftReference(null);
}