diff --git a/.travis.yml b/.travis.yml index 994e275..20fd86b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,3 @@ language: node_js node_js: - - 0.9 - 0.10 diff --git a/README.md b/README.md index 0fb6acd..1e49db0 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,17 @@ ### Tools for parsing, filtering and creating Redis RDB files -[![Build Status](https://travis-ci.org/codeaholics/node-rdb-tools.png?branch=master)](https://travis-ci.org/codeaholics/node-rdb-tools) +[![Build Status](https://travis-ci.org/codeaholics/node-rdb-tools.png?branch=master)](https://travis-ci.org/codeaholics/node-rdb-tools) [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/codeaholics/node-rdb-tools/trend.png)](https://bitdeli.com/free "Bitdeli Badge") -This module currently provides a parser which understands Redis RDB files. In future it will also provide tools for modifying those files and re-creating them. +This module currently provides: -This parser is perfect for situations where you want to do analysis on your Redis data, but don't want to do it online on the server. Typically, if you have a Redis instance with many millions of keys, then doing a `keys *` or similar will block your server for a long time. In cases like these, taking a recent dump (or forcing a current one with `BGSAVE`) and then analysing that file offline is a useful technique. +* an [RDB parser](#parser) - a "streams2" [transformer](http://nodejs.org/api/stream.html#stream_class_stream_transform) which understands Redis RDB files and produces objects representing the keys and values +* an [RDB writer](#writer) - a transformer which consumes the objects produced by the [parser](#parser) and produces a Redis RDB file +* a ["protocol emitter"](#protocol-emitter) - a transformer which takes arrays of Redis commands and produces raw Redis network protocol suitable for piping into `redis-cli --pipe` -The parser works as a Node "streams2" [transformer](http://nodejs.org/api/stream.html#stream_class_stream_transform). You feed it a stream of bytes (typically from `process.stdin` or a [file read stream](http://nodejs.org/api/fs.html#fs_fs_createreadstream_path_options)), and it produces a stream of objects representing your keys and values (and other miscellaneous structural information about the file). +In future it will also provide tools for modifying RDB files - for example deleting keys, moving keys to different spaces, merging/splitting RDB files, etc. + +These tools are perfect for situations where you want to do analysis on your Redis data, but don't want to do it online on the server. Typically, if you have a Redis instance with many millions of keys, then doing a `keys *` or similar will block your server for a long time. In cases like these, taking a recent dump (or forcing a current one with `BGSAVE`) and then analysing that file offline is a useful technique. ## Installation @@ -53,7 +57,11 @@ In this example, you can see we take `stdin`, pipe it through the parser and pip On my laptop (a Lenovo X1 Carbon running Ubuntu 12.10 with a `Intel(R) Core(TM) i7-3667U CPU @ 2.00GHz` CPU), I can chew through around 20,000 - 25,000 keys per second. This performance is dependent on the types of data in your file. For example, keys with simple string values are much faster to parse than keys with large composite data structures (hashes, lists, sets, sorted sets). My laptop also has an SSD, so I'm not disk-bound, but I doubt disk speed is going to be an issue. -## Constructor options +## Parser + +The parser works as a Node "streams2" transformer. You feed it a stream of bytes (typically from `process.stdin` or a [file read stream](http://nodejs.org/api/fs.html#fs_fs_createreadstream_path_options)), and it produces a stream of objects representing your keys and values (and other miscellaneous structural information about the file). + +### Constructor options ```javascript var parser = new Parser(options); @@ -63,39 +71,45 @@ var parser = new Parser(options); * `encoding`: the character encoding to use when converting to and from `String` (see below). Defaults to `utf8`. -## File formats +### File formats Redis RDB files come in a number of formats. [Sripathi Krishnan (@sripathikrishnan)](https://github.com/sripathikrishnan) does an excellent job of documenting the [internal structure](https://github.com/sripathikrishnan/redis-rdb-tools/blob/master/docs/RDB_File_Format.textile) and what the differences are between [different versions](https://github.com/sripathikrishnan/redis-rdb-tools/blob/master/docs/RDB_Version_History.textile). The parser currently doesn't pay any attention to the version of the file format. It understands (almost) all of the structures that can be found in the file and will handle them appropriately. -## Output +### Output As mentioned above, the parser produces objects as its output. The following objects are produced: -### Header +#### Events + +The parser emits an `error` event when it detects a problem with the RDB file. + +#### Header This object is produced when the "magic header" at the beginning of the file is parsed. It is of little use to downstream components, but is provided for completeness and in anticipation of creating an RDB writer component. ```javascript { type: 'header', - version: + version: , + offset: } ``` -### Database +#### Database -This object is produced when a "database" record is found. This indicates that any subsequent keys belong to the given database. This object can be produced multiple times in the following sequence: `database: 0` `key-value` `key-value` `key-value` `database:1` `key-value` `key-value`, etc. Downstream components have little use for this object because the subsequent key objects also carry the database information. +This object is produced when a "database" record is found. This indicates that any subsequent keys belong to the given database. This object can be produced multiple times in the following sequence: `database: 0`, `key-value`, `key-value`, `key-value`, `database:1`, `key-value`, `key-value`, etc. Downstream components have little use for this object because the subsequent key objects also carry the database information. ```javascript { type: 'database', - database: + number: , + offset: } ``` -### Key +#### Key This is the primary output of the parser. One key record is produced for each key-value pair found in the store. @@ -106,7 +120,8 @@ This is the primary output of the parser. One key record is produced for each ke database: , key: , expiry: , - value: + value: , + offset: } ``` @@ -119,7 +134,7 @@ This is the primary output of the parser. One key record is produced for each ke * Hashes are `Objects` whose keys and values map to the keys and values of the Redis hash * Sorted sets (zsets) are `Objects` whose keys are the sorted set keys and whose values are the scores -#### String interpretation +##### String interpretation Redis keys and values are "binary safe". This means that Redis treats them as just arrays of bytes and places no further interpretation on them - in particular it doesn't attempt to interpret them as strings with particular character encodings. (This isn't quite true, as Redis does understand keys and values which consist wholly of the ASCII characters '0'-'9' as in encodes them specially in RDB files and provides commands such as `INCR` and `HINCRBY` which understand the semantics of numeric values. But let's move on...) @@ -134,7 +149,7 @@ The parser uses the character encoding specified on construction (default `utf8` In this way, the parser presents a consistent view of the Redis store - all primitives are `Strings`. -#### Expiry magic +##### Expiry magic RDB files have two different encodings of key expiry - either seconds or milliseconds since ["Unix epoch"](http://en.wikipedia.org/wiki/Unix_epoch). @@ -154,7 +169,8 @@ This object represents the end of the file (almost... a CRC may follow). It is o ```javascript { - type: 'end' + type: 'end', + offset: } ``` @@ -164,12 +180,88 @@ Some versions of the RDB file format can contain a CRC checksum at the end of th ```javascript { - type: 'crc' + type: 'crc', + offset: } ``` Redis has a configuration option to disable the CRC (`rdbchecksum no`). If CRC is disabled, this object will still be produced. +## Writer + +The writer is also a transformer. If you pass it objects in the form produced by the [parser](#parser), it will produce a byte stream consisting of an RDB file. Probably the best thing to do with this is write it to disk by piping the writer to a [file writer stream](http://nodejs.org/api/fs.html#fs_fs_createwritestream_path_options). + +### Constructor options + +```javascript +var writer = new Writer(options); +``` + +`options` is an object with the following: + +* `encoding`: the character encoding to use when converting to and from `String` (see the [parser documentation](#string-interpretation)). Defaults to `utf8`. +* `compressionThreshold`: how large a given string is before the writer attempts to compress it. Like Redis, this defaults to `4`. When a string is larger than this threshold, the writer will compress it, but only write out the compressed version if it is actually smaller. This is consistent with Redis' behaviour. However, it should be noted that this can consume a large amount of CPU by compressing keys and values and then discarding the compressed versions if your keys and values are small or otherwise not very compressible. You may wish to increase this threshold to improve throughput at the expense of the output RDB size. + +### Output + +#### File format + +The writer currently only produces [version 6](https://github.com/sripathikrishnan/redis-rdb-tools/blob/master/docs/RDB_Version_History.textile#version-6) files. It doesn't, however, use all of the features of this file version. If you have a requirement for older file versions, please raise an issue. + +#### Events + +The writer emits an `error` event when it detects a problem with its input - for example, objects in the wrong order. + +### Input + +The writer takes as input the same objects that the parser produces as output. The writer ignores the `offset` field on any input objects as this isn't part of the RDB file format, but is provided by the parser for information/debugging purposes. + +#### Header + +When it receives a `header` object, the writer writes an RDB header to the output with the same version number as the incoming header object. *Note:* even though the writer emits a header with the same version as the input object, it doesn't adjust any other aspect of its output and still uses structures only found in later versions of the file format. This may change in future. If it causes you problems, please raise an issue. + +#### Database + +The writer ignores `database` objects. It gets the database information from the key objects and switches between databases as necessary based on that information. + +#### Key + +`Key` objects are written to the output RDB file stream using only the most simple encoding for each type. This will generally mean that your RDB files are not as compact as they may otherwise be. If this is a problem for you and you need the newer 'zip' encodings, please raise an issue. + +#### End + +The writer will write an EOF marker into the RDB stream when it receives this object. But remember... that's not the end... + +#### CRC + +After sending an `end` object, you will need to send a `crc` object. (*Note:* the parser already produces these objects in this order.) When it receives this object it will write out the CRC of the bytes already written. + +At this point, the writer will not accept any more objects and will produce an `error` event if any attempt is made to send more objects. The RDB stream is complete at this point and the writer should be finalised in the normal ways - e.g. by calling [`end()`](http://nodejs.org/api/stream.html#stream_writable_end_chunk_encoding_callback) if you're using the writer directly or by closing down the pipeline if you're piping into it. + +## Protocol Emitter + +The protocol emitter is also transformer. It takes arrays representing Redis commands as input and produces raw Redis network protocol as output. The output is suitable for piping into `redis-cli --pipe`. + +### Constructor options + +```javascript +var protocolEmitter = new ProtocolEmitter(options); +``` + +`options` is an object with the following: + +* `encoding`: the character encoding to use when converting the Redis commands from `String` to network protocol bytes. Defaults to `utf8`. + +### Input + +Feed the emitter arrays which look like this: + +```javascript +['HINCRBY', 'user:1234', 'failedLogins', '1'] +['SET', 'status', 'running'] +['ZINCRBY', 'popular', '1', 'https://github.com/codeaholics/node-rdb-tools'] +``` + ## Known Issues * Doesn't support binary keys/values and likely never will. Get in touch if you REALLY need this... @@ -179,9 +271,10 @@ Redis has a configuration option to disable the CRC (`rdbchecksum no`). If CRC i ## To do -- [ ] I don't believe any of the test RDB files have expiries in seconds (verify and create new test if necessary). -- [ ] All of the test RDBs claim to be version 3, even though many of them use features from later versions. Explicitly test later formats if possible. +- [x] I don't believe any of the test RDB files have expiries in seconds (verify and create new test if necessary). - [x] Sorted Set encoding is [not documented](https://github.com/sripathikrishnan/redis-rdb-tools/wiki/Redis-RDB-Dump-File-Format#sorted-set-encoding) and none of the test RDBs appear to use it. Is it obsoleted by more recent encodings for sorted sets? +- [ ] Writer only produces version 6 RDBs. This is probably good enough! +- [ ] Writer doesn't use any of the more compact 'zip' encodings. ## Acknowledgements diff --git a/bin/rdbdump b/bin/rdbdump old mode 100644 new mode 100755 diff --git a/binding.gyp b/binding.gyp index 33d8703..5e8245c 100644 --- a/binding.gyp +++ b/binding.gyp @@ -3,7 +3,7 @@ { "target_name": "Crc64", "sources": [ - "src/crc64.c", + "src/crc-64-jones.c", "src/Crc64.cc" ] } diff --git a/lib/parser.js b/lib/parser.js index 62a2af3..0d1a8cf 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -31,7 +31,9 @@ function Parser(options) { return new Parser(options); } - Transform.call(this, {objectMode: true}); + Transform.call(this); + this._writableState.objectMode = false; + this._readableState.objectMode = true; options = options || {}; @@ -117,7 +119,7 @@ function Parser(options) { function onExpirySecs() { bytes(5, function(buffer, output) { - onKey(buffer.readInt32LE(0), buffer[4]); + onKey(buffer.readInt32LE(0) * 1000, buffer[4]); }); } @@ -140,7 +142,6 @@ function Parser(options) { expiry: expiry, offset: startOfRecord }; - switch(valueType) { case 0: object.rtype = 'string'; @@ -214,6 +215,7 @@ function Parser(options) { nextRecord(); }) } + function onHashEncodedValue(object) { getLengthEncoding(function(n, special, output) { if (special) throw new Error('Unexpected special length encoding in hash'); @@ -493,7 +495,7 @@ function Parser(options) { break; case 2: bytes(4, function(buffer) { - cb(buffer.readInt32BE(0), false, output); + cb(buffer.readUInt32BE(0), false, output); }); break; case 3: @@ -506,10 +508,14 @@ function Parser(options) { function getBytes(cb) { getLengthEncoding(function(n, special, output) { if (!special) { - bytes(n, function(buffer, output) { - if (buffer.length != n) throw new Error('Incorrect read length'); - cb(buffer, output); - }); + if (n != 0) { + bytes(n, function(buffer, output) { + if (buffer.length != n) throw new Error('Incorrect read length'); + cb(buffer, output); + }); + } else { + cb(new Buffer(0), output); + } } else { switch (n) { case 0: diff --git a/lib/protocol-emitter.js b/lib/protocol-emitter.js new file mode 100644 index 0000000..e83b5e3 --- /dev/null +++ b/lib/protocol-emitter.js @@ -0,0 +1,61 @@ +// Copyright 2013 Danny Yates + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var Transform = require('stream').Transform, + util = require('util'), + crlf = new Buffer('\r\n', 'ascii'); + +exports = module.exports = ProtocolEmitter; + +util.inherits(ProtocolEmitter, Transform); + +function ProtocolEmitter(options) { + if (!(this instanceof ProtocolEmitter)) { + return new ProtocolEmitter(options); + } + + Transform.call(this); + this._writableState.objectMode = true; + this._readableState.objectMode = false; + + options = options || {}; + + var self = this; + var encoding = options.encoding || 'utf8'; + + self._transform = function(obj, encoding, cb) { + if (util.isArray(obj)) { + handleArray(obj); + cb(); + } else { + cb(new Error('Unexpected chunk received')); + } + } + + function handleArray(obj) { + var bufs = []; + + bufs.push(new Buffer('*' + obj.length, 'ascii')); + bufs.push(crlf); + for (var i = 0; i < obj.length; i++) { + var value = new Buffer(obj[i], 'utf8'); + bufs.push(new Buffer('$' + value.length, 'ascii')); + bufs.push(crlf); + bufs.push(value); + bufs.push(crlf); + } + + self.push(Buffer.concat(bufs)); + } +} diff --git a/lib/writer.js b/lib/writer.js new file mode 100644 index 0000000..4db310d --- /dev/null +++ b/lib/writer.js @@ -0,0 +1,246 @@ +// Copyright 2013 Danny Yates + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var Transform = require('stream').Transform, + lzf = require('lzf'), + util = require('util'), + Int64 = require('int64-native'), + Crc64 = require('../build/Release/Crc64.node').Crc64; + +exports = module.exports = Writer; + +util.inherits(Writer, Transform); + +function Writer(options) { + if (!(this instanceof Writer)) { + return new Writer(options); + } + + Transform.call(this); + this._writableState.objectMode = true; + this._readableState.objectMode = false; + + options = options || {}; + + var self = this; + var currentDatabase = undefined; + var encoding = options.encoding || 'utf8'; + var compressionThreshold = options.compressionThreshold || 4; // 4 is what Redis uses. + var crc = new Crc64(); + var outputBuffers = []; + var expectedNext = ['header']; + var handlers = {}; + + self._transform = function(obj, encoding, cb) { + try { + handleObject(obj, cb); + } catch(e) { + self.emit('error', e); + } + } + + function handleObject(obj, cb) { + if (!obj || !obj.type) throw new Error('Unexpected object received'); + if (expectedNext.indexOf(obj.type) == -1) throw new Error('Unexpected object received: ' + obj.type + '; was expecting one of: ' + expectedNext); + + handlers[obj.type](obj, function() { + if (outputBuffers.length) { + var output = Buffer.concat(outputBuffers); + crc.push(output); + self.push(output); + outputBuffers = []; + } + expectedNext = Array.prototype.slice.call(arguments, 0); + cb(); + }); + } + + handlers.header = function(obj, next) { + var header = 'REDIS' + ('000' + obj.version).slice(-4); + output(new Buffer(header, 'ascii')); + next('database', 'end'); + } + + handlers.database = function(obj, next) { + // don't do anything explicit with database objects; switch databases based on the key objects + next('database', 'key', 'end'); + } + + handlers.key = function(obj, next) { + if (obj.database != currentDatabase) { + switchDatabase(obj.database); + } + + if (typeof(obj.expiry) != 'undefined') { + outputExpiry(obj.expiry); + } + + handlers[obj.rtype + 'Key'](obj); + + next('database', 'key', 'end'); + } + + handlers.end = function(obj, next) { + output(new Buffer([0xFF])); + next('crc'); + } + + handlers.crc = function(obj, next) { + // ignore the CRC object itself, and use this as a signal to write a CRC + output(crc.value()); + next(); + } + + handlers.stringKey = function(obj) { + output(new Buffer([0])); + outputString(obj.key); + outputString(obj.value); + } + + handlers.listKey = function(obj) { + output(new Buffer([1])); + outputString(obj.key); + outputLengthEncoding(obj.value.length, false); + for (var i = 0, n = obj.value.length; i < n; i++) { + outputString(obj.value[i]); + } + } + + handlers.setKey = function(obj) { + output(new Buffer([2])); + outputString(obj.key); + outputLengthEncoding(obj.value.length, false); + for (var i = 0, n = obj.value.length; i < n; i++) { + outputString(obj.value[i]); + } + } + + handlers.zsetKey = function(obj) { + output(new Buffer([3])); + outputString(obj.key); + outputLengthEncoding(Object.keys(obj.value).length, false); + for (key in obj.value) { + outputString(key); + outputString(obj.value[key]); + } + } + + handlers.hashKey = function(obj) { + output(new Buffer([4])); + outputString(obj.key); + outputLengthEncoding(Object.keys(obj.value).length, false); + for (key in obj.value) { + outputString(key); + outputString(obj.value[key]); + } + } + + function outputExpiry(expiry) { + if (expiry % 1000 == 0) { + var buffer = new Buffer(5); + + buffer.writeUInt8(0xFD, 0); + buffer.writeInt32LE(expiry / 1000, 1); + } else { + var buffer = new Buffer(9); + var int64 = new Int64(expiry); + + buffer.writeUInt8(0xFC, 0); + buffer.writeUInt32LE(int64.low32(), 1); + buffer.writeUInt32LE(int64.high32(), 5); + } + + output(buffer); + } + + function switchDatabase(n) { + output(new Buffer([0xFE])); + outputLengthEncoding(n, false); + currentDatabase = n; + } + + function outputLengthEncoding(n, special) { + if (n < 0) throw new Error('Cannot write negative length encoding: ' + n); + + if (!special) { + if (n <= 0x3F) { + return output(new Buffer([n])); + } else if (n <= 0x3FFF) { + return output(new Buffer([0x40 | (n >> 8), n & 0xFF])); + } else if (n <= 0xFFFFFFFF) { + var buffer = new Buffer(5); + buffer.writeUInt8(0x80, 0); + buffer.writeUInt32BE(n, 1); + return output(buffer); + } + + throw new Error('Failed to write length encoding: ' + n); + } else { + if (n > 0x3F) { + throw new Error('Cannot encode ' + n + ' using special length encoding'); + } + return output(new Buffer([0xC0 | n])); + } + } + + function outputString(s) { + var buffer; + + // Does it look like a number? + if (s.match(/^-?\d+$/)) { + var n = parseInt(s); + if (n >= -128 && n <= 127) { + buffer = new Buffer(1); + buffer.writeInt8(n, 0); + outputLengthEncoding(0, true); + output(buffer); + return; + } else if (n >= -32768 && n <= 32767) { + buffer = new Buffer(2); + buffer.writeInt16LE(n, 0); + outputLengthEncoding(1, true); + output(buffer); + return; + } else if (n >= -2147483648 && n <= 2147483647) { + buffer = new Buffer(4); + buffer.writeInt32LE(n, 0); + outputLengthEncoding(2, true); + output(buffer); + return; + } + } + + // It doesn't look like a number, or it's too big + buffer = new Buffer(s, encoding); + if (buffer.length > compressionThreshold) { + var compressed = lzf.compress(buffer); + if (compressed.length < buffer.length) { + // It saved some space + outputLengthEncoding(3, true); + outputLengthEncoding(compressed.length, false); + outputLengthEncoding(buffer.length, false); + output(compressed); + return; + } + } + + outputLengthEncoding(buffer.length, false); + output(buffer); + } + + function output(data) { + if (data instanceof Buffer) return outputBuffers.push(data); + throw new Error('Unknown output data type'); + } +} diff --git a/package.json b/package.json index 9276626..5c8756e 100644 --- a/package.json +++ b/package.json @@ -1,12 +1,14 @@ { "name": "rdb-tools", - "version": "0.0.4", + "version": "0.1.1", "description": "Redis RDB parsing, filtering and creating tools", "author": "Danny Yates ", - "licenses": [{ - "type": "Apache-2.0", - "url": "http://www.apache.org/licenses/LICENSE-2.0" - }], + "licenses": [ + { + "type": "Apache-2.0", + "url": "http://www.apache.org/licenses/LICENSE-2.0" + } + ], "main": "rdb-tools.js", "scripts": { "install": "node-gyp configure build", @@ -29,7 +31,8 @@ "mocha": "~1.11.0", "chai": "~1.7.2", "underscore": "~1.4.4", - "istanbul": "~0.1.40" + "istanbul": "~0.1.40", + "bl": "~0.1.1" }, "bin": { "rdbdump": "./bin/rdbdump" diff --git a/rdb-tools.js b/rdb-tools.js index c7d0a73..d0984d9 100644 --- a/rdb-tools.js +++ b/rdb-tools.js @@ -13,5 +13,7 @@ // limitations under the License. exports = module.exports = { - Parser: require('./lib/parser') + Parser: require('./lib/parser'), + Writer: require('./lib/writer'), + ProtocolEmitter: require('./lib/protocol-emitter') } diff --git a/src/crc64.c b/src/crc-64-jones.c similarity index 100% rename from src/crc64.c rename to src/crc-64-jones.c diff --git a/test/dumps/empty_string.rdb b/test/dumps/empty_string.rdb new file mode 100644 index 0000000..c31364c Binary files /dev/null and b/test/dumps/empty_string.rdb differ diff --git a/test/dumps/error_reporting.rdb b/test/dumps/error_reporting.rdb index 3a2b030..8c2824b 100644 Binary files a/test/dumps/error_reporting.rdb and b/test/dumps/error_reporting.rdb differ diff --git a/test/dumps/keys_with_expiry_secs.rdb b/test/dumps/keys_with_expiry_secs.rdb new file mode 100644 index 0000000..a9fee6b Binary files /dev/null and b/test/dumps/keys_with_expiry_secs.rdb differ diff --git a/test/dumps/utf8.rdb b/test/dumps/utf8.rdb new file mode 100644 index 0000000..4cef203 Binary files /dev/null and b/test/dumps/utf8.rdb differ diff --git a/test/rdb.js b/test/parser.js similarity index 95% rename from test/rdb.js rename to test/parser.js index 5fd4e88..c81c8e9 100644 --- a/test/rdb.js +++ b/test/parser.js @@ -248,7 +248,7 @@ describe('Parser', function() { assert.equal(data.allKeys[0]['abc'].rtype, 'zset'); done(); }) - }) + }); it('should report errors', function(done) { var complete = function() { @@ -261,7 +261,29 @@ describe('Parser', function() { } load('error_reporting.rdb', complete, err); - }) + }); + + it('should handle empty strings', function(done) { + load('empty_string.rdb', function(data) { + assert.equal(data.allKeys[0][''].value, 'abc'); + assert.equal(data.allKeys[0][''].rtype, 'string'); + done(); + }) + }); + + it('should handle keys with second expiries', function(done) { + load('keys_with_expiry_secs.rdb', function(data) { + assert.equal(data.allKeys[0]['foo'].expiry, 1374939348000); + done(); + }) + }); + + it('should handle UTF-8', function(done) { + load('utf8.rdb', function(data) { + assert.equal(data.allKeys[0]['\u00A3'].value, '\u00A9'); + done(); + }) + }); }) function load(database, cb, errback) { diff --git a/test/protocol-emitter.js b/test/protocol-emitter.js new file mode 100644 index 0000000..268554e --- /dev/null +++ b/test/protocol-emitter.js @@ -0,0 +1,60 @@ +// Copyright 2013 Danny Yates + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var ProtocolEmitter = require('../rdb-tools').ProtocolEmitter, + assert = require('chai').assert, + BufferList = require('bl'), + bufferEqual = require('buffer-equal'); + +describe('Protocol Emitter', function() { + it('should convert arrays', function(done) { + var expected = makeExpected('*4', '$7', 'HINCRBY', '$9', 'user:1234', '$12', 'failedLogins', '$1', '1'); + + pass(['HINCRBY', 'user:1234', 'failedLogins', '1'], function(err, data) { + assert.equal(data.toString(), expected.toString()); + done(); + }); + }); + + it('should handle UTF-8', function(done) { + pass(['\u00a3'], function(err, data) { + assert.isTrue(bufferEqual(data.slice(8, 10), new Buffer([0xC2, 0xA3]))); + done(); + }); + }); + + it('should reject objects', function(done) { + assert.throw(pass.bind(this, {}), /Unexpected chunk received/); + done(); + }); +}) + +function makeExpected() { + var bl = new BufferList(); + + for (var i = 0; i < arguments.length; i++) { + bl.append(new Buffer(arguments[i], 'utf8')); + bl.append(new Buffer('\r\n', 'ascii')); + } + + return bl; +} + +function pass(obj, cb) { + var protocolEmitter = new ProtocolEmitter(); + var bl = new BufferList(cb); + + protocolEmitter.pipe(bl); + protocolEmitter.end(obj); +} diff --git a/test/writer.js b/test/writer.js new file mode 100644 index 0000000..9d6fa0b --- /dev/null +++ b/test/writer.js @@ -0,0 +1,121 @@ +// Copyright 2013 Danny Yates + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var Parser = require('../rdb-tools').Parser, + Writer = require('../rdb-tools').Writer, + assert = require('chai').assert, + fs = require('fs'), + Writable = require('stream').Writable, + Transform = require('stream').Transform, + BufferList = require('bl'), + _ = require('underscore'); + +describe('Writer', function() { + describe('should round-trip all parser test files', function() { + _.each(fs.readdirSync('test/dumps'), function(f) { + if (!f.match(/error/)) { + it(f, function(done) { + this.test.slow(125); + roundTripTest(f, done); + }); + } + }); + }); + + describe('should fail on unexpected objects', function() { + var tests = [['buffer', new Buffer(0)], + ['string', 'hello world'], + ['null', null], + ['undefined', undefined], + ['object without type', {}], + ['wrong type of object', {type: 'database'}]]; + + _.each(tests, function(test) { + it(test[0], simpleErrorTest.bind(null, test[1])); + }); + }); + + it('should handle UTF-8', function(done) { + var writer = new Writer(), + bl = new BufferList(function(err, data) { + assert.equal(data.get(13), 0xC2); + assert.equal(data.get(14), 0xA3); + assert.equal(data.get(16), 0xC2); + assert.equal(data.get(17), 0xA9); + done(); + }); + + writer.pipe(bl); + + writer.write({ + type: 'header', + version: 6 + }); + + writer.write({ + type: 'database', + number: 0 + }); + + writer.end({ + type: 'key', + rtype: 'string', + database: 0, + key: '\u00A3', + value: '\u00A9' + }); + }); +}); + +function simpleErrorTest(obj, done) { + var writer = new Writer(); + + writer.on('error', function(e) { + assert.match(e.message, /Unexpected object/); + done(); + }); + + writer._transform(obj); +} + +function roundTripTest(f, done) { + var inputStream = fs.createReadStream('test/dumps/' + f), + parser = new Parser(), + inputCaptor = new Transform({objectMode: true}), + writer = new Writer(), + reparser = new Parser(), + outputCaptor = new Writable({objectMode: true}), + inputCaptives = [], + outputCaptives = []; + + inputCaptor._transform = function(obj, encoding, cb) { + delete obj.offset; + inputCaptives.push(obj); + this.push(obj); + cb(); + } + + outputCaptor._write = function(obj, encoding, cb) { + delete obj.offset; + outputCaptives.push(obj); + cb(); + } + + outputCaptor.on('finish', function() { + assert.deepEqual(outputCaptives, inputCaptives); + done(); + }); + + inputStream.pipe(parser).pipe(inputCaptor).pipe(writer).pipe(reparser).pipe(outputCaptor); +}