Skip to content

Commit 5131c5f

Browse files
committed
Improved U.keywords() for Chinese / Japan chars.
1 parent 54233b8 commit 5131c5f

3 files changed

Lines changed: 59 additions & 32 deletions

File tree

changes.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
- updated: JavaScript compressor, now optimizes multiple `var` declarations
102102
- updated: `CORS()` without arguments for all routes, methods and origins
103103
- updated: `CORS()` tries to join multiple same preferences to one
104+
- updated: `U.keywords()` for Chinese/Japan characters
104105

105106
- fixed: mail attachments
106107
- fixed: mail `message.manually()`

nosql.js

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,18 @@ const EXTENSION_META = '.meta';
5252
const EXTENSION_COUNTER = '-counter2';
5353
const EXTENSION_INDEXES = '-indexes';
5454
const BINARY_HEADER_LENGTH = 2000;
55-
const NEWLINE = '\n';
55+
const COUNTER_MMA = [0, 0];
56+
const DIRECTORYLENGTH = 9;
5657
const EMPTYARRAY = [];
57-
const REG_CLEAN = /^[\s]+|[\s]+$/g;
58-
const INMEMORY = {};
5958
const FLAGS_READ = ['get'];
60-
const COUNTER_MMA = [0, 0];
61-
const REGNUMBER = /^\d+$/;
62-
const REGINDEXCHAR = /[a-z]{1,2}/;
63-
const REGBOOL = /":true/g; // for updates of boolean types
59+
const INMEMORY = {};
6460
const JSONBOOL = '":true ';
65-
const DIRECTORYLENGTH = 9;
61+
const NEWLINE = '\n';
62+
const REGBOOL = /":true/g; // for updates of boolean types
63+
const REGCHINA = /[\u3400-\u9FBF]/;
64+
const REGCLEAN = /^[\s]+|[\s]+$/g;
65+
const REGINDEXCHAR = /[a-z]{1,2}/;
66+
const REGNUMBER = /^\d+$/;
6667
const IMAGES = { gif: 1, jpg: 1, jpeg: 1, png: 1, svg: 1 };
6768
const BINARYREADDATA = { start: BINARY_HEADER_LENGTH };
6869
const BINARYREADDATABASE64 = { start: BINARY_HEADER_LENGTH, encoding: 'base64' };
@@ -3250,8 +3251,12 @@ DatabaseBuilder.prototype.fulltext = function(name, value, weight) {
32503251
if (value instanceof Array) {
32513252
for (var i = 0; i < value.length; i++)
32523253
value[i] = value[i].toLowerCase();
3253-
} else
3254-
value = value.toLowerCase().split(' ');
3254+
} else {
3255+
if (REGCHINA.test(value))
3256+
value = value.split('');
3257+
else
3258+
value = value.toLowerCase().split(' ');
3259+
}
32553260

32563261
var count = 1;
32573262

@@ -5028,7 +5033,7 @@ Binary.prototype.read = function(id, callback, count) {
50285033
var stream = Fs.createReadStream(filename, BINARYREADMETA);
50295034
stream.on('error', err => callback(err));
50305035
stream.on('data', function(buffer) {
5031-
var json = buffer.toString('utf8').replace(REG_CLEAN, '');
5036+
var json = buffer.toString('utf8').replace(REGCLEAN, '');
50325037
if (json) {
50335038
var meta = JSON.parse(json, jsonparser);
50345039
stream = Fs.createReadStream(filename, BINARYREADDATA);
@@ -5070,7 +5075,7 @@ Binary.prototype.readbase64 = function(id, callback, count) {
50705075
var stream = Fs.createReadStream(filename, BINARYREADMETA);
50715076
stream.on('error', err => callback(err));
50725077
stream.on('data', function(buffer) {
5073-
var json = buffer.toString('utf8').replace(REG_CLEAN, '');
5078+
var json = buffer.toString('utf8').replace(REGCLEAN, '');
50745079
if (json) {
50755080
var meta = JSON.parse(json, jsonparser);
50765081
stream = Fs.createReadStream(filename, BINARYREADDATABASE64);
@@ -5211,7 +5216,7 @@ Binary.prototype.browse = function(directory, callback) {
52115216
var stream = Fs.createReadStream(target + '/' + item, BINARYREADMETA);
52125217

52135218
stream.on('data', function(buffer) {
5214-
var json = framework_utils.createBuffer(buffer, 'binary').toString('utf8').replace(REG_CLEAN, '').parseJSON(true);
5219+
var json = framework_utils.createBuffer(buffer, 'binary').toString('utf8').replace(REGCLEAN, '').parseJSON(true);
52155220
if (json) {
52165221
var id = item.substring(0, item.length - le);
52175222
json.id = 'B' + json.date + 'T' + id;
@@ -5264,7 +5269,7 @@ Binary.prototype.all = function(callback) {
52645269
var stream = Fs.createReadStream(target + '/' + item, BINARYREADMETA);
52655270

52665271
stream.on('data', function(buffer) {
5267-
var json = framework_utils.createBuffer(buffer, 'binary').toString('utf8').replace(REG_CLEAN, '').parseJSON(true);
5272+
var json = framework_utils.createBuffer(buffer, 'binary').toString('utf8').replace(REGCLEAN, '').parseJSON(true);
52685273
if (json) {
52695274
json.id = item.substring(l, item.length - le);
52705275
json.ctime = stat.ctime;

utils.js

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ const regexpDECRYPT = /-|_/g;
6666
const regexpENCRYPT = /\/|\+/g;
6767
const regexpUNICODE = /\\u([\d\w]{4})/gi;
6868
const regexpTERMINAL = /[\w\S]+/g;
69+
const regexpY = /y/g;
70+
const regexpN = /\n/g;
71+
const regexpCHARS = /\W|_/g;
72+
const regexpCHINA = /[\u3400-\u9FBF]/;
6973
const SOUNDEX = { a: '', e: '', i: '', o: '', u: '', b: 1, f: 1, p: 1, v: 1, c: 2, g: 2, j: 2, k: 2, q: 2, s: 2, x: 2, z: 2, d: 3, t: 3, l: 4, m: 5, n: 5, r: 6 };
7074
const ENCODING = 'utf8';
7175
const NEWLINE = '\r\n';
@@ -353,14 +357,14 @@ exports.keywords = function(content, forSearch, alternative, max_count, max_leng
353357
for (var i = 0, length = content.length; i < length; i++) {
354358
if (!content[i])
355359
continue;
356-
var tmp = (forSearch ? content[i].removeDiacritics().toLowerCase().replace(/y/g, 'i') : content[i].toLowerCase()).replace(/\n/g, ' ').split(' ');
360+
var tmp = (forSearch ? content[i].removeDiacritics().toLowerCase().replace(regexpY, 'i') : content[i].toLowerCase()).replace(regexpN, ' ').split(' ');
357361
if (!tmp || !tmp.length)
358362
continue;
359363
for (var j = 0, jl = tmp.length; j < jl; j++)
360364
words.push(tmp[j]);
361365
}
362366
} else
363-
words = (forSearch ? content.removeDiacritics().toLowerCase().replace(/y/g, 'i') : content.toLowerCase()).replace(/\n/g, ' ').split(' ');
367+
words = (forSearch ? content.removeDiacritics().toLowerCase().replace(regexpY, 'i') : content.toLowerCase()).replace(regexpN, ' ').split(' ');
364368

365369
if (!words)
366370
words = [];
@@ -369,17 +373,34 @@ exports.keywords = function(content, forSearch, alternative, max_count, max_leng
369373
var counter = 0;
370374

371375
for (var i = 0, length = words.length; i < length; i++) {
372-
var word = words[i].trim();
376+
377+
var word = words[i].trim().replace(regexpCHARS, keywordscleaner);
378+
379+
if (regexpCHINA.test(word)) {
380+
381+
var tmpw = word.split('', max_count);
382+
383+
for (var j = 0; j < tmpw.length; j++) {
384+
word = tmpw[j];
385+
if (dic[word])
386+
dic[word]++;
387+
else
388+
dic[word] = 1;
389+
counter++;
390+
}
391+
392+
if (counter >= max_count)
393+
break;
394+
395+
continue;
396+
}
373397

374398
if (word.length < min_length)
375399
continue;
376400

377401
if (counter >= max_count)
378402
break;
379403

380-
if (forSearch)
381-
word = word.replace(/\W|_/g, '');
382-
383404
// Gets 80% length of word
384405
if (alternative) {
385406
if (isSoundex)
@@ -413,6 +434,10 @@ exports.keywords = function(content, forSearch, alternative, max_count, max_leng
413434
return keys;
414435
};
415436

437+
function keywordscleaner(c) {
438+
return c.charCodeAt(0) < 200 ? '' : c;
439+
}
440+
416441
function parseProxy(p) {
417442
var key = 'proxy_' + p;
418443
if (F.temporary.other[key])
@@ -638,11 +663,11 @@ function ProxyAgent(options) {
638663
}
639664

640665
ProxyAgent.prototype.createConnection = function(pending) {
641-
var self = this
666+
var self = this;
642667
self.createSocket(pending, function(socket) {
643668
pending.request.onSocket(socket);
644669
});
645-
}
670+
};
646671

647672
ProxyAgent.prototype.createSocket = function(options, callback) {
648673

@@ -661,16 +686,12 @@ ProxyAgent.prototype.createSocket = function(options, callback) {
661686

662687
req.setTimeout(3000);
663688
req.on('response', proxyagent_response);
664-
req.on('upgrade', function(res, socket, head) {
665-
setImmediate(onConnect, res, socket, head)
666-
});
667-
668-
req.on('connect', function(res, socket, head) {
689+
req.on('connect', function(res, socket) {
669690
if (res.statusCode === 200) {
670691
callback(socket);
671692
} else {
672-
var err = new Error('Proxy could not be established, code: ' + res.statusCode);
673-
err.code = 'ECONNRESET';
693+
var err = new Error('Proxy could not be established, code: ' + res.statusCode);
694+
err.code = 'ECONNRESET';
674695
options.request.emit('error', err);
675696
}
676697
});
@@ -680,15 +701,15 @@ ProxyAgent.prototype.createSocket = function(options, callback) {
680701
});
681702

682703
req.end();
683-
}
704+
};
684705

685-
function proxyagent_response() {
706+
function proxyagent_response(res) {
686707
res.upgrade = true;
687708
}
688709

689710
ProxyAgent.prototype.addRequest = function(req, options) {
690711
this.createConnection({ host: options.host, port: options.port, request: req });
691-
}
712+
};
692713

693714
function createSecureSocket(options, callback) {
694715
var self = this;

0 commit comments

Comments
 (0)