Skip to content

Commit 129a3af

Browse files
committed
Editor: word count: exclude more characters
Also only exclude these characters for the `words` type. They should be counted for other types. Add the ASCIIOnly option to the uglify config to preserve escaped unicode characters. See #30966. Fixes #27391. git-svn-id: https://develop.svn.wordpress.org/trunk@33292 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 4b975d8 commit 129a3af

3 files changed

Lines changed: 53 additions & 4 deletions

File tree

Gruntfile.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ module.exports = function(grunt) {
399399
}
400400
},
401401
uglify: {
402+
options: {
403+
ASCIIOnly: true
404+
},
402405
core: {
403406
expand: true,
404407
cwd: SOURCE_DIR,

src/wp-admin/js/word-count.js

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,42 @@
1414
WordCounter.prototype.settings = {
1515
HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
1616
spaceRegExp: /&nbsp;|&#160;/gi,
17-
removeRegExp: /[0-9.(),;:!?%#$¿'"_+=\\\/-]+/g,
17+
connectorRegExp: /--|\u2014/gi,
18+
removeRegExp: new RegExp( [
19+
'[',
20+
// Basic Latin (extract)
21+
'\u0021-\u0040\u005B-\u0060\u007B-\u007E',
22+
// Latin-1 Supplement (extract)
23+
'\u0080-\u00BF\u00D7\u00F7',
24+
// General Punctuation
25+
// Superscripts and Subscripts
26+
// Currency Symbols
27+
// Combining Diacritical Marks for Symbols
28+
// Letterlike Symbols
29+
// Number Forms
30+
// Arrows
31+
// Mathematical Operators
32+
// Miscellaneous Technical
33+
// Control Pictures
34+
// Optical Character Recognition
35+
// Enclosed Alphanumerics
36+
// Box Drawing
37+
// Block Elements
38+
// Geometric Shapes
39+
// Miscellaneous Symbols
40+
// Dingbats
41+
// Miscellaneous Mathematical Symbols-A
42+
// Supplemental Arrows-A
43+
// Braille Patterns
44+
// Supplemental Arrows-B
45+
// Miscellaneous Mathematical Symbols-B
46+
// Supplemental Mathematical Operators
47+
// Miscellaneous Symbols and Arrows
48+
'\u2000-\u2BFF',
49+
// Supplemental Punctuation
50+
'\u2E00-\u2E7F',
51+
']'
52+
].join( '' ), 'g' ),
1853
wordsRegExp: /\S\s+/g,
1954
charactersRegExp: /\S/g,
2055
allRegExp: /[^\f\n\r\t\v\u00ad\u2028\u2029]/g,
@@ -31,7 +66,11 @@
3166

3267
text = text.replace( this.settings.HTMLRegExp, '\n' );
3368
text = text.replace( this.settings.spaceRegExp, ' ' );
34-
text = text.replace( this.settings.removeRegExp, '' );
69+
70+
if ( type === 'words' ) {
71+
text = text.replace( this.settings.connectorRegExp, ' ' );
72+
text = text.replace( this.settings.removeRegExp, '' );
73+
}
3574

3675
text = text.match( this.settings[ type + 'RegExp' ] );
3776

tests/qunit/wp-admin/js/word-count.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,16 @@
3333
},
3434
{
3535
message: 'Punctuation.',
36-
string: 'It\'s two three... 4?',
36+
string: 'It\'s two three \u2026 4?',
3737
words: 3,
38-
characters: 11,
38+
characters: 15,
39+
all: 19
40+
},
41+
{
42+
message: 'Em dash.',
43+
string: 'one\u2014two--three',
44+
words: 3,
45+
characters: 14,
3946
all: 14
4047
}
4148
], function( test ) {

0 commit comments

Comments
 (0)