forked from nodejs/node
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstring.js
More file actions
184 lines (184 loc) · 6.96 KB
/
Copy pathstring.js
File metadata and controls
184 lines (184 loc) · 6.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
export function longestCommonPrefix(str1, str2) {
let i;
for (i = 0; i < str1.length && i < str2.length; i++) {
if (str1[i] != str2[i]) {
return str1.slice(0, i);
}
}
return str1.slice(0, i);
}
export function longestCommonSuffix(str1, str2) {
let i;
// Unlike longestCommonPrefix, we need a special case to handle all scenarios
// where we return the empty string since str1.slice(-0) will return the
// entire string.
if (!str1 || !str2 || str1[str1.length - 1] != str2[str2.length - 1]) {
return '';
}
for (i = 0; i < str1.length && i < str2.length; i++) {
if (str1[str1.length - (i + 1)] != str2[str2.length - (i + 1)]) {
return str1.slice(-i);
}
}
return str1.slice(-i);
}
export function replacePrefix(string, oldPrefix, newPrefix) {
if (string.slice(0, oldPrefix.length) != oldPrefix) {
throw Error(`string ${JSON.stringify(string)} doesn't start with prefix ${JSON.stringify(oldPrefix)}; this is a bug`);
}
return newPrefix + string.slice(oldPrefix.length);
}
export function replaceSuffix(string, oldSuffix, newSuffix) {
if (!oldSuffix) {
return string + newSuffix;
}
if (string.slice(-oldSuffix.length) != oldSuffix) {
throw Error(`string ${JSON.stringify(string)} doesn't end with suffix ${JSON.stringify(oldSuffix)}; this is a bug`);
}
return string.slice(0, -oldSuffix.length) + newSuffix;
}
export function removePrefix(string, oldPrefix) {
return replacePrefix(string, oldPrefix, '');
}
export function removeSuffix(string, oldSuffix) {
return replaceSuffix(string, oldSuffix, '');
}
export function maximumOverlap(string1, string2) {
return string2.slice(0, overlapCount(string1, string2));
}
// Nicked from https://stackoverflow.com/a/60422853/1709587
function overlapCount(a, b) {
// Deal with cases where the strings differ in length
let startA = 0;
if (a.length > b.length) {
startA = a.length - b.length;
}
let endB = b.length;
if (a.length < b.length) {
endB = a.length;
}
// Create a back-reference for each index
// that should be followed in case of a mismatch.
// We only need B to make these references:
const map = Array(endB);
let k = 0; // Index that lags behind j
map[0] = 0;
for (let j = 1; j < endB; j++) {
if (b[j] == b[k]) {
map[j] = map[k]; // skip over the same character (optional optimisation)
}
else {
map[j] = k;
}
while (k > 0 && b[j] != b[k]) {
k = map[k];
}
if (b[j] == b[k]) {
k++;
}
}
// Phase 2: use these references while iterating over A
k = 0;
for (let i = startA; i < a.length; i++) {
while (k > 0 && a[i] != b[k]) {
k = map[k];
}
if (a[i] == b[k]) {
k++;
}
}
return k;
}
/**
* Returns true if the string consistently uses Windows line endings.
*/
export function hasOnlyWinLineEndings(string) {
return string.includes('\r\n') && !string.startsWith('\n') && !string.match(/[^\r]\n/);
}
/**
* Returns true if the string consistently uses Unix line endings.
*/
export function hasOnlyUnixLineEndings(string) {
return !string.includes('\r\n') && string.includes('\n');
}
/**
* Split a string into segments using a word segmenter, merging consecutive
* segments if they are both whitespace segments. Whitespace segments can
* appear adjacent to one another for two reasons:
* - newlines always get their own segment
* - where a diacritic is attached to a whitespace character in the text, the
* segment ends after the diacritic, so e.g. " \u0300 " becomes two segments.
* This function therefore runs the segmenter's .segment() method and then
* merges consecutive segments of whitespace into a single part.
*/
export function segment(string, segmenter) {
const parts = [];
for (const segmentObj of Array.from(segmenter.segment(string))) {
const segment = segmentObj.segment;
if (parts.length && (/\s/).test(parts[parts.length - 1]) && (/\s/).test(segment)) {
parts[parts.length - 1] += segment;
}
else {
parts.push(segment);
}
}
return parts;
}
// The functions below take a `segmenter` argument so that, when called from
// diffWords when it is using a segmenter, they can use a notion of what
// constitutes "whitespace" that is consistent with the segmenter.
//
// USUALLY this will be identical to the result of the non-segmenter-based
// logic, but it differs in at least one case: when whitespace characters are
// modified by diacritics. A word segmenter considers these diacritics to be
// part of the whitespace, whereas our non-segmenter-based logic does not.
//
// Because the segmenter-based approach necessarily requires segmenting the
// entire string, we offer a leadingAndTrailingWs function to allow getting the
// whitespace prefix AND whitespace suffix with a single call to the segmenter,
// for efficiency's sake.
export function trailingWs(string, segmenter) {
if (segmenter) {
return leadingAndTrailingWs(string, segmenter)[1];
}
// Yes, this looks overcomplicated and dumb - why not replace the whole function with
// return string.match(/\s*$/)[0]
// you ask? Because:
// 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing
// this would cause this function to take O(n²) time in the worst case (specifically when
// there is a massive run of NON-TRAILING whitespace in `string`), and
// 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible
// with old Safari versions that we'd like to not break if possible (see
// https://github.com/kpdecker/jsdiff/pull/550)
// It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a
// better way that doesn't result in broken behaviour.
let i;
for (i = string.length - 1; i >= 0; i--) {
if (!string[i].match(/\s/)) {
break;
}
}
return string.substring(i + 1);
}
export function leadingWs(string, segmenter) {
if (segmenter) {
return leadingAndTrailingWs(string, segmenter)[0];
}
// Thankfully the annoying considerations described in trailingWs don't apply here:
const match = string.match(/^\s*/);
return match ? match[0] : '';
}
export function leadingAndTrailingWs(string, segmenter) {
if (!segmenter) {
return [leadingWs(string), trailingWs(string)];
}
if (segmenter.resolvedOptions().granularity != 'word') {
throw new Error('The segmenter passed must have a granularity of "word"');
}
const segments = segment(string, segmenter);
const firstSeg = segments[0];
const lastSeg = segments[segments.length - 1];
const head = (/\s/).test(firstSeg) ? firstSeg : '';
const tail = (/\s/).test(lastSeg) ? lastSeg : '';
return [head, tail];
}