forked from sanbuphy/learn-coding-agent
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathbashPipeCommand.ts
More file actions
294 lines (261 loc) · 10.5 KB
/
bashPipeCommand.ts
File metadata and controls
294 lines (261 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import {
hasMalformedTokens,
hasShellQuoteSingleQuoteBug,
type ParseEntry,
quote,
tryParseShellCommand,
} from './shellQuote.js'
/**
* Rearranges a command with pipes to place stdin redirect after the first command.
* This fixes an issue where eval treats the entire piped command as a single unit,
* causing the stdin redirect to apply to eval itself rather than the first command.
*/
export function rearrangePipeCommand(command: string): string {
// Skip if command has backticks - shell-quote doesn't handle them well
if (command.includes('`')) {
return quoteWithEvalStdinRedirect(command)
}
// Skip if command has command substitution - shell-quote parses $() incorrectly,
// treating ( and ) as separate operators instead of recognizing command substitution
if (command.includes('$(')) {
return quoteWithEvalStdinRedirect(command)
}
// Skip if command references shell variables ($VAR, ${VAR}). shell-quote's parse()
// expands these to empty string when no env is passed, silently dropping the
// reference. Even if we preserved the token via an env function, quote() would
// then escape the $ during rebuild, preventing runtime expansion. See #9732.
if (/\$[A-Za-z_{]/.test(command)) {
return quoteWithEvalStdinRedirect(command)
}
// Skip if command contains bash control structures (for/while/until/if/case/select)
// shell-quote cannot parse these correctly and will incorrectly find pipes inside
// the control structure body, breaking the command when rearranged
if (containsControlStructure(command)) {
return quoteWithEvalStdinRedirect(command)
}
// Join continuation lines before parsing: shell-quote doesn't handle \<newline>
// and produces empty string tokens for each occurrence, causing spurious empty
// arguments in the reconstructed command
const joined = joinContinuationLines(command)
// shell-quote treats bare newlines as whitespace, not command separators.
// Parsing+rebuilding 'cmd1 | head\ncmd2 | grep' yields 'cmd1 | head cmd2 | grep',
// silently merging pipelines. Line-continuation (\<newline>) is already stripped
// above; any remaining newline is a real separator. Bail to the eval fallback,
// which preserves the newline inside a single-quoted arg. See #32515.
if (joined.includes('\n')) {
return quoteWithEvalStdinRedirect(command)
}
// SECURITY: shell-quote treats \' inside single quotes as an escape, but
// bash treats it as literal \ followed by a closing quote. The pattern
// '\' <payload> '\' makes shell-quote merge <payload> into the quoted
// string, hiding operators like ; from the token stream. Rebuilding from
// that merged token can expose the operators when bash re-parses.
if (hasShellQuoteSingleQuoteBug(joined)) {
return quoteWithEvalStdinRedirect(command)
}
const parseResult = tryParseShellCommand(joined)
// If parsing fails (malformed syntax), fall back to quoting the whole command
if (!parseResult.success) {
return quoteWithEvalStdinRedirect(command)
}
const parsed = parseResult.tokens
// SECURITY: shell-quote tokenizes differently from bash. Input like
// `echo {"hi":\"hi;calc.exe"}` is a bash syntax error (unbalanced quote),
// but shell-quote parses it into tokens with `;` as an operator and
// `calc.exe` as a separate word. Rebuilding from those tokens produces
// valid bash that executes `calc.exe` — turning a syntax error into an
// injection. Unbalanced delimiters in a string token signal this
// misparsing; fall back to whole-command quoting, which preserves the
// original (bash then rejects it with the same syntax error it would have
// raised without us).
if (hasMalformedTokens(joined, parsed)) {
return quoteWithEvalStdinRedirect(command)
}
const firstPipeIndex = findFirstPipeOperator(parsed)
if (firstPipeIndex <= 0) {
return quoteWithEvalStdinRedirect(command)
}
// Rebuild: first_command < /dev/null | rest_of_pipeline
const parts = [
...buildCommandParts(parsed, 0, firstPipeIndex),
'< /dev/null',
...buildCommandParts(parsed, firstPipeIndex, parsed.length),
]
return singleQuoteForEval(parts.join(' '))
}
/**
* Finds the index of the first pipe operator in parsed shell command
*/
function findFirstPipeOperator(parsed: ParseEntry[]): number {
for (let i = 0; i < parsed.length; i++) {
const entry = parsed[i]
if (isOperator(entry, '|')) {
return i
}
}
return -1
}
/**
* Builds command parts from parsed entries, handling strings and operators.
* Special handling for file descriptor redirections to preserve them as single units.
*/
function buildCommandParts(
parsed: ParseEntry[],
start: number,
end: number,
): string[] {
const parts: string[] = []
// Track if we've seen a non-env-var string token yet
// Environment variables are only valid at the start of a command
let seenNonEnvVar = false
for (let i = start; i < end; i++) {
const entry = parsed[i]
// Check for file descriptor redirections (e.g., 2>&1, 2>/dev/null)
if (
typeof entry === 'string' &&
/^[012]$/.test(entry) &&
i + 2 < end &&
isOperator(parsed[i + 1])
) {
const op = parsed[i + 1] as { op: string }
const target = parsed[i + 2]
// Handle 2>&1 style redirections
if (
op.op === '>&' &&
typeof target === 'string' &&
/^[012]$/.test(target)
) {
parts.push(`${entry}>&${target}`)
i += 2
continue
}
// Handle 2>/dev/null style redirections
if (op.op === '>' && target === '/dev/null') {
parts.push(`${entry}>/dev/null`)
i += 2
continue
}
// Handle 2> &1 style (space between > and &1)
if (
op.op === '>' &&
typeof target === 'string' &&
target.startsWith('&')
) {
const fd = target.slice(1)
if (/^[012]$/.test(fd)) {
parts.push(`${entry}>&${fd}`)
i += 2
continue
}
}
}
// Handle regular entries
if (typeof entry === 'string') {
// Environment variable assignments are only valid at the start of a command,
// before any non-env-var tokens (the actual command and its arguments)
const isEnvVar = !seenNonEnvVar && isEnvironmentVariableAssignment(entry)
if (isEnvVar) {
// For env var assignments, we need to preserve the = but quote the value if needed
// Split into name and value parts
const eqIndex = entry.indexOf('=')
const name = entry.slice(0, eqIndex)
const value = entry.slice(eqIndex + 1)
// Quote the value part to handle spaces and special characters
const quotedValue = quote([value])
parts.push(`${name}=${quotedValue}`)
} else {
// Once we see a non-env-var string, all subsequent strings are arguments
seenNonEnvVar = true
parts.push(quote([entry]))
}
} else if (isOperator(entry)) {
// Special handling for glob operators
if (entry.op === 'glob' && 'pattern' in entry) {
// Don't quote glob patterns - they need to remain as-is for shell expansion
parts.push(entry.pattern as string)
} else {
parts.push(entry.op)
// Reset after command separators - the next command can have its own env vars
if (isCommandSeparator(entry.op)) {
seenNonEnvVar = false
}
}
}
}
return parts
}
/**
* Checks if a string is an environment variable assignment (VAR=value)
* Environment variable names must start with letter or underscore,
* followed by letters, numbers, or underscores
*/
function isEnvironmentVariableAssignment(str: string): boolean {
return /^[A-Za-z_][A-Za-z0-9_]*=/.test(str)
}
/**
* Checks if an operator is a command separator that starts a new command context.
* After these operators, environment variable assignments are valid again.
*/
function isCommandSeparator(op: string): boolean {
return op === '&&' || op === '||' || op === ';'
}
/**
* Type guard to check if a parsed entry is an operator
*/
function isOperator(entry: unknown, op?: string): entry is { op: string } {
if (!entry || typeof entry !== 'object' || !('op' in entry)) {
return false
}
return op ? entry.op === op : true
}
/**
* Checks if a command contains bash control structures that shell-quote cannot parse.
* These include for/while/until/if/case/select loops and conditionals.
* We match keywords followed by whitespace to avoid false positives with commands
* or arguments that happen to contain these words.
*/
function containsControlStructure(command: string): boolean {
return /\b(for|while|until|if|case|select)\s/.test(command)
}
/**
* Quotes a command and adds `< /dev/null` as a shell redirect on eval, rather than
* as an eval argument. This is critical for pipe commands where we can't parse the
* pipe boundary (e.g., commands with $(), backticks, or control structures).
*
* Using `singleQuoteForEval(cmd) + ' < /dev/null'` produces: eval 'cmd' < /dev/null
* → eval's stdin is /dev/null, eval evaluates 'cmd', pipes inside work correctly
*
* The previous approach `quote([cmd, '<', '/dev/null'])` produced: eval 'cmd' \< /dev/null
* → eval concatenates args to 'cmd < /dev/null', redirect applies to LAST pipe command
*/
function quoteWithEvalStdinRedirect(command: string): string {
return singleQuoteForEval(command) + ' < /dev/null'
}
/**
* Single-quote a string for use as an eval argument. Escapes embedded single
* quotes via '"'"' (close-sq, literal-sq-in-dq, reopen-sq). Used instead of
* shell-quote's quote() which switches to double-quote mode when the input
* contains single quotes and then escapes ! -> \!, corrupting jq/awk filters
* like `select(.x != .y)` into `select(.x \!= .y)`.
*/
function singleQuoteForEval(s: string): string {
return "'" + s.replace(/'/g, `'"'"'`) + "'"
}
/**
* Joins shell continuation lines (backslash-newline) into a single line.
* Only joins when there's an odd number of backslashes before the newline
* (the last one escapes the newline). Even backslashes pair up as escape
* sequences and the newline remains a separator.
*/
function joinContinuationLines(command: string): string {
return command.replace(/\\+\n/g, match => {
const backslashCount = match.length - 1 // -1 for the newline
if (backslashCount % 2 === 1) {
// Odd number: last backslash escapes the newline (line continuation)
return '\\'.repeat(backslashCount - 1)
} else {
// Even number: all pair up, newline is a real separator
return match
}
})
}