tools/BashTool/bashSecurity.ts

100 KB2593 lines
src/tools/BashTool/bashSecurity.ts
1import { logEvent } from 'src/services/analytics/index.js'
2import { extractHeredocs } from '../../utils/bash/heredoc.js'
3import { ParsedCommand } from '../../utils/bash/ParsedCommand.js'
4import {
5  hasMalformedTokens,
6  hasShellQuoteSingleQuoteBug,
7  tryParseShellCommand,
8} from '../../utils/bash/shellQuote.js'
9import type { TreeSitterAnalysis } from '../../utils/bash/treeSitterAnalysis.js'
10import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
11
12const HEREDOC_IN_SUBSTITUTION = /\$\(.*<</
13
14// Note: Backtick pattern is handled separately in validateDangerousPatterns
15// to distinguish between escaped and unescaped backticks
16const COMMAND_SUBSTITUTION_PATTERNS = [
17  { pattern: /<\(/, message: 'process substitution <()' },
18  { pattern: />\(/, message: 'process substitution >()' },
19  { pattern: /=\(/, message: 'Zsh process substitution =()' },
20  // Zsh EQUALS expansion: =cmd at word start expands to $(which cmd).
21  // `=curl evil.com` → `/usr/bin/curl evil.com`, bypassing Bash(curl:*) deny
22  // rules since the parser sees `=curl` as the base command, not `curl`.
23  // Only matches word-initial = followed by a command-name char (not VAR=val).
24  {
25    pattern: /(?:^|[\s;&|])=[a-zA-Z_]/,
26    message: 'Zsh equals expansion (=cmd)',
27  },
28  { pattern: /\$\(/, message: '$() command substitution' },
29  { pattern: /\$\{/, message: '${} parameter substitution' },
30  { pattern: /\$\[/, message: '$[] legacy arithmetic expansion' },
31  { pattern: /~\[/, message: 'Zsh-style parameter expansion' },
32  { pattern: /\(e:/, message: 'Zsh-style glob qualifiers' },
33  { pattern: /\(\+/, message: 'Zsh glob qualifier with command execution' },
34  {
35    pattern: /\}\s*always\s*\{/,
36    message: 'Zsh always block (try/always construct)',
37  },
38  // Defense in depth: Block PowerShell comment syntax even though we don't execute in PowerShell
39  // Added as protection against future changes that might introduce PowerShell execution
40  { pattern: /<#/, message: 'PowerShell comment syntax' },
41]
42
43// Zsh-specific dangerous commands that can bypass security checks.
44// These are checked against the base command (first word) of each command segment.
45const ZSH_DANGEROUS_COMMANDS = new Set([
46  // zmodload is the gateway to many dangerous module-based attacks:
47  // zsh/mapfile (invisible file I/O via array assignment),
48  // zsh/system (sysopen/syswrite two-step file access),
49  // zsh/zpty (pseudo-terminal command execution),
50  // zsh/net/tcp (network exfiltration via ztcp),
51  // zsh/files (builtin rm/mv/ln/chmod that bypass binary checks)
52  'zmodload',
53  // emulate with -c flag is an eval-equivalent that executes arbitrary code
54  'emulate',
55  // Zsh module builtins that enable dangerous operations.
56  // These require zmodload first, but we block them as defense-in-depth
57  // in case zmodload is somehow bypassed or the module is pre-loaded.
58  'sysopen', // Opens files with fine-grained control (zsh/system)
59  'sysread', // Reads from file descriptors (zsh/system)
60  'syswrite', // Writes to file descriptors (zsh/system)
61  'sysseek', // Seeks on file descriptors (zsh/system)
62  'zpty', // Executes commands on pseudo-terminals (zsh/zpty)
63  'ztcp', // Creates TCP connections for exfiltration (zsh/net/tcp)
64  'zsocket', // Creates Unix/TCP sockets (zsh/net/socket)
65  'mapfile', // Not actually a command, but the associative array is set via zmodload
66  'zf_rm', // Builtin rm from zsh/files
67  'zf_mv', // Builtin mv from zsh/files
68  'zf_ln', // Builtin ln from zsh/files
69  'zf_chmod', // Builtin chmod from zsh/files
70  'zf_chown', // Builtin chown from zsh/files
71  'zf_mkdir', // Builtin mkdir from zsh/files
72  'zf_rmdir', // Builtin rmdir from zsh/files
73  'zf_chgrp', // Builtin chgrp from zsh/files
74])
75
76// Numeric identifiers for bash security checks (to avoid logging strings)
77const BASH_SECURITY_CHECK_IDS = {
78  INCOMPLETE_COMMANDS: 1,
79  JQ_SYSTEM_FUNCTION: 2,
80  JQ_FILE_ARGUMENTS: 3,
81  OBFUSCATED_FLAGS: 4,
82  SHELL_METACHARACTERS: 5,
83  DANGEROUS_VARIABLES: 6,
84  NEWLINES: 7,
85  DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION: 8,
86  DANGEROUS_PATTERNS_INPUT_REDIRECTION: 9,
87  DANGEROUS_PATTERNS_OUTPUT_REDIRECTION: 10,
88  IFS_INJECTION: 11,
89  GIT_COMMIT_SUBSTITUTION: 12,
90  PROC_ENVIRON_ACCESS: 13,
91  MALFORMED_TOKEN_INJECTION: 14,
92  BACKSLASH_ESCAPED_WHITESPACE: 15,
93  BRACE_EXPANSION: 16,
94  CONTROL_CHARACTERS: 17,
95  UNICODE_WHITESPACE: 18,
96  MID_WORD_HASH: 19,
97  ZSH_DANGEROUS_COMMANDS: 20,
98  BACKSLASH_ESCAPED_OPERATORS: 21,
99  COMMENT_QUOTE_DESYNC: 22,
100  QUOTED_NEWLINE: 23,
101} as const
102
103type ValidationContext = {
104  originalCommand: string
105  baseCommand: string
106  unquotedContent: string
107  fullyUnquotedContent: string
108  /** fullyUnquoted before stripSafeRedirections — used by validateBraceExpansion
109   * to avoid false negatives from redirection stripping creating backslash adjacencies */
110  fullyUnquotedPreStrip: string
111  /** Like fullyUnquotedPreStrip but preserves quote characters ('/"): e.g.,
112   * echo 'x'# → echo ''# (the quote chars remain, revealing adjacency to #) */
113  unquotedKeepQuoteChars: string
114  /** Tree-sitter analysis data, if available. Validators can use this for
115   * more accurate analysis when present, falling back to regex otherwise. */
116  treeSitter?: TreeSitterAnalysis | null
117}
118
119type QuoteExtraction = {
120  withDoubleQuotes: string
121  fullyUnquoted: string
122  /** Like fullyUnquoted but preserves quote characters ('/"): strips quoted
123   * content while keeping the delimiters. Used by validateMidWordHash to detect
124   * quote-adjacent # (e.g., 'x'# where quote stripping would hide adjacency). */
125  unquotedKeepQuoteChars: string
126}
127
128function extractQuotedContent(command: string, isJq = false): QuoteExtraction {
129  let withDoubleQuotes = ''
130  let fullyUnquoted = ''
131  let unquotedKeepQuoteChars = ''
132  let inSingleQuote = false
133  let inDoubleQuote = false
134  let escaped = false
135
136  for (let i = 0; i < command.length; i++) {
137    const char = command[i]
138
139    if (escaped) {
140      escaped = false
141      if (!inSingleQuote) withDoubleQuotes += char
142      if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
143      if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
144      continue
145    }
146
147    if (char === '\\' && !inSingleQuote) {
148      escaped = true
149      if (!inSingleQuote) withDoubleQuotes += char
150      if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
151      if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
152      continue
153    }
154
155    if (char === "'" && !inDoubleQuote) {
156      inSingleQuote = !inSingleQuote
157      unquotedKeepQuoteChars += char
158      continue
159    }
160
161    if (char === '"' && !inSingleQuote) {
162      inDoubleQuote = !inDoubleQuote
163      unquotedKeepQuoteChars += char
164      // For jq, include quotes in extraction to ensure content is properly analyzed
165      if (!isJq) continue
166    }
167
168    if (!inSingleQuote) withDoubleQuotes += char
169    if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
170    if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
171  }
172
173  return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }
174}
175
176function stripSafeRedirections(content: string): string {
177  // SECURITY: All three patterns MUST have a trailing boundary (?=\s|$).
178  // Without it, `> /dev/nullo` matches `/dev/null` as a PREFIX, strips
179  // `> /dev/null` leaving `o`, so `echo hi > /dev/nullo` becomes `echo hi o`.
180  // validateRedirections then sees no `>` and passes. The file write to
181  // /dev/nullo is auto-allowed via the read-only path (checkReadOnlyConstraints).
182  // Main bashPermissions flow is protected (checkPathConstraints validates the
183  // original command), but speculation.ts uses checkReadOnlyConstraints alone.
184  return content
185    .replace(/\s+2\s*>&\s*1(?=\s|$)/g, '')
186    .replace(/[012]?\s*>\s*\/dev\/null(?=\s|$)/g, '')
187    .replace(/\s*<\s*\/dev\/null(?=\s|$)/g, '')
188}
189
190/**
191 * Checks if content contains an unescaped occurrence of a single character.
192 * Handles bash escape sequences correctly where a backslash escapes the following character.
193 *
194 * IMPORTANT: This function only handles single characters, not strings. If you need to extend
195 * this to handle multi-character strings, be EXTREMELY CAREFUL about shell ANSI-C quoting
196 * (e.g., $'\n', $'\x41', $'\u0041') which can encode arbitrary characters and strings in ways
197 * that are very difficult to parse correctly. Incorrect handling could introduce security
198 * vulnerabilities by allowing attackers to bypass security checks.
199 *
200 * @param content - The string to search (typically from extractQuotedContent)
201 * @param char - Single character to search for (e.g., '`')
202 * @returns true if unescaped occurrence found, false otherwise
203 *
204 * Examples:
205 *   hasUnescapedChar("test \`safe\`", '`') → false (escaped backticks)
206 *   hasUnescapedChar("test `dangerous`", '`') → true (unescaped backticks)
207 *   hasUnescapedChar("test\\`date`", '`') → true (escaped backslash + unescaped backtick)
208 */
209function hasUnescapedChar(content: string, char: string): boolean {
210  if (char.length !== 1) {
211    throw new Error('hasUnescapedChar only works with single characters')
212  }
213
214  let i = 0
215  while (i < content.length) {
216    // If we see a backslash, skip it and the next character (they form an escape sequence)
217    if (content[i] === '\\' && i + 1 < content.length) {
218      i += 2 // Skip backslash and escaped character
219      continue
220    }
221
222    // Check if current character matches
223    if (content[i] === char) {
224      return true // Found unescaped occurrence
225    }
226
227    i++
228  }
229
230  return false // No unescaped occurrences found
231}
232
233function validateEmpty(context: ValidationContext): PermissionResult {
234  if (!context.originalCommand.trim()) {
235    return {
236      behavior: 'allow',
237      updatedInput: { command: context.originalCommand },
238      decisionReason: { type: 'other', reason: 'Empty command is safe' },
239    }
240  }
241  return { behavior: 'passthrough', message: 'Command is not empty' }
242}
243
244function validateIncompleteCommands(
245  context: ValidationContext,
246): PermissionResult {
247  const { originalCommand } = context
248  const trimmed = originalCommand.trim()
249
250  if (/^\s*\t/.test(originalCommand)) {
251    logEvent('tengu_bash_security_check_triggered', {
252      checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
253      subId: 1,
254    })
255    return {
256      behavior: 'ask',
257      message: 'Command appears to be an incomplete fragment (starts with tab)',
258    }
259  }
260
261  if (trimmed.startsWith('-')) {
262    logEvent('tengu_bash_security_check_triggered', {
263      checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
264      subId: 2,
265    })
266    return {
267      behavior: 'ask',
268      message:
269        'Command appears to be an incomplete fragment (starts with flags)',
270    }
271  }
272
273  if (/^\s*(&&|\|\||;|>>?|<)/.test(originalCommand)) {
274    logEvent('tengu_bash_security_check_triggered', {
275      checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
276      subId: 3,
277    })
278    return {
279      behavior: 'ask',
280      message:
281        'Command appears to be a continuation line (starts with operator)',
282    }
283  }
284
285  return { behavior: 'passthrough', message: 'Command appears complete' }
286}
287
288/**
289 * Checks if a command is a "safe" heredoc-in-substitution pattern that can
290 * bypass the generic $() validator.
291 *
292 * This is an EARLY-ALLOW path: returning `true` causes bashCommandIsSafe to
293 * return `passthrough`, bypassing ALL subsequent validators. Given this
294 * authority, the check must be PROVABLY safe, not "probably safe".
295 *
296 * The only pattern we allow is:
297 *   [prefix] $(cat <<'DELIM'\n
298 *   [body lines]\n
299 *   DELIM\n
300 *   ) [suffix]
301 *
302 * Where:
303 * - The delimiter must be single-quoted ('DELIM') or escaped (\DELIM) so the
304 *   body is literal text with no expansion
305 * - The closing delimiter must be on a line BY ITSELF (or with only trailing
306 *   whitespace + `)` for the $(cat <<'EOF'\n...\nEOF)` inline form)
307 * - The closing delimiter must be the FIRST such line — matching bash's
308 *   behavior exactly (no skipping past early delimiters to find EOF))
309 * - There must be non-whitespace text BEFORE the $( (i.e., the substitution
310 *   is used in argument position, not as a command name). Otherwise the
311 *   heredoc body becomes an arbitrary command name with [suffix] as args.
312 * - The remaining text (with the heredoc stripped) must pass all validators
313 *
314 * This implementation uses LINE-BASED matching, not regex [\s\S]*?, to
315 * precisely replicate bash's heredoc-closing behavior.
316 */
317function isSafeHeredoc(command: string): boolean {
318  if (!HEREDOC_IN_SUBSTITUTION.test(command)) return false
319
320  // SECURITY: Use [ \t] (not \s) between << and the delimiter. \s matches
321  // newlines, but bash requires the delimiter word on the same line as <<.
322  // Matching across newlines could accept malformed syntax that bash rejects.
323  // Handle quote variations: 'EOF', ''EOF'' (splitCommand may mangle quotes).
324  const heredocPattern =
325    /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g
326  let match
327  type HeredocMatch = {
328    start: number
329    operatorEnd: number
330    delimiter: string
331    isDash: boolean
332  }
333  const safeHeredocs: HeredocMatch[] = []
334
335  while ((match = heredocPattern.exec(command)) !== null) {
336    const delimiter = match[2] || match[3]
337    if (delimiter) {
338      safeHeredocs.push({
339        start: match.index,
340        operatorEnd: match.index + match[0].length,
341        delimiter,
342        isDash: match[1] === '-',
343      })
344    }
345  }
346
347  // If no safe heredoc patterns found, it's not safe
348  if (safeHeredocs.length === 0) return false
349
350  // SECURITY: For each heredoc, find the closing delimiter using LINE-BASED
351  // matching that exactly replicates bash's behavior. Bash closes a heredoc
352  // at the FIRST line that exactly matches the delimiter. Any subsequent
353  // occurrence of the delimiter is just content (or a new command). Regex
354  // [\s\S]*? can skip past the first delimiter to find a later `DELIM)`
355  // pattern, hiding injected commands between the two delimiters.
356  type VerifiedHeredoc = { start: number; end: number }
357  const verified: VerifiedHeredoc[] = []
358
359  for (const { start, operatorEnd, delimiter, isDash } of safeHeredocs) {
360    // The opening line must end immediately after the delimiter (only
361    // horizontal whitespace allowed before the newline). If there's other
362    // content (like `; rm -rf /`), this is not a simple safe heredoc.
363    const afterOperator = command.slice(operatorEnd)
364    const openLineEnd = afterOperator.indexOf('\n')
365    if (openLineEnd === -1) return false // No content at all
366    const openLineTail = afterOperator.slice(0, openLineEnd)
367    if (!/^[ \t]*$/.test(openLineTail)) return false // Extra content on open line
368
369    // Body starts after the newline
370    const bodyStart = operatorEnd + openLineEnd + 1
371    const body = command.slice(bodyStart)
372    const bodyLines = body.split('\n')
373
374    // Find the FIRST line that closes the heredoc. There are two valid forms:
375    //   1. `DELIM` alone on a line (bash-standard), followed by `)` on the
376    //      next line (with only whitespace before it)
377    //   2. `DELIM)` on a line (the inline $(cat <<'EOF'\n...\nEOF) form,
378    //      where bash's PST_EOFTOKEN closes both heredoc and substitution)
379    // For <<-, leading tabs are stripped before matching.
380    let closingLineIdx = -1
381    let closeParenLineIdx = -1 // Line index where `)` appears
382    let closeParenColIdx = -1 // Column index of `)` on that line
383
384    for (let i = 0; i < bodyLines.length; i++) {
385      const rawLine = bodyLines[i]!
386      const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine
387
388      // Form 1: delimiter alone on a line
389      if (line === delimiter) {
390        closingLineIdx = i
391        // The `)` must be on the NEXT line with only whitespace before it
392        const nextLine = bodyLines[i + 1]
393        if (nextLine === undefined) return false // No closing `)`
394        const parenMatch = nextLine.match(/^([ \t]*)\)/)
395        if (!parenMatch) return false // `)` not at start of next line
396        closeParenLineIdx = i + 1
397        closeParenColIdx = parenMatch[1]!.length // Position of `)`
398        break
399      }
400
401      // Form 2: delimiter immediately followed by `)` (PST_EOFTOKEN form)
402      // Only whitespace allowed between delimiter and `)`.
403      if (line.startsWith(delimiter)) {
404        const afterDelim = line.slice(delimiter.length)
405        const parenMatch = afterDelim.match(/^([ \t]*)\)/)
406        if (parenMatch) {
407          closingLineIdx = i
408          closeParenLineIdx = i
409          // Column is in rawLine (pre-tab-strip), so recompute
410          const tabPrefix = isDash ? (rawLine.match(/^\t*/)?.[0] ?? '') : ''
411          closeParenColIdx =
412            tabPrefix.length + delimiter.length + parenMatch[1]!.length
413          break
414        }
415        // Line starts with delimiter but has other trailing content —
416        // this is NOT the closing line (bash requires exact match or EOF`)`).
417        // But it's also a red flag: if this were inside $(), bash might
418        // close early via PST_EOFTOKEN with other shell metacharacters.
419        // We already handle that case in extractHeredocs — here we just
420        // reject it as not matching our safe pattern.
421        if (/^[)}`|&;(<>]/.test(afterDelim)) {
422          return false // Ambiguous early-closure pattern
423        }
424      }
425    }
426
427    if (closingLineIdx === -1) return false // No closing delimiter found
428
429    // Compute the absolute end position (one past the `)` character)
430    let endPos = bodyStart
431    for (let i = 0; i < closeParenLineIdx; i++) {
432      endPos += bodyLines[i]!.length + 1 // +1 for newline
433    }
434    endPos += closeParenColIdx + 1 // +1 to include the `)` itself
435
436    verified.push({ start, end: endPos })
437  }
438
439  // SECURITY: Reject nested matches. The regex finds $(cat <<'X' patterns
440  // in RAW TEXT without understanding quoted-heredoc semantics. When the
441  // outer heredoc has a quoted delimiter (<<'A'), its body is LITERAL text
442  // in bash — any inner $(cat <<'B' is just characters, not a real heredoc.
443  // But our regex matches both, producing NESTED ranges. Stripping nested
444  // ranges corrupts indices: after stripping the inner range, the outer
445  // range's `end` is stale (points past the shrunken string), causing
446  // `remaining.slice(end)` to return '' and silently drop any suffix
447  // (e.g., `; rm -rf /`). Since all our matched heredocs have quoted/escaped
448  // delimiters, a nested match inside the body is ALWAYS literal text —
449  // no legitimate user writes this pattern. Bail to safe fallback.
450  for (const outer of verified) {
451    for (const inner of verified) {
452      if (inner === outer) continue
453      if (inner.start > outer.start && inner.start < outer.end) {
454        return false
455      }
456    }
457  }
458
459  // Strip all verified heredocs from the command, building `remaining`.
460  // Process in reverse order so earlier indices stay valid.
461  const sortedVerified = [...verified].sort((a, b) => b.start - a.start)
462  let remaining = command
463  for (const { start, end } of sortedVerified) {
464    remaining = remaining.slice(0, start) + remaining.slice(end)
465  }
466
467  // SECURITY: The remaining text must NOT start with only whitespace before
468  // the (now-stripped) heredoc position IF there's non-whitespace after it.
469  // If the $() is in COMMAND-NAME position (no prefix), its output becomes
470  // the command to execute, with any suffix text as arguments:
471  //   $(cat <<'EOF'\nchmod\nEOF\n) 777 /etc/shadow
472  //   → runs `chmod 777 /etc/shadow`
473  // We only allow the substitution in ARGUMENT position: there must be a
474  // command word before the $(.
475  // After stripping, `remaining` should look like `cmd args... [more args]`.
476  // If remaining starts with only whitespace (or is empty), the $() WAS the
477  // command — that's only safe if there are no trailing arguments.
478  const trimmedRemaining = remaining.trim()
479  if (trimmedRemaining.length > 0) {
480    // There's a prefix command — good. But verify the original command
481    // also had a non-whitespace prefix before the FIRST $( (the heredoc
482    // could be one of several; we need the first one's prefix).
483    const firstHeredocStart = Math.min(...verified.map(v => v.start))
484    const prefix = command.slice(0, firstHeredocStart)
485    if (prefix.trim().length === 0) {
486      // $() is in command-name position but there's trailing text — UNSAFE.
487      // The heredoc body becomes the command name, trailing text becomes args.
488      return false
489    }
490  }
491
492  // Check that remaining text contains only safe characters.
493  // After stripping safe heredocs, the remaining text should only be command
494  // names, arguments, quotes, and whitespace. Reject ANY shell metacharacter
495  // to prevent operators (|, &, &&, ||, ;) or expansions ($, `, {, <, >) from
496  // being used to chain dangerous commands after a safe heredoc.
497  // SECURITY: Use explicit ASCII space/tab only — \s matches unicode whitespace
498  // like \u00A0 which can be used to hide content. Newlines are also blocked
499  // (they would indicate multi-line commands outside the heredoc body).
500  if (!/^[a-zA-Z0-9 \t"'.\-/_@=,:+~]*$/.test(remaining)) return false
501
502  // SECURITY: The remaining text (command with heredocs stripped) must also
503  // pass all security validators. Without this, appending a safe heredoc to a
504  // dangerous command (e.g., `zmodload zsh/system $(cat <<'EOF'\nx\nEOF\n)`)
505  // causes this early-allow path to return passthrough, bypassing
506  // validateZshDangerousCommands, validateProcEnvironAccess, and any other
507  // main validator that checks allowlist-safe character patterns.
508  // No recursion risk: `remaining` has no `$(... <<` pattern, so the recursive
509  // call's validateSafeCommandSubstitution returns passthrough immediately.
510  if (bashCommandIsSafe_DEPRECATED(remaining).behavior !== 'passthrough')
511    return false
512
513  return true
514}
515
516/**
517 * Detects well-formed $(cat <<'DELIM'...DELIM) heredoc substitution patterns.
518 * Returns the command with matched heredocs stripped, or null if none found.
519 * Used by the pre-split gate to strip safe heredocs and re-check the remainder.
520 */
521export function stripSafeHeredocSubstitutions(command: string): string | null {
522  if (!HEREDOC_IN_SUBSTITUTION.test(command)) return null
523
524  const heredocPattern =
525    /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g
526  let result = command
527  let found = false
528  let match
529  const ranges: Array<{ start: number; end: number }> = []
530  while ((match = heredocPattern.exec(command)) !== null) {
531    if (match.index > 0 && command[match.index - 1] === '\\') continue
532    const delimiter = match[2] || match[3]
533    if (!delimiter) continue
534    const isDash = match[1] === '-'
535    const operatorEnd = match.index + match[0].length
536
537    const afterOperator = command.slice(operatorEnd)
538    const openLineEnd = afterOperator.indexOf('\n')
539    if (openLineEnd === -1) continue
540    if (!/^[ \t]*$/.test(afterOperator.slice(0, openLineEnd))) continue
541
542    const bodyStart = operatorEnd + openLineEnd + 1
543    const bodyLines = command.slice(bodyStart).split('\n')
544    for (let i = 0; i < bodyLines.length; i++) {
545      const rawLine = bodyLines[i]!
546      const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine
547      if (line.startsWith(delimiter)) {
548        const after = line.slice(delimiter.length)
549        let closePos = -1
550        if (/^[ \t]*\)/.test(after)) {
551          const lineStart =
552            bodyStart +
553            bodyLines.slice(0, i).join('\n').length +
554            (i > 0 ? 1 : 0)
555          closePos = command.indexOf(')', lineStart)
556        } else if (after === '') {
557          const nextLine = bodyLines[i + 1]
558          if (nextLine !== undefined && /^[ \t]*\)/.test(nextLine)) {
559            const nextLineStart =
560              bodyStart + bodyLines.slice(0, i + 1).join('\n').length + 1
561            closePos = command.indexOf(')', nextLineStart)
562          }
563        }
564        if (closePos !== -1) {
565          ranges.push({ start: match.index, end: closePos + 1 })
566          found = true
567        }
568        break
569      }
570    }
571  }
572  if (!found) return null
573  for (let i = ranges.length - 1; i >= 0; i--) {
574    const r = ranges[i]!
575    result = result.slice(0, r.start) + result.slice(r.end)
576  }
577  return result
578}
579
580/** Detection-only check: does the command contain a safe heredoc substitution? */
581export function hasSafeHeredocSubstitution(command: string): boolean {
582  return stripSafeHeredocSubstitutions(command) !== null
583}
584
585function validateSafeCommandSubstitution(
586  context: ValidationContext,
587): PermissionResult {
588  const { originalCommand } = context
589
590  if (!HEREDOC_IN_SUBSTITUTION.test(originalCommand)) {
591    return { behavior: 'passthrough', message: 'No heredoc in substitution' }
592  }
593
594  if (isSafeHeredoc(originalCommand)) {
595    return {
596      behavior: 'allow',
597      updatedInput: { command: originalCommand },
598      decisionReason: {
599        type: 'other',
600        reason:
601          'Safe command substitution: cat with quoted/escaped heredoc delimiter',
602      },
603    }
604  }
605
606  return {
607    behavior: 'passthrough',
608    message: 'Command substitution needs validation',
609  }
610}
611
612function validateGitCommit(context: ValidationContext): PermissionResult {
613  const { originalCommand, baseCommand } = context
614
615  if (baseCommand !== 'git' || !/^git\s+commit\s+/.test(originalCommand)) {
616    return { behavior: 'passthrough', message: 'Not a git commit' }
617  }
618
619  // SECURITY: Backslashes can cause our regex to mis-identify quote boundaries
620  // (e.g., `git commit -m "test\"msg" && evil`). Legitimate commit messages
621  // virtually never contain backslashes, so bail to the full validator chain.
622  if (originalCommand.includes('\\')) {
623    return {
624      behavior: 'passthrough',
625      message: 'Git commit contains backslash, needs full validation',
626    }
627  }
628
629  // SECURITY: The `.*?` before `-m` must NOT match shell operators. Previously
630  // `.*?` matched anything except `\n`, including `;`, `&`, `|`, `` ` ``, `$(`.
631  // For `git commit ; curl evil.com -m 'x'`, `.*?` swallowed `; curl evil.com `
632  // leaving remainder=`` (falsy → remainder check skipped) → returned `allow`
633  // for a compound command. Early-allow skips ALL main validators (line ~1908),
634  // nullifying validateQuotedNewline, validateBackslashEscapedOperators, etc.
635  // While splitCommand currently catches this downstream, early-allow is a
636  // POSITIVE ASSERTION that the FULL command is safe — which it is NOT.
637  //
638  // Also: `\s+` between `git` and `commit` must NOT match `\n`/`\r` (command
639  // separators in bash). Use `[ \t]+` for horizontal-only whitespace.
640  //
641  // The `[^;&|`$<>()\n\r]*?` class excludes shell metacharacters. We also
642  // exclude `<` and `>` here (redirects) — they're allowed in the REMAINDER
643  // for `--author="Name <email>"` but must not appear BEFORE `-m`.
644  const messageMatch = originalCommand.match(
645    /^git[ \t]+commit[ \t]+[^;&|`$<>()\n\r]*?-m[ \t]+(["'])([\s\S]*?)\1(.*)$/,
646  )
647
648  if (messageMatch) {
649    const [, quote, messageContent, remainder] = messageMatch
650
651    if (quote === '"' && messageContent && /\$\(|`|\$\{/.test(messageContent)) {
652      logEvent('tengu_bash_security_check_triggered', {
653        checkId: BASH_SECURITY_CHECK_IDS.GIT_COMMIT_SUBSTITUTION,
654        subId: 1,
655      })
656      return {
657        behavior: 'ask',
658        message: 'Git commit message contains command substitution patterns',
659      }
660    }
661
662    // SECURITY: Check remainder for shell operators that could chain commands
663    // or redirect output. The `.*` before `-m` in the regex can swallow flags
664    // like `--amend`, leaving `&& evil` or `> ~/.bashrc` in the remainder.
665    // Previously we only checked for $() / `` / ${} here, missing operators
666    // like ; | & && || < >.
667    //
668    // `<` and `>` can legitimately appear INSIDE quotes in --author values
669    // like `--author="Name <email>"`. An UNQUOTED `>` is a shell redirect
670    // operator. Because validateGitCommit is an EARLY validator, returning
671    // `allow` here short-circuits bashCommandIsSafe and SKIPS
672    // validateRedirections. So we must bail to passthrough on unquoted `<>`
673    // to let the main validators handle it.
674    //
675    // Attack: `git commit --allow-empty -m 'payload' > ~/.bashrc`
676    //   validateGitCommit returns allow → bashCommandIsSafe short-circuits →
677    //   validateRedirections NEVER runs → ~/.bashrc overwritten with git
678    //   stdout containing `payload` → RCE on next shell login.
679    if (remainder && /[;|&()`]|\$\(|\$\{/.test(remainder)) {
680      return {
681        behavior: 'passthrough',
682        message: 'Git commit remainder contains shell metacharacters',
683      }
684    }
685    if (remainder) {
686      // Strip quoted content, then check for `<` or `>`. Quoted `<>` (email
687      // brackets in --author) are safe; unquoted `<>` are shell redirects.
688      // NOTE: This simple quote tracker has NO backslash handling. `\'`/`\"`
689      // outside quotes would desync it (bash: \' = literal ', tracker: toggles
690      // SQ). BUT line 584 already bailed on ANY backslash in originalCommand,
691      // so we never reach here with backslashes. For backslash-free input,
692      // simple quote toggling is correct (no way to escape quotes without \\).
693      let unquoted = ''
694      let inSQ = false
695      let inDQ = false
696      for (let i = 0; i < remainder.length; i++) {
697        const c = remainder[i]
698        if (c === "'" && !inDQ) {
699          inSQ = !inSQ
700          continue
701        }
702        if (c === '"' && !inSQ) {
703          inDQ = !inDQ
704          continue
705        }
706        if (!inSQ && !inDQ) unquoted += c
707      }
708      if (/[<>]/.test(unquoted)) {
709        return {
710          behavior: 'passthrough',
711          message: 'Git commit remainder contains unquoted redirect operator',
712        }
713      }
714    }
715
716    // Security hardening: block messages starting with dash
717    // This catches potential obfuscation patterns like git commit -m "---"
718    if (messageContent && messageContent.startsWith('-')) {
719      logEvent('tengu_bash_security_check_triggered', {
720        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
721        subId: 5,
722      })
723      return {
724        behavior: 'ask',
725        message: 'Command contains quoted characters in flag names',
726      }
727    }
728
729    return {
730      behavior: 'allow',
731      updatedInput: { command: originalCommand },
732      decisionReason: {
733        type: 'other',
734        reason: 'Git commit with simple quoted message is allowed',
735      },
736    }
737  }
738
739  return { behavior: 'passthrough', message: 'Git commit needs validation' }
740}
741
742function validateJqCommand(context: ValidationContext): PermissionResult {
743  const { originalCommand, baseCommand } = context
744
745  if (baseCommand !== 'jq') {
746    return { behavior: 'passthrough', message: 'Not jq' }
747  }
748
749  if (/\bsystem\s*\(/.test(originalCommand)) {
750    logEvent('tengu_bash_security_check_triggered', {
751      checkId: BASH_SECURITY_CHECK_IDS.JQ_SYSTEM_FUNCTION,
752      subId: 1,
753    })
754    return {
755      behavior: 'ask',
756      message:
757        'jq command contains system() function which executes arbitrary commands',
758    }
759  }
760
761  // File arguments are now allowed - they will be validated by path validation in readOnlyValidation.ts
762  // Only block dangerous flags that could read files into jq variables
763  const afterJq = originalCommand.substring(3).trim()
764  if (
765    /(?:^|\s)(?:-f\b|--from-file|--rawfile|--slurpfile|-L\b|--library-path)/.test(
766      afterJq,
767    )
768  ) {
769    logEvent('tengu_bash_security_check_triggered', {
770      checkId: BASH_SECURITY_CHECK_IDS.JQ_FILE_ARGUMENTS,
771      subId: 1,
772    })
773    return {
774      behavior: 'ask',
775      message:
776        'jq command contains dangerous flags that could execute code or read arbitrary files',
777    }
778  }
779
780  return { behavior: 'passthrough', message: 'jq command is safe' }
781}
782
783function validateShellMetacharacters(
784  context: ValidationContext,
785): PermissionResult {
786  const { unquotedContent } = context
787  const message =
788    'Command contains shell metacharacters (;, |, or &) in arguments'
789
790  if (/(?:^|\s)["'][^"']*[;&][^"']*["'](?:\s|$)/.test(unquotedContent)) {
791    logEvent('tengu_bash_security_check_triggered', {
792      checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
793      subId: 1,
794    })
795    return { behavior: 'ask', message }
796  }
797
798  const globPatterns = [
799    /-name\s+["'][^"']*[;|&][^"']*["']/,
800    /-path\s+["'][^"']*[;|&][^"']*["']/,
801    /-iname\s+["'][^"']*[;|&][^"']*["']/,
802  ]
803
804  if (globPatterns.some(p => p.test(unquotedContent))) {
805    logEvent('tengu_bash_security_check_triggered', {
806      checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
807      subId: 2,
808    })
809    return { behavior: 'ask', message }
810  }
811
812  if (/-regex\s+["'][^"']*[;&][^"']*["']/.test(unquotedContent)) {
813    logEvent('tengu_bash_security_check_triggered', {
814      checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
815      subId: 3,
816    })
817    return { behavior: 'ask', message }
818  }
819
820  return { behavior: 'passthrough', message: 'No metacharacters' }
821}
822
823function validateDangerousVariables(
824  context: ValidationContext,
825): PermissionResult {
826  const { fullyUnquotedContent } = context
827
828  if (
829    /[<>|]\s*\$[A-Za-z_]/.test(fullyUnquotedContent) ||
830    /\$[A-Za-z_][A-Za-z0-9_]*\s*[|<>]/.test(fullyUnquotedContent)
831  ) {
832    logEvent('tengu_bash_security_check_triggered', {
833      checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_VARIABLES,
834      subId: 1,
835    })
836    return {
837      behavior: 'ask',
838      message:
839        'Command contains variables in dangerous contexts (redirections or pipes)',
840    }
841  }
842
843  return { behavior: 'passthrough', message: 'No dangerous variables' }
844}
845
846function validateDangerousPatterns(
847  context: ValidationContext,
848): PermissionResult {
849  const { unquotedContent } = context
850
851  // Special handling for backticks - check for UNESCAPED backticks only
852  // Escaped backticks (e.g., \`) are safe and commonly used in SQL commands
853  if (hasUnescapedChar(unquotedContent, '`')) {
854    return {
855      behavior: 'ask',
856      message: 'Command contains backticks (`) for command substitution',
857    }
858  }
859
860  // Other command substitution checks (include double-quoted content)
861  for (const { pattern, message } of COMMAND_SUBSTITUTION_PATTERNS) {
862    if (pattern.test(unquotedContent)) {
863      logEvent('tengu_bash_security_check_triggered', {
864        checkId:
865          BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION,
866        subId: 1,
867      })
868      return { behavior: 'ask', message: `Command contains ${message}` }
869    }
870  }
871
872  return { behavior: 'passthrough', message: 'No dangerous patterns' }
873}
874
875function validateRedirections(context: ValidationContext): PermissionResult {
876  const { fullyUnquotedContent } = context
877
878  if (/</.test(fullyUnquotedContent)) {
879    logEvent('tengu_bash_security_check_triggered', {
880      checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_INPUT_REDIRECTION,
881      subId: 1,
882    })
883    return {
884      behavior: 'ask',
885      message:
886        'Command contains input redirection (<) which could read sensitive files',
887    }
888  }
889
890  if (/>/.test(fullyUnquotedContent)) {
891    logEvent('tengu_bash_security_check_triggered', {
892      checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_OUTPUT_REDIRECTION,
893      subId: 1,
894    })
895    return {
896      behavior: 'ask',
897      message:
898        'Command contains output redirection (>) which could write to arbitrary files',
899    }
900  }
901
902  return { behavior: 'passthrough', message: 'No redirections' }
903}
904
905function validateNewlines(context: ValidationContext): PermissionResult {
906  // Use fullyUnquotedPreStrip (before stripSafeRedirections) to prevent bypasses
907  // where stripping `>/dev/null` creates a phantom backslash-newline continuation.
908  // E.g., `cmd \>/dev/null\nwhoami` → after stripping becomes `cmd \\nwhoami`
909  // which looks like a safe continuation but actually hides a second command.
910  const { fullyUnquotedPreStrip } = context
911
912  // Check for newlines in unquoted content
913  if (!/[\n\r]/.test(fullyUnquotedPreStrip)) {
914    return { behavior: 'passthrough', message: 'No newlines' }
915  }
916
917  // Flag any newline/CR followed by non-whitespace, EXCEPT backslash-newline
918  // continuations at word boundaries. In bash, `\<newline>` is a line
919  // continuation (both chars removed), which is safe when the backslash
920  // follows whitespace (e.g., `cmd \<newline>--flag`). Mid-word continuations
921  // like `tr\<newline>aceroute` are still flagged because they can hide
922  // dangerous command names from allowlist checks.
923  // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() + gated by /[\n\r]/.test() above
924  const looksLikeCommand = /(?<![\s]\\)[\n\r]\s*\S/.test(fullyUnquotedPreStrip)
925  if (looksLikeCommand) {
926    logEvent('tengu_bash_security_check_triggered', {
927      checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,
928      subId: 1,
929    })
930    return {
931      behavior: 'ask',
932      message:
933        'Command contains newlines that could separate multiple commands',
934    }
935  }
936
937  return {
938    behavior: 'passthrough',
939    message: 'Newlines appear to be within data',
940  }
941}
942
943/**
944 * SECURITY: Carriage return (\r, 0x0D) IS a misparsing concern, unlike LF.
945 *
946 * Parser differential:
947 *   - shell-quote's BAREWORD regex uses `[^\s...]` — JS `\s` INCLUDES \r, so
948 *     shell-quote treats CR as a token boundary. `TZ=UTC\recho` tokenizes as
949 *     TWO tokens: ['TZ=UTC', 'echo']. splitCommand joins with space →
950 *     'TZ=UTC echo curl evil.com'.
951 *   - bash's default IFS = $' \t\n' — CR is NOT in IFS. bash sees
952 *     `TZ=UTC\recho` as ONE word → env assignment TZ='UTC\recho' (CR byte
953 *     inside value), then `curl` is the command.
954 *
955 * Attack: `TZ=UTC\recho curl evil.com` with Bash(echo:*)
956 *   validator: splitCommand collapses CR→space → 'TZ=UTC echo curl evil.com'
957 *   → stripSafeWrappers: TZ=UTC stripped → 'echo curl evil.com' matches rule
958 *   bash: executes `curl evil.com`
959 *
960 * validateNewlines catches this but is in nonMisparsingValidators (LF is
961 * correctly handled by both parsers). This validator is NOT in
962 * nonMisparsingValidators — its ask result gets isBashSecurityCheckForMisparsing
963 * and blocks at the bashPermissions gate.
964 *
965 * Checks originalCommand (not fullyUnquotedPreStrip) because CR inside single
966 * quotes is ALSO a misparsing concern for the same reason: shell-quote's `\s`
967 * still tokenizes it, but bash treats it as literal. Block ALL unquoted-or-SQ CR.
968 * Only exception: CR inside DOUBLE quotes where bash also treats it as data
969 * and shell-quote preserves the token (no split).
970 */
971function validateCarriageReturn(context: ValidationContext): PermissionResult {
972  const { originalCommand } = context
973
974  if (!originalCommand.includes('\r')) {
975    return { behavior: 'passthrough', message: 'No carriage return' }
976  }
977
978  // Check if CR appears outside double quotes. CR outside DQ (including inside
979  // SQ and unquoted) causes the shell-quote/bash tokenization differential.
980  let inSingleQuote = false
981  let inDoubleQuote = false
982  let escaped = false
983  for (let i = 0; i < originalCommand.length; i++) {
984    const c = originalCommand[i]
985    if (escaped) {
986      escaped = false
987      continue
988    }
989    if (c === '\\' && !inSingleQuote) {
990      escaped = true
991      continue
992    }
993    if (c === "'" && !inDoubleQuote) {
994      inSingleQuote = !inSingleQuote
995      continue
996    }
997    if (c === '"' && !inSingleQuote) {
998      inDoubleQuote = !inDoubleQuote
999      continue
1000    }
1001    if (c === '\r' && !inDoubleQuote) {
1002      logEvent('tengu_bash_security_check_triggered', {
1003        checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,
1004        subId: 2,
1005      })
1006      return {
1007        behavior: 'ask',
1008        message:
1009          'Command contains carriage return (\\r) which shell-quote and bash tokenize differently',
1010      }
1011    }
1012  }
1013
1014  return { behavior: 'passthrough', message: 'CR only inside double quotes' }
1015}
1016
1017function validateIFSInjection(context: ValidationContext): PermissionResult {
1018  const { originalCommand } = context
1019
1020  // Detect any usage of IFS variable which could be used to bypass regex validation
1021  // Check for $IFS and ${...IFS...} patterns (including parameter expansions like ${IFS:0:1}, ${#IFS}, etc.)
1022  // Using ${[^}]*IFS to catch all parameter expansion variations with IFS
1023  if (/\$IFS|\$\{[^}]*IFS/.test(originalCommand)) {
1024    logEvent('tengu_bash_security_check_triggered', {
1025      checkId: BASH_SECURITY_CHECK_IDS.IFS_INJECTION,
1026      subId: 1,
1027    })
1028    return {
1029      behavior: 'ask',
1030      message:
1031        'Command contains IFS variable usage which could bypass security validation',
1032    }
1033  }
1034
1035  return { behavior: 'passthrough', message: 'No IFS injection detected' }
1036}
1037
1038// Additional hardening against reading environment variables via /proc filesystem.
1039// Path validation typically blocks /proc access, but this provides defense-in-depth.
1040// Environment files in /proc can expose sensitive data like API keys and secrets.
1041function validateProcEnvironAccess(
1042  context: ValidationContext,
1043): PermissionResult {
1044  const { originalCommand } = context
1045
1046  // Check for /proc paths that could expose environment variables
1047  // This catches patterns like:
1048  // - /proc/self/environ
1049  // - /proc/1/environ
1050  // - /proc/*/environ (with any PID)
1051  if (/\/proc\/.*\/environ/.test(originalCommand)) {
1052    logEvent('tengu_bash_security_check_triggered', {
1053      checkId: BASH_SECURITY_CHECK_IDS.PROC_ENVIRON_ACCESS,
1054      subId: 1,
1055    })
1056    return {
1057      behavior: 'ask',
1058      message:
1059        'Command accesses /proc/*/environ which could expose sensitive environment variables',
1060    }
1061  }
1062
1063  return {
1064    behavior: 'passthrough',
1065    message: 'No /proc/environ access detected',
1066  }
1067}
1068
1069/**
1070 * Detects commands with malformed tokens (unbalanced delimiters) combined with
1071 * command separators. This catches potential injection patterns where ambiguous
1072 * shell syntax could be exploited.
1073 *
1074 * Security: This check catches the eval bypass discovered in HackerOne review.
1075 * When shell-quote parses ambiguous patterns like `echo {"hi":"hi;evil"}`,
1076 * it may produce unbalanced tokens (e.g., `{hi:"hi`). Combined with command
1077 * separators, this can lead to unintended command execution via eval re-parsing.
1078 *
1079 * By forcing user approval for these patterns, we ensure the user sees exactly
1080 * what will be executed before approving.
1081 */
1082function validateMalformedTokenInjection(
1083  context: ValidationContext,
1084): PermissionResult {
1085  const { originalCommand } = context
1086
1087  const parseResult = tryParseShellCommand(originalCommand)
1088  if (!parseResult.success) {
1089    // Parse failed - this is handled elsewhere (bashToolHasPermission checks this)
1090    return {
1091      behavior: 'passthrough',
1092      message: 'Parse failed, handled elsewhere',
1093    }
1094  }
1095
1096  const parsed = parseResult.tokens
1097
1098  // Check for command separators (;, &&, ||)
1099  const hasCommandSeparator = parsed.some(
1100    entry =>
1101      typeof entry === 'object' &&
1102      entry !== null &&
1103      'op' in entry &&
1104      (entry.op === ';' || entry.op === '&&' || entry.op === '||'),
1105  )
1106
1107  if (!hasCommandSeparator) {
1108    return { behavior: 'passthrough', message: 'No command separators' }
1109  }
1110
1111  // Check for malformed tokens (unbalanced delimiters)
1112  if (hasMalformedTokens(originalCommand, parsed)) {
1113    logEvent('tengu_bash_security_check_triggered', {
1114      checkId: BASH_SECURITY_CHECK_IDS.MALFORMED_TOKEN_INJECTION,
1115      subId: 1,
1116    })
1117    return {
1118      behavior: 'ask',
1119      message:
1120        'Command contains ambiguous syntax with command separators that could be misinterpreted',
1121    }
1122  }
1123
1124  return {
1125    behavior: 'passthrough',
1126    message: 'No malformed token injection detected',
1127  }
1128}
1129
1130function validateObfuscatedFlags(context: ValidationContext): PermissionResult {
1131  // Block shell quoting bypass patterns used to circumvent negative lookaheads we use in our regexes to block known dangerous flags
1132
1133  const { originalCommand, baseCommand } = context
1134
1135  // Echo is safe for obfuscated flags, BUT only for simple echo commands.
1136  // For compound commands (with |, &, ;), we need to check the whole command
1137  // because the dangerous ANSI-C quoting might be after the operator.
1138  const hasShellOperators = /[|&;]/.test(originalCommand)
1139  if (baseCommand === 'echo' && !hasShellOperators) {
1140    return {
1141      behavior: 'passthrough',
1142      message: 'echo command is safe and has no dangerous flags',
1143    }
1144  }
1145
1146  // COMPREHENSIVE OBFUSCATION DETECTION
1147  // These checks catch various ways to hide flags using shell quoting
1148
1149  // 1. Block ANSI-C quoting ($'...') - can encode any character via escape sequences
1150  // Simple pattern that matches $'...' anywhere. This correctly handles:
1151  // - grep '$' file => no match ($ is regex anchor inside quotes, no $'...' structure)
1152  // - 'test'$'-exec' => match (quote concatenation with ANSI-C)
1153  // - Zero-width space and other invisible chars => match
1154  // The pattern requires $' followed by content (can be empty) followed by closing '
1155  if (/\$'[^']*'/.test(originalCommand)) {
1156    logEvent('tengu_bash_security_check_triggered', {
1157      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1158      subId: 5,
1159    })
1160    return {
1161      behavior: 'ask',
1162      message: 'Command contains ANSI-C quoting which can hide characters',
1163    }
1164  }
1165
1166  // 2. Block locale quoting ($"...")  - can also use escape sequences
1167  // Same simple pattern as ANSI-C quoting above
1168  if (/\$"[^"]*"/.test(originalCommand)) {
1169    logEvent('tengu_bash_security_check_triggered', {
1170      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1171      subId: 6,
1172    })
1173    return {
1174      behavior: 'ask',
1175      message: 'Command contains locale quoting which can hide characters',
1176    }
1177  }
1178
1179  // 3. Block empty ANSI-C or locale quotes followed by dash
1180  // $''-exec or $""-exec
1181  if (/\$['"]{2}\s*-/.test(originalCommand)) {
1182    logEvent('tengu_bash_security_check_triggered', {
1183      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1184      subId: 9,
1185    })
1186    return {
1187      behavior: 'ask',
1188      message:
1189        'Command contains empty special quotes before dash (potential bypass)',
1190    }
1191  }
1192
1193  // 4. Block ANY sequence of empty quotes followed by dash
1194  // This catches: ''-  ""-  ''""-  ""''-  ''""''-  etc.
1195  // The pattern looks for one or more empty quote pairs followed by optional whitespace and dash
1196  if (/(?:^|\s)(?:''|"")+\s*-/.test(originalCommand)) {
1197    logEvent('tengu_bash_security_check_triggered', {
1198      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1199      subId: 7,
1200    })
1201    return {
1202      behavior: 'ask',
1203      message: 'Command contains empty quotes before dash (potential bypass)',
1204    }
1205  }
1206
1207  // 4b. SECURITY: Block homogeneous empty quote pair(s) immediately adjacent
1208  // to a quoted dash. Patterns like `"""-f"` (empty `""` + quoted `"-f"`)
1209  // concatenate in bash to `-f` but slip past all the above checks:
1210  //   - Regex (4) above: `(?:''|"")+\s*-` matches `""` pair, then expects
1211  //     optional space and dash — but finds a third `"` instead. No match.
1212  //   - Quote-content scanner (below): Sees the first `""` pair with empty
1213  //     content (doesn't start with dash). The third `"` opens a new quoted
1214  //     region handled by the main quote-state tracker.
1215  //   - Quote-state tracker: `""` toggles inDoubleQuote on/off; third `"`
1216  //     opens it again. The `-` inside `"-f"` is INSIDE quotes → skipped.
1217  //   - Flag scanner: Looks for `\s` before `-`. The `-` is preceded by `"`.
1218  //   - fullyUnquotedContent: Both `""` and `"-f"` get stripped.
1219  //
1220  // In bash, `"""-f"` = empty string + string "-f" = `-f`. This bypass works
1221  // for ANY dangerous-flag check (jq -f, find -exec, fc -e) with a matching
1222  // prefix permission (Bash(jq:*), Bash(find:*)).
1223  //
1224  // The regex `(?:""|'')+['"]-` matches:
1225  //   - One or more HOMOGENEOUS empty pairs (`""` or `''`) — the concatenation
1226  //     point where bash joins the empty string to the flag.
1227  //   - Immediately followed by ANY quote char — opens the flag-quoted region.
1228  //   - Immediately followed by `-` — the obfuscated flag.
1229  //
1230  // POSITION-AGNOSTIC: We do NOT require word-start (`(?:^|\s)`) because
1231  // prefixes like `$x"""-f"` (unset/empty variable) concatenate the same way.
1232  // The homogeneous-empty-pair requirement filters out the `'"'"'` idiom
1233  // (no homogeneous empty pair — it's close, double-quoted-content, open).
1234  //
1235  // FALSE POSITIVE: Matches `echo '"""-f" text'` (pattern inside single-quoted
1236  // string). Extremely rare (requires echoing the literal attack). Acceptable.
1237  if (/(?:""|'')+['"]-/.test(originalCommand)) {
1238    logEvent('tengu_bash_security_check_triggered', {
1239      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1240      subId: 10,
1241    })
1242    return {
1243      behavior: 'ask',
1244      message:
1245        'Command contains empty quote pair adjacent to quoted dash (potential flag obfuscation)',
1246    }
1247  }
1248
1249  // 4c. SECURITY: Also block 3+ consecutive quotes at word start even without
1250  // an immediate dash. Broader safety net for multi-quote obfuscation patterns
1251  // not enumerated above (e.g., `"""x"-f` where content between quotes shifts
1252  // the dash position). Legitimate commands never need `"""x"` when `"x"` works.
1253  if (/(?:^|\s)['"]{3,}/.test(originalCommand)) {
1254    logEvent('tengu_bash_security_check_triggered', {
1255      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1256      subId: 11,
1257    })
1258    return {
1259      behavior: 'ask',
1260      message:
1261        'Command contains consecutive quote characters at word start (potential obfuscation)',
1262    }
1263  }
1264
1265  // Track quote state to avoid false positives for flags inside quoted strings
1266  let inSingleQuote = false
1267  let inDoubleQuote = false
1268  let escaped = false
1269
1270  for (let i = 0; i < originalCommand.length - 1; i++) {
1271    const currentChar = originalCommand[i]
1272    const nextChar = originalCommand[i + 1]
1273
1274    // Update quote state
1275    if (escaped) {
1276      escaped = false
1277      continue
1278    }
1279
1280    // SECURITY: Only treat backslash as escape OUTSIDE single quotes. In bash,
1281    // `\` inside `'...'` is LITERAL. Without this guard, `'\'` desyncs the
1282    // quote tracker: `\` sets escaped=true, closing `'` is consumed by the
1283    // escaped-skip above instead of toggling inSingleQuote. Parser stays in
1284    // single-quote mode, and the `if (inSingleQuote || inDoubleQuote) continue`
1285    // at line ~1121 skips ALL subsequent flag detection for the rest of the
1286    // command. Example: `jq '\' "-f" evil` — bash gets `-f` arg, but desynced
1287    // parser thinks ` "-f" evil` is inside quotes → flag detection bypassed.
1288    // Defense-in-depth: hasShellQuoteSingleQuoteBug catches `'\'` patterns at
1289    // line ~1856 before this runs. But we fix the tracker for consistency with
1290    // the CORRECT implementations elsewhere in this file (hasBackslashEscaped*,
1291    // extractQuotedContent) which all guard with `!inSingleQuote`.
1292    if (currentChar === '\\' && !inSingleQuote) {
1293      escaped = true
1294      continue
1295    }
1296
1297    if (currentChar === "'" && !inDoubleQuote) {
1298      inSingleQuote = !inSingleQuote
1299      continue
1300    }
1301
1302    if (currentChar === '"' && !inSingleQuote) {
1303      inDoubleQuote = !inDoubleQuote
1304      continue
1305    }
1306
1307    // Only look for flags when not inside quoted strings
1308    // This prevents false positives like: make test TEST="file.py -v"
1309    if (inSingleQuote || inDoubleQuote) {
1310      continue
1311    }
1312
1313    // Look for whitespace followed by quote that contains a dash (potential flag obfuscation)
1314    // SECURITY: Block ANY quoted content starting with dash - err on side of safety
1315    // Catches: "-"exec, "-file", "--flag", '-'output, etc.
1316    // Users can approve manually if legitimate (e.g., find . -name "-file")
1317    if (
1318      currentChar &&
1319      nextChar &&
1320      /\s/.test(currentChar) &&
1321      /['"`]/.test(nextChar)
1322    ) {
1323      const quoteChar = nextChar
1324      let j = i + 2 // Start after the opening quote
1325      let insideQuote = ''
1326
1327      // Collect content inside the quote
1328      while (j < originalCommand.length && originalCommand[j] !== quoteChar) {
1329        insideQuote += originalCommand[j]!
1330        j++
1331      }
1332
1333      // If we found a closing quote and the content looks like an obfuscated flag, block it.
1334      // Three attack patterns to catch:
1335      //   1. Flag name inside quotes: "--flag", "-exec", "-X" (dashes + letters inside)
1336      //   2. Split-quote flag: "-"exec, "--"output (dashes inside, letters continue after quote)
1337      //   3. Chained quotes: "-""exec" (dashes in first quote, second quote contains letters)
1338      // Pure-dash strings like "---" or "--" followed by whitespace/separator are separators,
1339      // not flags, and should not trigger this check.
1340      const charAfterQuote = originalCommand[j + 1]
1341      // Inside double quotes, $VAR and `cmd` expand at runtime, so "-$VAR" can
1342      // become -exec. Blocking $ and ` here over-blocks single-quoted literals
1343      // like grep '-$' (where $ is literal), but main's startsWith('-') already
1344      // blocked those — this restores status quo, not a new false positive.
1345      // Brace expansion ({) does NOT happen inside quotes, so { is not needed here.
1346      const hasFlagCharsInside = /^-+[a-zA-Z0-9$`]/.test(insideQuote)
1347      // Characters that can continue a flag after a closing quote. This catches:
1348      //   a-zA-Z0-9: "-"exec → -exec (direct concatenation)
1349      //   \\:        "-"\exec → -exec (backslash escape is stripped)
1350      //   -:         "-"-output → --output (extra dashes)
1351      //   {:         "-"{exec,delete} → -exec -delete (brace expansion)
1352      //   $:         "-"$VAR → -exec when VAR=exec (variable expansion)
1353      //   `:         "-"`echo exec` → -exec (command substitution)
1354      // Note: glob chars (*?[) are omitted — they require attacker-controlled
1355      // filenames in CWD to exploit, and blocking them would break patterns
1356      // like `ls -- "-"*` for listing files that start with dash.
1357      const FLAG_CONTINUATION_CHARS = /[a-zA-Z0-9\\${`-]/
1358      const hasFlagCharsContinuing =
1359        /^-+$/.test(insideQuote) &&
1360        charAfterQuote !== undefined &&
1361        FLAG_CONTINUATION_CHARS.test(charAfterQuote)
1362      // Handle adjacent quote chaining: "-""exec" or "-""-"exec or """-"exec concatenates
1363      // to -exec in shell. Follow the chain of adjacent quoted segments until
1364      // we find one containing an alphanumeric char or hit a non-quote boundary.
1365      // Also handles empty prefix quotes: """-"exec where "" is followed by "-"exec
1366      // The combined segments form a flag if they contain dash(es) followed by alphanumerics.
1367      const hasFlagCharsInNextQuote =
1368        // Trigger when: first segment is only dashes OR empty (could be prefix for flag)
1369        (insideQuote === '' || /^-+$/.test(insideQuote)) &&
1370        charAfterQuote !== undefined &&
1371        /['"`]/.test(charAfterQuote) &&
1372        (() => {
1373          let pos = j + 1 // Start at charAfterQuote (an opening quote)
1374          let combinedContent = insideQuote // Track what the shell will see
1375          while (
1376            pos < originalCommand.length &&
1377            /['"`]/.test(originalCommand[pos]!)
1378          ) {
1379            const segQuote = originalCommand[pos]!
1380            let end = pos + 1
1381            while (
1382              end < originalCommand.length &&
1383              originalCommand[end] !== segQuote
1384            ) {
1385              end++
1386            }
1387            const segment = originalCommand.slice(pos + 1, end)
1388            combinedContent += segment
1389
1390            // Check if combined content so far forms a flag pattern.
1391            // Include $ and ` for in-quote expansion: "-""$VAR" → -exec
1392            if (/^-+[a-zA-Z0-9$`]/.test(combinedContent)) return true
1393
1394            // If this segment has alphanumeric/expansion and we already have dashes,
1395            // it's a flag. Catches "-""$*" where segment='$*' has no alnum but
1396            // expands to positional params at runtime.
1397            // Guard against segment.length === 0: slice(0, -0) → slice(0, 0) → ''.
1398            const priorContent =
1399              segment.length > 0
1400                ? combinedContent.slice(0, -segment.length)
1401                : combinedContent
1402            if (/^-+$/.test(priorContent)) {
1403              if (/[a-zA-Z0-9$`]/.test(segment)) return true
1404            }
1405
1406            if (end >= originalCommand.length) break // Unclosed quote
1407            pos = end + 1 // Move past closing quote to check next segment
1408          }
1409          // Also check the unquoted char at the end of the chain
1410          if (
1411            pos < originalCommand.length &&
1412            FLAG_CONTINUATION_CHARS.test(originalCommand[pos]!)
1413          ) {
1414            // If we have dashes in combined content, the trailing char completes a flag
1415            if (/^-+$/.test(combinedContent) || combinedContent === '') {
1416              // Check if we're about to form a flag with the following content
1417              const nextChar = originalCommand[pos]!
1418              if (nextChar === '-') {
1419                // More dashes, could still form a flag
1420                return true
1421              }
1422              if (/[a-zA-Z0-9\\${`]/.test(nextChar) && combinedContent !== '') {
1423                // We have dashes and now alphanumeric/expansion follows
1424                return true
1425              }
1426            }
1427            // Original check for dashes followed by alphanumeric
1428            if (/^-/.test(combinedContent)) {
1429              return true
1430            }
1431          }
1432          return false
1433        })()
1434      if (
1435        j < originalCommand.length &&
1436        originalCommand[j] === quoteChar &&
1437        (hasFlagCharsInside ||
1438          hasFlagCharsContinuing ||
1439          hasFlagCharsInNextQuote)
1440      ) {
1441        logEvent('tengu_bash_security_check_triggered', {
1442          checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1443          subId: 4,
1444        })
1445        return {
1446          behavior: 'ask',
1447          message: 'Command contains quoted characters in flag names',
1448        }
1449      }
1450    }
1451
1452    // Look for whitespace followed by dash - this starts a flag
1453    if (currentChar && nextChar && /\s/.test(currentChar) && nextChar === '-') {
1454      let j = i + 1 // Start at the dash
1455      let flagContent = ''
1456
1457      // Collect flag content
1458      while (j < originalCommand.length) {
1459        const flagChar = originalCommand[j]
1460        if (!flagChar) break
1461
1462        // End flag content once we hit whitespace or an equals sign
1463        if (/[\s=]/.test(flagChar)) {
1464          break
1465        }
1466        // End flag collection if we hit quote followed by non-flag character. This is needed to handle cases like -d"," which should be parsed as just -d
1467        if (/['"`]/.test(flagChar)) {
1468          // Special case for cut -d flag: the delimiter value can be quoted
1469          // Example: cut -d'"' should parse as flag name: -d, value: '"'
1470          // Note: We only apply this exception to cut -d specifically to avoid bypasses.
1471          // Without this restriction, a command like `find -e"xec"` could be parsed as
1472          // flag name: -e, bypassing our blocklist for -exec. By restricting to cut -d,
1473          // we allow the legitimate use case while preventing obfuscation attacks on other
1474          // commands where quoted flag values could hide dangerous flag names.
1475          if (
1476            baseCommand === 'cut' &&
1477            flagContent === '-d' &&
1478            /['"`]/.test(flagChar)
1479          ) {
1480            // This is cut -d followed by a quoted delimiter - flagContent is already '-d'
1481            break
1482          }
1483
1484          // Look ahead to see what follows the quote
1485          if (j + 1 < originalCommand.length) {
1486            const nextFlagChar = originalCommand[j + 1]
1487            if (nextFlagChar && !/[a-zA-Z0-9_'"-]/.test(nextFlagChar)) {
1488              // Quote followed by something that is clearly not part of a flag, end the parsing
1489              break
1490            }
1491          }
1492        }
1493        flagContent += flagChar
1494        j++
1495      }
1496
1497      if (flagContent.includes('"') || flagContent.includes("'")) {
1498        logEvent('tengu_bash_security_check_triggered', {
1499          checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1500          subId: 1,
1501        })
1502        return {
1503          behavior: 'ask',
1504          message: 'Command contains quoted characters in flag names',
1505        }
1506      }
1507    }
1508  }
1509
1510  // Also handle flags that start with quotes: "--"output, '-'-output, etc.
1511  // Use fullyUnquotedContent to avoid false positives from legitimate quoted content like echo "---"
1512  if (/\s['"`]-/.test(context.fullyUnquotedContent)) {
1513    logEvent('tengu_bash_security_check_triggered', {
1514      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1515      subId: 2,
1516    })
1517    return {
1518      behavior: 'ask',
1519      message: 'Command contains quoted characters in flag names',
1520    }
1521  }
1522
1523  // Also handles cases like ""--output
1524  // Use fullyUnquotedContent to avoid false positives from legitimate quoted content
1525  if (/['"`]{2}-/.test(context.fullyUnquotedContent)) {
1526    logEvent('tengu_bash_security_check_triggered', {
1527      checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1528      subId: 3,
1529    })
1530    return {
1531      behavior: 'ask',
1532      message: 'Command contains quoted characters in flag names',
1533    }
1534  }
1535
1536  return { behavior: 'passthrough', message: 'No obfuscated flags detected' }
1537}
1538
1539/**
1540 * Detects backslash-escaped whitespace characters (space, tab) outside of quotes.
1541 *
1542 * In bash, `echo\ test` is a single token (command named "echo test"), but
1543 * shell-quote decodes the escape and produces `echo test` (two separate tokens).
1544 * This discrepancy allows path traversal attacks like:
1545 *   echo\ test/../../../usr/bin/touch /tmp/file
1546 * which the parser sees as `echo test/.../touch /tmp/file` (an echo command)
1547 * but bash resolves as `/usr/bin/touch /tmp/file` (via directory "echo test").
1548 */
1549function hasBackslashEscapedWhitespace(command: string): boolean {
1550  let inSingleQuote = false
1551  let inDoubleQuote = false
1552
1553  for (let i = 0; i < command.length; i++) {
1554    const char = command[i]
1555
1556    if (char === '\\' && !inSingleQuote) {
1557      if (!inDoubleQuote) {
1558        const nextChar = command[i + 1]
1559        if (nextChar === ' ' || nextChar === '\t') {
1560          return true
1561        }
1562      }
1563      // Skip the escaped character (both outside quotes and inside double quotes,
1564      // where \\, \", \$, \` are valid escape sequences)
1565      i++
1566      continue
1567    }
1568
1569    if (char === '"' && !inSingleQuote) {
1570      inDoubleQuote = !inDoubleQuote
1571      continue
1572    }
1573
1574    if (char === "'" && !inDoubleQuote) {
1575      inSingleQuote = !inSingleQuote
1576      continue
1577    }
1578  }
1579
1580  return false
1581}
1582
1583function validateBackslashEscapedWhitespace(
1584  context: ValidationContext,
1585): PermissionResult {
1586  if (hasBackslashEscapedWhitespace(context.originalCommand)) {
1587    logEvent('tengu_bash_security_check_triggered', {
1588      checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_WHITESPACE,
1589    })
1590    return {
1591      behavior: 'ask',
1592      message:
1593        'Command contains backslash-escaped whitespace that could alter command parsing',
1594    }
1595  }
1596
1597  return {
1598    behavior: 'passthrough',
1599    message: 'No backslash-escaped whitespace',
1600  }
1601}
1602
1603/**
1604 * Detects a backslash immediately preceding a shell operator outside of quotes.
1605 *
1606 * SECURITY: splitCommand normalizes `\;` to a bare `;` in its output string.
1607 * When downstream code (checkReadOnlyConstraints, checkPathConstraints, etc.)
1608 * re-parses that normalized string, the bare `;` is seen as an operator and
1609 * causes a false split. This enables arbitrary file read bypassing path checks:
1610 *
1611 *   cat safe.txt \; echo ~/.ssh/id_rsa
1612 *
1613 * In bash: ONE cat command reading safe.txt, ;, echo, ~/.ssh/id_rsa as files.
1614 * After splitCommand normalizes: "cat safe.txt ; echo ~/.ssh/id_rsa"
1615 * Nested re-parse: ["cat safe.txt", "echo ~/.ssh/id_rsa"] — both segments
1616 * pass isCommandReadOnly, sensitive path hidden in echo segment is never
1617 * validated by path constraints. Auto-allowed. Private key leaked.
1618 *
1619 * This check flags any \<operator> regardless of backslash parity. Even counts
1620 * (\\;) are dangerous in bash (\\ → \, ; separates). Odd counts (\;) are safe
1621 * in bash but trigger the double-parse bug above. Both must be flagged.
1622 *
1623 * Known false positive: `find . -exec cmd {} \;` — users will be prompted once.
1624 *
1625 * Note: `(` and `)` are NOT in this set — splitCommand preserves `\(` and `\)`
1626 * in its output (round-trip safe), so they don't trigger the double-parse bug.
1627 * This allows `find . \( -name x -o -name y \)` to pass without false positives.
1628 */
1629const SHELL_OPERATORS = new Set([';', '|', '&', '<', '>'])
1630
1631function hasBackslashEscapedOperator(command: string): boolean {
1632  let inSingleQuote = false
1633  let inDoubleQuote = false
1634
1635  for (let i = 0; i < command.length; i++) {
1636    const char = command[i]
1637
1638    // SECURITY: Handle backslash FIRST, before quote toggles. In bash, inside
1639    // double quotes, `\"` is an escape sequence producing a literal `"` — it
1640    // does NOT close the quote. If we process quote toggles first, `\"` inside
1641    // `"..."` desyncs the tracker:
1642    //   - `\` is ignored (gated by !inDoubleQuote)
1643    //   - `"` toggles inDoubleQuote to FALSE (wrong — bash says still inside)
1644    //   - next `"` (the real closing quote) toggles BACK to TRUE — locked desync
1645    //   - subsequent `\;` is missed because !inDoubleQuote is false
1646    // Exploit: `tac "x\"y" \; echo ~/.ssh/id_rsa` — bash runs ONE tac reading
1647    // all args as files (leaking id_rsa), but desynced tracker misses `\;` and
1648    // splitCommand's double-parse normalization "sees" two safe commands.
1649    //
1650    // Fix structure matches hasBackslashEscapedWhitespace (which was correctly
1651    // fixed for this in commit prior to d000dfe84e): backslash check first,
1652    // gated only by !inSingleQuote (since backslash IS literal inside '...'),
1653    // unconditional i++ to skip the escaped char even inside double quotes.
1654    if (char === '\\' && !inSingleQuote) {
1655      // Only flag \<operator> when OUTSIDE double quotes (inside double quotes,
1656      // operators like ;|&<> are already not special, so \; is harmless there).
1657      if (!inDoubleQuote) {
1658        const nextChar = command[i + 1]
1659        if (nextChar && SHELL_OPERATORS.has(nextChar)) {
1660          return true
1661        }
1662      }
1663      // Skip the escaped character unconditionally. Inside double quotes, this
1664      // correctly consumes backslash pairs: `"x\\"` → pos 6 (`\`) skips pos 7
1665      // (`\`), then pos 8 (`"`) toggles inDoubleQuote off correctly. Without
1666      // unconditional skip, pos 7 would see `\`, see pos 8 (`"`) as nextChar,
1667      // skip it, and the closing quote would NEVER toggle inDoubleQuote —
1668      // permanently desyncing and missing subsequent `\;` outside quotes.
1669      // Exploit: `cat "x\\" \; echo /etc/passwd` — bash reads /etc/passwd.
1670      //
1671      // This correctly handles backslash parity: odd-count `\;` (1, 3, 5...)
1672      // is flagged (the unpaired `\` before `;` is detected). Even-count `\\;`
1673      // (2, 4...) is NOT flagged, which is CORRECT — bash treats `\\` as
1674      // literal `\` and `;` as a separator, so splitCommand handles it
1675      // normally (no double-parse bug). This matches
1676      // hasBackslashEscapedWhitespace line ~1340.
1677      i++
1678      continue
1679    }
1680
1681    // Quote toggles come AFTER backslash handling (backslash already skipped
1682    // any escaped quote char, so these toggles only fire on unescaped quotes).
1683    if (char === "'" && !inDoubleQuote) {
1684      inSingleQuote = !inSingleQuote
1685      continue
1686    }
1687    if (char === '"' && !inSingleQuote) {
1688      inDoubleQuote = !inDoubleQuote
1689      continue
1690    }
1691  }
1692
1693  return false
1694}
1695
1696function validateBackslashEscapedOperators(
1697  context: ValidationContext,
1698): PermissionResult {
1699  // Tree-sitter path: if tree-sitter confirms no actual operator nodes exist
1700  // in the AST, then any \; is just an escaped character in a word argument
1701  // (e.g., `find . -exec cmd {} \;`). Skip the expensive regex check.
1702  if (context.treeSitter && !context.treeSitter.hasActualOperatorNodes) {
1703    return { behavior: 'passthrough', message: 'No operator nodes in AST' }
1704  }
1705
1706  if (hasBackslashEscapedOperator(context.originalCommand)) {
1707    logEvent('tengu_bash_security_check_triggered', {
1708      checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_OPERATORS,
1709    })
1710    return {
1711      behavior: 'ask',
1712      message:
1713        'Command contains a backslash before a shell operator (;, |, &, <, >) which can hide command structure',
1714    }
1715  }
1716
1717  return {
1718    behavior: 'passthrough',
1719    message: 'No backslash-escaped operators',
1720  }
1721}
1722
1723/**
1724 * Checks if a character at position `pos` in `content` is escaped by counting
1725 * consecutive backslashes before it. An odd number means it's escaped.
1726 */
1727function isEscapedAtPosition(content: string, pos: number): boolean {
1728  let backslashCount = 0
1729  let i = pos - 1
1730  while (i >= 0 && content[i] === '\\') {
1731    backslashCount++
1732    i--
1733  }
1734  return backslashCount % 2 === 1
1735}
1736
1737/**
1738 * Detects unquoted brace expansion syntax that Bash expands but shell-quote/tree-sitter
1739 * treat as literal strings. This parsing discrepancy allows permission bypass:
1740 *   git ls-remote {--upload-pack="touch /tmp/test",test}
1741 * Parser sees one literal arg, but Bash expands to: --upload-pack="touch /tmp/test" test
1742 *
1743 * Brace expansion has two forms:
1744 *   1. Comma-separated: {a,b,c} → a b c
1745 *   2. Sequence: {1..5} → 1 2 3 4 5
1746 *
1747 * Both single and double quotes suppress brace expansion in Bash, so we use
1748 * fullyUnquotedContent which has both quote types stripped.
1749 * Backslash-escaped braces (\{, \}) also suppress expansion.
1750 */
1751function validateBraceExpansion(context: ValidationContext): PermissionResult {
1752  // Use pre-strip content to avoid false negatives from stripSafeRedirections
1753  // creating backslash adjacencies (e.g., `\>/dev/null{a,b}` → `\{a,b}` after
1754  // stripping, making isEscapedAtPosition think the brace is escaped).
1755  const content = context.fullyUnquotedPreStrip
1756
1757  // SECURITY: Check for MISMATCHED brace counts in fullyUnquoted content.
1758  // A mismatch indicates that quoted braces (e.g., `'{'` or `"{"`) were
1759  // stripped by extractQuotedContent, leaving unbalanced braces in the content
1760  // we analyze. Our depth-matching algorithm below assumes balanced braces —
1761  // with a mismatch, it closes at the WRONG position, missing commas that
1762  // bash's algorithm WOULD find.
1763  //
1764  // Exploit: `git diff {@'{'0},--output=/tmp/pwned}`
1765  //   - Original: 2 `{`, 2 `}` (quoted `'{'` counts as content, not operator)
1766  //   - fullyUnquoted: `git diff {@0},--output=/tmp/pwned}` — 1 `{`, 2 `}`!
1767  //   - Our depth-matcher: closes at first `}` (after `0`), inner=`@0`, no `,`
1768  //   - Bash (on original): quoted `{` is content; first unquoted `}` has no
1769  //     `,` yet → bash treats as literal content, keeps scanning → finds `,`
1770  //     → final `}` closes → expands to `@{0} --output=/tmp/pwned`
1771  //   - git writes diff to /tmp/pwned. ARBITRARY FILE WRITE, ZERO PERMISSIONS.
1772  //
1773  // We count ONLY unescaped braces (backslash-escaped braces are literal in
1774  // bash). If counts mismatch AND at least one unescaped `{` exists, block —
1775  // our depth-matching cannot be trusted on this content.
1776  let unescapedOpenBraces = 0
1777  let unescapedCloseBraces = 0
1778  for (let i = 0; i < content.length; i++) {
1779    if (content[i] === '{' && !isEscapedAtPosition(content, i)) {
1780      unescapedOpenBraces++
1781    } else if (content[i] === '}' && !isEscapedAtPosition(content, i)) {
1782      unescapedCloseBraces++
1783    }
1784  }
1785  // Only block when CLOSE count EXCEEDS open count — this is the specific
1786  // attack signature. More `}` than `{` means a quoted `{` was stripped
1787  // (bash saw it as content, we see extra `}` unaccounted for). The inverse
1788  // (more `{` than `}`) is usually legitimate unclosed/escaped braces like
1789  // `{foo` or `{a,b\}` where bash doesn't expand anyway.
1790  if (unescapedOpenBraces > 0 && unescapedCloseBraces > unescapedOpenBraces) {
1791    logEvent('tengu_bash_security_check_triggered', {
1792      checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
1793      subId: 2,
1794    })
1795    return {
1796      behavior: 'ask',
1797      message:
1798        'Command has excess closing braces after quote stripping, indicating possible brace expansion obfuscation',
1799    }
1800  }
1801
1802  // SECURITY: Additionally, check the ORIGINAL command (before quote stripping)
1803  // for `'{'` or `"{"` INSIDE an unquoted brace context — this is the specific
1804  // attack primitive. A quoted brace inside an outer unquoted `{...}` is
1805  // essentially always an obfuscation attempt; legitimate commands don't nest
1806  // quoted braces inside brace expansion (awk/find patterns are fully quoted,
1807  // like `awk '{print $1}'` where the OUTER brace is inside quotes too).
1808  //
1809  // This catches the attack even if an attacker crafts a payload with balanced
1810  // stripped braces (defense-in-depth). We use a simple heuristic: if the
1811  // original command has `'{'` or `'}'` or `"{"` or `"}"` (quoted single brace)
1812  // AND also has an unquoted `{`, that's suspicious.
1813  if (unescapedOpenBraces > 0) {
1814    const orig = context.originalCommand
1815    // Look for quoted single-brace patterns: '{', '}', "{",  "}"
1816    // These are the attack primitive — a brace char wrapped in quotes.
1817    if (/['"][{}]['"]/.test(orig)) {
1818      logEvent('tengu_bash_security_check_triggered', {
1819        checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
1820        subId: 3,
1821      })
1822      return {
1823        behavior: 'ask',
1824        message:
1825          'Command contains quoted brace character inside brace context (potential brace expansion obfuscation)',
1826      }
1827    }
1828  }
1829
1830  // Scan for unescaped `{` characters, then check if they form brace expansion.
1831  // We use a manual scan rather than a simple regex lookbehind because
1832  // lookbehinds can't handle double-escaped backslashes (\\{ is unescaped `{`).
1833  for (let i = 0; i < content.length; i++) {
1834    if (content[i] !== '{') continue
1835    if (isEscapedAtPosition(content, i)) continue
1836
1837    // Find matching unescaped `}` by tracking nesting depth.
1838    // Previous approach broke on nested `{`, missing commas between the outer
1839    // `{` and the nested one (e.g., `{--upload-pack="evil",{test}}`).
1840    let depth = 1
1841    let matchingClose = -1
1842    for (let j = i + 1; j < content.length; j++) {
1843      const ch = content[j]
1844      if (ch === '{' && !isEscapedAtPosition(content, j)) {
1845        depth++
1846      } else if (ch === '}' && !isEscapedAtPosition(content, j)) {
1847        depth--
1848        if (depth === 0) {
1849          matchingClose = j
1850          break
1851        }
1852      }
1853    }
1854
1855    if (matchingClose === -1) continue
1856
1857    // Check for `,` or `..` at the outermost nesting level between this
1858    // `{` and its matching `}`. Only depth-0 triggers matter — bash splits
1859    // brace expansion at outer-level commas/sequences.
1860    let innerDepth = 0
1861    for (let k = i + 1; k < matchingClose; k++) {
1862      const ch = content[k]
1863      if (ch === '{' && !isEscapedAtPosition(content, k)) {
1864        innerDepth++
1865      } else if (ch === '}' && !isEscapedAtPosition(content, k)) {
1866        innerDepth--
1867      } else if (innerDepth === 0) {
1868        if (
1869          ch === ',' ||
1870          (ch === '.' && k + 1 < matchingClose && content[k + 1] === '.')
1871        ) {
1872          logEvent('tengu_bash_security_check_triggered', {
1873            checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
1874            subId: 1,
1875          })
1876          return {
1877            behavior: 'ask',
1878            message:
1879              'Command contains brace expansion that could alter command parsing',
1880          }
1881        }
1882      }
1883    }
1884    // No expansion at this level — don't skip past; inner pairs will be
1885    // caught by subsequent iterations of the outer loop.
1886  }
1887
1888  return {
1889    behavior: 'passthrough',
1890    message: 'No brace expansion detected',
1891  }
1892}
1893
1894// Matches Unicode whitespace characters that shell-quote treats as word
1895// separators but bash treats as literal word content. While this differential
1896// is defense-favorable (shell-quote over-splits), blocking these proactively
1897// prevents future edge cases.
1898// eslint-disable-next-line no-misleading-character-class
1899const UNICODE_WS_RE =
1900  /[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]/
1901
1902function validateUnicodeWhitespace(
1903  context: ValidationContext,
1904): PermissionResult {
1905  const { originalCommand } = context
1906  if (UNICODE_WS_RE.test(originalCommand)) {
1907    logEvent('tengu_bash_security_check_triggered', {
1908      checkId: BASH_SECURITY_CHECK_IDS.UNICODE_WHITESPACE,
1909    })
1910    return {
1911      behavior: 'ask',
1912      message:
1913        'Command contains Unicode whitespace characters that could cause parsing inconsistencies',
1914    }
1915  }
1916  return { behavior: 'passthrough', message: 'No Unicode whitespace' }
1917}
1918
1919function validateMidWordHash(context: ValidationContext): PermissionResult {
1920  const { unquotedKeepQuoteChars } = context
1921  // Match # preceded by a non-whitespace character (mid-word hash).
1922  // shell-quote treats mid-word # as comment-start but bash treats it as a
1923  // literal character, creating a parser differential.
1924  //
1925  // Uses unquotedKeepQuoteChars (which preserves quote delimiters but strips
1926  // quoted content) to catch quote-adjacent # like 'x'# — fullyUnquotedPreStrip
1927  // would strip both quotes and content, turning 'x'# into just # (word-start).
1928  //
1929  // SECURITY: Also check the CONTINUATION-JOINED version. The context is built
1930  // from the original command (pre-continuation-join). For `foo\<NL>#bar`,
1931  // pre-join the `#` is preceded by `\n` (whitespace → `/\S#/` doesn't match),
1932  // but post-join it's preceded by `o` (non-whitespace → matches). shell-quote
1933  // operates on the post-join text (line continuations are joined in
1934  // splitCommand), so the parser differential manifests on the joined text.
1935  // While not directly exploitable (the `#...` fragment still prompts as its
1936  // own subcommand), this is a defense-in-depth gap — shell-quote would drop
1937  // post-`#` content from path extraction.
1938  //
1939  // Exclude ${# which is bash string-length syntax (e.g., ${#var}).
1940  // Note: the lookbehind must be placed immediately before # (not before \S)
1941  // so that it checks the correct 2-char window.
1942  const joined = unquotedKeepQuoteChars.replace(/\\+\n/g, match => {
1943    const backslashCount = match.length - 1
1944    return backslashCount % 2 === 1 ? '\\'.repeat(backslashCount - 1) : match
1945  })
1946  if (
1947    // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() with atom search: fast when # absent
1948    /\S(?<!\$\{)#/.test(unquotedKeepQuoteChars) ||
1949    // eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above
1950    /\S(?<!\$\{)#/.test(joined)
1951  ) {
1952    logEvent('tengu_bash_security_check_triggered', {
1953      checkId: BASH_SECURITY_CHECK_IDS.MID_WORD_HASH,
1954    })
1955    return {
1956      behavior: 'ask',
1957      message:
1958        'Command contains mid-word # which is parsed differently by shell-quote vs bash',
1959    }
1960  }
1961  return { behavior: 'passthrough', message: 'No mid-word hash' }
1962}
1963
1964/**
1965 * Detects when a `#` comment contains quote characters that would desync
1966 * downstream quote trackers (like extractQuotedContent).
1967 *
1968 * In bash, everything after an unquoted `#` on a line is a comment — quote
1969 * characters inside the comment are literal text, not quote toggles. But our
1970 * quote-tracking functions don't handle comments, so a `'` or `"` after `#`
1971 * toggles their quote state. Attackers can craft `# ' "` sequences that
1972 * precisely desync the tracker, causing subsequent content (on following
1973 * lines) to appear "inside quotes" when it's actually unquoted in bash.
1974 *
1975 * Example attack:
1976 *   echo "it's" # ' " <<'MARKER'\n
1977 *   rm -rf /\n
1978 *   MARKER
1979 * In bash: `#` starts a comment, `rm -rf /` executes on line 2.
1980 * In extractQuotedContent: the `'` at position 14 (after #) opens a single
1981 * quote, and the `'` before MARKER closes it. But the `'` after MARKER opens
1982 * ANOTHER single quote, swallowing the newline and `rm -rf /`, so
1983 * validateNewlines sees no unquoted newlines.
1984 *
1985 * Defense: If we see an unquoted `#` followed by any quote character on the
1986 * same line, treat it as a misparsing concern. Legitimate commands rarely
1987 * have quote characters in their comments (and if they do, the user can
1988 * approve manually).
1989 */
1990function validateCommentQuoteDesync(
1991  context: ValidationContext,
1992): PermissionResult {
1993  // Tree-sitter path: tree-sitter correctly identifies comment nodes and
1994  // quoted content. The desync concern is about regex quote tracking being
1995  // confused by quote characters inside comments. When tree-sitter provides
1996  // the quote context, this desync cannot happen — the AST is authoritative
1997  // regardless of whether the command contains a comment.
1998  if (context.treeSitter) {
1999    return {
2000      behavior: 'passthrough',
2001      message: 'Tree-sitter quote context is authoritative',
2002    }
2003  }
2004
2005  const { originalCommand } = context
2006
2007  // Track quote state character-by-character using the same (correct) logic
2008  // as extractQuotedContent: single quotes don't toggle inside double quotes.
2009  // When we encounter an unquoted `#`, check if the rest of the line (until
2010  // newline) contains any quote characters.
2011  let inSingleQuote = false
2012  let inDoubleQuote = false
2013  let escaped = false
2014
2015  for (let i = 0; i < originalCommand.length; i++) {
2016    const char = originalCommand[i]
2017
2018    if (escaped) {
2019      escaped = false
2020      continue
2021    }
2022
2023    if (inSingleQuote) {
2024      if (char === "'") inSingleQuote = false
2025      continue
2026    }
2027
2028    if (char === '\\') {
2029      escaped = true
2030      continue
2031    }
2032
2033    if (inDoubleQuote) {
2034      if (char === '"') inDoubleQuote = false
2035      // Single quotes inside double quotes are literal — no toggle
2036      continue
2037    }
2038
2039    if (char === "'") {
2040      inSingleQuote = true
2041      continue
2042    }
2043
2044    if (char === '"') {
2045      inDoubleQuote = true
2046      continue
2047    }
2048
2049    // Unquoted `#` — in bash, this starts a comment. Check if the rest of
2050    // the line contains quote characters that would desync other trackers.
2051    if (char === '#') {
2052      const lineEnd = originalCommand.indexOf('\n', i)
2053      const commentText = originalCommand.slice(
2054        i + 1,
2055        lineEnd === -1 ? originalCommand.length : lineEnd,
2056      )
2057      if (/['"]/.test(commentText)) {
2058        logEvent('tengu_bash_security_check_triggered', {
2059          checkId: BASH_SECURITY_CHECK_IDS.COMMENT_QUOTE_DESYNC,
2060        })
2061        return {
2062          behavior: 'ask',
2063          message:
2064            'Command contains quote characters inside a # comment which can desync quote tracking',
2065        }
2066      }
2067      // Skip to end of line (rest is comment)
2068      if (lineEnd === -1) break
2069      i = lineEnd // Loop increment will move past newline
2070    }
2071  }
2072
2073  return { behavior: 'passthrough', message: 'No comment quote desync' }
2074}
2075
2076/**
2077 * Detects a newline inside a quoted string where the NEXT line would be
2078 * stripped by stripCommentLines (trimmed line starts with `#`).
2079 *
2080 * In bash, `\n` inside quotes is a literal character and part of the argument.
2081 * But stripCommentLines (called by stripSafeWrappers in bashPermissions before
2082 * path validation and rule matching) processes commands LINE-BY-LINE via
2083 * `command.split('\n')` without tracking quote state. A quoted newline lets an
2084 * attacker position the next line to start with `#` (after trim), causing
2085 * stripCommentLines to drop that line entirely — hiding sensitive paths or
2086 * arguments from path validation and permission rule matching.
2087 *
2088 * Example attack (auto-allowed in acceptEdits mode without any Bash rules):
2089 *   mv ./decoy '<\n>#' ~/.ssh/id_rsa ./exfil_dir
2090 * Bash: moves ./decoy AND ~/.ssh/id_rsa into ./exfil_dir/ (errors on `\n#`).
2091 * stripSafeWrappers: line 2 starts with `#` → stripped → "mv ./decoy '".
2092 * shell-quote: drops unbalanced trailing quote → ["mv", "./decoy"].
2093 * checkPathConstraints: only sees ./decoy (in cwd) → passthrough.
2094 * acceptEdits mode: mv with all-cwd paths → ALLOW. Zero clicks, no warning.
2095 *
2096 * Also works with cp (exfil), rm/rm -rf (delete arbitrary files/dirs).
2097 *
2098 * Defense: block ONLY the specific stripCommentLines trigger — a newline inside
2099 * quotes where the next line starts with `#` after trim. This is the minimal
2100 * check that catches the parser differential while preserving legitimate
2101 * multi-line quoted arguments (echo 'line1\nline2', grep patterns, etc.).
2102 * Safe heredocs ($(cat <<'EOF'...)) and git commit -m "..." are handled by
2103 * early validators and never reach this check.
2104 *
2105 * This validator is NOT in nonMisparsingValidators — its ask result gets
2106 * isBashSecurityCheckForMisparsing: true, causing an early block in the
2107 * permission flow at bashPermissions.ts before any line-based processing runs.
2108 */
2109function validateQuotedNewline(context: ValidationContext): PermissionResult {
2110  const { originalCommand } = context
2111
2112  // Fast path: must have both a newline byte AND a # character somewhere.
2113  // stripCommentLines only strips lines where trim().startsWith('#'), so
2114  // no # means no possible trigger.
2115  if (!originalCommand.includes('\n') || !originalCommand.includes('#')) {
2116    return { behavior: 'passthrough', message: 'No newline or no hash' }
2117  }
2118
2119  // Track quote state. Mirrors extractQuotedContent / validateCommentQuoteDesync:
2120  // - single quotes don't toggle inside double quotes
2121  // - backslash escapes the next char (but not inside single quotes)
2122  // stripCommentLines splits on '\n' (not \r), so we only treat \n as a line
2123  // separator. \r inside a line is removed by trim() and doesn't change the
2124  // trimmed-starts-with-# check.
2125  let inSingleQuote = false
2126  let inDoubleQuote = false
2127  let escaped = false
2128
2129  for (let i = 0; i < originalCommand.length; i++) {
2130    const char = originalCommand[i]
2131
2132    if (escaped) {
2133      escaped = false
2134      continue
2135    }
2136
2137    if (char === '\\' && !inSingleQuote) {
2138      escaped = true
2139      continue
2140    }
2141
2142    if (char === "'" && !inDoubleQuote) {
2143      inSingleQuote = !inSingleQuote
2144      continue
2145    }
2146
2147    if (char === '"' && !inSingleQuote) {
2148      inDoubleQuote = !inDoubleQuote
2149      continue
2150    }
2151
2152    // A newline inside quotes: the NEXT line (from bash's perspective) starts
2153    // inside a quoted string. Check if that line would be stripped by
2154    // stripCommentLines — i.e., after trim(), does it start with `#`?
2155    // This exactly mirrors: lines.filter(l => !l.trim().startsWith('#'))
2156    if (char === '\n' && (inSingleQuote || inDoubleQuote)) {
2157      const lineStart = i + 1
2158      const nextNewline = originalCommand.indexOf('\n', lineStart)
2159      const lineEnd = nextNewline === -1 ? originalCommand.length : nextNewline
2160      const nextLine = originalCommand.slice(lineStart, lineEnd)
2161      if (nextLine.trim().startsWith('#')) {
2162        logEvent('tengu_bash_security_check_triggered', {
2163          checkId: BASH_SECURITY_CHECK_IDS.QUOTED_NEWLINE,
2164        })
2165        return {
2166          behavior: 'ask',
2167          message:
2168            'Command contains a quoted newline followed by a #-prefixed line, which can hide arguments from line-based permission checks',
2169        }
2170      }
2171    }
2172  }
2173
2174  return { behavior: 'passthrough', message: 'No quoted newline-hash pattern' }
2175}
2176
2177/**
2178 * Validates that the command doesn't use Zsh-specific dangerous commands that
2179 * can bypass security checks. These commands provide capabilities like loading
2180 * kernel modules, raw file I/O, network access, and pseudo-terminal execution
2181 * that circumvent normal permission checks.
2182 *
2183 * Also catches `fc -e` which can execute arbitrary editors on command history,
2184 * and `emulate` which with `-c` is an eval-equivalent.
2185 */
2186function validateZshDangerousCommands(
2187  context: ValidationContext,
2188): PermissionResult {
2189  const { originalCommand } = context
2190
2191  // Extract the base command from the original command, stripping leading
2192  // whitespace, env var assignments, and Zsh precommand modifiers.
2193  // e.g., "FOO=bar command builtin zmodload" -> "zmodload"
2194  const ZSH_PRECOMMAND_MODIFIERS = new Set([
2195    'command',
2196    'builtin',
2197    'noglob',
2198    'nocorrect',
2199  ])
2200  const trimmed = originalCommand.trim()
2201  const tokens = trimmed.split(/\s+/)
2202  let baseCmd = ''
2203  for (const token of tokens) {
2204    // Skip env var assignments (VAR=value)
2205    if (/^[A-Za-z_]\w*=/.test(token)) continue
2206    // Skip Zsh precommand modifiers (they don't change what command runs)
2207    if (ZSH_PRECOMMAND_MODIFIERS.has(token)) continue
2208    baseCmd = token
2209    break
2210  }
2211
2212  if (ZSH_DANGEROUS_COMMANDS.has(baseCmd)) {
2213    logEvent('tengu_bash_security_check_triggered', {
2214      checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,
2215      subId: 1,
2216    })
2217    return {
2218      behavior: 'ask',
2219      message: `Command uses Zsh-specific '${baseCmd}' which can bypass security checks`,
2220    }
2221  }
2222
2223  // Check for `fc -e` which allows executing arbitrary commands via editor
2224  // fc without -e is safe (just lists history), but -e specifies an editor
2225  // to run on the command, effectively an eval
2226  if (baseCmd === 'fc' && /\s-\S*e/.test(trimmed)) {
2227    logEvent('tengu_bash_security_check_triggered', {
2228      checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,
2229      subId: 2,
2230    })
2231    return {
2232      behavior: 'ask',
2233      message:
2234        "Command uses 'fc -e' which can execute arbitrary commands via editor",
2235    }
2236  }
2237
2238  return {
2239    behavior: 'passthrough',
2240    message: 'No Zsh dangerous commands',
2241  }
2242}
2243
2244// Matches non-printable control characters that have no legitimate use in shell
2245// commands: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F. Excludes tab (0x09),
2246// newline (0x0A), and carriage return (0x0D) which are handled by other
2247// validators. Bash silently drops null bytes and ignores most control chars,
2248// so an attacker can use them to slip metacharacters past our checks while
2249// bash still executes them (e.g., "echo safe\x00; rm -rf /").
2250// eslint-disable-next-line no-control-regex
2251const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/
2252
2253/**
2254 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
2255 * unavailable. The primary gate is parseForSecurity (ast.ts).
2256 */
2257export function bashCommandIsSafe_DEPRECATED(
2258  command: string,
2259): PermissionResult {
2260  // SECURITY: Block control characters before any other processing. Null bytes
2261  // and other non-printable chars are silently dropped by bash but confuse our
2262  // validators, allowing metacharacters adjacent to them to slip through.
2263  if (CONTROL_CHAR_RE.test(command)) {
2264    logEvent('tengu_bash_security_check_triggered', {
2265      checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,
2266    })
2267    return {
2268      behavior: 'ask',
2269      message:
2270        'Command contains non-printable control characters that could be used to bypass security checks',
2271      isBashSecurityCheckForMisparsing: true,
2272    }
2273  }
2274
2275  // SECURITY: Detect '\' patterns that exploit shell-quote's incorrect handling
2276  // of backslashes inside single quotes. Must run before shell-quote parsing.
2277  if (hasShellQuoteSingleQuoteBug(command)) {
2278    return {
2279      behavior: 'ask',
2280      message:
2281        'Command contains single-quoted backslash pattern that could bypass security checks',
2282      isBashSecurityCheckForMisparsing: true,
2283    }
2284  }
2285
2286  // SECURITY: Strip heredoc bodies before running security validators.
2287  // Only strip bodies for quoted/escaped delimiters (<<'EOF', <<\EOF) where
2288  // the body is literal text — $(), backticks, and ${} are NOT expanded.
2289  // Unquoted heredocs (<<EOF) undergo full shell expansion, so their bodies
2290  // may contain executable command substitutions that validators must see.
2291  // When extractHeredocs bails out (can't parse safely), the raw command
2292  // goes through all validators — which is the safe direction.
2293  const { processedCommand } = extractHeredocs(command, { quotedOnly: true })
2294
2295  const baseCommand = command.split(' ')[0] || ''
2296  const { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } =
2297    extractQuotedContent(processedCommand, baseCommand === 'jq')
2298
2299  const context: ValidationContext = {
2300    originalCommand: command,
2301    baseCommand,
2302    unquotedContent: withDoubleQuotes,
2303    fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),
2304    fullyUnquotedPreStrip: fullyUnquoted,
2305    unquotedKeepQuoteChars,
2306  }
2307
2308  const earlyValidators = [
2309    validateEmpty,
2310    validateIncompleteCommands,
2311    validateSafeCommandSubstitution,
2312    validateGitCommit,
2313  ]
2314
2315  for (const validator of earlyValidators) {
2316    const result = validator(context)
2317    if (result.behavior === 'allow') {
2318      return {
2319        behavior: 'passthrough',
2320        message:
2321          result.decisionReason?.type === 'other' ||
2322          result.decisionReason?.type === 'safetyCheck'
2323            ? result.decisionReason.reason
2324            : 'Command allowed',
2325      }
2326    }
2327    if (result.behavior !== 'passthrough') {
2328      return result.behavior === 'ask'
2329        ? { ...result, isBashSecurityCheckForMisparsing: true as const }
2330        : result
2331    }
2332  }
2333
2334  // Validators that don't set isBashSecurityCheckForMisparsing — their ask
2335  // results go through the standard permission flow rather than being blocked
2336  // early. LF newlines and redirections are normal patterns that splitCommand
2337  // handles correctly, not misparsing concerns.
2338  //
2339  // NOTE: validateCarriageReturn is NOT here — CR IS a misparsing concern.
2340  // shell-quote's `[^\s]` treats CR as a word separator (JS `\s` ⊃ \r), but
2341  // bash IFS does NOT include CR. splitCommand collapses CR→space, which IS
2342  // misparsing. See validateCarriageReturn for the full attack trace.
2343  const nonMisparsingValidators = new Set([
2344    validateNewlines,
2345    validateRedirections,
2346  ])
2347
2348  const validators = [
2349    validateJqCommand,
2350    validateObfuscatedFlags,
2351    validateShellMetacharacters,
2352    validateDangerousVariables,
2353    // Run comment-quote-desync BEFORE validateNewlines: it detects cases where
2354    // the quote tracker would miss newlines due to # comment desync.
2355    validateCommentQuoteDesync,
2356    // Run quoted-newline BEFORE validateNewlines: it detects the INVERSE case
2357    // (newlines INSIDE quotes, which validateNewlines ignores by design). Quoted
2358    // newlines let attackers split commands across lines so that line-based
2359    // processing (stripCommentLines) drops sensitive content.
2360    validateQuotedNewline,
2361    // CR check runs BEFORE validateNewlines — CR is a MISPARSING concern
2362    // (shell-quote/bash tokenization differential), LF is not.
2363    validateCarriageReturn,
2364    validateNewlines,
2365    validateIFSInjection,
2366    validateProcEnvironAccess,
2367    validateDangerousPatterns,
2368    validateRedirections,
2369    validateBackslashEscapedWhitespace,
2370    validateBackslashEscapedOperators,
2371    validateUnicodeWhitespace,
2372    validateMidWordHash,
2373    validateBraceExpansion,
2374    validateZshDangerousCommands,
2375    // Run malformed token check last - other validators should catch specific patterns first
2376    // (e.g., $() substitution, backticks, etc.) since they have more precise error messages
2377    validateMalformedTokenInjection,
2378  ]
2379
2380  // SECURITY: We must NOT short-circuit when a non-misparsing validator
2381  // returns 'ask' if there are still misparsing validators later in the list.
2382  // Non-misparsing ask results are discarded at bashPermissions.ts:~1301-1303
2383  // (the gate only blocks when isBashSecurityCheckForMisparsing is set). If
2384  // validateRedirections (index 10, non-misparsing) fires first on `>`, it
2385  // returns ask-without-flag — but validateBackslashEscapedOperators (index 12,
2386  // misparsing) would have caught `\;` WITH the flag. Short-circuiting lets a
2387  // payload like `cat safe.txt \; echo /etc/passwd > ./out` slip through.
2388  //
2389  // Fix: defer non-misparsing ask results. Continue running validators; if any
2390  // misparsing validator fires, return THAT (with the flag). Only if we reach
2391  // the end without a misparsing ask, return the deferred non-misparsing ask.
2392  let deferredNonMisparsingResult: PermissionResult | null = null
2393  for (const validator of validators) {
2394    const result = validator(context)
2395    if (result.behavior === 'ask') {
2396      if (nonMisparsingValidators.has(validator)) {
2397        if (deferredNonMisparsingResult === null) {
2398          deferredNonMisparsingResult = result
2399        }
2400        continue
2401      }
2402      return { ...result, isBashSecurityCheckForMisparsing: true as const }
2403    }
2404  }
2405  if (deferredNonMisparsingResult !== null) {
2406    return deferredNonMisparsingResult
2407  }
2408
2409  return {
2410    behavior: 'passthrough',
2411    message: 'Command passed all security checks',
2412  }
2413}
2414
2415/**
2416 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
2417 * unavailable. The primary gate is parseForSecurity (ast.ts).
2418 *
2419 * Async version of bashCommandIsSafe that uses tree-sitter when available
2420 * for more accurate parsing. Falls back to the sync regex version when
2421 * tree-sitter is not available.
2422 *
2423 * This should be used by async callers (bashPermissions.ts, bashCommandHelpers.ts).
2424 * Sync callers (readOnlyValidation.ts) should continue using bashCommandIsSafe().
2425 */
2426export async function bashCommandIsSafeAsync_DEPRECATED(
2427  command: string,
2428  onDivergence?: () => void,
2429): Promise<PermissionResult> {
2430  // Try to get tree-sitter analysis
2431  const parsed = await ParsedCommand.parse(command)
2432  const tsAnalysis = parsed?.getTreeSitterAnalysis() ?? null
2433
2434  // If no tree-sitter, fall back to sync version
2435  if (!tsAnalysis) {
2436    return bashCommandIsSafe_DEPRECATED(command)
2437  }
2438
2439  // Run the same security checks but with tree-sitter enriched context.
2440  // The early checks (control chars, shell-quote bug) don't benefit from
2441  // tree-sitter, so we run them identically.
2442  if (CONTROL_CHAR_RE.test(command)) {
2443    logEvent('tengu_bash_security_check_triggered', {
2444      checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,
2445    })
2446    return {
2447      behavior: 'ask',
2448      message:
2449        'Command contains non-printable control characters that could be used to bypass security checks',
2450      isBashSecurityCheckForMisparsing: true,
2451    }
2452  }
2453
2454  if (hasShellQuoteSingleQuoteBug(command)) {
2455    return {
2456      behavior: 'ask',
2457      message:
2458        'Command contains single-quoted backslash pattern that could bypass security checks',
2459      isBashSecurityCheckForMisparsing: true,
2460    }
2461  }
2462
2463  const { processedCommand } = extractHeredocs(command, { quotedOnly: true })
2464
2465  const baseCommand = command.split(' ')[0] || ''
2466
2467  // Use tree-sitter quote context for more accurate analysis
2468  const tsQuote = tsAnalysis.quoteContext
2469  const regexQuote = extractQuotedContent(
2470    processedCommand,
2471    baseCommand === 'jq',
2472  )
2473
2474  // Use tree-sitter quote context as primary, but keep regex as reference
2475  // for divergence logging
2476  const withDoubleQuotes = tsQuote.withDoubleQuotes
2477  const fullyUnquoted = tsQuote.fullyUnquoted
2478  const unquotedKeepQuoteChars = tsQuote.unquotedKeepQuoteChars
2479
2480  const context: ValidationContext = {
2481    originalCommand: command,
2482    baseCommand,
2483    unquotedContent: withDoubleQuotes,
2484    fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),
2485    fullyUnquotedPreStrip: fullyUnquoted,
2486    unquotedKeepQuoteChars,
2487    treeSitter: tsAnalysis,
2488  }
2489
2490  // Log divergence between tree-sitter and regex quote extraction.
2491  // Skip for heredoc commands: tree-sitter strips (quoted) heredoc bodies
2492  // to nothing while the regex path replaces them with placeholder strings
2493  // (via extractHeredocs), so the two outputs can never match. Logging
2494  // divergence for every heredoc command would poison the signal.
2495  //
2496  // onDivergence callback: when called in a fanout loop (bashPermissions.ts
2497  // Promise.all over subcommands), the caller batches divergences into a
2498  // single logEvent instead of N separate calls. Each logEvent triggers
2499  // getEventMetadata() → buildProcessMetrics() → process.memoryUsage() →
2500  // /proc/self/stat read; with memoized metadata these resolve as microtasks
2501  // and starve the event loop (CC-643). Single-command callers omit the
2502  // callback and get the original per-call logEvent behavior.
2503  if (!tsAnalysis.dangerousPatterns.hasHeredoc) {
2504    const hasDivergence =
2505      tsQuote.fullyUnquoted !== regexQuote.fullyUnquoted ||
2506      tsQuote.withDoubleQuotes !== regexQuote.withDoubleQuotes
2507    if (hasDivergence) {
2508      if (onDivergence) {
2509        onDivergence()
2510      } else {
2511        logEvent('tengu_tree_sitter_security_divergence', {
2512          quoteContextDivergence: true,
2513        })
2514      }
2515    }
2516  }
2517
2518  const earlyValidators = [
2519    validateEmpty,
2520    validateIncompleteCommands,
2521    validateSafeCommandSubstitution,
2522    validateGitCommit,
2523  ]
2524
2525  for (const validator of earlyValidators) {
2526    const result = validator(context)
2527    if (result.behavior === 'allow') {
2528      return {
2529        behavior: 'passthrough',
2530        message:
2531          result.decisionReason?.type === 'other' ||
2532          result.decisionReason?.type === 'safetyCheck'
2533            ? result.decisionReason.reason
2534            : 'Command allowed',
2535      }
2536    }
2537    if (result.behavior !== 'passthrough') {
2538      return result.behavior === 'ask'
2539        ? { ...result, isBashSecurityCheckForMisparsing: true as const }
2540        : result
2541    }
2542  }
2543
2544  const nonMisparsingValidators = new Set([
2545    validateNewlines,
2546    validateRedirections,
2547  ])
2548
2549  const validators = [
2550    validateJqCommand,
2551    validateObfuscatedFlags,
2552    validateShellMetacharacters,
2553    validateDangerousVariables,
2554    validateCommentQuoteDesync,
2555    validateQuotedNewline,
2556    validateCarriageReturn,
2557    validateNewlines,
2558    validateIFSInjection,
2559    validateProcEnvironAccess,
2560    validateDangerousPatterns,
2561    validateRedirections,
2562    validateBackslashEscapedWhitespace,
2563    validateBackslashEscapedOperators,
2564    validateUnicodeWhitespace,
2565    validateMidWordHash,
2566    validateBraceExpansion,
2567    validateZshDangerousCommands,
2568    validateMalformedTokenInjection,
2569  ]
2570
2571  let deferredNonMisparsingResult: PermissionResult | null = null
2572  for (const validator of validators) {
2573    const result = validator(context)
2574    if (result.behavior === 'ask') {
2575      if (nonMisparsingValidators.has(validator)) {
2576        if (deferredNonMisparsingResult === null) {
2577          deferredNonMisparsingResult = result
2578        }
2579        continue
2580      }
2581      return { ...result, isBashSecurityCheckForMisparsing: true as const }
2582    }
2583  }
2584  if (deferredNonMisparsingResult !== null) {
2585    return deferredNonMisparsingResult
2586  }
2587
2588  return {
2589    behavior: 'passthrough',
2590    message: 'Command passed all security checks',
2591  }
2592}
2593