QueryEngine.ts
46 KB1296 lines
src/QueryEngine.ts
1import { feature } from 'bun:bundle'2import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'3import { randomUUID } from 'crypto'4import last from 'lodash-es/last.js'5import {6 getSessionId,7 isSessionPersistenceDisabled,8} from 'src/bootstrap/state.js'9import type {10 PermissionMode,11 SDKCompactBoundaryMessage,12 SDKMessage,13 SDKPermissionDenial,14 SDKStatus,15 SDKUserMessageReplay,16} from 'src/entrypoints/agentSdkTypes.js'17import { accumulateUsage, updateUsage } from 'src/services/api/claude.js'18import type { NonNullableUsage } from 'src/services/api/logging.js'19import { EMPTY_USAGE } from 'src/services/api/logging.js'20import stripAnsi from 'strip-ansi'21import type { Command } from './commands.js'22import { getSlashCommandToolSkills } from './commands.js'23import {24 LOCAL_COMMAND_STDERR_TAG,25 LOCAL_COMMAND_STDOUT_TAG,26} from './constants/xml.js'27import {28 getModelUsage,29 getTotalAPIDuration,30 getTotalCost,31} from './cost-tracker.js'32import type { CanUseToolFn } from './hooks/useCanUseTool.js'33import { loadMemoryPrompt } from './memdir/memdir.js'34import { hasAutoMemPathOverride } from './memdir/paths.js'35import { query } from './query.js'36import { categorizeRetryableAPIError } from './services/api/errors.js'37import type { MCPServerConnection } from './services/mcp/types.js'38import type { AppState } from './state/AppState.js'39import { type Tools, type ToolUseContext, toolMatchesName } from './Tool.js'40import type { AgentDefinition } from './tools/AgentTool/loadAgentsDir.js'41import { SYNTHETIC_OUTPUT_TOOL_NAME } from './tools/SyntheticOutputTool/SyntheticOutputTool.js'42import type { Message } from './types/message.js'43import type { OrphanedPermission } from './types/textInputTypes.js'44import { createAbortController } from './utils/abortController.js'45import type { AttributionState } from './utils/commitAttribution.js'46import { getGlobalConfig } from './utils/config.js'47import { getCwd } from './utils/cwd.js'48import { isBareMode, isEnvTruthy } from './utils/envUtils.js'49import { getFastModeState } from './utils/fastMode.js'50import {51 type FileHistoryState,52 fileHistoryEnabled,53 fileHistoryMakeSnapshot,54} from './utils/fileHistory.js'55import {56 cloneFileStateCache,57 type FileStateCache,58} from './utils/fileStateCache.js'59import { headlessProfilerCheckpoint } from './utils/headlessProfiler.js'60import { registerStructuredOutputEnforcement } from './utils/hooks/hookHelpers.js'61import { getInMemoryErrors } from './utils/log.js'62import { countToolCalls, SYNTHETIC_MESSAGES } from './utils/messages.js'63import {64 getMainLoopModel,65 parseUserSpecifiedModel,66} from './utils/model/model.js'67import { loadAllPluginsCacheOnly } from './utils/plugins/pluginLoader.js'68import {69 type ProcessUserInputContext,70 processUserInput,71} from './utils/processUserInput/processUserInput.js'72import { fetchSystemPromptParts } from './utils/queryContext.js'73import { setCwd } from './utils/Shell.js'74import {75 flushSessionStorage,76 recordTranscript,77} from './utils/sessionStorage.js'78import { asSystemPrompt } from './utils/systemPromptType.js'79import { resolveThemeSetting } from './utils/systemTheme.js'80import {81 shouldEnableThinkingByDefault,82 type ThinkingConfig,83} from './utils/thinking.js'8485// Lazy: MessageSelector.tsx pulls React/ink; only needed for message filtering at query time86/* eslint-disable @typescript-eslint/no-require-imports */87const messageSelector =88 (): typeof import('src/components/MessageSelector.js') =>89 require('src/components/MessageSelector.js')9091import {92 localCommandOutputToSDKAssistantMessage,93 toSDKCompactMetadata,94} from './utils/messages/mappers.js'95import {96 buildSystemInitMessage,97 sdkCompatToolName,98} from './utils/messages/systemInit.js'99import {100 getScratchpadDir,101 isScratchpadEnabled,102} from './utils/permissions/filesystem.js'103/* eslint-enable @typescript-eslint/no-require-imports */104import {105 handleOrphanedPermission,106 isResultSuccessful,107 normalizeMessage,108} from './utils/queryHelpers.js'109110// Dead code elimination: conditional import for coordinator mode111/* eslint-disable @typescript-eslint/no-require-imports */112const getCoordinatorUserContext: (113 mcpClients: ReadonlyArray<{ name: string }>,114 scratchpadDir?: string,115) => { [k: string]: string } = feature('COORDINATOR_MODE')116 ? require('./coordinator/coordinatorMode.js').getCoordinatorUserContext117 : () => ({})118/* eslint-enable @typescript-eslint/no-require-imports */119120// Dead code elimination: conditional import for snip compaction121/* eslint-disable @typescript-eslint/no-require-imports */122const snipModule = feature('HISTORY_SNIP')123 ? (require('./services/compact/snipCompact.js') as typeof import('./services/compact/snipCompact.js'))124 : null125const snipProjection = feature('HISTORY_SNIP')126 ? (require('./services/compact/snipProjection.js') as typeof import('./services/compact/snipProjection.js'))127 : null128/* eslint-enable @typescript-eslint/no-require-imports */129130export type QueryEngineConfig = {131 cwd: string132 tools: Tools133 commands: Command[]134 mcpClients: MCPServerConnection[]135 agents: AgentDefinition[]136 canUseTool: CanUseToolFn137 getAppState: () => AppState138 setAppState: (f: (prev: AppState) => AppState) => void139 initialMessages?: Message[]140 readFileCache: FileStateCache141 customSystemPrompt?: string142 appendSystemPrompt?: string143 userSpecifiedModel?: string144 fallbackModel?: string145 thinkingConfig?: ThinkingConfig146 maxTurns?: number147 maxBudgetUsd?: number148 taskBudget?: { total: number }149 jsonSchema?: Record<string, unknown>150 verbose?: boolean151 replayUserMessages?: boolean152 /** Handler for URL elicitations triggered by MCP tool -32042 errors. */153 handleElicitation?: ToolUseContext['handleElicitation']154 includePartialMessages?: boolean155 setSDKStatus?: (status: SDKStatus) => void156 abortController?: AbortController157 orphanedPermission?: OrphanedPermission158 /**159 * Snip-boundary handler: receives each yielded system message plus the160 * current mutableMessages store. Returns undefined if the message is not a161 * snip boundary; otherwise returns the replayed snip result. Injected by162 * ask() when HISTORY_SNIP is enabled so feature-gated strings stay inside163 * the gated module (keeps QueryEngine free of excluded strings and testable164 * despite feature() returning false under bun test). SDK-only: the REPL165 * keeps full history for UI scrollback and projects on demand via166 * projectSnippedView; QueryEngine truncates here to bound memory in long167 * headless sessions (no UI to preserve).168 */169 snipReplay?: (170 yieldedSystemMsg: Message,171 store: Message[],172 ) => { messages: Message[]; executed: boolean } | undefined173}174175/**176 * QueryEngine owns the query lifecycle and session state for a conversation.177 * It extracts the core logic from ask() into a standalone class that can be178 * used by both the headless/SDK path and (in a future phase) the REPL.179 *180 * One QueryEngine per conversation. Each submitMessage() call starts a new181 * turn within the same conversation. State (messages, file cache, usage, etc.)182 * persists across turns.183 */184export class QueryEngine {185 private config: QueryEngineConfig186 private mutableMessages: Message[]187 private abortController: AbortController188 private permissionDenials: SDKPermissionDenial[]189 private totalUsage: NonNullableUsage190 private hasHandledOrphanedPermission = false191 private readFileState: FileStateCache192 // Turn-scoped skill discovery tracking (feeds was_discovered on193 // tengu_skill_tool_invocation). Must persist across the two194 // processUserInputContext rebuilds inside submitMessage, but is cleared195 // at the start of each submitMessage to avoid unbounded growth across196 // many turns in SDK mode.197 private discoveredSkillNames = new Set<string>()198 private loadedNestedMemoryPaths = new Set<string>()199200 constructor(config: QueryEngineConfig) {201 this.config = config202 this.mutableMessages = config.initialMessages ?? []203 this.abortController = config.abortController ?? createAbortController()204 this.permissionDenials = []205 this.readFileState = config.readFileCache206 this.totalUsage = EMPTY_USAGE207 }208209 async *submitMessage(210 prompt: string | ContentBlockParam[],211 options?: { uuid?: string; isMeta?: boolean },212 ): AsyncGenerator<SDKMessage, void, unknown> {213 const {214 cwd,215 commands,216 tools,217 mcpClients,218 verbose = false,219 thinkingConfig,220 maxTurns,221 maxBudgetUsd,222 taskBudget,223 canUseTool,224 customSystemPrompt,225 appendSystemPrompt,226 userSpecifiedModel,227 fallbackModel,228 jsonSchema,229 getAppState,230 setAppState,231 replayUserMessages = false,232 includePartialMessages = false,233 agents = [],234 setSDKStatus,235 orphanedPermission,236 } = this.config237238 this.discoveredSkillNames.clear()239 setCwd(cwd)240 const persistSession = !isSessionPersistenceDisabled()241 const startTime = Date.now()242243 // Wrap canUseTool to track permission denials244 const wrappedCanUseTool: CanUseToolFn = async (245 tool,246 input,247 toolUseContext,248 assistantMessage,249 toolUseID,250 forceDecision,251 ) => {252 const result = await canUseTool(253 tool,254 input,255 toolUseContext,256 assistantMessage,257 toolUseID,258 forceDecision,259 )260261 // Track denials for SDK reporting262 if (result.behavior !== 'allow') {263 this.permissionDenials.push({264 tool_name: sdkCompatToolName(tool.name),265 tool_use_id: toolUseID,266 tool_input: input,267 })268 }269270 return result271 }272273 const initialAppState = getAppState()274 const initialMainLoopModel = userSpecifiedModel275 ? parseUserSpecifiedModel(userSpecifiedModel)276 : getMainLoopModel()277278 const initialThinkingConfig: ThinkingConfig = thinkingConfig279 ? thinkingConfig280 : shouldEnableThinkingByDefault() !== false281 ? { type: 'adaptive' }282 : { type: 'disabled' }283284 headlessProfilerCheckpoint('before_getSystemPrompt')285 // Narrow once so TS tracks the type through the conditionals below.286 const customPrompt =287 typeof customSystemPrompt === 'string' ? customSystemPrompt : undefined288 const {289 defaultSystemPrompt,290 userContext: baseUserContext,291 systemContext,292 } = await fetchSystemPromptParts({293 tools,294 mainLoopModel: initialMainLoopModel,295 additionalWorkingDirectories: Array.from(296 initialAppState.toolPermissionContext.additionalWorkingDirectories.keys(),297 ),298 mcpClients,299 customSystemPrompt: customPrompt,300 })301 headlessProfilerCheckpoint('after_getSystemPrompt')302 const userContext = {303 ...baseUserContext,304 ...getCoordinatorUserContext(305 mcpClients,306 isScratchpadEnabled() ? getScratchpadDir() : undefined,307 ),308 }309310 // When an SDK caller provides a custom system prompt AND has set311 // CLAUDE_COWORK_MEMORY_PATH_OVERRIDE, inject the memory-mechanics prompt.312 // The env var is an explicit opt-in signal — the caller has wired up313 // a memory directory and needs Claude to know how to use it (which314 // Write/Edit tools to call, MEMORY.md filename, loading semantics).315 // The caller can layer their own policy text via appendSystemPrompt.316 const memoryMechanicsPrompt =317 customPrompt !== undefined && hasAutoMemPathOverride()318 ? await loadMemoryPrompt()319 : null320321 const systemPrompt = asSystemPrompt([322 ...(customPrompt !== undefined ? [customPrompt] : defaultSystemPrompt),323 ...(memoryMechanicsPrompt ? [memoryMechanicsPrompt] : []),324 ...(appendSystemPrompt ? [appendSystemPrompt] : []),325 ])326327 // Register function hook for structured output enforcement328 const hasStructuredOutputTool = tools.some(t =>329 toolMatchesName(t, SYNTHETIC_OUTPUT_TOOL_NAME),330 )331 if (jsonSchema && hasStructuredOutputTool) {332 registerStructuredOutputEnforcement(setAppState, getSessionId())333 }334335 let processUserInputContext: ProcessUserInputContext = {336 messages: this.mutableMessages,337 // Slash commands that mutate the message array (e.g. /force-snip)338 // call setMessages(fn). In interactive mode this writes back to339 // AppState; in print mode we write back to mutableMessages so the340 // rest of the query loop (push at :389, snapshot at :392) sees341 // the result. The second processUserInputContext below (after342 // slash-command processing) keeps the no-op — nothing else calls343 // setMessages past that point.344 setMessages: fn => {345 this.mutableMessages = fn(this.mutableMessages)346 },347 onChangeAPIKey: () => {},348 handleElicitation: this.config.handleElicitation,349 options: {350 commands,351 debug: false, // we use stdout, so don't want to clobber it352 tools,353 verbose,354 mainLoopModel: initialMainLoopModel,355 thinkingConfig: initialThinkingConfig,356 mcpClients,357 mcpResources: {},358 ideInstallationStatus: null,359 isNonInteractiveSession: true,360 customSystemPrompt,361 appendSystemPrompt,362 agentDefinitions: { activeAgents: agents, allAgents: [] },363 theme: resolveThemeSetting(getGlobalConfig().theme),364 maxBudgetUsd,365 },366 getAppState,367 setAppState,368 abortController: this.abortController,369 readFileState: this.readFileState,370 nestedMemoryAttachmentTriggers: new Set<string>(),371 loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,372 dynamicSkillDirTriggers: new Set<string>(),373 discoveredSkillNames: this.discoveredSkillNames,374 setInProgressToolUseIDs: () => {},375 setResponseLength: () => {},376 updateFileHistoryState: (377 updater: (prev: FileHistoryState) => FileHistoryState,378 ) => {379 setAppState(prev => {380 const updated = updater(prev.fileHistory)381 if (updated === prev.fileHistory) return prev382 return { ...prev, fileHistory: updated }383 })384 },385 updateAttributionState: (386 updater: (prev: AttributionState) => AttributionState,387 ) => {388 setAppState(prev => {389 const updated = updater(prev.attribution)390 if (updated === prev.attribution) return prev391 return { ...prev, attribution: updated }392 })393 },394 setSDKStatus,395 }396397 // Handle orphaned permission (only once per engine lifetime)398 if (orphanedPermission && !this.hasHandledOrphanedPermission) {399 this.hasHandledOrphanedPermission = true400 for await (const message of handleOrphanedPermission(401 orphanedPermission,402 tools,403 this.mutableMessages,404 processUserInputContext,405 )) {406 yield message407 }408 }409410 const {411 messages: messagesFromUserInput,412 shouldQuery,413 allowedTools,414 model: modelFromUserInput,415 resultText,416 } = await processUserInput({417 input: prompt,418 mode: 'prompt',419 setToolJSX: () => {},420 context: {421 ...processUserInputContext,422 messages: this.mutableMessages,423 },424 messages: this.mutableMessages,425 uuid: options?.uuid,426 isMeta: options?.isMeta,427 querySource: 'sdk',428 })429430 // Push new messages, including user input and any attachments431 this.mutableMessages.push(...messagesFromUserInput)432433 // Update params to reflect updates from processing /slash commands434 const messages = [...this.mutableMessages]435436 // Persist the user's message(s) to transcript BEFORE entering the query437 // loop. The for-await below only calls recordTranscript when ask() yields438 // an assistant/user/compact_boundary message — which doesn't happen until439 // the API responds. If the process is killed before that (e.g. user clicks440 // Stop in cowork seconds after send), the transcript is left with only441 // queue-operation entries; getLastSessionLog filters those out, returns442 // null, and --resume fails with "No conversation found". Writing now makes443 // the transcript resumable from the point the user message was accepted,444 // even if no API response ever arrives.445 //446 // --bare / SIMPLE: fire-and-forget. Scripted calls don't --resume after447 // kill-mid-request. The await is ~4ms on SSD, ~30ms under disk contention448 // — the single largest controllable critical-path cost after module eval.449 // Transcript is still written (for post-hoc debugging); just not blocking.450 if (persistSession && messagesFromUserInput.length > 0) {451 const transcriptPromise = recordTranscript(messages)452 if (isBareMode()) {453 void transcriptPromise454 } else {455 await transcriptPromise456 if (457 isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||458 isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)459 ) {460 await flushSessionStorage()461 }462 }463 }464465 // Filter messages that should be acknowledged after transcript466 const replayableMessages = messagesFromUserInput.filter(467 msg =>468 (msg.type === 'user' &&469 !msg.isMeta && // Skip synthetic caveat messages470 !msg.toolUseResult && // Skip tool results (they'll be acked from query)471 messageSelector().selectableUserMessagesFilter(msg)) || // Skip non-user-authored messages (task notifications, etc.)472 (msg.type === 'system' && msg.subtype === 'compact_boundary'), // Always ack compact boundaries473 )474 const messagesToAck = replayUserMessages ? replayableMessages : []475476 // Update the ToolPermissionContext based on user input processing (as necessary)477 setAppState(prev => ({478 ...prev,479 toolPermissionContext: {480 ...prev.toolPermissionContext,481 alwaysAllowRules: {482 ...prev.toolPermissionContext.alwaysAllowRules,483 command: allowedTools,484 },485 },486 }))487488 const mainLoopModel = modelFromUserInput ?? initialMainLoopModel489490 // Recreate after processing the prompt to pick up updated messages and491 // model (from slash commands).492 processUserInputContext = {493 messages,494 setMessages: () => {},495 onChangeAPIKey: () => {},496 handleElicitation: this.config.handleElicitation,497 options: {498 commands,499 debug: false,500 tools,501 verbose,502 mainLoopModel,503 thinkingConfig: initialThinkingConfig,504 mcpClients,505 mcpResources: {},506 ideInstallationStatus: null,507 isNonInteractiveSession: true,508 customSystemPrompt,509 appendSystemPrompt,510 theme: resolveThemeSetting(getGlobalConfig().theme),511 agentDefinitions: { activeAgents: agents, allAgents: [] },512 maxBudgetUsd,513 },514 getAppState,515 setAppState,516 abortController: this.abortController,517 readFileState: this.readFileState,518 nestedMemoryAttachmentTriggers: new Set<string>(),519 loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,520 dynamicSkillDirTriggers: new Set<string>(),521 discoveredSkillNames: this.discoveredSkillNames,522 setInProgressToolUseIDs: () => {},523 setResponseLength: () => {},524 updateFileHistoryState: processUserInputContext.updateFileHistoryState,525 updateAttributionState: processUserInputContext.updateAttributionState,526 setSDKStatus,527 }528529 headlessProfilerCheckpoint('before_skills_plugins')530 // Cache-only: headless/SDK/CCR startup must not block on network for531 // ref-tracked plugins. CCR populates the cache via CLAUDE_CODE_SYNC_PLUGIN_INSTALL532 // (headlessPluginInstall) or CLAUDE_CODE_PLUGIN_SEED_DIR before this runs;533 // SDK callers that need fresh source can call /reload-plugins.534 const [skills, { enabled: enabledPlugins }] = await Promise.all([535 getSlashCommandToolSkills(getCwd()),536 loadAllPluginsCacheOnly(),537 ])538 headlessProfilerCheckpoint('after_skills_plugins')539540 yield buildSystemInitMessage({541 tools,542 mcpClients,543 model: mainLoopModel,544 permissionMode: initialAppState.toolPermissionContext545 .mode as PermissionMode, // TODO: avoid the cast546 commands,547 agents,548 skills,549 plugins: enabledPlugins,550 fastMode: initialAppState.fastMode,551 })552553 // Record when system message is yielded for headless latency tracking554 headlessProfilerCheckpoint('system_message_yielded')555556 if (!shouldQuery) {557 // Return the results of local slash commands.558 // Use messagesFromUserInput (not replayableMessages) for command output559 // because selectableUserMessagesFilter excludes local-command-stdout tags.560 for (const msg of messagesFromUserInput) {561 if (562 msg.type === 'user' &&563 typeof msg.message.content === 'string' &&564 (msg.message.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||565 msg.message.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`) ||566 msg.isCompactSummary)567 ) {568 yield {569 type: 'user',570 message: {571 ...msg.message,572 content: stripAnsi(msg.message.content),573 },574 session_id: getSessionId(),575 parent_tool_use_id: null,576 uuid: msg.uuid,577 timestamp: msg.timestamp,578 isReplay: !msg.isCompactSummary,579 isSynthetic: msg.isMeta || msg.isVisibleInTranscriptOnly,580 } as SDKUserMessageReplay581 }582583 // Local command output — yield as a synthetic assistant message so584 // RC renders it as assistant-style text rather than a user bubble.585 // Emitted as assistant (not the dedicated SDKLocalCommandOutputMessage586 // system subtype) so mobile clients + session-ingress can parse it.587 if (588 msg.type === 'system' &&589 msg.subtype === 'local_command' &&590 typeof msg.content === 'string' &&591 (msg.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||592 msg.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`))593 ) {594 yield localCommandOutputToSDKAssistantMessage(msg.content, msg.uuid)595 }596597 if (msg.type === 'system' && msg.subtype === 'compact_boundary') {598 yield {599 type: 'system',600 subtype: 'compact_boundary' as const,601 session_id: getSessionId(),602 uuid: msg.uuid,603 compact_metadata: toSDKCompactMetadata(msg.compactMetadata),604 } as SDKCompactBoundaryMessage605 }606 }607608 if (persistSession) {609 await recordTranscript(messages)610 if (611 isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||612 isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)613 ) {614 await flushSessionStorage()615 }616 }617618 yield {619 type: 'result',620 subtype: 'success',621 is_error: false,622 duration_ms: Date.now() - startTime,623 duration_api_ms: getTotalAPIDuration(),624 num_turns: messages.length - 1,625 result: resultText ?? '',626 stop_reason: null,627 session_id: getSessionId(),628 total_cost_usd: getTotalCost(),629 usage: this.totalUsage,630 modelUsage: getModelUsage(),631 permission_denials: this.permissionDenials,632 fast_mode_state: getFastModeState(633 mainLoopModel,634 initialAppState.fastMode,635 ),636 uuid: randomUUID(),637 }638 return639 }640641 if (fileHistoryEnabled() && persistSession) {642 messagesFromUserInput643 .filter(messageSelector().selectableUserMessagesFilter)644 .forEach(message => {645 void fileHistoryMakeSnapshot(646 (updater: (prev: FileHistoryState) => FileHistoryState) => {647 setAppState(prev => ({648 ...prev,649 fileHistory: updater(prev.fileHistory),650 }))651 },652 message.uuid,653 )654 })655 }656657 // Track current message usage (reset on each message_start)658 let currentMessageUsage: NonNullableUsage = EMPTY_USAGE659 let turnCount = 1660 let hasAcknowledgedInitialMessages = false661 // Track structured output from StructuredOutput tool calls662 let structuredOutputFromTool: unknown663 // Track the last stop_reason from assistant messages664 let lastStopReason: string | null = null665 // Reference-based watermark so error_during_execution's errors[] is666 // turn-scoped. A length-based index breaks when the 100-entry ring buffer667 // shift()s during the turn — the index slides. If this entry is rotated668 // out, lastIndexOf returns -1 and we include everything (safe fallback).669 const errorLogWatermark = getInMemoryErrors().at(-1)670 // Snapshot count before this query for delta-based retry limiting671 const initialStructuredOutputCalls = jsonSchema672 ? countToolCalls(this.mutableMessages, SYNTHETIC_OUTPUT_TOOL_NAME)673 : 0674675 for await (const message of query({676 messages,677 systemPrompt,678 userContext,679 systemContext,680 canUseTool: wrappedCanUseTool,681 toolUseContext: processUserInputContext,682 fallbackModel,683 querySource: 'sdk',684 maxTurns,685 taskBudget,686 })) {687 // Record assistant, user, and compact boundary messages688 if (689 message.type === 'assistant' ||690 message.type === 'user' ||691 (message.type === 'system' && message.subtype === 'compact_boundary')692 ) {693 // Before writing a compact boundary, flush any in-memory-only694 // messages up through the preservedSegment tail. Attachments and695 // progress are now recorded inline (their switch cases below), but696 // this flush still matters for the preservedSegment tail walk.697 // If the SDK subprocess restarts before then (claude-desktop kills698 // between turns), tailUuid points to a never-written message →699 // applyPreservedSegmentRelinks fails its tail→head walk → returns700 // without pruning → resume loads full pre-compact history.701 if (702 persistSession &&703 message.type === 'system' &&704 message.subtype === 'compact_boundary'705 ) {706 const tailUuid = message.compactMetadata?.preservedSegment?.tailUuid707 if (tailUuid) {708 const tailIdx = this.mutableMessages.findLastIndex(709 m => m.uuid === tailUuid,710 )711 if (tailIdx !== -1) {712 await recordTranscript(this.mutableMessages.slice(0, tailIdx + 1))713 }714 }715 }716 messages.push(message)717 if (persistSession) {718 // Fire-and-forget for assistant messages. claude.ts yields one719 // assistant message per content block, then mutates the last720 // one's message.usage/stop_reason on message_delta — relying on721 // the write queue's 100ms lazy jsonStringify. Awaiting here722 // blocks ask()'s generator, so message_delta can't run until723 // every block is consumed; the drain timer (started at block 1)724 // elapses first. Interactive CC doesn't hit this because725 // useLogMessages.ts fire-and-forgets. enqueueWrite is726 // order-preserving so fire-and-forget here is safe.727 if (message.type === 'assistant') {728 void recordTranscript(messages)729 } else {730 await recordTranscript(messages)731 }732 }733734 // Acknowledge initial user messages after first transcript recording735 if (!hasAcknowledgedInitialMessages && messagesToAck.length > 0) {736 hasAcknowledgedInitialMessages = true737 for (const msgToAck of messagesToAck) {738 if (msgToAck.type === 'user') {739 yield {740 type: 'user',741 message: msgToAck.message,742 session_id: getSessionId(),743 parent_tool_use_id: null,744 uuid: msgToAck.uuid,745 timestamp: msgToAck.timestamp,746 isReplay: true,747 } as SDKUserMessageReplay748 }749 }750 }751 }752753 if (message.type === 'user') {754 turnCount++755 }756757 switch (message.type) {758 case 'tombstone':759 // Tombstone messages are control signals for removing messages, skip them760 break761 case 'assistant':762 // Capture stop_reason if already set (synthetic messages). For763 // streamed responses, this is null at content_block_stop time;764 // the real value arrives via message_delta (handled below).765 if (message.message.stop_reason != null) {766 lastStopReason = message.message.stop_reason767 }768 this.mutableMessages.push(message)769 yield* normalizeMessage(message)770 break771 case 'progress':772 this.mutableMessages.push(message)773 // Record inline so the dedup loop in the next ask() call sees it774 // as already-recorded. Without this, deferred progress interleaves775 // with already-recorded tool_results in mutableMessages, and the776 // dedup walk freezes startingParentUuid at the wrong message —777 // forking the chain and orphaning the conversation on resume.778 if (persistSession) {779 messages.push(message)780 void recordTranscript(messages)781 }782 yield* normalizeMessage(message)783 break784 case 'user':785 this.mutableMessages.push(message)786 yield* normalizeMessage(message)787 break788 case 'stream_event':789 if (message.event.type === 'message_start') {790 // Reset current message usage for new message791 currentMessageUsage = EMPTY_USAGE792 currentMessageUsage = updateUsage(793 currentMessageUsage,794 message.event.message.usage,795 )796 }797 if (message.event.type === 'message_delta') {798 currentMessageUsage = updateUsage(799 currentMessageUsage,800 message.event.usage,801 )802 // Capture stop_reason from message_delta. The assistant message803 // is yielded at content_block_stop with stop_reason=null; the804 // real value only arrives here (see claude.ts message_delta805 // handler). Without this, result.stop_reason is always null.806 if (message.event.delta.stop_reason != null) {807 lastStopReason = message.event.delta.stop_reason808 }809 }810 if (message.event.type === 'message_stop') {811 // Accumulate current message usage into total812 this.totalUsage = accumulateUsage(813 this.totalUsage,814 currentMessageUsage,815 )816 }817818 if (includePartialMessages) {819 yield {820 type: 'stream_event' as const,821 event: message.event,822 session_id: getSessionId(),823 parent_tool_use_id: null,824 uuid: randomUUID(),825 }826 }827828 break829 case 'attachment':830 this.mutableMessages.push(message)831 // Record inline (same reason as progress above).832 if (persistSession) {833 messages.push(message)834 void recordTranscript(messages)835 }836837 // Extract structured output from StructuredOutput tool calls838 if (message.attachment.type === 'structured_output') {839 structuredOutputFromTool = message.attachment.data840 }841 // Handle max turns reached signal from query.ts842 else if (message.attachment.type === 'max_turns_reached') {843 if (persistSession) {844 if (845 isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||846 isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)847 ) {848 await flushSessionStorage()849 }850 }851 yield {852 type: 'result',853 subtype: 'error_max_turns',854 duration_ms: Date.now() - startTime,855 duration_api_ms: getTotalAPIDuration(),856 is_error: true,857 num_turns: message.attachment.turnCount,858 stop_reason: lastStopReason,859 session_id: getSessionId(),860 total_cost_usd: getTotalCost(),861 usage: this.totalUsage,862 modelUsage: getModelUsage(),863 permission_denials: this.permissionDenials,864 fast_mode_state: getFastModeState(865 mainLoopModel,866 initialAppState.fastMode,867 ),868 uuid: randomUUID(),869 errors: [870 `Reached maximum number of turns (${message.attachment.maxTurns})`,871 ],872 }873 return874 }875 // Yield queued_command attachments as SDK user message replays876 else if (877 replayUserMessages &&878 message.attachment.type === 'queued_command'879 ) {880 yield {881 type: 'user',882 message: {883 role: 'user' as const,884 content: message.attachment.prompt,885 },886 session_id: getSessionId(),887 parent_tool_use_id: null,888 uuid: message.attachment.source_uuid || message.uuid,889 timestamp: message.timestamp,890 isReplay: true,891 } as SDKUserMessageReplay892 }893 break894 case 'stream_request_start':895 // Don't yield stream request start messages896 break897 case 'system': {898 // Snip boundary: replay on our store to remove zombie messages and899 // stale markers. The yielded boundary is a signal, not data to push —900 // the replay produces its own equivalent boundary. Without this,901 // markers persist and re-trigger on every turn, and mutableMessages902 // never shrinks (memory leak in long SDK sessions). The subtype903 // check lives inside the injected callback so feature-gated strings904 // stay out of this file (excluded-strings check).905 const snipResult = this.config.snipReplay?.(906 message,907 this.mutableMessages,908 )909 if (snipResult !== undefined) {910 if (snipResult.executed) {911 this.mutableMessages.length = 0912 this.mutableMessages.push(...snipResult.messages)913 }914 break915 }916 this.mutableMessages.push(message)917 // Yield compact boundary messages to SDK918 if (919 message.subtype === 'compact_boundary' &&920 message.compactMetadata921 ) {922 // Release pre-compaction messages for GC. The boundary was just923 // pushed so it's the last element. query.ts already uses924 // getMessagesAfterCompactBoundary() internally, so only925 // post-boundary messages are needed going forward.926 const mutableBoundaryIdx = this.mutableMessages.length - 1927 if (mutableBoundaryIdx > 0) {928 this.mutableMessages.splice(0, mutableBoundaryIdx)929 }930 const localBoundaryIdx = messages.length - 1931 if (localBoundaryIdx > 0) {932 messages.splice(0, localBoundaryIdx)933 }934935 yield {936 type: 'system',937 subtype: 'compact_boundary' as const,938 session_id: getSessionId(),939 uuid: message.uuid,940 compact_metadata: toSDKCompactMetadata(message.compactMetadata),941 }942 }943 if (message.subtype === 'api_error') {944 yield {945 type: 'system',946 subtype: 'api_retry' as const,947 attempt: message.retryAttempt,948 max_retries: message.maxRetries,949 retry_delay_ms: message.retryInMs,950 error_status: message.error.status ?? null,951 error: categorizeRetryableAPIError(message.error),952 session_id: getSessionId(),953 uuid: message.uuid,954 }955 }956 // Don't yield other system messages in headless mode957 break958 }959 case 'tool_use_summary':960 // Yield tool use summary messages to SDK961 yield {962 type: 'tool_use_summary' as const,963 summary: message.summary,964 preceding_tool_use_ids: message.precedingToolUseIds,965 session_id: getSessionId(),966 uuid: message.uuid,967 }968 break969 }970971 // Check if USD budget has been exceeded972 if (maxBudgetUsd !== undefined && getTotalCost() >= maxBudgetUsd) {973 if (persistSession) {974 if (975 isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||976 isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)977 ) {978 await flushSessionStorage()979 }980 }981 yield {982 type: 'result',983 subtype: 'error_max_budget_usd',984 duration_ms: Date.now() - startTime,985 duration_api_ms: getTotalAPIDuration(),986 is_error: true,987 num_turns: turnCount,988 stop_reason: lastStopReason,989 session_id: getSessionId(),990 total_cost_usd: getTotalCost(),991 usage: this.totalUsage,992 modelUsage: getModelUsage(),993 permission_denials: this.permissionDenials,994 fast_mode_state: getFastModeState(995 mainLoopModel,996 initialAppState.fastMode,997 ),998 uuid: randomUUID(),999 errors: [`Reached maximum budget ($${maxBudgetUsd})`],1000 }1001 return1002 }10031004 // Check if structured output retry limit exceeded (only on user messages)1005 if (message.type === 'user' && jsonSchema) {1006 const currentCalls = countToolCalls(1007 this.mutableMessages,1008 SYNTHETIC_OUTPUT_TOOL_NAME,1009 )1010 const callsThisQuery = currentCalls - initialStructuredOutputCalls1011 const maxRetries = parseInt(1012 process.env.MAX_STRUCTURED_OUTPUT_RETRIES || '5',1013 10,1014 )1015 if (callsThisQuery >= maxRetries) {1016 if (persistSession) {1017 if (1018 isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||1019 isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)1020 ) {1021 await flushSessionStorage()1022 }1023 }1024 yield {1025 type: 'result',1026 subtype: 'error_max_structured_output_retries',1027 duration_ms: Date.now() - startTime,1028 duration_api_ms: getTotalAPIDuration(),1029 is_error: true,1030 num_turns: turnCount,1031 stop_reason: lastStopReason,1032 session_id: getSessionId(),1033 total_cost_usd: getTotalCost(),1034 usage: this.totalUsage,1035 modelUsage: getModelUsage(),1036 permission_denials: this.permissionDenials,1037 fast_mode_state: getFastModeState(1038 mainLoopModel,1039 initialAppState.fastMode,1040 ),1041 uuid: randomUUID(),1042 errors: [1043 `Failed to provide valid structured output after ${maxRetries} attempts`,1044 ],1045 }1046 return1047 }1048 }1049 }10501051 // Stop hooks yield progress/attachment messages AFTER the assistant1052 // response (via yield* handleStopHooks in query.ts). Since #23537 pushes1053 // those to `messages` inline, last(messages) can be a progress/attachment1054 // instead of the assistant — which makes textResult extraction below1055 // return '' and -p mode emit a blank line. Allowlist to assistant|user:1056 // isResultSuccessful handles both (user with all tool_result blocks is a1057 // valid successful terminal state).1058 const result = messages.findLast(1059 m => m.type === 'assistant' || m.type === 'user',1060 )1061 // Capture for the error_during_execution diagnostic — isResultSuccessful1062 // is a type predicate (message is Message), so inside the false branch1063 // `result` narrows to never and these accesses don't typecheck.1064 const edeResultType = result?.type ?? 'undefined'1065 const edeLastContentType =1066 result?.type === 'assistant'1067 ? (last(result.message.content)?.type ?? 'none')1068 : 'n/a'10691070 // Flush buffered transcript writes before yielding result.1071 // The desktop app kills the CLI process immediately after receiving the1072 // result message, so any unflushed writes would be lost.1073 if (persistSession) {1074 if (1075 isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||1076 isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)1077 ) {1078 await flushSessionStorage()1079 }1080 }10811082 if (!isResultSuccessful(result, lastStopReason)) {1083 yield {1084 type: 'result',1085 subtype: 'error_during_execution',1086 duration_ms: Date.now() - startTime,1087 duration_api_ms: getTotalAPIDuration(),1088 is_error: true,1089 num_turns: turnCount,1090 stop_reason: lastStopReason,1091 session_id: getSessionId(),1092 total_cost_usd: getTotalCost(),1093 usage: this.totalUsage,1094 modelUsage: getModelUsage(),1095 permission_denials: this.permissionDenials,1096 fast_mode_state: getFastModeState(1097 mainLoopModel,1098 initialAppState.fastMode,1099 ),1100 uuid: randomUUID(),1101 // Diagnostic prefix: these are what isResultSuccessful() checks — if1102 // the result type isn't assistant-with-text/thinking or user-with-1103 // tool_result, and stop_reason isn't end_turn, that's why this fired.1104 // errors[] is turn-scoped via the watermark; previously it dumped the1105 // entire process's logError buffer (ripgrep timeouts, ENOENT, etc).1106 errors: (() => {1107 const all = getInMemoryErrors()1108 const start = errorLogWatermark1109 ? all.lastIndexOf(errorLogWatermark) + 11110 : 01111 return [1112 `[ede_diagnostic] result_type=${edeResultType} last_content_type=${edeLastContentType} stop_reason=${lastStopReason}`,1113 ...all.slice(start).map(_ => _.error),1114 ]1115 })(),1116 }1117 return1118 }11191120 // Extract the text result based on message type1121 let textResult = ''1122 let isApiError = false11231124 if (result.type === 'assistant') {1125 const lastContent = last(result.message.content)1126 if (1127 lastContent?.type === 'text' &&1128 !SYNTHETIC_MESSAGES.has(lastContent.text)1129 ) {1130 textResult = lastContent.text1131 }1132 isApiError = Boolean(result.isApiErrorMessage)1133 }11341135 yield {1136 type: 'result',1137 subtype: 'success',1138 is_error: isApiError,1139 duration_ms: Date.now() - startTime,1140 duration_api_ms: getTotalAPIDuration(),1141 num_turns: turnCount,1142 result: textResult,1143 stop_reason: lastStopReason,1144 session_id: getSessionId(),1145 total_cost_usd: getTotalCost(),1146 usage: this.totalUsage,1147 modelUsage: getModelUsage(),1148 permission_denials: this.permissionDenials,1149 structured_output: structuredOutputFromTool,1150 fast_mode_state: getFastModeState(1151 mainLoopModel,1152 initialAppState.fastMode,1153 ),1154 uuid: randomUUID(),1155 }1156 }11571158 interrupt(): void {1159 this.abortController.abort()1160 }11611162 getMessages(): readonly Message[] {1163 return this.mutableMessages1164 }11651166 getReadFileState(): FileStateCache {1167 return this.readFileState1168 }11691170 getSessionId(): string {1171 return getSessionId()1172 }11731174 setModel(model: string): void {1175 this.config.userSpecifiedModel = model1176 }1177}11781179/**1180 * Sends a single prompt to the Claude API and returns the response.1181 * Assumes that claude is being used non-interactively -- will not1182 * ask the user for permissions or further input.1183 *1184 * Convenience wrapper around QueryEngine for one-shot usage.1185 */1186export async function* ask({1187 commands,1188 prompt,1189 promptUuid,1190 isMeta,1191 cwd,1192 tools,1193 mcpClients,1194 verbose = false,1195 thinkingConfig,1196 maxTurns,1197 maxBudgetUsd,1198 taskBudget,1199 canUseTool,1200 mutableMessages = [],1201 getReadFileCache,1202 setReadFileCache,1203 customSystemPrompt,1204 appendSystemPrompt,1205 userSpecifiedModel,1206 fallbackModel,1207 jsonSchema,1208 getAppState,1209 setAppState,1210 abortController,1211 replayUserMessages = false,1212 includePartialMessages = false,1213 handleElicitation,1214 agents = [],1215 setSDKStatus,1216 orphanedPermission,1217}: {1218 commands: Command[]1219 prompt: string | Array<ContentBlockParam>1220 promptUuid?: string1221 isMeta?: boolean1222 cwd: string1223 tools: Tools1224 verbose?: boolean1225 mcpClients: MCPServerConnection[]1226 thinkingConfig?: ThinkingConfig1227 maxTurns?: number1228 maxBudgetUsd?: number1229 taskBudget?: { total: number }1230 canUseTool: CanUseToolFn1231 mutableMessages?: Message[]1232 customSystemPrompt?: string1233 appendSystemPrompt?: string1234 userSpecifiedModel?: string1235 fallbackModel?: string1236 jsonSchema?: Record<string, unknown>1237 getAppState: () => AppState1238 setAppState: (f: (prev: AppState) => AppState) => void1239 getReadFileCache: () => FileStateCache1240 setReadFileCache: (cache: FileStateCache) => void1241 abortController?: AbortController1242 replayUserMessages?: boolean1243 includePartialMessages?: boolean1244 handleElicitation?: ToolUseContext['handleElicitation']1245 agents?: AgentDefinition[]1246 setSDKStatus?: (status: SDKStatus) => void1247 orphanedPermission?: OrphanedPermission1248}): AsyncGenerator<SDKMessage, void, unknown> {1249 const engine = new QueryEngine({1250 cwd,1251 tools,1252 commands,1253 mcpClients,1254 agents,1255 canUseTool,1256 getAppState,1257 setAppState,1258 initialMessages: mutableMessages,1259 readFileCache: cloneFileStateCache(getReadFileCache()),1260 customSystemPrompt,1261 appendSystemPrompt,1262 userSpecifiedModel,1263 fallbackModel,1264 thinkingConfig,1265 maxTurns,1266 maxBudgetUsd,1267 taskBudget,1268 jsonSchema,1269 verbose,1270 handleElicitation,1271 replayUserMessages,1272 includePartialMessages,1273 setSDKStatus,1274 abortController,1275 orphanedPermission,1276 ...(feature('HISTORY_SNIP')1277 ? {1278 snipReplay: (yielded: Message, store: Message[]) => {1279 if (!snipProjection!.isSnipBoundaryMessage(yielded))1280 return undefined1281 return snipModule!.snipCompactIfNeeded(store, { force: true })1282 },1283 }1284 : {}),1285 })12861287 try {1288 yield* engine.submitMessage(prompt, {1289 uuid: promptUuid,1290 isMeta,1291 })1292 } finally {1293 setReadFileCache(engine.getReadFileState())1294 }1295}1296