QueryEngine.ts

46 KB1296 lines
src/QueryEngine.ts
1import { feature } from 'bun:bundle'
2import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
3import { randomUUID } from 'crypto'
4import last from 'lodash-es/last.js'
5import {
6  getSessionId,
7  isSessionPersistenceDisabled,
8} from 'src/bootstrap/state.js'
9import type {
10  PermissionMode,
11  SDKCompactBoundaryMessage,
12  SDKMessage,
13  SDKPermissionDenial,
14  SDKStatus,
15  SDKUserMessageReplay,
16} from 'src/entrypoints/agentSdkTypes.js'
17import { accumulateUsage, updateUsage } from 'src/services/api/claude.js'
18import type { NonNullableUsage } from 'src/services/api/logging.js'
19import { EMPTY_USAGE } from 'src/services/api/logging.js'
20import stripAnsi from 'strip-ansi'
21import type { Command } from './commands.js'
22import { getSlashCommandToolSkills } from './commands.js'
23import {
24  LOCAL_COMMAND_STDERR_TAG,
25  LOCAL_COMMAND_STDOUT_TAG,
26} from './constants/xml.js'
27import {
28  getModelUsage,
29  getTotalAPIDuration,
30  getTotalCost,
31} from './cost-tracker.js'
32import type { CanUseToolFn } from './hooks/useCanUseTool.js'
33import { loadMemoryPrompt } from './memdir/memdir.js'
34import { hasAutoMemPathOverride } from './memdir/paths.js'
35import { query } from './query.js'
36import { categorizeRetryableAPIError } from './services/api/errors.js'
37import type { MCPServerConnection } from './services/mcp/types.js'
38import type { AppState } from './state/AppState.js'
39import { type Tools, type ToolUseContext, toolMatchesName } from './Tool.js'
40import type { AgentDefinition } from './tools/AgentTool/loadAgentsDir.js'
41import { SYNTHETIC_OUTPUT_TOOL_NAME } from './tools/SyntheticOutputTool/SyntheticOutputTool.js'
42import type { Message } from './types/message.js'
43import type { OrphanedPermission } from './types/textInputTypes.js'
44import { createAbortController } from './utils/abortController.js'
45import type { AttributionState } from './utils/commitAttribution.js'
46import { getGlobalConfig } from './utils/config.js'
47import { getCwd } from './utils/cwd.js'
48import { isBareMode, isEnvTruthy } from './utils/envUtils.js'
49import { getFastModeState } from './utils/fastMode.js'
50import {
51  type FileHistoryState,
52  fileHistoryEnabled,
53  fileHistoryMakeSnapshot,
54} from './utils/fileHistory.js'
55import {
56  cloneFileStateCache,
57  type FileStateCache,
58} from './utils/fileStateCache.js'
59import { headlessProfilerCheckpoint } from './utils/headlessProfiler.js'
60import { registerStructuredOutputEnforcement } from './utils/hooks/hookHelpers.js'
61import { getInMemoryErrors } from './utils/log.js'
62import { countToolCalls, SYNTHETIC_MESSAGES } from './utils/messages.js'
63import {
64  getMainLoopModel,
65  parseUserSpecifiedModel,
66} from './utils/model/model.js'
67import { loadAllPluginsCacheOnly } from './utils/plugins/pluginLoader.js'
68import {
69  type ProcessUserInputContext,
70  processUserInput,
71} from './utils/processUserInput/processUserInput.js'
72import { fetchSystemPromptParts } from './utils/queryContext.js'
73import { setCwd } from './utils/Shell.js'
74import {
75  flushSessionStorage,
76  recordTranscript,
77} from './utils/sessionStorage.js'
78import { asSystemPrompt } from './utils/systemPromptType.js'
79import { resolveThemeSetting } from './utils/systemTheme.js'
80import {
81  shouldEnableThinkingByDefault,
82  type ThinkingConfig,
83} from './utils/thinking.js'
84
85// Lazy: MessageSelector.tsx pulls React/ink; only needed for message filtering at query time
86/* eslint-disable @typescript-eslint/no-require-imports */
87const messageSelector =
88  (): typeof import('src/components/MessageSelector.js') =>
89    require('src/components/MessageSelector.js')
90
91import {
92  localCommandOutputToSDKAssistantMessage,
93  toSDKCompactMetadata,
94} from './utils/messages/mappers.js'
95import {
96  buildSystemInitMessage,
97  sdkCompatToolName,
98} from './utils/messages/systemInit.js'
99import {
100  getScratchpadDir,
101  isScratchpadEnabled,
102} from './utils/permissions/filesystem.js'
103/* eslint-enable @typescript-eslint/no-require-imports */
104import {
105  handleOrphanedPermission,
106  isResultSuccessful,
107  normalizeMessage,
108} from './utils/queryHelpers.js'
109
110// Dead code elimination: conditional import for coordinator mode
111/* eslint-disable @typescript-eslint/no-require-imports */
112const getCoordinatorUserContext: (
113  mcpClients: ReadonlyArray<{ name: string }>,
114  scratchpadDir?: string,
115) => { [k: string]: string } = feature('COORDINATOR_MODE')
116  ? require('./coordinator/coordinatorMode.js').getCoordinatorUserContext
117  : () => ({})
118/* eslint-enable @typescript-eslint/no-require-imports */
119
120// Dead code elimination: conditional import for snip compaction
121/* eslint-disable @typescript-eslint/no-require-imports */
122const snipModule = feature('HISTORY_SNIP')
123  ? (require('./services/compact/snipCompact.js') as typeof import('./services/compact/snipCompact.js'))
124  : null
125const snipProjection = feature('HISTORY_SNIP')
126  ? (require('./services/compact/snipProjection.js') as typeof import('./services/compact/snipProjection.js'))
127  : null
128/* eslint-enable @typescript-eslint/no-require-imports */
129
130export type QueryEngineConfig = {
131  cwd: string
132  tools: Tools
133  commands: Command[]
134  mcpClients: MCPServerConnection[]
135  agents: AgentDefinition[]
136  canUseTool: CanUseToolFn
137  getAppState: () => AppState
138  setAppState: (f: (prev: AppState) => AppState) => void
139  initialMessages?: Message[]
140  readFileCache: FileStateCache
141  customSystemPrompt?: string
142  appendSystemPrompt?: string
143  userSpecifiedModel?: string
144  fallbackModel?: string
145  thinkingConfig?: ThinkingConfig
146  maxTurns?: number
147  maxBudgetUsd?: number
148  taskBudget?: { total: number }
149  jsonSchema?: Record<string, unknown>
150  verbose?: boolean
151  replayUserMessages?: boolean
152  /** Handler for URL elicitations triggered by MCP tool -32042 errors. */
153  handleElicitation?: ToolUseContext['handleElicitation']
154  includePartialMessages?: boolean
155  setSDKStatus?: (status: SDKStatus) => void
156  abortController?: AbortController
157  orphanedPermission?: OrphanedPermission
158  /**
159   * Snip-boundary handler: receives each yielded system message plus the
160   * current mutableMessages store. Returns undefined if the message is not a
161   * snip boundary; otherwise returns the replayed snip result. Injected by
162   * ask() when HISTORY_SNIP is enabled so feature-gated strings stay inside
163   * the gated module (keeps QueryEngine free of excluded strings and testable
164   * despite feature() returning false under bun test). SDK-only: the REPL
165   * keeps full history for UI scrollback and projects on demand via
166   * projectSnippedView; QueryEngine truncates here to bound memory in long
167   * headless sessions (no UI to preserve).
168   */
169  snipReplay?: (
170    yieldedSystemMsg: Message,
171    store: Message[],
172  ) => { messages: Message[]; executed: boolean } | undefined
173}
174
175/**
176 * QueryEngine owns the query lifecycle and session state for a conversation.
177 * It extracts the core logic from ask() into a standalone class that can be
178 * used by both the headless/SDK path and (in a future phase) the REPL.
179 *
180 * One QueryEngine per conversation. Each submitMessage() call starts a new
181 * turn within the same conversation. State (messages, file cache, usage, etc.)
182 * persists across turns.
183 */
184export class QueryEngine {
185  private config: QueryEngineConfig
186  private mutableMessages: Message[]
187  private abortController: AbortController
188  private permissionDenials: SDKPermissionDenial[]
189  private totalUsage: NonNullableUsage
190  private hasHandledOrphanedPermission = false
191  private readFileState: FileStateCache
192  // Turn-scoped skill discovery tracking (feeds was_discovered on
193  // tengu_skill_tool_invocation). Must persist across the two
194  // processUserInputContext rebuilds inside submitMessage, but is cleared
195  // at the start of each submitMessage to avoid unbounded growth across
196  // many turns in SDK mode.
197  private discoveredSkillNames = new Set<string>()
198  private loadedNestedMemoryPaths = new Set<string>()
199
200  constructor(config: QueryEngineConfig) {
201    this.config = config
202    this.mutableMessages = config.initialMessages ?? []
203    this.abortController = config.abortController ?? createAbortController()
204    this.permissionDenials = []
205    this.readFileState = config.readFileCache
206    this.totalUsage = EMPTY_USAGE
207  }
208
209  async *submitMessage(
210    prompt: string | ContentBlockParam[],
211    options?: { uuid?: string; isMeta?: boolean },
212  ): AsyncGenerator<SDKMessage, void, unknown> {
213    const {
214      cwd,
215      commands,
216      tools,
217      mcpClients,
218      verbose = false,
219      thinkingConfig,
220      maxTurns,
221      maxBudgetUsd,
222      taskBudget,
223      canUseTool,
224      customSystemPrompt,
225      appendSystemPrompt,
226      userSpecifiedModel,
227      fallbackModel,
228      jsonSchema,
229      getAppState,
230      setAppState,
231      replayUserMessages = false,
232      includePartialMessages = false,
233      agents = [],
234      setSDKStatus,
235      orphanedPermission,
236    } = this.config
237
238    this.discoveredSkillNames.clear()
239    setCwd(cwd)
240    const persistSession = !isSessionPersistenceDisabled()
241    const startTime = Date.now()
242
243    // Wrap canUseTool to track permission denials
244    const wrappedCanUseTool: CanUseToolFn = async (
245      tool,
246      input,
247      toolUseContext,
248      assistantMessage,
249      toolUseID,
250      forceDecision,
251    ) => {
252      const result = await canUseTool(
253        tool,
254        input,
255        toolUseContext,
256        assistantMessage,
257        toolUseID,
258        forceDecision,
259      )
260
261      // Track denials for SDK reporting
262      if (result.behavior !== 'allow') {
263        this.permissionDenials.push({
264          tool_name: sdkCompatToolName(tool.name),
265          tool_use_id: toolUseID,
266          tool_input: input,
267        })
268      }
269
270      return result
271    }
272
273    const initialAppState = getAppState()
274    const initialMainLoopModel = userSpecifiedModel
275      ? parseUserSpecifiedModel(userSpecifiedModel)
276      : getMainLoopModel()
277
278    const initialThinkingConfig: ThinkingConfig = thinkingConfig
279      ? thinkingConfig
280      : shouldEnableThinkingByDefault() !== false
281        ? { type: 'adaptive' }
282        : { type: 'disabled' }
283
284    headlessProfilerCheckpoint('before_getSystemPrompt')
285    // Narrow once so TS tracks the type through the conditionals below.
286    const customPrompt =
287      typeof customSystemPrompt === 'string' ? customSystemPrompt : undefined
288    const {
289      defaultSystemPrompt,
290      userContext: baseUserContext,
291      systemContext,
292    } = await fetchSystemPromptParts({
293      tools,
294      mainLoopModel: initialMainLoopModel,
295      additionalWorkingDirectories: Array.from(
296        initialAppState.toolPermissionContext.additionalWorkingDirectories.keys(),
297      ),
298      mcpClients,
299      customSystemPrompt: customPrompt,
300    })
301    headlessProfilerCheckpoint('after_getSystemPrompt')
302    const userContext = {
303      ...baseUserContext,
304      ...getCoordinatorUserContext(
305        mcpClients,
306        isScratchpadEnabled() ? getScratchpadDir() : undefined,
307      ),
308    }
309
310    // When an SDK caller provides a custom system prompt AND has set
311    // CLAUDE_COWORK_MEMORY_PATH_OVERRIDE, inject the memory-mechanics prompt.
312    // The env var is an explicit opt-in signal — the caller has wired up
313    // a memory directory and needs Claude to know how to use it (which
314    // Write/Edit tools to call, MEMORY.md filename, loading semantics).
315    // The caller can layer their own policy text via appendSystemPrompt.
316    const memoryMechanicsPrompt =
317      customPrompt !== undefined && hasAutoMemPathOverride()
318        ? await loadMemoryPrompt()
319        : null
320
321    const systemPrompt = asSystemPrompt([
322      ...(customPrompt !== undefined ? [customPrompt] : defaultSystemPrompt),
323      ...(memoryMechanicsPrompt ? [memoryMechanicsPrompt] : []),
324      ...(appendSystemPrompt ? [appendSystemPrompt] : []),
325    ])
326
327    // Register function hook for structured output enforcement
328    const hasStructuredOutputTool = tools.some(t =>
329      toolMatchesName(t, SYNTHETIC_OUTPUT_TOOL_NAME),
330    )
331    if (jsonSchema && hasStructuredOutputTool) {
332      registerStructuredOutputEnforcement(setAppState, getSessionId())
333    }
334
335    let processUserInputContext: ProcessUserInputContext = {
336      messages: this.mutableMessages,
337      // Slash commands that mutate the message array (e.g. /force-snip)
338      // call setMessages(fn).  In interactive mode this writes back to
339      // AppState; in print mode we write back to mutableMessages so the
340      // rest of the query loop (push at :389, snapshot at :392) sees
341      // the result.  The second processUserInputContext below (after
342      // slash-command processing) keeps the no-op — nothing else calls
343      // setMessages past that point.
344      setMessages: fn => {
345        this.mutableMessages = fn(this.mutableMessages)
346      },
347      onChangeAPIKey: () => {},
348      handleElicitation: this.config.handleElicitation,
349      options: {
350        commands,
351        debug: false, // we use stdout, so don't want to clobber it
352        tools,
353        verbose,
354        mainLoopModel: initialMainLoopModel,
355        thinkingConfig: initialThinkingConfig,
356        mcpClients,
357        mcpResources: {},
358        ideInstallationStatus: null,
359        isNonInteractiveSession: true,
360        customSystemPrompt,
361        appendSystemPrompt,
362        agentDefinitions: { activeAgents: agents, allAgents: [] },
363        theme: resolveThemeSetting(getGlobalConfig().theme),
364        maxBudgetUsd,
365      },
366      getAppState,
367      setAppState,
368      abortController: this.abortController,
369      readFileState: this.readFileState,
370      nestedMemoryAttachmentTriggers: new Set<string>(),
371      loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,
372      dynamicSkillDirTriggers: new Set<string>(),
373      discoveredSkillNames: this.discoveredSkillNames,
374      setInProgressToolUseIDs: () => {},
375      setResponseLength: () => {},
376      updateFileHistoryState: (
377        updater: (prev: FileHistoryState) => FileHistoryState,
378      ) => {
379        setAppState(prev => {
380          const updated = updater(prev.fileHistory)
381          if (updated === prev.fileHistory) return prev
382          return { ...prev, fileHistory: updated }
383        })
384      },
385      updateAttributionState: (
386        updater: (prev: AttributionState) => AttributionState,
387      ) => {
388        setAppState(prev => {
389          const updated = updater(prev.attribution)
390          if (updated === prev.attribution) return prev
391          return { ...prev, attribution: updated }
392        })
393      },
394      setSDKStatus,
395    }
396
397    // Handle orphaned permission (only once per engine lifetime)
398    if (orphanedPermission && !this.hasHandledOrphanedPermission) {
399      this.hasHandledOrphanedPermission = true
400      for await (const message of handleOrphanedPermission(
401        orphanedPermission,
402        tools,
403        this.mutableMessages,
404        processUserInputContext,
405      )) {
406        yield message
407      }
408    }
409
410    const {
411      messages: messagesFromUserInput,
412      shouldQuery,
413      allowedTools,
414      model: modelFromUserInput,
415      resultText,
416    } = await processUserInput({
417      input: prompt,
418      mode: 'prompt',
419      setToolJSX: () => {},
420      context: {
421        ...processUserInputContext,
422        messages: this.mutableMessages,
423      },
424      messages: this.mutableMessages,
425      uuid: options?.uuid,
426      isMeta: options?.isMeta,
427      querySource: 'sdk',
428    })
429
430    // Push new messages, including user input and any attachments
431    this.mutableMessages.push(...messagesFromUserInput)
432
433    // Update params to reflect updates from processing /slash commands
434    const messages = [...this.mutableMessages]
435
436    // Persist the user's message(s) to transcript BEFORE entering the query
437    // loop. The for-await below only calls recordTranscript when ask() yields
438    // an assistant/user/compact_boundary message — which doesn't happen until
439    // the API responds. If the process is killed before that (e.g. user clicks
440    // Stop in cowork seconds after send), the transcript is left with only
441    // queue-operation entries; getLastSessionLog filters those out, returns
442    // null, and --resume fails with "No conversation found". Writing now makes
443    // the transcript resumable from the point the user message was accepted,
444    // even if no API response ever arrives.
445    //
446    // --bare / SIMPLE: fire-and-forget. Scripted calls don't --resume after
447    // kill-mid-request. The await is ~4ms on SSD, ~30ms under disk contention
448    // — the single largest controllable critical-path cost after module eval.
449    // Transcript is still written (for post-hoc debugging); just not blocking.
450    if (persistSession && messagesFromUserInput.length > 0) {
451      const transcriptPromise = recordTranscript(messages)
452      if (isBareMode()) {
453        void transcriptPromise
454      } else {
455        await transcriptPromise
456        if (
457          isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
458          isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
459        ) {
460          await flushSessionStorage()
461        }
462      }
463    }
464
465    // Filter messages that should be acknowledged after transcript
466    const replayableMessages = messagesFromUserInput.filter(
467      msg =>
468        (msg.type === 'user' &&
469          !msg.isMeta && // Skip synthetic caveat messages
470          !msg.toolUseResult && // Skip tool results (they'll be acked from query)
471          messageSelector().selectableUserMessagesFilter(msg)) || // Skip non-user-authored messages (task notifications, etc.)
472        (msg.type === 'system' && msg.subtype === 'compact_boundary'), // Always ack compact boundaries
473    )
474    const messagesToAck = replayUserMessages ? replayableMessages : []
475
476    // Update the ToolPermissionContext based on user input processing (as necessary)
477    setAppState(prev => ({
478      ...prev,
479      toolPermissionContext: {
480        ...prev.toolPermissionContext,
481        alwaysAllowRules: {
482          ...prev.toolPermissionContext.alwaysAllowRules,
483          command: allowedTools,
484        },
485      },
486    }))
487
488    const mainLoopModel = modelFromUserInput ?? initialMainLoopModel
489
490    // Recreate after processing the prompt to pick up updated messages and
491    // model (from slash commands).
492    processUserInputContext = {
493      messages,
494      setMessages: () => {},
495      onChangeAPIKey: () => {},
496      handleElicitation: this.config.handleElicitation,
497      options: {
498        commands,
499        debug: false,
500        tools,
501        verbose,
502        mainLoopModel,
503        thinkingConfig: initialThinkingConfig,
504        mcpClients,
505        mcpResources: {},
506        ideInstallationStatus: null,
507        isNonInteractiveSession: true,
508        customSystemPrompt,
509        appendSystemPrompt,
510        theme: resolveThemeSetting(getGlobalConfig().theme),
511        agentDefinitions: { activeAgents: agents, allAgents: [] },
512        maxBudgetUsd,
513      },
514      getAppState,
515      setAppState,
516      abortController: this.abortController,
517      readFileState: this.readFileState,
518      nestedMemoryAttachmentTriggers: new Set<string>(),
519      loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,
520      dynamicSkillDirTriggers: new Set<string>(),
521      discoveredSkillNames: this.discoveredSkillNames,
522      setInProgressToolUseIDs: () => {},
523      setResponseLength: () => {},
524      updateFileHistoryState: processUserInputContext.updateFileHistoryState,
525      updateAttributionState: processUserInputContext.updateAttributionState,
526      setSDKStatus,
527    }
528
529    headlessProfilerCheckpoint('before_skills_plugins')
530    // Cache-only: headless/SDK/CCR startup must not block on network for
531    // ref-tracked plugins. CCR populates the cache via CLAUDE_CODE_SYNC_PLUGIN_INSTALL
532    // (headlessPluginInstall) or CLAUDE_CODE_PLUGIN_SEED_DIR before this runs;
533    // SDK callers that need fresh source can call /reload-plugins.
534    const [skills, { enabled: enabledPlugins }] = await Promise.all([
535      getSlashCommandToolSkills(getCwd()),
536      loadAllPluginsCacheOnly(),
537    ])
538    headlessProfilerCheckpoint('after_skills_plugins')
539
540    yield buildSystemInitMessage({
541      tools,
542      mcpClients,
543      model: mainLoopModel,
544      permissionMode: initialAppState.toolPermissionContext
545        .mode as PermissionMode, // TODO: avoid the cast
546      commands,
547      agents,
548      skills,
549      plugins: enabledPlugins,
550      fastMode: initialAppState.fastMode,
551    })
552
553    // Record when system message is yielded for headless latency tracking
554    headlessProfilerCheckpoint('system_message_yielded')
555
556    if (!shouldQuery) {
557      // Return the results of local slash commands.
558      // Use messagesFromUserInput (not replayableMessages) for command output
559      // because selectableUserMessagesFilter excludes local-command-stdout tags.
560      for (const msg of messagesFromUserInput) {
561        if (
562          msg.type === 'user' &&
563          typeof msg.message.content === 'string' &&
564          (msg.message.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||
565            msg.message.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`) ||
566            msg.isCompactSummary)
567        ) {
568          yield {
569            type: 'user',
570            message: {
571              ...msg.message,
572              content: stripAnsi(msg.message.content),
573            },
574            session_id: getSessionId(),
575            parent_tool_use_id: null,
576            uuid: msg.uuid,
577            timestamp: msg.timestamp,
578            isReplay: !msg.isCompactSummary,
579            isSynthetic: msg.isMeta || msg.isVisibleInTranscriptOnly,
580          } as SDKUserMessageReplay
581        }
582
583        // Local command output — yield as a synthetic assistant message so
584        // RC renders it as assistant-style text rather than a user bubble.
585        // Emitted as assistant (not the dedicated SDKLocalCommandOutputMessage
586        // system subtype) so mobile clients + session-ingress can parse it.
587        if (
588          msg.type === 'system' &&
589          msg.subtype === 'local_command' &&
590          typeof msg.content === 'string' &&
591          (msg.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||
592            msg.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`))
593        ) {
594          yield localCommandOutputToSDKAssistantMessage(msg.content, msg.uuid)
595        }
596
597        if (msg.type === 'system' && msg.subtype === 'compact_boundary') {
598          yield {
599            type: 'system',
600            subtype: 'compact_boundary' as const,
601            session_id: getSessionId(),
602            uuid: msg.uuid,
603            compact_metadata: toSDKCompactMetadata(msg.compactMetadata),
604          } as SDKCompactBoundaryMessage
605        }
606      }
607
608      if (persistSession) {
609        await recordTranscript(messages)
610        if (
611          isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
612          isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
613        ) {
614          await flushSessionStorage()
615        }
616      }
617
618      yield {
619        type: 'result',
620        subtype: 'success',
621        is_error: false,
622        duration_ms: Date.now() - startTime,
623        duration_api_ms: getTotalAPIDuration(),
624        num_turns: messages.length - 1,
625        result: resultText ?? '',
626        stop_reason: null,
627        session_id: getSessionId(),
628        total_cost_usd: getTotalCost(),
629        usage: this.totalUsage,
630        modelUsage: getModelUsage(),
631        permission_denials: this.permissionDenials,
632        fast_mode_state: getFastModeState(
633          mainLoopModel,
634          initialAppState.fastMode,
635        ),
636        uuid: randomUUID(),
637      }
638      return
639    }
640
641    if (fileHistoryEnabled() && persistSession) {
642      messagesFromUserInput
643        .filter(messageSelector().selectableUserMessagesFilter)
644        .forEach(message => {
645          void fileHistoryMakeSnapshot(
646            (updater: (prev: FileHistoryState) => FileHistoryState) => {
647              setAppState(prev => ({
648                ...prev,
649                fileHistory: updater(prev.fileHistory),
650              }))
651            },
652            message.uuid,
653          )
654        })
655    }
656
657    // Track current message usage (reset on each message_start)
658    let currentMessageUsage: NonNullableUsage = EMPTY_USAGE
659    let turnCount = 1
660    let hasAcknowledgedInitialMessages = false
661    // Track structured output from StructuredOutput tool calls
662    let structuredOutputFromTool: unknown
663    // Track the last stop_reason from assistant messages
664    let lastStopReason: string | null = null
665    // Reference-based watermark so error_during_execution's errors[] is
666    // turn-scoped. A length-based index breaks when the 100-entry ring buffer
667    // shift()s during the turn — the index slides. If this entry is rotated
668    // out, lastIndexOf returns -1 and we include everything (safe fallback).
669    const errorLogWatermark = getInMemoryErrors().at(-1)
670    // Snapshot count before this query for delta-based retry limiting
671    const initialStructuredOutputCalls = jsonSchema
672      ? countToolCalls(this.mutableMessages, SYNTHETIC_OUTPUT_TOOL_NAME)
673      : 0
674
675    for await (const message of query({
676      messages,
677      systemPrompt,
678      userContext,
679      systemContext,
680      canUseTool: wrappedCanUseTool,
681      toolUseContext: processUserInputContext,
682      fallbackModel,
683      querySource: 'sdk',
684      maxTurns,
685      taskBudget,
686    })) {
687      // Record assistant, user, and compact boundary messages
688      if (
689        message.type === 'assistant' ||
690        message.type === 'user' ||
691        (message.type === 'system' && message.subtype === 'compact_boundary')
692      ) {
693        // Before writing a compact boundary, flush any in-memory-only
694        // messages up through the preservedSegment tail. Attachments and
695        // progress are now recorded inline (their switch cases below), but
696        // this flush still matters for the preservedSegment tail walk.
697        // If the SDK subprocess restarts before then (claude-desktop kills
698        // between turns), tailUuid points to a never-written message →
699        // applyPreservedSegmentRelinks fails its tail→head walk → returns
700        // without pruning → resume loads full pre-compact history.
701        if (
702          persistSession &&
703          message.type === 'system' &&
704          message.subtype === 'compact_boundary'
705        ) {
706          const tailUuid = message.compactMetadata?.preservedSegment?.tailUuid
707          if (tailUuid) {
708            const tailIdx = this.mutableMessages.findLastIndex(
709              m => m.uuid === tailUuid,
710            )
711            if (tailIdx !== -1) {
712              await recordTranscript(this.mutableMessages.slice(0, tailIdx + 1))
713            }
714          }
715        }
716        messages.push(message)
717        if (persistSession) {
718          // Fire-and-forget for assistant messages. claude.ts yields one
719          // assistant message per content block, then mutates the last
720          // one's message.usage/stop_reason on message_delta — relying on
721          // the write queue's 100ms lazy jsonStringify. Awaiting here
722          // blocks ask()'s generator, so message_delta can't run until
723          // every block is consumed; the drain timer (started at block 1)
724          // elapses first. Interactive CC doesn't hit this because
725          // useLogMessages.ts fire-and-forgets. enqueueWrite is
726          // order-preserving so fire-and-forget here is safe.
727          if (message.type === 'assistant') {
728            void recordTranscript(messages)
729          } else {
730            await recordTranscript(messages)
731          }
732        }
733
734        // Acknowledge initial user messages after first transcript recording
735        if (!hasAcknowledgedInitialMessages && messagesToAck.length > 0) {
736          hasAcknowledgedInitialMessages = true
737          for (const msgToAck of messagesToAck) {
738            if (msgToAck.type === 'user') {
739              yield {
740                type: 'user',
741                message: msgToAck.message,
742                session_id: getSessionId(),
743                parent_tool_use_id: null,
744                uuid: msgToAck.uuid,
745                timestamp: msgToAck.timestamp,
746                isReplay: true,
747              } as SDKUserMessageReplay
748            }
749          }
750        }
751      }
752
753      if (message.type === 'user') {
754        turnCount++
755      }
756
757      switch (message.type) {
758        case 'tombstone':
759          // Tombstone messages are control signals for removing messages, skip them
760          break
761        case 'assistant':
762          // Capture stop_reason if already set (synthetic messages). For
763          // streamed responses, this is null at content_block_stop time;
764          // the real value arrives via message_delta (handled below).
765          if (message.message.stop_reason != null) {
766            lastStopReason = message.message.stop_reason
767          }
768          this.mutableMessages.push(message)
769          yield* normalizeMessage(message)
770          break
771        case 'progress':
772          this.mutableMessages.push(message)
773          // Record inline so the dedup loop in the next ask() call sees it
774          // as already-recorded. Without this, deferred progress interleaves
775          // with already-recorded tool_results in mutableMessages, and the
776          // dedup walk freezes startingParentUuid at the wrong message —
777          // forking the chain and orphaning the conversation on resume.
778          if (persistSession) {
779            messages.push(message)
780            void recordTranscript(messages)
781          }
782          yield* normalizeMessage(message)
783          break
784        case 'user':
785          this.mutableMessages.push(message)
786          yield* normalizeMessage(message)
787          break
788        case 'stream_event':
789          if (message.event.type === 'message_start') {
790            // Reset current message usage for new message
791            currentMessageUsage = EMPTY_USAGE
792            currentMessageUsage = updateUsage(
793              currentMessageUsage,
794              message.event.message.usage,
795            )
796          }
797          if (message.event.type === 'message_delta') {
798            currentMessageUsage = updateUsage(
799              currentMessageUsage,
800              message.event.usage,
801            )
802            // Capture stop_reason from message_delta. The assistant message
803            // is yielded at content_block_stop with stop_reason=null; the
804            // real value only arrives here (see claude.ts message_delta
805            // handler). Without this, result.stop_reason is always null.
806            if (message.event.delta.stop_reason != null) {
807              lastStopReason = message.event.delta.stop_reason
808            }
809          }
810          if (message.event.type === 'message_stop') {
811            // Accumulate current message usage into total
812            this.totalUsage = accumulateUsage(
813              this.totalUsage,
814              currentMessageUsage,
815            )
816          }
817
818          if (includePartialMessages) {
819            yield {
820              type: 'stream_event' as const,
821              event: message.event,
822              session_id: getSessionId(),
823              parent_tool_use_id: null,
824              uuid: randomUUID(),
825            }
826          }
827
828          break
829        case 'attachment':
830          this.mutableMessages.push(message)
831          // Record inline (same reason as progress above).
832          if (persistSession) {
833            messages.push(message)
834            void recordTranscript(messages)
835          }
836
837          // Extract structured output from StructuredOutput tool calls
838          if (message.attachment.type === 'structured_output') {
839            structuredOutputFromTool = message.attachment.data
840          }
841          // Handle max turns reached signal from query.ts
842          else if (message.attachment.type === 'max_turns_reached') {
843            if (persistSession) {
844              if (
845                isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
846                isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
847              ) {
848                await flushSessionStorage()
849              }
850            }
851            yield {
852              type: 'result',
853              subtype: 'error_max_turns',
854              duration_ms: Date.now() - startTime,
855              duration_api_ms: getTotalAPIDuration(),
856              is_error: true,
857              num_turns: message.attachment.turnCount,
858              stop_reason: lastStopReason,
859              session_id: getSessionId(),
860              total_cost_usd: getTotalCost(),
861              usage: this.totalUsage,
862              modelUsage: getModelUsage(),
863              permission_denials: this.permissionDenials,
864              fast_mode_state: getFastModeState(
865                mainLoopModel,
866                initialAppState.fastMode,
867              ),
868              uuid: randomUUID(),
869              errors: [
870                `Reached maximum number of turns (${message.attachment.maxTurns})`,
871              ],
872            }
873            return
874          }
875          // Yield queued_command attachments as SDK user message replays
876          else if (
877            replayUserMessages &&
878            message.attachment.type === 'queued_command'
879          ) {
880            yield {
881              type: 'user',
882              message: {
883                role: 'user' as const,
884                content: message.attachment.prompt,
885              },
886              session_id: getSessionId(),
887              parent_tool_use_id: null,
888              uuid: message.attachment.source_uuid || message.uuid,
889              timestamp: message.timestamp,
890              isReplay: true,
891            } as SDKUserMessageReplay
892          }
893          break
894        case 'stream_request_start':
895          // Don't yield stream request start messages
896          break
897        case 'system': {
898          // Snip boundary: replay on our store to remove zombie messages and
899          // stale markers. The yielded boundary is a signal, not data to push —
900          // the replay produces its own equivalent boundary. Without this,
901          // markers persist and re-trigger on every turn, and mutableMessages
902          // never shrinks (memory leak in long SDK sessions). The subtype
903          // check lives inside the injected callback so feature-gated strings
904          // stay out of this file (excluded-strings check).
905          const snipResult = this.config.snipReplay?.(
906            message,
907            this.mutableMessages,
908          )
909          if (snipResult !== undefined) {
910            if (snipResult.executed) {
911              this.mutableMessages.length = 0
912              this.mutableMessages.push(...snipResult.messages)
913            }
914            break
915          }
916          this.mutableMessages.push(message)
917          // Yield compact boundary messages to SDK
918          if (
919            message.subtype === 'compact_boundary' &&
920            message.compactMetadata
921          ) {
922            // Release pre-compaction messages for GC. The boundary was just
923            // pushed so it's the last element. query.ts already uses
924            // getMessagesAfterCompactBoundary() internally, so only
925            // post-boundary messages are needed going forward.
926            const mutableBoundaryIdx = this.mutableMessages.length - 1
927            if (mutableBoundaryIdx > 0) {
928              this.mutableMessages.splice(0, mutableBoundaryIdx)
929            }
930            const localBoundaryIdx = messages.length - 1
931            if (localBoundaryIdx > 0) {
932              messages.splice(0, localBoundaryIdx)
933            }
934
935            yield {
936              type: 'system',
937              subtype: 'compact_boundary' as const,
938              session_id: getSessionId(),
939              uuid: message.uuid,
940              compact_metadata: toSDKCompactMetadata(message.compactMetadata),
941            }
942          }
943          if (message.subtype === 'api_error') {
944            yield {
945              type: 'system',
946              subtype: 'api_retry' as const,
947              attempt: message.retryAttempt,
948              max_retries: message.maxRetries,
949              retry_delay_ms: message.retryInMs,
950              error_status: message.error.status ?? null,
951              error: categorizeRetryableAPIError(message.error),
952              session_id: getSessionId(),
953              uuid: message.uuid,
954            }
955          }
956          // Don't yield other system messages in headless mode
957          break
958        }
959        case 'tool_use_summary':
960          // Yield tool use summary messages to SDK
961          yield {
962            type: 'tool_use_summary' as const,
963            summary: message.summary,
964            preceding_tool_use_ids: message.precedingToolUseIds,
965            session_id: getSessionId(),
966            uuid: message.uuid,
967          }
968          break
969      }
970
971      // Check if USD budget has been exceeded
972      if (maxBudgetUsd !== undefined && getTotalCost() >= maxBudgetUsd) {
973        if (persistSession) {
974          if (
975            isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
976            isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
977          ) {
978            await flushSessionStorage()
979          }
980        }
981        yield {
982          type: 'result',
983          subtype: 'error_max_budget_usd',
984          duration_ms: Date.now() - startTime,
985          duration_api_ms: getTotalAPIDuration(),
986          is_error: true,
987          num_turns: turnCount,
988          stop_reason: lastStopReason,
989          session_id: getSessionId(),
990          total_cost_usd: getTotalCost(),
991          usage: this.totalUsage,
992          modelUsage: getModelUsage(),
993          permission_denials: this.permissionDenials,
994          fast_mode_state: getFastModeState(
995            mainLoopModel,
996            initialAppState.fastMode,
997          ),
998          uuid: randomUUID(),
999          errors: [`Reached maximum budget ($${maxBudgetUsd})`],
1000        }
1001        return
1002      }
1003
1004      // Check if structured output retry limit exceeded (only on user messages)
1005      if (message.type === 'user' && jsonSchema) {
1006        const currentCalls = countToolCalls(
1007          this.mutableMessages,
1008          SYNTHETIC_OUTPUT_TOOL_NAME,
1009        )
1010        const callsThisQuery = currentCalls - initialStructuredOutputCalls
1011        const maxRetries = parseInt(
1012          process.env.MAX_STRUCTURED_OUTPUT_RETRIES || '5',
1013          10,
1014        )
1015        if (callsThisQuery >= maxRetries) {
1016          if (persistSession) {
1017            if (
1018              isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
1019              isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
1020            ) {
1021              await flushSessionStorage()
1022            }
1023          }
1024          yield {
1025            type: 'result',
1026            subtype: 'error_max_structured_output_retries',
1027            duration_ms: Date.now() - startTime,
1028            duration_api_ms: getTotalAPIDuration(),
1029            is_error: true,
1030            num_turns: turnCount,
1031            stop_reason: lastStopReason,
1032            session_id: getSessionId(),
1033            total_cost_usd: getTotalCost(),
1034            usage: this.totalUsage,
1035            modelUsage: getModelUsage(),
1036            permission_denials: this.permissionDenials,
1037            fast_mode_state: getFastModeState(
1038              mainLoopModel,
1039              initialAppState.fastMode,
1040            ),
1041            uuid: randomUUID(),
1042            errors: [
1043              `Failed to provide valid structured output after ${maxRetries} attempts`,
1044            ],
1045          }
1046          return
1047        }
1048      }
1049    }
1050
1051    // Stop hooks yield progress/attachment messages AFTER the assistant
1052    // response (via yield* handleStopHooks in query.ts). Since #23537 pushes
1053    // those to `messages` inline, last(messages) can be a progress/attachment
1054    // instead of the assistant — which makes textResult extraction below
1055    // return '' and -p mode emit a blank line. Allowlist to assistant|user:
1056    // isResultSuccessful handles both (user with all tool_result blocks is a
1057    // valid successful terminal state).
1058    const result = messages.findLast(
1059      m => m.type === 'assistant' || m.type === 'user',
1060    )
1061    // Capture for the error_during_execution diagnostic — isResultSuccessful
1062    // is a type predicate (message is Message), so inside the false branch
1063    // `result` narrows to never and these accesses don't typecheck.
1064    const edeResultType = result?.type ?? 'undefined'
1065    const edeLastContentType =
1066      result?.type === 'assistant'
1067        ? (last(result.message.content)?.type ?? 'none')
1068        : 'n/a'
1069
1070    // Flush buffered transcript writes before yielding result.
1071    // The desktop app kills the CLI process immediately after receiving the
1072    // result message, so any unflushed writes would be lost.
1073    if (persistSession) {
1074      if (
1075        isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
1076        isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
1077      ) {
1078        await flushSessionStorage()
1079      }
1080    }
1081
1082    if (!isResultSuccessful(result, lastStopReason)) {
1083      yield {
1084        type: 'result',
1085        subtype: 'error_during_execution',
1086        duration_ms: Date.now() - startTime,
1087        duration_api_ms: getTotalAPIDuration(),
1088        is_error: true,
1089        num_turns: turnCount,
1090        stop_reason: lastStopReason,
1091        session_id: getSessionId(),
1092        total_cost_usd: getTotalCost(),
1093        usage: this.totalUsage,
1094        modelUsage: getModelUsage(),
1095        permission_denials: this.permissionDenials,
1096        fast_mode_state: getFastModeState(
1097          mainLoopModel,
1098          initialAppState.fastMode,
1099        ),
1100        uuid: randomUUID(),
1101        // Diagnostic prefix: these are what isResultSuccessful() checks — if
1102        // the result type isn't assistant-with-text/thinking or user-with-
1103        // tool_result, and stop_reason isn't end_turn, that's why this fired.
1104        // errors[] is turn-scoped via the watermark; previously it dumped the
1105        // entire process's logError buffer (ripgrep timeouts, ENOENT, etc).
1106        errors: (() => {
1107          const all = getInMemoryErrors()
1108          const start = errorLogWatermark
1109            ? all.lastIndexOf(errorLogWatermark) + 1
1110            : 0
1111          return [
1112            `[ede_diagnostic] result_type=${edeResultType} last_content_type=${edeLastContentType} stop_reason=${lastStopReason}`,
1113            ...all.slice(start).map(_ => _.error),
1114          ]
1115        })(),
1116      }
1117      return
1118    }
1119
1120    // Extract the text result based on message type
1121    let textResult = ''
1122    let isApiError = false
1123
1124    if (result.type === 'assistant') {
1125      const lastContent = last(result.message.content)
1126      if (
1127        lastContent?.type === 'text' &&
1128        !SYNTHETIC_MESSAGES.has(lastContent.text)
1129      ) {
1130        textResult = lastContent.text
1131      }
1132      isApiError = Boolean(result.isApiErrorMessage)
1133    }
1134
1135    yield {
1136      type: 'result',
1137      subtype: 'success',
1138      is_error: isApiError,
1139      duration_ms: Date.now() - startTime,
1140      duration_api_ms: getTotalAPIDuration(),
1141      num_turns: turnCount,
1142      result: textResult,
1143      stop_reason: lastStopReason,
1144      session_id: getSessionId(),
1145      total_cost_usd: getTotalCost(),
1146      usage: this.totalUsage,
1147      modelUsage: getModelUsage(),
1148      permission_denials: this.permissionDenials,
1149      structured_output: structuredOutputFromTool,
1150      fast_mode_state: getFastModeState(
1151        mainLoopModel,
1152        initialAppState.fastMode,
1153      ),
1154      uuid: randomUUID(),
1155    }
1156  }
1157
1158  interrupt(): void {
1159    this.abortController.abort()
1160  }
1161
1162  getMessages(): readonly Message[] {
1163    return this.mutableMessages
1164  }
1165
1166  getReadFileState(): FileStateCache {
1167    return this.readFileState
1168  }
1169
1170  getSessionId(): string {
1171    return getSessionId()
1172  }
1173
1174  setModel(model: string): void {
1175    this.config.userSpecifiedModel = model
1176  }
1177}
1178
1179/**
1180 * Sends a single prompt to the Claude API and returns the response.
1181 * Assumes that claude is being used non-interactively -- will not
1182 * ask the user for permissions or further input.
1183 *
1184 * Convenience wrapper around QueryEngine for one-shot usage.
1185 */
1186export async function* ask({
1187  commands,
1188  prompt,
1189  promptUuid,
1190  isMeta,
1191  cwd,
1192  tools,
1193  mcpClients,
1194  verbose = false,
1195  thinkingConfig,
1196  maxTurns,
1197  maxBudgetUsd,
1198  taskBudget,
1199  canUseTool,
1200  mutableMessages = [],
1201  getReadFileCache,
1202  setReadFileCache,
1203  customSystemPrompt,
1204  appendSystemPrompt,
1205  userSpecifiedModel,
1206  fallbackModel,
1207  jsonSchema,
1208  getAppState,
1209  setAppState,
1210  abortController,
1211  replayUserMessages = false,
1212  includePartialMessages = false,
1213  handleElicitation,
1214  agents = [],
1215  setSDKStatus,
1216  orphanedPermission,
1217}: {
1218  commands: Command[]
1219  prompt: string | Array<ContentBlockParam>
1220  promptUuid?: string
1221  isMeta?: boolean
1222  cwd: string
1223  tools: Tools
1224  verbose?: boolean
1225  mcpClients: MCPServerConnection[]
1226  thinkingConfig?: ThinkingConfig
1227  maxTurns?: number
1228  maxBudgetUsd?: number
1229  taskBudget?: { total: number }
1230  canUseTool: CanUseToolFn
1231  mutableMessages?: Message[]
1232  customSystemPrompt?: string
1233  appendSystemPrompt?: string
1234  userSpecifiedModel?: string
1235  fallbackModel?: string
1236  jsonSchema?: Record<string, unknown>
1237  getAppState: () => AppState
1238  setAppState: (f: (prev: AppState) => AppState) => void
1239  getReadFileCache: () => FileStateCache
1240  setReadFileCache: (cache: FileStateCache) => void
1241  abortController?: AbortController
1242  replayUserMessages?: boolean
1243  includePartialMessages?: boolean
1244  handleElicitation?: ToolUseContext['handleElicitation']
1245  agents?: AgentDefinition[]
1246  setSDKStatus?: (status: SDKStatus) => void
1247  orphanedPermission?: OrphanedPermission
1248}): AsyncGenerator<SDKMessage, void, unknown> {
1249  const engine = new QueryEngine({
1250    cwd,
1251    tools,
1252    commands,
1253    mcpClients,
1254    agents,
1255    canUseTool,
1256    getAppState,
1257    setAppState,
1258    initialMessages: mutableMessages,
1259    readFileCache: cloneFileStateCache(getReadFileCache()),
1260    customSystemPrompt,
1261    appendSystemPrompt,
1262    userSpecifiedModel,
1263    fallbackModel,
1264    thinkingConfig,
1265    maxTurns,
1266    maxBudgetUsd,
1267    taskBudget,
1268    jsonSchema,
1269    verbose,
1270    handleElicitation,
1271    replayUserMessages,
1272    includePartialMessages,
1273    setSDKStatus,
1274    abortController,
1275    orphanedPermission,
1276    ...(feature('HISTORY_SNIP')
1277      ? {
1278          snipReplay: (yielded: Message, store: Message[]) => {
1279            if (!snipProjection!.isSnipBoundaryMessage(yielded))
1280              return undefined
1281            return snipModule!.snipCompactIfNeeded(store, { force: true })
1282          },
1283        }
1284      : {}),
1285  })
1286
1287  try {
1288    yield* engine.submitMessage(prompt, {
1289      uuid: promptUuid,
1290      isMeta,
1291    })
1292  } finally {
1293    setReadFileCache(engine.getReadFileState())
1294  }
1295}
1296