services/compact/compact.ts

59 KB1706 lines
src/services/compact/compact.ts
1import { feature } from 'bun:bundle'
2import type { UUID } from 'crypto'
3import uniqBy from 'lodash-es/uniqBy.js'
4
5/* eslint-disable @typescript-eslint/no-require-imports */
6const sessionTranscriptModule = feature('KAIROS')
7  ? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js'))
8  : null
9
10import { APIUserAbortError } from '@anthropic-ai/sdk'
11import { markPostCompaction } from 'src/bootstrap/state.js'
12import { getInvokedSkillsForAgent } from '../../bootstrap/state.js'
13import type { QuerySource } from '../../constants/querySource.js'
14import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
15import type { Tool, ToolUseContext } from '../../Tool.js'
16import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js'
17import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js'
18import {
19  FILE_READ_TOOL_NAME,
20  FILE_UNCHANGED_STUB,
21} from '../../tools/FileReadTool/prompt.js'
22import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js'
23import type { AgentId } from '../../types/ids.js'
24import type {
25  AssistantMessage,
26  AttachmentMessage,
27  HookResultMessage,
28  Message,
29  PartialCompactDirection,
30  SystemCompactBoundaryMessage,
31  SystemMessage,
32  UserMessage,
33} from '../../types/message.js'
34import {
35  createAttachmentMessage,
36  generateFileAttachment,
37  getAgentListingDeltaAttachment,
38  getDeferredToolsDeltaAttachment,
39  getMcpInstructionsDeltaAttachment,
40} from '../../utils/attachments.js'
41import { getMemoryPath } from '../../utils/config.js'
42import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js'
43import {
44  analyzeContext,
45  tokenStatsToStatsigMetrics,
46} from '../../utils/contextAnalysis.js'
47import { logForDebugging } from '../../utils/debug.js'
48import { hasExactErrorMessage } from '../../utils/errors.js'
49import { cacheToObject } from '../../utils/fileStateCache.js'
50import {
51  type CacheSafeParams,
52  runForkedAgent,
53} from '../../utils/forkedAgent.js'
54import {
55  executePostCompactHooks,
56  executePreCompactHooks,
57} from '../../utils/hooks.js'
58import { logError } from '../../utils/log.js'
59import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js'
60import {
61  createCompactBoundaryMessage,
62  createUserMessage,
63  getAssistantMessageText,
64  getLastAssistantMessage,
65  getMessagesAfterCompactBoundary,
66  isCompactBoundaryMessage,
67  normalizeMessagesForAPI,
68} from '../../utils/messages.js'
69import { expandPath } from '../../utils/path.js'
70import { getPlan, getPlanFilePath } from '../../utils/plans.js'
71import {
72  isSessionActivityTrackingActive,
73  sendSessionActivitySignal,
74} from '../../utils/sessionActivity.js'
75import { processSessionStartHooks } from '../../utils/sessionStart.js'
76import {
77  getTranscriptPath,
78  reAppendSessionMetadata,
79} from '../../utils/sessionStorage.js'
80import { sleep } from '../../utils/sleep.js'
81import { jsonStringify } from '../../utils/slowOperations.js'
82/* eslint-enable @typescript-eslint/no-require-imports */
83import { asSystemPrompt } from '../../utils/systemPromptType.js'
84import { getTaskOutputPath } from '../../utils/task/diskOutput.js'
85import {
86  getTokenUsage,
87  tokenCountFromLastAPIResponse,
88  tokenCountWithEstimation,
89} from '../../utils/tokens.js'
90import {
91  extractDiscoveredToolNames,
92  isToolSearchEnabled,
93} from '../../utils/toolSearch.js'
94import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
95import {
96  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
97  logEvent,
98} from '../analytics/index.js'
99import {
100  getMaxOutputTokensForModel,
101  queryModelWithStreaming,
102} from '../api/claude.js'
103import {
104  getPromptTooLongTokenGap,
105  PROMPT_TOO_LONG_ERROR_MESSAGE,
106  startsWithApiErrorPrefix,
107} from '../api/errors.js'
108import { notifyCompaction } from '../api/promptCacheBreakDetection.js'
109import { getRetryDelay } from '../api/withRetry.js'
110import { logPermissionContextForAnts } from '../internalLogging.js'
111import {
112  roughTokenCountEstimation,
113  roughTokenCountEstimationForMessages,
114} from '../tokenEstimation.js'
115import { groupMessagesByApiRound } from './grouping.js'
116import {
117  getCompactPrompt,
118  getCompactUserSummaryMessage,
119  getPartialCompactPrompt,
120} from './prompt.js'
121
122export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5
123export const POST_COMPACT_TOKEN_BUDGET = 50_000
124export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000
125// Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected
126// unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats
127// dropping — instructions at the top of a skill file are usually the critical
128// part. Budget sized to hold ~5 skills at the per-skill cap.
129export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000
130export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000
131const MAX_COMPACT_STREAMING_RETRIES = 2
132
133/**
134 * Strip image blocks from user messages before sending for compaction.
135 * Images are not needed for generating a conversation summary and can
136 * cause the compaction API call itself to hit the prompt-too-long limit,
137 * especially in CCD sessions where users frequently attach images.
138 * Replaces image blocks with a text marker so the summary still notes
139 * that an image was shared.
140 *
141 * Note: Only user messages contain images (either directly attached or within
142 * tool_result content from tools). Assistant messages contain text, tool_use,
143 * and thinking blocks but not images.
144 */
145export function stripImagesFromMessages(messages: Message[]): Message[] {
146  return messages.map(message => {
147    if (message.type !== 'user') {
148      return message
149    }
150
151    const content = message.message.content
152    if (!Array.isArray(content)) {
153      return message
154    }
155
156    let hasMediaBlock = false
157    const newContent = content.flatMap(block => {
158      if (block.type === 'image') {
159        hasMediaBlock = true
160        return [{ type: 'text' as const, text: '[image]' }]
161      }
162      if (block.type === 'document') {
163        hasMediaBlock = true
164        return [{ type: 'text' as const, text: '[document]' }]
165      }
166      // Also strip images/documents nested inside tool_result content arrays
167      if (block.type === 'tool_result' && Array.isArray(block.content)) {
168        let toolHasMedia = false
169        const newToolContent = block.content.map(item => {
170          if (item.type === 'image') {
171            toolHasMedia = true
172            return { type: 'text' as const, text: '[image]' }
173          }
174          if (item.type === 'document') {
175            toolHasMedia = true
176            return { type: 'text' as const, text: '[document]' }
177          }
178          return item
179        })
180        if (toolHasMedia) {
181          hasMediaBlock = true
182          return [{ ...block, content: newToolContent }]
183        }
184      }
185      return [block]
186    })
187
188    if (!hasMediaBlock) {
189      return message
190    }
191
192    return {
193      ...message,
194      message: {
195        ...message.message,
196        content: newContent,
197      },
198    } as typeof message
199  })
200}
201
202/**
203 * Strip attachment types that are re-injected post-compaction anyway.
204 * skill_discovery/skill_listing are re-surfaced by resetSentSkillNames()
205 * + the next turn's discovery signal, so feeding them to the summarizer
206 * wastes tokens and pollutes the summary with stale skill suggestions.
207 *
208 * No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types
209 * don't exist on external builds).
210 */
211export function stripReinjectedAttachments(messages: Message[]): Message[] {
212  if (feature('EXPERIMENTAL_SKILL_SEARCH')) {
213    return messages.filter(
214      m =>
215        !(
216          m.type === 'attachment' &&
217          (m.attachment.type === 'skill_discovery' ||
218            m.attachment.type === 'skill_listing')
219        ),
220    )
221  }
222  return messages
223}
224
225export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES =
226  'Not enough messages to compact.'
227const MAX_PTL_RETRIES = 3
228const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]'
229
230/**
231 * Drops the oldest API-round groups from messages until tokenGap is covered.
232 * Falls back to dropping 20% of groups when the gap is unparseable (some
233 * Vertex/Bedrock error formats). Returns null when nothing can be dropped
234 * without leaving an empty summarize set.
235 *
236 * This is the last-resort escape hatch for CC-1180 — when the compact request
237 * itself hits prompt-too-long, the user is otherwise stuck. Dropping the
238 * oldest context is lossy but unblocks them. The reactive-compact path
239 * (compactMessages.ts) has the proper retry loop that peels from the tail;
240 * this helper is the dumb-but-safe fallback for the proactive/manual path
241 * that wasn't migrated in bfdb472f's unification.
242 */
243export function truncateHeadForPTLRetry(
244  messages: Message[],
245  ptlResponse: AssistantMessage,
246): Message[] | null {
247  // Strip our own synthetic marker from a previous retry before grouping.
248  // Otherwise it becomes its own group 0 and the 20% fallback stalls
249  // (drops only the marker, re-adds it, zero progress on retry 2+).
250  const input =
251    messages[0]?.type === 'user' &&
252    messages[0].isMeta &&
253    messages[0].message.content === PTL_RETRY_MARKER
254      ? messages.slice(1)
255      : messages
256
257  const groups = groupMessagesByApiRound(input)
258  if (groups.length < 2) return null
259
260  const tokenGap = getPromptTooLongTokenGap(ptlResponse)
261  let dropCount: number
262  if (tokenGap !== undefined) {
263    let acc = 0
264    dropCount = 0
265    for (const g of groups) {
266      acc += roughTokenCountEstimationForMessages(g)
267      dropCount++
268      if (acc >= tokenGap) break
269    }
270  } else {
271    dropCount = Math.max(1, Math.floor(groups.length * 0.2))
272  }
273
274  // Keep at least one group so there's something to summarize.
275  dropCount = Math.min(dropCount, groups.length - 1)
276  if (dropCount < 1) return null
277
278  const sliced = groups.slice(dropCount).flat()
279  // groupMessagesByApiRound puts the preamble in group 0 and starts every
280  // subsequent group with an assistant message. Dropping group 0 leaves an
281  // assistant-first sequence which the API rejects (first message must be
282  // role=user). Prepend a synthetic user marker — ensureToolResultPairing
283  // already handles any orphaned tool_results this creates.
284  if (sliced[0]?.type === 'assistant') {
285    return [
286      createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }),
287      ...sliced,
288    ]
289  }
290  return sliced
291}
292
293export const ERROR_MESSAGE_PROMPT_TOO_LONG =
294  'Conversation too long. Press esc twice to go up a few messages and try again.'
295export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.'
296export const ERROR_MESSAGE_INCOMPLETE_RESPONSE =
297  'Compaction interrupted · This may be due to network issues — please try again.'
298
299export interface CompactionResult {
300  boundaryMarker: SystemMessage
301  summaryMessages: UserMessage[]
302  attachments: AttachmentMessage[]
303  hookResults: HookResultMessage[]
304  messagesToKeep?: Message[]
305  userDisplayMessage?: string
306  preCompactTokenCount?: number
307  postCompactTokenCount?: number
308  truePostCompactTokenCount?: number
309  compactionUsage?: ReturnType<typeof getTokenUsage>
310}
311
312/**
313 * Diagnosis context passed from autoCompactIfNeeded into compactConversation.
314 * Lets the tengu_compact event disambiguate same-chain loops (H2) from
315 * cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins.
316 */
317export type RecompactionInfo = {
318  isRecompactionInChain: boolean
319  turnsSincePreviousCompact: number
320  previousCompactTurnId?: string
321  autoCompactThreshold: number
322  querySource?: QuerySource
323}
324
325/**
326 * Build the base post-compact messages array from a CompactionResult.
327 * This ensures consistent ordering across all compaction paths.
328 * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults
329 */
330export function buildPostCompactMessages(result: CompactionResult): Message[] {
331  return [
332    result.boundaryMarker,
333    ...result.summaryMessages,
334    ...(result.messagesToKeep ?? []),
335    ...result.attachments,
336    ...result.hookResults,
337  ]
338}
339
340/**
341 * Annotate a compact boundary with relink metadata for messagesToKeep.
342 * Preserved messages keep their original parentUuids on disk (dedup-skipped);
343 * the loader uses this to patch head→anchor and anchor's-other-children→tail.
344 *
345 * `anchorUuid` = what sits immediately before keep[0] in the desired chain:
346 *   - suffix-preserving (reactive/session-memory): last summary message
347 *   - prefix-preserving (partial compact): the boundary itself
348 */
349export function annotateBoundaryWithPreservedSegment(
350  boundary: SystemCompactBoundaryMessage,
351  anchorUuid: UUID,
352  messagesToKeep: readonly Message[] | undefined,
353): SystemCompactBoundaryMessage {
354  const keep = messagesToKeep ?? []
355  if (keep.length === 0) return boundary
356  return {
357    ...boundary,
358    compactMetadata: {
359      ...boundary.compactMetadata,
360      preservedSegment: {
361        headUuid: keep[0]!.uuid,
362        anchorUuid,
363        tailUuid: keep.at(-1)!.uuid,
364      },
365    },
366  }
367}
368
369/**
370 * Merges user-supplied custom instructions with hook-provided instructions.
371 * User instructions come first; hook instructions are appended.
372 * Empty strings normalize to undefined.
373 */
374export function mergeHookInstructions(
375  userInstructions: string | undefined,
376  hookInstructions: string | undefined,
377): string | undefined {
378  if (!hookInstructions) return userInstructions || undefined
379  if (!userInstructions) return hookInstructions
380  return `${userInstructions}\n\n${hookInstructions}`
381}
382
383/**
384 * Creates a compact version of a conversation by summarizing older messages
385 * and preserving recent conversation history.
386 */
387export async function compactConversation(
388  messages: Message[],
389  context: ToolUseContext,
390  cacheSafeParams: CacheSafeParams,
391  suppressFollowUpQuestions: boolean,
392  customInstructions?: string,
393  isAutoCompact: boolean = false,
394  recompactionInfo?: RecompactionInfo,
395): Promise<CompactionResult> {
396  try {
397    if (messages.length === 0) {
398      throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
399    }
400
401    const preCompactTokenCount = tokenCountWithEstimation(messages)
402
403    const appState = context.getAppState()
404    void logPermissionContextForAnts(appState.toolPermissionContext, 'summary')
405
406    context.onCompactProgress?.({
407      type: 'hooks_start',
408      hookType: 'pre_compact',
409    })
410
411    // Execute PreCompact hooks
412    context.setSDKStatus?.('compacting')
413    const hookResult = await executePreCompactHooks(
414      {
415        trigger: isAutoCompact ? 'auto' : 'manual',
416        customInstructions: customInstructions ?? null,
417      },
418      context.abortController.signal,
419    )
420    customInstructions = mergeHookInstructions(
421      customInstructions,
422      hookResult.newCustomInstructions,
423    )
424    const userDisplayMessage = hookResult.userDisplayMessage
425
426    // Show requesting mode with up arrow and custom message
427    context.setStreamMode?.('requesting')
428    context.setResponseLength?.(() => 0)
429    context.onCompactProgress?.({ type: 'compact_start' })
430
431    // 3P default: true — forked-agent path reuses main conversation's prompt cache.
432    // Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of
433    // fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK)
434    // with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch.
435    const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
436      'tengu_compact_cache_prefix',
437      true,
438    )
439
440    const compactPrompt = getCompactPrompt(customInstructions)
441    const summaryRequest = createUserMessage({
442      content: compactPrompt,
443    })
444
445    let messagesToSummarize = messages
446    let retryCacheSafeParams = cacheSafeParams
447    let summaryResponse: AssistantMessage
448    let summary: string | null
449    let ptlAttempts = 0
450    for (;;) {
451      summaryResponse = await streamCompactSummary({
452        messages: messagesToSummarize,
453        summaryRequest,
454        appState,
455        context,
456        preCompactTokenCount,
457        cacheSafeParams: retryCacheSafeParams,
458      })
459      summary = getAssistantMessageText(summaryResponse)
460      if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
461
462      // CC-1180: compact request itself hit prompt-too-long. Truncate the
463      // oldest API-round groups and retry rather than leaving the user stuck.
464      ptlAttempts++
465      const truncated =
466        ptlAttempts <= MAX_PTL_RETRIES
467          ? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse)
468          : null
469      if (!truncated) {
470        logEvent('tengu_compact_failed', {
471          reason:
472            'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
473          preCompactTokenCount,
474          promptCacheSharingEnabled,
475          ptlAttempts,
476        })
477        throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
478      }
479      logEvent('tengu_compact_ptl_retry', {
480        attempt: ptlAttempts,
481        droppedMessages: messagesToSummarize.length - truncated.length,
482        remainingMessages: truncated.length,
483      })
484      messagesToSummarize = truncated
485      // The forked-agent path reads from cacheSafeParams.forkContextMessages,
486      // not the messages param — thread the truncated set through both paths.
487      retryCacheSafeParams = {
488        ...retryCacheSafeParams,
489        forkContextMessages: truncated,
490      }
491    }
492
493    if (!summary) {
494      logForDebugging(
495        `Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`,
496        { level: 'error' },
497      )
498      logEvent('tengu_compact_failed', {
499        reason:
500          'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
501        preCompactTokenCount,
502        promptCacheSharingEnabled,
503      })
504      throw new Error(
505        `Failed to generate conversation summary - response did not contain valid text content`,
506      )
507    } else if (startsWithApiErrorPrefix(summary)) {
508      logEvent('tengu_compact_failed', {
509        reason:
510          'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
511        preCompactTokenCount,
512        promptCacheSharingEnabled,
513      })
514      throw new Error(summary)
515    }
516
517    // Store the current file state before clearing
518    const preCompactReadFileState = cacheToObject(context.readFileState)
519
520    // Clear the cache
521    context.readFileState.clear()
522    context.loadedNestedMemoryPaths?.clear()
523
524    // Intentionally NOT resetting sentSkillNames: re-injecting the full
525    // skill_listing (~4K tokens) post-compact is pure cache_creation with
526    // marginal benefit. The model still has SkillTool in its schema and
527    // invoked_skills attachment (below) preserves used-skill content. Ants
528    // with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the
529    // early-return in getSkillListingAttachments.
530
531    // Run async attachment generation in parallel
532    const [fileAttachments, asyncAgentAttachments] = await Promise.all([
533      createPostCompactFileAttachments(
534        preCompactReadFileState,
535        context,
536        POST_COMPACT_MAX_FILES_TO_RESTORE,
537      ),
538      createAsyncAgentAttachmentsIfNeeded(context),
539    ])
540
541    const postCompactFileAttachments: AttachmentMessage[] = [
542      ...fileAttachments,
543      ...asyncAgentAttachments,
544    ]
545    const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
546    if (planAttachment) {
547      postCompactFileAttachments.push(planAttachment)
548    }
549
550    // Add plan mode instructions if currently in plan mode, so the model
551    // continues operating in plan mode after compaction
552    const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
553    if (planModeAttachment) {
554      postCompactFileAttachments.push(planModeAttachment)
555    }
556
557    // Add skill attachment if skills were invoked in this session
558    const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
559    if (skillAttachment) {
560      postCompactFileAttachments.push(skillAttachment)
561    }
562
563    // Compaction ate prior delta attachments. Re-announce from the current
564    // state so the model has tool/instruction context on the first
565    // post-compact turn. Empty message history → diff against nothing →
566    // announces the full set.
567    for (const att of getDeferredToolsDeltaAttachment(
568      context.options.tools,
569      context.options.mainLoopModel,
570      [],
571      { callSite: 'compact_full' },
572    )) {
573      postCompactFileAttachments.push(createAttachmentMessage(att))
574    }
575    for (const att of getAgentListingDeltaAttachment(context, [])) {
576      postCompactFileAttachments.push(createAttachmentMessage(att))
577    }
578    for (const att of getMcpInstructionsDeltaAttachment(
579      context.options.mcpClients,
580      context.options.tools,
581      context.options.mainLoopModel,
582      [],
583    )) {
584      postCompactFileAttachments.push(createAttachmentMessage(att))
585    }
586
587    context.onCompactProgress?.({
588      type: 'hooks_start',
589      hookType: 'session_start',
590    })
591    // Execute SessionStart hooks after successful compaction
592    const hookMessages = await processSessionStartHooks('compact', {
593      model: context.options.mainLoopModel,
594    })
595
596    // Create the compact boundary marker and summary messages before the
597    // event so we can compute the true resulting-context size.
598    const boundaryMarker = createCompactBoundaryMessage(
599      isAutoCompact ? 'auto' : 'manual',
600      preCompactTokenCount ?? 0,
601      messages.at(-1)?.uuid,
602    )
603    // Carry loaded-tool state — the summary doesn't preserve tool_reference
604    // blocks, so the post-compact schema filter needs this to keep sending
605    // already-loaded deferred tool schemas to the API.
606    const preCompactDiscovered = extractDiscoveredToolNames(messages)
607    if (preCompactDiscovered.size > 0) {
608      boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
609        ...preCompactDiscovered,
610      ].sort()
611    }
612
613    const transcriptPath = getTranscriptPath()
614    const summaryMessages: UserMessage[] = [
615      createUserMessage({
616        content: getCompactUserSummaryMessage(
617          summary,
618          suppressFollowUpQuestions,
619          transcriptPath,
620        ),
621        isCompactSummary: true,
622        isVisibleInTranscriptOnly: true,
623      }),
624    ]
625
626    // Previously "postCompactTokenCount" — renamed because this is the
627    // compact API call's total usage (input_tokens ≈ preCompactTokenCount),
628    // NOT the size of the resulting context. Kept for event-field continuity.
629    const compactionCallTotalTokens = tokenCountFromLastAPIResponse([
630      summaryResponse,
631    ])
632
633    // Message-payload estimate of the resulting context. The next iteration's
634    // shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools +
635    // userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true`
636    // is a strong signal; `false` may still retrigger when this is close to threshold.
637    const truePostCompactTokenCount = roughTokenCountEstimationForMessages([
638      boundaryMarker,
639      ...summaryMessages,
640      ...postCompactFileAttachments,
641      ...hookMessages,
642    ])
643
644    // Extract compaction API usage metrics
645    const compactionUsage = getTokenUsage(summaryResponse)
646
647    const querySourceForEvent =
648      recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown'
649
650    logEvent('tengu_compact', {
651      preCompactTokenCount,
652      // Kept for continuity — semantically the compact API call's total usage
653      postCompactTokenCount: compactionCallTotalTokens,
654      truePostCompactTokenCount,
655      autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1,
656      willRetriggerNextTurn:
657        recompactionInfo !== undefined &&
658        truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold,
659      isAutoCompact,
660      querySource:
661        querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
662      queryChainId: (context.queryTracking?.chainId ??
663        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
664      queryDepth: context.queryTracking?.depth ?? -1,
665      isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false,
666      turnsSincePreviousCompact:
667        recompactionInfo?.turnsSincePreviousCompact ?? -1,
668      previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ??
669        '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
670      compactionInputTokens: compactionUsage?.input_tokens,
671      compactionOutputTokens: compactionUsage?.output_tokens,
672      compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
673      compactionCacheCreationTokens:
674        compactionUsage?.cache_creation_input_tokens ?? 0,
675      compactionTotalTokens: compactionUsage
676        ? compactionUsage.input_tokens +
677          (compactionUsage.cache_creation_input_tokens ?? 0) +
678          (compactionUsage.cache_read_input_tokens ?? 0) +
679          compactionUsage.output_tokens
680        : 0,
681      promptCacheSharingEnabled,
682      // analyzeContext walks every content block (~11ms on a 4.5K-message
683      // session) purely for this telemetry breakdown. Computed here, past
684      // the compaction-API await, so the sync walk doesn't starve the
685      // render loop before compaction even starts. Same deferral pattern
686      // as reactiveCompact.ts.
687      ...(() => {
688        try {
689          return tokenStatsToStatsigMetrics(analyzeContext(messages))
690        } catch (error) {
691          logError(error as Error)
692          return {}
693        }
694      })(),
695    })
696
697    // Reset cache read baseline so the post-compact drop isn't flagged as a break
698    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
699      notifyCompaction(
700        context.options.querySource ?? 'compact',
701        context.agentId,
702      )
703    }
704    markPostCompaction()
705
706    // Re-append session metadata (custom title, tag) so it stays within
707    // the 16KB tail window that readLiteMetadata reads for --resume display.
708    // Without this, enough post-compaction messages push the metadata entry
709    // out of the window, causing --resume to show the auto-generated title
710    // instead of the user-set session name.
711    reAppendSessionMetadata()
712
713    // Write a reduced transcript segment for the pre-compaction messages
714    // (assistant mode only). Fire-and-forget — errors are logged internally.
715    if (feature('KAIROS')) {
716      void sessionTranscriptModule?.writeSessionTranscriptSegment(messages)
717    }
718
719    context.onCompactProgress?.({
720      type: 'hooks_start',
721      hookType: 'post_compact',
722    })
723    const postCompactHookResult = await executePostCompactHooks(
724      {
725        trigger: isAutoCompact ? 'auto' : 'manual',
726        compactSummary: summary,
727      },
728      context.abortController.signal,
729    )
730
731    const combinedUserDisplayMessage = [
732      userDisplayMessage,
733      postCompactHookResult.userDisplayMessage,
734    ]
735      .filter(Boolean)
736      .join('\n')
737
738    return {
739      boundaryMarker,
740      summaryMessages,
741      attachments: postCompactFileAttachments,
742      hookResults: hookMessages,
743      userDisplayMessage: combinedUserDisplayMessage || undefined,
744      preCompactTokenCount,
745      postCompactTokenCount: compactionCallTotalTokens,
746      truePostCompactTokenCount,
747      compactionUsage,
748    }
749  } catch (error) {
750    // Only show the error notification for manual /compact.
751    // Auto-compact failures are retried on the next turn and the
752    // notification is confusing when compaction eventually succeeds.
753    if (!isAutoCompact) {
754      addErrorNotificationIfNeeded(error, context)
755    }
756    throw error
757  } finally {
758    context.setStreamMode?.('requesting')
759    context.setResponseLength?.(() => 0)
760    context.onCompactProgress?.({ type: 'compact_end' })
761    context.setSDKStatus?.(null)
762  }
763}
764
765/**
766 * Performs a partial compaction around the selected message index.
767 * Direction 'from': summarizes messages after the index, keeps earlier ones.
768 *   Prompt cache for kept (earlier) messages is preserved.
769 * Direction 'up_to': summarizes messages before the index, keeps later ones.
770 *   Prompt cache is invalidated since the summary precedes the kept messages.
771 */
772export async function partialCompactConversation(
773  allMessages: Message[],
774  pivotIndex: number,
775  context: ToolUseContext,
776  cacheSafeParams: CacheSafeParams,
777  userFeedback?: string,
778  direction: PartialCompactDirection = 'from',
779): Promise<CompactionResult> {
780  try {
781    const messagesToSummarize =
782      direction === 'up_to'
783        ? allMessages.slice(0, pivotIndex)
784        : allMessages.slice(pivotIndex)
785    // 'up_to' must strip old compact boundaries/summaries: for 'up_to',
786    // summary_B sits BEFORE kept, so a stale boundary_A in kept wins
787    // findLastCompactBoundaryIndex's backward scan and drops summary_B.
788    // 'from' keeps them: summary_B sits AFTER kept (backward scan still
789    // works), and removing an old summary would lose its covered history.
790    const messagesToKeep =
791      direction === 'up_to'
792        ? allMessages
793            .slice(pivotIndex)
794            .filter(
795              m =>
796                m.type !== 'progress' &&
797                !isCompactBoundaryMessage(m) &&
798                !(m.type === 'user' && m.isCompactSummary),
799            )
800        : allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress')
801
802    if (messagesToSummarize.length === 0) {
803      throw new Error(
804        direction === 'up_to'
805          ? 'Nothing to summarize before the selected message.'
806          : 'Nothing to summarize after the selected message.',
807      )
808    }
809
810    const preCompactTokenCount = tokenCountWithEstimation(allMessages)
811
812    context.onCompactProgress?.({
813      type: 'hooks_start',
814      hookType: 'pre_compact',
815    })
816
817    context.setSDKStatus?.('compacting')
818    const hookResult = await executePreCompactHooks(
819      {
820        trigger: 'manual',
821        customInstructions: null,
822      },
823      context.abortController.signal,
824    )
825
826    // Merge hook instructions with user feedback
827    let customInstructions: string | undefined
828    if (hookResult.newCustomInstructions && userFeedback) {
829      customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}`
830    } else if (hookResult.newCustomInstructions) {
831      customInstructions = hookResult.newCustomInstructions
832    } else if (userFeedback) {
833      customInstructions = `User context: ${userFeedback}`
834    }
835
836    context.setStreamMode?.('requesting')
837    context.setResponseLength?.(() => 0)
838    context.onCompactProgress?.({ type: 'compact_start' })
839
840    const compactPrompt = getPartialCompactPrompt(customInstructions, direction)
841    const summaryRequest = createUserMessage({
842      content: compactPrompt,
843    })
844
845    const failureMetadata = {
846      preCompactTokenCount,
847      direction:
848        direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
849      messagesSummarized: messagesToSummarize.length,
850    }
851
852    // 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache).
853    // PTL retry breaks the cache prefix but unblocks the user (CC-1180).
854    let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages
855    let retryCacheSafeParams =
856      direction === 'up_to'
857        ? { ...cacheSafeParams, forkContextMessages: messagesToSummarize }
858        : cacheSafeParams
859    let summaryResponse: AssistantMessage
860    let summary: string | null
861    let ptlAttempts = 0
862    for (;;) {
863      summaryResponse = await streamCompactSummary({
864        messages: apiMessages,
865        summaryRequest,
866        appState: context.getAppState(),
867        context,
868        preCompactTokenCount,
869        cacheSafeParams: retryCacheSafeParams,
870      })
871      summary = getAssistantMessageText(summaryResponse)
872      if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break
873
874      ptlAttempts++
875      const truncated =
876        ptlAttempts <= MAX_PTL_RETRIES
877          ? truncateHeadForPTLRetry(apiMessages, summaryResponse)
878          : null
879      if (!truncated) {
880        logEvent('tengu_partial_compact_failed', {
881          reason:
882            'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
883          ...failureMetadata,
884          ptlAttempts,
885        })
886        throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)
887      }
888      logEvent('tengu_compact_ptl_retry', {
889        attempt: ptlAttempts,
890        droppedMessages: apiMessages.length - truncated.length,
891        remainingMessages: truncated.length,
892        path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
893      })
894      apiMessages = truncated
895      retryCacheSafeParams = {
896        ...retryCacheSafeParams,
897        forkContextMessages: truncated,
898      }
899    }
900    if (!summary) {
901      logEvent('tengu_partial_compact_failed', {
902        reason:
903          'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
904        ...failureMetadata,
905      })
906      throw new Error(
907        'Failed to generate conversation summary - response did not contain valid text content',
908      )
909    } else if (startsWithApiErrorPrefix(summary)) {
910      logEvent('tengu_partial_compact_failed', {
911        reason:
912          'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
913        ...failureMetadata,
914      })
915      throw new Error(summary)
916    }
917
918    // Store the current file state before clearing
919    const preCompactReadFileState = cacheToObject(context.readFileState)
920    context.readFileState.clear()
921    context.loadedNestedMemoryPaths?.clear()
922    // Intentionally NOT resetting sentSkillNames — see compactConversation()
923    // for rationale (~4K tokens saved per compact event).
924
925    const [fileAttachments, asyncAgentAttachments] = await Promise.all([
926      createPostCompactFileAttachments(
927        preCompactReadFileState,
928        context,
929        POST_COMPACT_MAX_FILES_TO_RESTORE,
930        messagesToKeep,
931      ),
932      createAsyncAgentAttachmentsIfNeeded(context),
933    ])
934
935    const postCompactFileAttachments: AttachmentMessage[] = [
936      ...fileAttachments,
937      ...asyncAgentAttachments,
938    ]
939    const planAttachment = createPlanAttachmentIfNeeded(context.agentId)
940    if (planAttachment) {
941      postCompactFileAttachments.push(planAttachment)
942    }
943
944    // Add plan mode instructions if currently in plan mode
945    const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)
946    if (planModeAttachment) {
947      postCompactFileAttachments.push(planModeAttachment)
948    }
949
950    const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)
951    if (skillAttachment) {
952      postCompactFileAttachments.push(skillAttachment)
953    }
954
955    // Re-announce only what was in the summarized portion — messagesToKeep
956    // is scanned, so anything already announced there is skipped.
957    for (const att of getDeferredToolsDeltaAttachment(
958      context.options.tools,
959      context.options.mainLoopModel,
960      messagesToKeep,
961      { callSite: 'compact_partial' },
962    )) {
963      postCompactFileAttachments.push(createAttachmentMessage(att))
964    }
965    for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) {
966      postCompactFileAttachments.push(createAttachmentMessage(att))
967    }
968    for (const att of getMcpInstructionsDeltaAttachment(
969      context.options.mcpClients,
970      context.options.tools,
971      context.options.mainLoopModel,
972      messagesToKeep,
973    )) {
974      postCompactFileAttachments.push(createAttachmentMessage(att))
975    }
976
977    context.onCompactProgress?.({
978      type: 'hooks_start',
979      hookType: 'session_start',
980    })
981    const hookMessages = await processSessionStartHooks('compact', {
982      model: context.options.mainLoopModel,
983    })
984
985    const postCompactTokenCount = tokenCountFromLastAPIResponse([
986      summaryResponse,
987    ])
988    const compactionUsage = getTokenUsage(summaryResponse)
989
990    logEvent('tengu_partial_compact', {
991      preCompactTokenCount,
992      postCompactTokenCount,
993      messagesKept: messagesToKeep.length,
994      messagesSummarized: messagesToSummarize.length,
995      direction:
996        direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
997      hasUserFeedback: !!userFeedback,
998      trigger:
999        'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1000      compactionInputTokens: compactionUsage?.input_tokens,
1001      compactionOutputTokens: compactionUsage?.output_tokens,
1002      compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,
1003      compactionCacheCreationTokens:
1004        compactionUsage?.cache_creation_input_tokens ?? 0,
1005    })
1006
1007    // Progress messages aren't loggable, so forkSessionImpl would null out
1008    // a logicalParentUuid pointing at one. Both directions skip them.
1009    const lastPreCompactUuid =
1010      direction === 'up_to'
1011        ? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress')
1012            ?.uuid
1013        : messagesToKeep.at(-1)?.uuid
1014    const boundaryMarker = createCompactBoundaryMessage(
1015      'manual',
1016      preCompactTokenCount ?? 0,
1017      lastPreCompactUuid,
1018      userFeedback,
1019      messagesToSummarize.length,
1020    )
1021    // allMessages not just messagesToSummarize — set union is idempotent,
1022    // simpler than tracking which half each tool lived in.
1023    const preCompactDiscovered = extractDiscoveredToolNames(allMessages)
1024    if (preCompactDiscovered.size > 0) {
1025      boundaryMarker.compactMetadata.preCompactDiscoveredTools = [
1026        ...preCompactDiscovered,
1027      ].sort()
1028    }
1029
1030    const transcriptPath = getTranscriptPath()
1031    const summaryMessages: UserMessage[] = [
1032      createUserMessage({
1033        content: getCompactUserSummaryMessage(summary, false, transcriptPath),
1034        isCompactSummary: true,
1035        ...(messagesToKeep.length > 0
1036          ? {
1037              summarizeMetadata: {
1038                messagesSummarized: messagesToSummarize.length,
1039                userContext: userFeedback,
1040                direction,
1041              },
1042            }
1043          : { isVisibleInTranscriptOnly: true as const }),
1044      }),
1045    ]
1046
1047    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
1048      notifyCompaction(
1049        context.options.querySource ?? 'compact',
1050        context.agentId,
1051      )
1052    }
1053    markPostCompaction()
1054
1055    // Re-append session metadata (custom title, tag) so it stays within
1056    // the 16KB tail window that readLiteMetadata reads for --resume display.
1057    reAppendSessionMetadata()
1058
1059    if (feature('KAIROS')) {
1060      void sessionTranscriptModule?.writeSessionTranscriptSegment(
1061        messagesToSummarize,
1062      )
1063    }
1064
1065    context.onCompactProgress?.({
1066      type: 'hooks_start',
1067      hookType: 'post_compact',
1068    })
1069    const postCompactHookResult = await executePostCompactHooks(
1070      {
1071        trigger: 'manual',
1072        compactSummary: summary,
1073      },
1074      context.abortController.signal,
1075    )
1076
1077    // 'from': prefix-preserving → boundary; 'up_to': suffix → last summary
1078    const anchorUuid =
1079      direction === 'up_to'
1080        ? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid)
1081        : boundaryMarker.uuid
1082    return {
1083      boundaryMarker: annotateBoundaryWithPreservedSegment(
1084        boundaryMarker,
1085        anchorUuid,
1086        messagesToKeep,
1087      ),
1088      summaryMessages,
1089      messagesToKeep,
1090      attachments: postCompactFileAttachments,
1091      hookResults: hookMessages,
1092      userDisplayMessage: postCompactHookResult.userDisplayMessage,
1093      preCompactTokenCount,
1094      postCompactTokenCount,
1095      compactionUsage,
1096    }
1097  } catch (error) {
1098    addErrorNotificationIfNeeded(error, context)
1099    throw error
1100  } finally {
1101    context.setStreamMode?.('requesting')
1102    context.setResponseLength?.(() => 0)
1103    context.onCompactProgress?.({ type: 'compact_end' })
1104    context.setSDKStatus?.(null)
1105  }
1106}
1107
1108function addErrorNotificationIfNeeded(
1109  error: unknown,
1110  context: Pick<ToolUseContext, 'addNotification'>,
1111) {
1112  if (
1113    !hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) &&
1114    !hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
1115  ) {
1116    context.addNotification?.({
1117      key: 'error-compacting-conversation',
1118      text: 'Error compacting conversation',
1119      priority: 'immediate',
1120      color: 'error',
1121    })
1122  }
1123}
1124
1125export function createCompactCanUseTool(): CanUseToolFn {
1126  return async () => ({
1127    behavior: 'deny' as const,
1128    message: 'Tool use is not allowed during compaction',
1129    decisionReason: {
1130      type: 'other' as const,
1131      reason: 'compaction agent should only produce text summary',
1132    },
1133  })
1134}
1135
1136async function streamCompactSummary({
1137  messages,
1138  summaryRequest,
1139  appState,
1140  context,
1141  preCompactTokenCount,
1142  cacheSafeParams,
1143}: {
1144  messages: Message[]
1145  summaryRequest: UserMessage
1146  appState: Awaited<ReturnType<ToolUseContext['getAppState']>>
1147  context: ToolUseContext
1148  preCompactTokenCount: number
1149  cacheSafeParams: CacheSafeParams
1150}): Promise<AssistantMessage> {
1151  // When prompt cache sharing is enabled, use forked agent to reuse the
1152  // main conversation's cached prefix (system prompt, tools, context messages).
1153  // Falls back to regular streaming path on failure.
1154  // 3P default: true — see comment at the other tengu_compact_cache_prefix read above.
1155  const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
1156    'tengu_compact_cache_prefix',
1157    true,
1158  )
1159  // Send keep-alive signals during compaction to prevent remote session
1160  // WebSocket idle timeouts from dropping bridge connections. Compaction
1161  // API calls can take 5-10+ seconds, during which no other messages
1162  // flow through the transport — without keep-alives, the server may
1163  // close the WebSocket for inactivity.
1164  // Two signals: (1) PUT /worker heartbeat via sessionActivity, and
1165  // (2) re-emit 'compacting' status so the SDK event stream stays active
1166  // and the server doesn't consider the session stale.
1167  const activityInterval = isSessionActivityTrackingActive()
1168    ? setInterval(
1169        (statusSetter?: (status: 'compacting' | null) => void) => {
1170          sendSessionActivitySignal()
1171          statusSetter?.('compacting')
1172        },
1173        30_000,
1174        context.setSDKStatus,
1175      )
1176    : undefined
1177
1178  try {
1179    if (promptCacheSharingEnabled) {
1180      try {
1181        // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's
1182        // prompt cache by sending identical cache-key params (system, tools, model,
1183        // messages prefix, thinking config). Setting maxOutputTokens would clamp
1184        // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts,
1185        // creating a thinking config mismatch that invalidates the cache.
1186        // The streaming fallback path (below) can safely set maxOutputTokensOverride
1187        // since it doesn't share cache with the main thread.
1188        const result = await runForkedAgent({
1189          promptMessages: [summaryRequest],
1190          cacheSafeParams,
1191          canUseTool: createCompactCanUseTool(),
1192          querySource: 'compact',
1193          forkLabel: 'compact',
1194          maxTurns: 1,
1195          skipCacheWrite: true,
1196          // Pass the compact context's abortController so user Esc aborts the
1197          // fork — same signal the streaming fallback uses at
1198          // `signal: context.abortController.signal` below.
1199          overrides: { abortController: context.abortController },
1200        })
1201        const assistantMsg = getLastAssistantMessage(result.messages)
1202        const assistantText = assistantMsg
1203          ? getAssistantMessageText(assistantMsg)
1204          : null
1205        // Guard isApiErrorMessage: query() catches API errors (including
1206        // APIUserAbortError on ESC) and yields them as synthetic assistant
1207        // messages. Without this check, an aborted compact "succeeds" with
1208        // "Request was aborted." as the summary — the text doesn't start with
1209        // "API Error" so the caller's startsWithApiErrorPrefix guard misses it.
1210        if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) {
1211          // Skip success logging for PTL error text — it's returned so the
1212          // caller's retry loop catches it, but it's not a successful summary.
1213          if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) {
1214            logEvent('tengu_compact_cache_sharing_success', {
1215              preCompactTokenCount,
1216              outputTokens: result.totalUsage.output_tokens,
1217              cacheReadInputTokens: result.totalUsage.cache_read_input_tokens,
1218              cacheCreationInputTokens:
1219                result.totalUsage.cache_creation_input_tokens,
1220              cacheHitRate:
1221                result.totalUsage.cache_read_input_tokens > 0
1222                  ? result.totalUsage.cache_read_input_tokens /
1223                    (result.totalUsage.cache_read_input_tokens +
1224                      result.totalUsage.cache_creation_input_tokens +
1225                      result.totalUsage.input_tokens)
1226                  : 0,
1227            })
1228          }
1229          return assistantMsg
1230        }
1231        logForDebugging(
1232          `Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`,
1233          { level: 'warn' },
1234        )
1235        logEvent('tengu_compact_cache_sharing_fallback', {
1236          reason:
1237            'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1238          preCompactTokenCount,
1239        })
1240      } catch (error) {
1241        logError(error)
1242        logEvent('tengu_compact_cache_sharing_fallback', {
1243          reason:
1244            'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1245          preCompactTokenCount,
1246        })
1247      }
1248    }
1249
1250    // Regular streaming path (fallback when cache sharing fails or is disabled)
1251    const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE(
1252      'tengu_compact_streaming_retry',
1253      false,
1254    )
1255    const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 1
1256
1257    for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1258      // Reset state for retry
1259      let hasStartedStreaming = false
1260      let response: AssistantMessage | undefined
1261      context.setResponseLength?.(() => 0)
1262
1263      // Check if tool search is enabled using the main loop's tools list.
1264      // context.options.tools includes MCP tools merged via useMergedTools.
1265      const useToolSearch = await isToolSearchEnabled(
1266        context.options.mainLoopModel,
1267        context.options.tools,
1268        async () => appState.toolPermissionContext,
1269        context.options.agentDefinitions.activeAgents,
1270        'compact',
1271      )
1272
1273      // When tool search is enabled, include ToolSearchTool and MCP tools. They get
1274      // defer_loading: true and don't count against context - the API filters them out
1275      // of system_prompt_tools before token counting (see api/token_count_api/counting.py:188
1276      // and api/public_api/messages/handler.py:324).
1277      // Filter MCP tools from context.options.tools (not appState.mcp.tools) so we
1278      // get the permission-filtered set from useMergedTools — same source used for
1279      // isToolSearchEnabled above and normalizeMessagesForAPI below.
1280      // Deduplicate by name to avoid API errors when MCP tools share names with built-in tools.
1281      const tools: Tool[] = useToolSearch
1282        ? uniqBy(
1283            [
1284              FileReadTool,
1285              ToolSearchTool,
1286              ...context.options.tools.filter(t => t.isMcp),
1287            ],
1288            'name',
1289          )
1290        : [FileReadTool]
1291
1292      const streamingGen = queryModelWithStreaming({
1293        messages: normalizeMessagesForAPI(
1294          stripImagesFromMessages(
1295            stripReinjectedAttachments([
1296              ...getMessagesAfterCompactBoundary(messages),
1297              summaryRequest,
1298            ]),
1299          ),
1300          context.options.tools,
1301        ),
1302        systemPrompt: asSystemPrompt([
1303          'You are a helpful AI assistant tasked with summarizing conversations.',
1304        ]),
1305        thinkingConfig: { type: 'disabled' as const },
1306        tools,
1307        signal: context.abortController.signal,
1308        options: {
1309          async getToolPermissionContext() {
1310            const appState = context.getAppState()
1311            return appState.toolPermissionContext
1312          },
1313          model: context.options.mainLoopModel,
1314          toolChoice: undefined,
1315          isNonInteractiveSession: context.options.isNonInteractiveSession,
1316          hasAppendSystemPrompt: !!context.options.appendSystemPrompt,
1317          maxOutputTokensOverride: Math.min(
1318            COMPACT_MAX_OUTPUT_TOKENS,
1319            getMaxOutputTokensForModel(context.options.mainLoopModel),
1320          ),
1321          querySource: 'compact',
1322          agents: context.options.agentDefinitions.activeAgents,
1323          mcpTools: [],
1324          effortValue: appState.effortValue,
1325        },
1326      })
1327      const streamIter = streamingGen[Symbol.asyncIterator]()
1328      let next = await streamIter.next()
1329
1330      while (!next.done) {
1331        const event = next.value
1332
1333        if (
1334          !hasStartedStreaming &&
1335          event.type === 'stream_event' &&
1336          event.event.type === 'content_block_start' &&
1337          event.event.content_block.type === 'text'
1338        ) {
1339          hasStartedStreaming = true
1340          context.setStreamMode?.('responding')
1341        }
1342
1343        if (
1344          event.type === 'stream_event' &&
1345          event.event.type === 'content_block_delta' &&
1346          event.event.delta.type === 'text_delta'
1347        ) {
1348          const charactersStreamed = event.event.delta.text.length
1349          context.setResponseLength?.(length => length + charactersStreamed)
1350        }
1351
1352        if (event.type === 'assistant') {
1353          response = event
1354        }
1355
1356        next = await streamIter.next()
1357      }
1358
1359      if (response) {
1360        return response
1361      }
1362
1363      if (attempt < maxAttempts) {
1364        logEvent('tengu_compact_streaming_retry', {
1365          attempt,
1366          preCompactTokenCount,
1367          hasStartedStreaming,
1368        })
1369        await sleep(getRetryDelay(attempt), context.abortController.signal, {
1370          abortError: () => new APIUserAbortError(),
1371        })
1372        continue
1373      }
1374
1375      logForDebugging(
1376        `Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`,
1377        { level: 'error' },
1378      )
1379      logEvent('tengu_compact_failed', {
1380        reason:
1381          'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1382        preCompactTokenCount,
1383        hasStartedStreaming,
1384        retryEnabled,
1385        attempts: attempt,
1386        promptCacheSharingEnabled,
1387      })
1388      throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
1389    }
1390
1391    // This should never be reached due to the throw above, but TypeScript needs it
1392    throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
1393  } finally {
1394    clearInterval(activityInterval)
1395  }
1396}
1397
1398/**
1399 * Creates attachment messages for recently accessed files to restore them after compaction.
1400 * This prevents the model from having to re-read files that were recently accessed.
1401 * Re-reads files using FileReadTool to get fresh content with proper validation.
1402 * Files are selected based on recency, but constrained by both file count and token budget limits.
1403 *
1404 * Files already present as Read tool results in preservedMessages are skipped —
1405 * re-injecting identical content the model can already see in the preserved tail
1406 * is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved
1407 * pattern that getDeferredToolsDeltaAttachment uses at the same call sites.
1408 *
1409 * @param readFileState The current file state tracking recently read files
1410 * @param toolUseContext The tool use context for calling FileReadTool
1411 * @param maxFiles Maximum number of files to restore (default: 5)
1412 * @param preservedMessages Messages kept post-compact; Read results here are skipped
1413 * @returns Array of attachment messages for the most recently accessed files that fit within token budget
1414 */
1415export async function createPostCompactFileAttachments(
1416  readFileState: Record<string, { content: string; timestamp: number }>,
1417  toolUseContext: ToolUseContext,
1418  maxFiles: number,
1419  preservedMessages: Message[] = [],
1420): Promise<AttachmentMessage[]> {
1421  const preservedReadPaths = collectReadToolFilePaths(preservedMessages)
1422  const recentFiles = Object.entries(readFileState)
1423    .map(([filename, state]) => ({ filename, ...state }))
1424    .filter(
1425      file =>
1426        !shouldExcludeFromPostCompactRestore(
1427          file.filename,
1428          toolUseContext.agentId,
1429        ) && !preservedReadPaths.has(expandPath(file.filename)),
1430    )
1431    .sort((a, b) => b.timestamp - a.timestamp)
1432    .slice(0, maxFiles)
1433
1434  const results = await Promise.all(
1435    recentFiles.map(async file => {
1436      const attachment = await generateFileAttachment(
1437        file.filename,
1438        {
1439          ...toolUseContext,
1440          fileReadingLimits: {
1441            maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE,
1442          },
1443        },
1444        'tengu_post_compact_file_restore_success',
1445        'tengu_post_compact_file_restore_error',
1446        'compact',
1447      )
1448      return attachment ? createAttachmentMessage(attachment) : null
1449    }),
1450  )
1451
1452  let usedTokens = 0
1453  return results.filter((result): result is AttachmentMessage => {
1454    if (result === null) {
1455      return false
1456    }
1457    const attachmentTokens = roughTokenCountEstimation(jsonStringify(result))
1458    if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) {
1459      usedTokens += attachmentTokens
1460      return true
1461    }
1462    return false
1463  })
1464}
1465
1466/**
1467 * Creates a plan file attachment if a plan file exists for the current session.
1468 * This ensures the plan is preserved after compaction.
1469 */
1470export function createPlanAttachmentIfNeeded(
1471  agentId?: AgentId,
1472): AttachmentMessage | null {
1473  const planContent = getPlan(agentId)
1474
1475  if (!planContent) {
1476    return null
1477  }
1478
1479  const planFilePath = getPlanFilePath(agentId)
1480
1481  return createAttachmentMessage({
1482    type: 'plan_file_reference',
1483    planFilePath,
1484    planContent,
1485  })
1486}
1487
1488/**
1489 * Creates an attachment for invoked skills to preserve their content across compaction.
1490 * Only includes skills scoped to the given agent (or main session when agentId is null/undefined).
1491 * This ensures skill guidelines remain available after the conversation is summarized
1492 * without leaking skills from other agent contexts.
1493 */
1494export function createSkillAttachmentIfNeeded(
1495  agentId?: string,
1496): AttachmentMessage | null {
1497  const invokedSkills = getInvokedSkillsForAgent(agentId)
1498
1499  if (invokedSkills.size === 0) {
1500    return null
1501  }
1502
1503  // Sorted most-recent-first so budget pressure drops the least-relevant skills.
1504  // Per-skill truncation keeps the head of each file (where setup/usage
1505  // instructions typically live) rather than dropping whole skills.
1506  let usedTokens = 0
1507  const skills = Array.from(invokedSkills.values())
1508    .sort((a, b) => b.invokedAt - a.invokedAt)
1509    .map(skill => ({
1510      name: skill.skillName,
1511      path: skill.skillPath,
1512      content: truncateToTokens(
1513        skill.content,
1514        POST_COMPACT_MAX_TOKENS_PER_SKILL,
1515      ),
1516    }))
1517    .filter(skill => {
1518      const tokens = roughTokenCountEstimation(skill.content)
1519      if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) {
1520        return false
1521      }
1522      usedTokens += tokens
1523      return true
1524    })
1525
1526  if (skills.length === 0) {
1527    return null
1528  }
1529
1530  return createAttachmentMessage({
1531    type: 'invoked_skills',
1532    skills,
1533  })
1534}
1535
1536/**
1537 * Creates a plan_mode attachment if the user is currently in plan mode.
1538 * This ensures the model continues to operate in plan mode after compaction
1539 * (otherwise it would lose the plan mode instructions since those are
1540 * normally only injected on tool-use turns via getAttachmentMessages).
1541 */
1542export async function createPlanModeAttachmentIfNeeded(
1543  context: ToolUseContext,
1544): Promise<AttachmentMessage | null> {
1545  const appState = context.getAppState()
1546  if (appState.toolPermissionContext.mode !== 'plan') {
1547    return null
1548  }
1549
1550  const planFilePath = getPlanFilePath(context.agentId)
1551  const planExists = getPlan(context.agentId) !== null
1552
1553  return createAttachmentMessage({
1554    type: 'plan_mode',
1555    reminderType: 'full',
1556    isSubAgent: !!context.agentId,
1557    planFilePath,
1558    planExists,
1559  })
1560}
1561
1562/**
1563 * Creates attachments for async agents so the model knows about them after
1564 * compaction. Covers both agents still running in the background (so the model
1565 * doesn't spawn a duplicate) and agents that have finished but whose results
1566 * haven't been retrieved yet.
1567 */
1568export async function createAsyncAgentAttachmentsIfNeeded(
1569  context: ToolUseContext,
1570): Promise<AttachmentMessage[]> {
1571  const appState = context.getAppState()
1572  const asyncAgents = Object.values(appState.tasks).filter(
1573    (task): task is LocalAgentTaskState => task.type === 'local_agent',
1574  )
1575
1576  return asyncAgents.flatMap(agent => {
1577    if (
1578      agent.retrieved ||
1579      agent.status === 'pending' ||
1580      agent.agentId === context.agentId
1581    ) {
1582      return []
1583    }
1584    return [
1585      createAttachmentMessage({
1586        type: 'task_status',
1587        taskId: agent.agentId,
1588        taskType: 'local_agent',
1589        description: agent.description,
1590        status: agent.status,
1591        deltaSummary:
1592          agent.status === 'running'
1593            ? (agent.progress?.summary ?? null)
1594            : (agent.error ?? null),
1595        outputFilePath: getTaskOutputPath(agent.agentId),
1596      }),
1597    ]
1598  })
1599}
1600
1601/**
1602 * Scan messages for Read tool_use blocks and collect their file_path inputs
1603 * (normalized via expandPath). Used to dedup post-compact file restoration
1604 * against what's already visible in the preserved tail.
1605 *
1606 * Skips Reads whose tool_result is a dedup stub — the stub points at an
1607 * earlier full Read that may have been compacted away, so we want
1608 * createPostCompactFileAttachments to re-inject the real content.
1609 */
1610function collectReadToolFilePaths(messages: Message[]): Set<string> {
1611  const stubIds = new Set<string>()
1612  for (const message of messages) {
1613    if (message.type !== 'user' || !Array.isArray(message.message.content)) {
1614      continue
1615    }
1616    for (const block of message.message.content) {
1617      if (
1618        block.type === 'tool_result' &&
1619        typeof block.content === 'string' &&
1620        block.content.startsWith(FILE_UNCHANGED_STUB)
1621      ) {
1622        stubIds.add(block.tool_use_id)
1623      }
1624    }
1625  }
1626
1627  const paths = new Set<string>()
1628  for (const message of messages) {
1629    if (
1630      message.type !== 'assistant' ||
1631      !Array.isArray(message.message.content)
1632    ) {
1633      continue
1634    }
1635    for (const block of message.message.content) {
1636      if (
1637        block.type !== 'tool_use' ||
1638        block.name !== FILE_READ_TOOL_NAME ||
1639        stubIds.has(block.id)
1640      ) {
1641        continue
1642      }
1643      const input = block.input
1644      if (
1645        input &&
1646        typeof input === 'object' &&
1647        'file_path' in input &&
1648        typeof input.file_path === 'string'
1649      ) {
1650        paths.add(expandPath(input.file_path))
1651      }
1652    }
1653  }
1654  return paths
1655}
1656
1657const SKILL_TRUNCATION_MARKER =
1658  '\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]'
1659
1660/**
1661 * Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation
1662 * uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 4
1663 * minus the marker so the result stays within budget. Marker tells the model it
1664 * can Read the full file if needed.
1665 */
1666function truncateToTokens(content: string, maxTokens: number): string {
1667  if (roughTokenCountEstimation(content) <= maxTokens) {
1668    return content
1669  }
1670  const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length
1671  return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER
1672}
1673
1674function shouldExcludeFromPostCompactRestore(
1675  filename: string,
1676  agentId?: AgentId,
1677): boolean {
1678  const normalizedFilename = expandPath(filename)
1679  // Exclude plan files
1680  try {
1681    const planFilePath = expandPath(getPlanFilePath(agentId))
1682    if (normalizedFilename === planFilePath) {
1683      return true
1684    }
1685  } catch {
1686    // If we can't get plan file path, continue with other checks
1687  }
1688
1689  // Exclude all types of claude.md files
1690  // TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency
1691  // and to also match child directory memory files (.claude/rules/*.md, etc.)
1692  try {
1693    const normalizedMemoryPaths = new Set(
1694      MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))),
1695    )
1696
1697    if (normalizedMemoryPaths.has(normalizedFilename)) {
1698      return true
1699    }
1700  } catch {
1701    // If we can't get memory paths, continue
1702  }
1703
1704  return false
1705}
1706