services/compact/compact.ts
59 KB1706 lines
src/services/compact/compact.ts
1import { feature } from 'bun:bundle'2import type { UUID } from 'crypto'3import uniqBy from 'lodash-es/uniqBy.js'45/* eslint-disable @typescript-eslint/no-require-imports */6const sessionTranscriptModule = feature('KAIROS')7 ? (require('../sessionTranscript/sessionTranscript.js') as typeof import('../sessionTranscript/sessionTranscript.js'))8 : null910import { APIUserAbortError } from '@anthropic-ai/sdk'11import { markPostCompaction } from 'src/bootstrap/state.js'12import { getInvokedSkillsForAgent } from '../../bootstrap/state.js'13import type { QuerySource } from '../../constants/querySource.js'14import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'15import type { Tool, ToolUseContext } from '../../Tool.js'16import type { LocalAgentTaskState } from '../../tasks/LocalAgentTask/LocalAgentTask.js'17import { FileReadTool } from '../../tools/FileReadTool/FileReadTool.js'18import {19 FILE_READ_TOOL_NAME,20 FILE_UNCHANGED_STUB,21} from '../../tools/FileReadTool/prompt.js'22import { ToolSearchTool } from '../../tools/ToolSearchTool/ToolSearchTool.js'23import type { AgentId } from '../../types/ids.js'24import type {25 AssistantMessage,26 AttachmentMessage,27 HookResultMessage,28 Message,29 PartialCompactDirection,30 SystemCompactBoundaryMessage,31 SystemMessage,32 UserMessage,33} from '../../types/message.js'34import {35 createAttachmentMessage,36 generateFileAttachment,37 getAgentListingDeltaAttachment,38 getDeferredToolsDeltaAttachment,39 getMcpInstructionsDeltaAttachment,40} from '../../utils/attachments.js'41import { getMemoryPath } from '../../utils/config.js'42import { COMPACT_MAX_OUTPUT_TOKENS } from '../../utils/context.js'43import {44 analyzeContext,45 tokenStatsToStatsigMetrics,46} from '../../utils/contextAnalysis.js'47import { logForDebugging } from '../../utils/debug.js'48import { hasExactErrorMessage } from '../../utils/errors.js'49import { cacheToObject } from '../../utils/fileStateCache.js'50import {51 type CacheSafeParams,52 runForkedAgent,53} from '../../utils/forkedAgent.js'54import {55 executePostCompactHooks,56 executePreCompactHooks,57} from '../../utils/hooks.js'58import { logError } from '../../utils/log.js'59import { MEMORY_TYPE_VALUES } from '../../utils/memory/types.js'60import {61 createCompactBoundaryMessage,62 createUserMessage,63 getAssistantMessageText,64 getLastAssistantMessage,65 getMessagesAfterCompactBoundary,66 isCompactBoundaryMessage,67 normalizeMessagesForAPI,68} from '../../utils/messages.js'69import { expandPath } from '../../utils/path.js'70import { getPlan, getPlanFilePath } from '../../utils/plans.js'71import {72 isSessionActivityTrackingActive,73 sendSessionActivitySignal,74} from '../../utils/sessionActivity.js'75import { processSessionStartHooks } from '../../utils/sessionStart.js'76import {77 getTranscriptPath,78 reAppendSessionMetadata,79} from '../../utils/sessionStorage.js'80import { sleep } from '../../utils/sleep.js'81import { jsonStringify } from '../../utils/slowOperations.js'82/* eslint-enable @typescript-eslint/no-require-imports */83import { asSystemPrompt } from '../../utils/systemPromptType.js'84import { getTaskOutputPath } from '../../utils/task/diskOutput.js'85import {86 getTokenUsage,87 tokenCountFromLastAPIResponse,88 tokenCountWithEstimation,89} from '../../utils/tokens.js'90import {91 extractDiscoveredToolNames,92 isToolSearchEnabled,93} from '../../utils/toolSearch.js'94import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'95import {96 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,97 logEvent,98} from '../analytics/index.js'99import {100 getMaxOutputTokensForModel,101 queryModelWithStreaming,102} from '../api/claude.js'103import {104 getPromptTooLongTokenGap,105 PROMPT_TOO_LONG_ERROR_MESSAGE,106 startsWithApiErrorPrefix,107} from '../api/errors.js'108import { notifyCompaction } from '../api/promptCacheBreakDetection.js'109import { getRetryDelay } from '../api/withRetry.js'110import { logPermissionContextForAnts } from '../internalLogging.js'111import {112 roughTokenCountEstimation,113 roughTokenCountEstimationForMessages,114} from '../tokenEstimation.js'115import { groupMessagesByApiRound } from './grouping.js'116import {117 getCompactPrompt,118 getCompactUserSummaryMessage,119 getPartialCompactPrompt,120} from './prompt.js'121122export const POST_COMPACT_MAX_FILES_TO_RESTORE = 5123export const POST_COMPACT_TOKEN_BUDGET = 50_000124export const POST_COMPACT_MAX_TOKENS_PER_FILE = 5_000125// Skills can be large (verify=18.7KB, claude-api=20.1KB). Previously re-injected126// unbounded on every compact → 5-10K tok/compact. Per-skill truncation beats127// dropping — instructions at the top of a skill file are usually the critical128// part. Budget sized to hold ~5 skills at the per-skill cap.129export const POST_COMPACT_MAX_TOKENS_PER_SKILL = 5_000130export const POST_COMPACT_SKILLS_TOKEN_BUDGET = 25_000131const MAX_COMPACT_STREAMING_RETRIES = 2132133/**134 * Strip image blocks from user messages before sending for compaction.135 * Images are not needed for generating a conversation summary and can136 * cause the compaction API call itself to hit the prompt-too-long limit,137 * especially in CCD sessions where users frequently attach images.138 * Replaces image blocks with a text marker so the summary still notes139 * that an image was shared.140 *141 * Note: Only user messages contain images (either directly attached or within142 * tool_result content from tools). Assistant messages contain text, tool_use,143 * and thinking blocks but not images.144 */145export function stripImagesFromMessages(messages: Message[]): Message[] {146 return messages.map(message => {147 if (message.type !== 'user') {148 return message149 }150151 const content = message.message.content152 if (!Array.isArray(content)) {153 return message154 }155156 let hasMediaBlock = false157 const newContent = content.flatMap(block => {158 if (block.type === 'image') {159 hasMediaBlock = true160 return [{ type: 'text' as const, text: '[image]' }]161 }162 if (block.type === 'document') {163 hasMediaBlock = true164 return [{ type: 'text' as const, text: '[document]' }]165 }166 // Also strip images/documents nested inside tool_result content arrays167 if (block.type === 'tool_result' && Array.isArray(block.content)) {168 let toolHasMedia = false169 const newToolContent = block.content.map(item => {170 if (item.type === 'image') {171 toolHasMedia = true172 return { type: 'text' as const, text: '[image]' }173 }174 if (item.type === 'document') {175 toolHasMedia = true176 return { type: 'text' as const, text: '[document]' }177 }178 return item179 })180 if (toolHasMedia) {181 hasMediaBlock = true182 return [{ ...block, content: newToolContent }]183 }184 }185 return [block]186 })187188 if (!hasMediaBlock) {189 return message190 }191192 return {193 ...message,194 message: {195 ...message.message,196 content: newContent,197 },198 } as typeof message199 })200}201202/**203 * Strip attachment types that are re-injected post-compaction anyway.204 * skill_discovery/skill_listing are re-surfaced by resetSentSkillNames()205 * + the next turn's discovery signal, so feeding them to the summarizer206 * wastes tokens and pollutes the summary with stale skill suggestions.207 *208 * No-op when EXPERIMENTAL_SKILL_SEARCH is off (the attachment types209 * don't exist on external builds).210 */211export function stripReinjectedAttachments(messages: Message[]): Message[] {212 if (feature('EXPERIMENTAL_SKILL_SEARCH')) {213 return messages.filter(214 m =>215 !(216 m.type === 'attachment' &&217 (m.attachment.type === 'skill_discovery' ||218 m.attachment.type === 'skill_listing')219 ),220 )221 }222 return messages223}224225export const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES =226 'Not enough messages to compact.'227const MAX_PTL_RETRIES = 3228const PTL_RETRY_MARKER = '[earlier conversation truncated for compaction retry]'229230/**231 * Drops the oldest API-round groups from messages until tokenGap is covered.232 * Falls back to dropping 20% of groups when the gap is unparseable (some233 * Vertex/Bedrock error formats). Returns null when nothing can be dropped234 * without leaving an empty summarize set.235 *236 * This is the last-resort escape hatch for CC-1180 — when the compact request237 * itself hits prompt-too-long, the user is otherwise stuck. Dropping the238 * oldest context is lossy but unblocks them. The reactive-compact path239 * (compactMessages.ts) has the proper retry loop that peels from the tail;240 * this helper is the dumb-but-safe fallback for the proactive/manual path241 * that wasn't migrated in bfdb472f's unification.242 */243export function truncateHeadForPTLRetry(244 messages: Message[],245 ptlResponse: AssistantMessage,246): Message[] | null {247 // Strip our own synthetic marker from a previous retry before grouping.248 // Otherwise it becomes its own group 0 and the 20% fallback stalls249 // (drops only the marker, re-adds it, zero progress on retry 2+).250 const input =251 messages[0]?.type === 'user' &&252 messages[0].isMeta &&253 messages[0].message.content === PTL_RETRY_MARKER254 ? messages.slice(1)255 : messages256257 const groups = groupMessagesByApiRound(input)258 if (groups.length < 2) return null259260 const tokenGap = getPromptTooLongTokenGap(ptlResponse)261 let dropCount: number262 if (tokenGap !== undefined) {263 let acc = 0264 dropCount = 0265 for (const g of groups) {266 acc += roughTokenCountEstimationForMessages(g)267 dropCount++268 if (acc >= tokenGap) break269 }270 } else {271 dropCount = Math.max(1, Math.floor(groups.length * 0.2))272 }273274 // Keep at least one group so there's something to summarize.275 dropCount = Math.min(dropCount, groups.length - 1)276 if (dropCount < 1) return null277278 const sliced = groups.slice(dropCount).flat()279 // groupMessagesByApiRound puts the preamble in group 0 and starts every280 // subsequent group with an assistant message. Dropping group 0 leaves an281 // assistant-first sequence which the API rejects (first message must be282 // role=user). Prepend a synthetic user marker — ensureToolResultPairing283 // already handles any orphaned tool_results this creates.284 if (sliced[0]?.type === 'assistant') {285 return [286 createUserMessage({ content: PTL_RETRY_MARKER, isMeta: true }),287 ...sliced,288 ]289 }290 return sliced291}292293export const ERROR_MESSAGE_PROMPT_TOO_LONG =294 'Conversation too long. Press esc twice to go up a few messages and try again.'295export const ERROR_MESSAGE_USER_ABORT = 'API Error: Request was aborted.'296export const ERROR_MESSAGE_INCOMPLETE_RESPONSE =297 'Compaction interrupted · This may be due to network issues — please try again.'298299export interface CompactionResult {300 boundaryMarker: SystemMessage301 summaryMessages: UserMessage[]302 attachments: AttachmentMessage[]303 hookResults: HookResultMessage[]304 messagesToKeep?: Message[]305 userDisplayMessage?: string306 preCompactTokenCount?: number307 postCompactTokenCount?: number308 truePostCompactTokenCount?: number309 compactionUsage?: ReturnType<typeof getTokenUsage>310}311312/**313 * Diagnosis context passed from autoCompactIfNeeded into compactConversation.314 * Lets the tengu_compact event disambiguate same-chain loops (H2) from315 * cross-agent (H1/H5) and manual-vs-auto (H3) compactions without joins.316 */317export type RecompactionInfo = {318 isRecompactionInChain: boolean319 turnsSincePreviousCompact: number320 previousCompactTurnId?: string321 autoCompactThreshold: number322 querySource?: QuerySource323}324325/**326 * Build the base post-compact messages array from a CompactionResult.327 * This ensures consistent ordering across all compaction paths.328 * Order: boundaryMarker, summaryMessages, messagesToKeep, attachments, hookResults329 */330export function buildPostCompactMessages(result: CompactionResult): Message[] {331 return [332 result.boundaryMarker,333 ...result.summaryMessages,334 ...(result.messagesToKeep ?? []),335 ...result.attachments,336 ...result.hookResults,337 ]338}339340/**341 * Annotate a compact boundary with relink metadata for messagesToKeep.342 * Preserved messages keep their original parentUuids on disk (dedup-skipped);343 * the loader uses this to patch head→anchor and anchor's-other-children→tail.344 *345 * `anchorUuid` = what sits immediately before keep[0] in the desired chain:346 * - suffix-preserving (reactive/session-memory): last summary message347 * - prefix-preserving (partial compact): the boundary itself348 */349export function annotateBoundaryWithPreservedSegment(350 boundary: SystemCompactBoundaryMessage,351 anchorUuid: UUID,352 messagesToKeep: readonly Message[] | undefined,353): SystemCompactBoundaryMessage {354 const keep = messagesToKeep ?? []355 if (keep.length === 0) return boundary356 return {357 ...boundary,358 compactMetadata: {359 ...boundary.compactMetadata,360 preservedSegment: {361 headUuid: keep[0]!.uuid,362 anchorUuid,363 tailUuid: keep.at(-1)!.uuid,364 },365 },366 }367}368369/**370 * Merges user-supplied custom instructions with hook-provided instructions.371 * User instructions come first; hook instructions are appended.372 * Empty strings normalize to undefined.373 */374export function mergeHookInstructions(375 userInstructions: string | undefined,376 hookInstructions: string | undefined,377): string | undefined {378 if (!hookInstructions) return userInstructions || undefined379 if (!userInstructions) return hookInstructions380 return `${userInstructions}\n\n${hookInstructions}`381}382383/**384 * Creates a compact version of a conversation by summarizing older messages385 * and preserving recent conversation history.386 */387export async function compactConversation(388 messages: Message[],389 context: ToolUseContext,390 cacheSafeParams: CacheSafeParams,391 suppressFollowUpQuestions: boolean,392 customInstructions?: string,393 isAutoCompact: boolean = false,394 recompactionInfo?: RecompactionInfo,395): Promise<CompactionResult> {396 try {397 if (messages.length === 0) {398 throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)399 }400401 const preCompactTokenCount = tokenCountWithEstimation(messages)402403 const appState = context.getAppState()404 void logPermissionContextForAnts(appState.toolPermissionContext, 'summary')405406 context.onCompactProgress?.({407 type: 'hooks_start',408 hookType: 'pre_compact',409 })410411 // Execute PreCompact hooks412 context.setSDKStatus?.('compacting')413 const hookResult = await executePreCompactHooks(414 {415 trigger: isAutoCompact ? 'auto' : 'manual',416 customInstructions: customInstructions ?? null,417 },418 context.abortController.signal,419 )420 customInstructions = mergeHookInstructions(421 customInstructions,422 hookResult.newCustomInstructions,423 )424 const userDisplayMessage = hookResult.userDisplayMessage425426 // Show requesting mode with up arrow and custom message427 context.setStreamMode?.('requesting')428 context.setResponseLength?.(() => 0)429 context.onCompactProgress?.({ type: 'compact_start' })430431 // 3P default: true — forked-agent path reuses main conversation's prompt cache.432 // Experiment (Jan 2026) confirmed: false path is 98% cache miss, costs ~0.76% of433 // fleet cache_creation (~38B tok/day), concentrated in ephemeral envs (CCR/GHA/SDK)434 // with cold GB cache and 3P providers where GB is disabled. GB gate kept as kill-switch.435 const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(436 'tengu_compact_cache_prefix',437 true,438 )439440 const compactPrompt = getCompactPrompt(customInstructions)441 const summaryRequest = createUserMessage({442 content: compactPrompt,443 })444445 let messagesToSummarize = messages446 let retryCacheSafeParams = cacheSafeParams447 let summaryResponse: AssistantMessage448 let summary: string | null449 let ptlAttempts = 0450 for (;;) {451 summaryResponse = await streamCompactSummary({452 messages: messagesToSummarize,453 summaryRequest,454 appState,455 context,456 preCompactTokenCount,457 cacheSafeParams: retryCacheSafeParams,458 })459 summary = getAssistantMessageText(summaryResponse)460 if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break461462 // CC-1180: compact request itself hit prompt-too-long. Truncate the463 // oldest API-round groups and retry rather than leaving the user stuck.464 ptlAttempts++465 const truncated =466 ptlAttempts <= MAX_PTL_RETRIES467 ? truncateHeadForPTLRetry(messagesToSummarize, summaryResponse)468 : null469 if (!truncated) {470 logEvent('tengu_compact_failed', {471 reason:472 'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,473 preCompactTokenCount,474 promptCacheSharingEnabled,475 ptlAttempts,476 })477 throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)478 }479 logEvent('tengu_compact_ptl_retry', {480 attempt: ptlAttempts,481 droppedMessages: messagesToSummarize.length - truncated.length,482 remainingMessages: truncated.length,483 })484 messagesToSummarize = truncated485 // The forked-agent path reads from cacheSafeParams.forkContextMessages,486 // not the messages param — thread the truncated set through both paths.487 retryCacheSafeParams = {488 ...retryCacheSafeParams,489 forkContextMessages: truncated,490 }491 }492493 if (!summary) {494 logForDebugging(495 `Compact failed: no summary text in response. Response: ${jsonStringify(summaryResponse)}`,496 { level: 'error' },497 )498 logEvent('tengu_compact_failed', {499 reason:500 'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,501 preCompactTokenCount,502 promptCacheSharingEnabled,503 })504 throw new Error(505 `Failed to generate conversation summary - response did not contain valid text content`,506 )507 } else if (startsWithApiErrorPrefix(summary)) {508 logEvent('tengu_compact_failed', {509 reason:510 'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,511 preCompactTokenCount,512 promptCacheSharingEnabled,513 })514 throw new Error(summary)515 }516517 // Store the current file state before clearing518 const preCompactReadFileState = cacheToObject(context.readFileState)519520 // Clear the cache521 context.readFileState.clear()522 context.loadedNestedMemoryPaths?.clear()523524 // Intentionally NOT resetting sentSkillNames: re-injecting the full525 // skill_listing (~4K tokens) post-compact is pure cache_creation with526 // marginal benefit. The model still has SkillTool in its schema and527 // invoked_skills attachment (below) preserves used-skill content. Ants528 // with EXPERIMENTAL_SKILL_SEARCH already skip re-injection via the529 // early-return in getSkillListingAttachments.530531 // Run async attachment generation in parallel532 const [fileAttachments, asyncAgentAttachments] = await Promise.all([533 createPostCompactFileAttachments(534 preCompactReadFileState,535 context,536 POST_COMPACT_MAX_FILES_TO_RESTORE,537 ),538 createAsyncAgentAttachmentsIfNeeded(context),539 ])540541 const postCompactFileAttachments: AttachmentMessage[] = [542 ...fileAttachments,543 ...asyncAgentAttachments,544 ]545 const planAttachment = createPlanAttachmentIfNeeded(context.agentId)546 if (planAttachment) {547 postCompactFileAttachments.push(planAttachment)548 }549550 // Add plan mode instructions if currently in plan mode, so the model551 // continues operating in plan mode after compaction552 const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)553 if (planModeAttachment) {554 postCompactFileAttachments.push(planModeAttachment)555 }556557 // Add skill attachment if skills were invoked in this session558 const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)559 if (skillAttachment) {560 postCompactFileAttachments.push(skillAttachment)561 }562563 // Compaction ate prior delta attachments. Re-announce from the current564 // state so the model has tool/instruction context on the first565 // post-compact turn. Empty message history → diff against nothing →566 // announces the full set.567 for (const att of getDeferredToolsDeltaAttachment(568 context.options.tools,569 context.options.mainLoopModel,570 [],571 { callSite: 'compact_full' },572 )) {573 postCompactFileAttachments.push(createAttachmentMessage(att))574 }575 for (const att of getAgentListingDeltaAttachment(context, [])) {576 postCompactFileAttachments.push(createAttachmentMessage(att))577 }578 for (const att of getMcpInstructionsDeltaAttachment(579 context.options.mcpClients,580 context.options.tools,581 context.options.mainLoopModel,582 [],583 )) {584 postCompactFileAttachments.push(createAttachmentMessage(att))585 }586587 context.onCompactProgress?.({588 type: 'hooks_start',589 hookType: 'session_start',590 })591 // Execute SessionStart hooks after successful compaction592 const hookMessages = await processSessionStartHooks('compact', {593 model: context.options.mainLoopModel,594 })595596 // Create the compact boundary marker and summary messages before the597 // event so we can compute the true resulting-context size.598 const boundaryMarker = createCompactBoundaryMessage(599 isAutoCompact ? 'auto' : 'manual',600 preCompactTokenCount ?? 0,601 messages.at(-1)?.uuid,602 )603 // Carry loaded-tool state — the summary doesn't preserve tool_reference604 // blocks, so the post-compact schema filter needs this to keep sending605 // already-loaded deferred tool schemas to the API.606 const preCompactDiscovered = extractDiscoveredToolNames(messages)607 if (preCompactDiscovered.size > 0) {608 boundaryMarker.compactMetadata.preCompactDiscoveredTools = [609 ...preCompactDiscovered,610 ].sort()611 }612613 const transcriptPath = getTranscriptPath()614 const summaryMessages: UserMessage[] = [615 createUserMessage({616 content: getCompactUserSummaryMessage(617 summary,618 suppressFollowUpQuestions,619 transcriptPath,620 ),621 isCompactSummary: true,622 isVisibleInTranscriptOnly: true,623 }),624 ]625626 // Previously "postCompactTokenCount" — renamed because this is the627 // compact API call's total usage (input_tokens ≈ preCompactTokenCount),628 // NOT the size of the resulting context. Kept for event-field continuity.629 const compactionCallTotalTokens = tokenCountFromLastAPIResponse([630 summaryResponse,631 ])632633 // Message-payload estimate of the resulting context. The next iteration's634 // shouldAutoCompact will see this PLUS ~20-40K for system prompt + tools +635 // userContext (via API usage.input_tokens). So `willRetriggerNextTurn: true`636 // is a strong signal; `false` may still retrigger when this is close to threshold.637 const truePostCompactTokenCount = roughTokenCountEstimationForMessages([638 boundaryMarker,639 ...summaryMessages,640 ...postCompactFileAttachments,641 ...hookMessages,642 ])643644 // Extract compaction API usage metrics645 const compactionUsage = getTokenUsage(summaryResponse)646647 const querySourceForEvent =648 recompactionInfo?.querySource ?? context.options.querySource ?? 'unknown'649650 logEvent('tengu_compact', {651 preCompactTokenCount,652 // Kept for continuity — semantically the compact API call's total usage653 postCompactTokenCount: compactionCallTotalTokens,654 truePostCompactTokenCount,655 autoCompactThreshold: recompactionInfo?.autoCompactThreshold ?? -1,656 willRetriggerNextTurn:657 recompactionInfo !== undefined &&658 truePostCompactTokenCount >= recompactionInfo.autoCompactThreshold,659 isAutoCompact,660 querySource:661 querySourceForEvent as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,662 queryChainId: (context.queryTracking?.chainId ??663 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,664 queryDepth: context.queryTracking?.depth ?? -1,665 isRecompactionInChain: recompactionInfo?.isRecompactionInChain ?? false,666 turnsSincePreviousCompact:667 recompactionInfo?.turnsSincePreviousCompact ?? -1,668 previousCompactTurnId: (recompactionInfo?.previousCompactTurnId ??669 '') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,670 compactionInputTokens: compactionUsage?.input_tokens,671 compactionOutputTokens: compactionUsage?.output_tokens,672 compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,673 compactionCacheCreationTokens:674 compactionUsage?.cache_creation_input_tokens ?? 0,675 compactionTotalTokens: compactionUsage676 ? compactionUsage.input_tokens +677 (compactionUsage.cache_creation_input_tokens ?? 0) +678 (compactionUsage.cache_read_input_tokens ?? 0) +679 compactionUsage.output_tokens680 : 0,681 promptCacheSharingEnabled,682 // analyzeContext walks every content block (~11ms on a 4.5K-message683 // session) purely for this telemetry breakdown. Computed here, past684 // the compaction-API await, so the sync walk doesn't starve the685 // render loop before compaction even starts. Same deferral pattern686 // as reactiveCompact.ts.687 ...(() => {688 try {689 return tokenStatsToStatsigMetrics(analyzeContext(messages))690 } catch (error) {691 logError(error as Error)692 return {}693 }694 })(),695 })696697 // Reset cache read baseline so the post-compact drop isn't flagged as a break698 if (feature('PROMPT_CACHE_BREAK_DETECTION')) {699 notifyCompaction(700 context.options.querySource ?? 'compact',701 context.agentId,702 )703 }704 markPostCompaction()705706 // Re-append session metadata (custom title, tag) so it stays within707 // the 16KB tail window that readLiteMetadata reads for --resume display.708 // Without this, enough post-compaction messages push the metadata entry709 // out of the window, causing --resume to show the auto-generated title710 // instead of the user-set session name.711 reAppendSessionMetadata()712713 // Write a reduced transcript segment for the pre-compaction messages714 // (assistant mode only). Fire-and-forget — errors are logged internally.715 if (feature('KAIROS')) {716 void sessionTranscriptModule?.writeSessionTranscriptSegment(messages)717 }718719 context.onCompactProgress?.({720 type: 'hooks_start',721 hookType: 'post_compact',722 })723 const postCompactHookResult = await executePostCompactHooks(724 {725 trigger: isAutoCompact ? 'auto' : 'manual',726 compactSummary: summary,727 },728 context.abortController.signal,729 )730731 const combinedUserDisplayMessage = [732 userDisplayMessage,733 postCompactHookResult.userDisplayMessage,734 ]735 .filter(Boolean)736 .join('\n')737738 return {739 boundaryMarker,740 summaryMessages,741 attachments: postCompactFileAttachments,742 hookResults: hookMessages,743 userDisplayMessage: combinedUserDisplayMessage || undefined,744 preCompactTokenCount,745 postCompactTokenCount: compactionCallTotalTokens,746 truePostCompactTokenCount,747 compactionUsage,748 }749 } catch (error) {750 // Only show the error notification for manual /compact.751 // Auto-compact failures are retried on the next turn and the752 // notification is confusing when compaction eventually succeeds.753 if (!isAutoCompact) {754 addErrorNotificationIfNeeded(error, context)755 }756 throw error757 } finally {758 context.setStreamMode?.('requesting')759 context.setResponseLength?.(() => 0)760 context.onCompactProgress?.({ type: 'compact_end' })761 context.setSDKStatus?.(null)762 }763}764765/**766 * Performs a partial compaction around the selected message index.767 * Direction 'from': summarizes messages after the index, keeps earlier ones.768 * Prompt cache for kept (earlier) messages is preserved.769 * Direction 'up_to': summarizes messages before the index, keeps later ones.770 * Prompt cache is invalidated since the summary precedes the kept messages.771 */772export async function partialCompactConversation(773 allMessages: Message[],774 pivotIndex: number,775 context: ToolUseContext,776 cacheSafeParams: CacheSafeParams,777 userFeedback?: string,778 direction: PartialCompactDirection = 'from',779): Promise<CompactionResult> {780 try {781 const messagesToSummarize =782 direction === 'up_to'783 ? allMessages.slice(0, pivotIndex)784 : allMessages.slice(pivotIndex)785 // 'up_to' must strip old compact boundaries/summaries: for 'up_to',786 // summary_B sits BEFORE kept, so a stale boundary_A in kept wins787 // findLastCompactBoundaryIndex's backward scan and drops summary_B.788 // 'from' keeps them: summary_B sits AFTER kept (backward scan still789 // works), and removing an old summary would lose its covered history.790 const messagesToKeep =791 direction === 'up_to'792 ? allMessages793 .slice(pivotIndex)794 .filter(795 m =>796 m.type !== 'progress' &&797 !isCompactBoundaryMessage(m) &&798 !(m.type === 'user' && m.isCompactSummary),799 )800 : allMessages.slice(0, pivotIndex).filter(m => m.type !== 'progress')801802 if (messagesToSummarize.length === 0) {803 throw new Error(804 direction === 'up_to'805 ? 'Nothing to summarize before the selected message.'806 : 'Nothing to summarize after the selected message.',807 )808 }809810 const preCompactTokenCount = tokenCountWithEstimation(allMessages)811812 context.onCompactProgress?.({813 type: 'hooks_start',814 hookType: 'pre_compact',815 })816817 context.setSDKStatus?.('compacting')818 const hookResult = await executePreCompactHooks(819 {820 trigger: 'manual',821 customInstructions: null,822 },823 context.abortController.signal,824 )825826 // Merge hook instructions with user feedback827 let customInstructions: string | undefined828 if (hookResult.newCustomInstructions && userFeedback) {829 customInstructions = `${hookResult.newCustomInstructions}\n\nUser context: ${userFeedback}`830 } else if (hookResult.newCustomInstructions) {831 customInstructions = hookResult.newCustomInstructions832 } else if (userFeedback) {833 customInstructions = `User context: ${userFeedback}`834 }835836 context.setStreamMode?.('requesting')837 context.setResponseLength?.(() => 0)838 context.onCompactProgress?.({ type: 'compact_start' })839840 const compactPrompt = getPartialCompactPrompt(customInstructions, direction)841 const summaryRequest = createUserMessage({842 content: compactPrompt,843 })844845 const failureMetadata = {846 preCompactTokenCount,847 direction:848 direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,849 messagesSummarized: messagesToSummarize.length,850 }851852 // 'up_to' prefix hits cache directly; 'from' sends all (tail wouldn't cache).853 // PTL retry breaks the cache prefix but unblocks the user (CC-1180).854 let apiMessages = direction === 'up_to' ? messagesToSummarize : allMessages855 let retryCacheSafeParams =856 direction === 'up_to'857 ? { ...cacheSafeParams, forkContextMessages: messagesToSummarize }858 : cacheSafeParams859 let summaryResponse: AssistantMessage860 let summary: string | null861 let ptlAttempts = 0862 for (;;) {863 summaryResponse = await streamCompactSummary({864 messages: apiMessages,865 summaryRequest,866 appState: context.getAppState(),867 context,868 preCompactTokenCount,869 cacheSafeParams: retryCacheSafeParams,870 })871 summary = getAssistantMessageText(summaryResponse)872 if (!summary?.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) break873874 ptlAttempts++875 const truncated =876 ptlAttempts <= MAX_PTL_RETRIES877 ? truncateHeadForPTLRetry(apiMessages, summaryResponse)878 : null879 if (!truncated) {880 logEvent('tengu_partial_compact_failed', {881 reason:882 'prompt_too_long' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,883 ...failureMetadata,884 ptlAttempts,885 })886 throw new Error(ERROR_MESSAGE_PROMPT_TOO_LONG)887 }888 logEvent('tengu_compact_ptl_retry', {889 attempt: ptlAttempts,890 droppedMessages: apiMessages.length - truncated.length,891 remainingMessages: truncated.length,892 path: 'partial' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,893 })894 apiMessages = truncated895 retryCacheSafeParams = {896 ...retryCacheSafeParams,897 forkContextMessages: truncated,898 }899 }900 if (!summary) {901 logEvent('tengu_partial_compact_failed', {902 reason:903 'no_summary' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,904 ...failureMetadata,905 })906 throw new Error(907 'Failed to generate conversation summary - response did not contain valid text content',908 )909 } else if (startsWithApiErrorPrefix(summary)) {910 logEvent('tengu_partial_compact_failed', {911 reason:912 'api_error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,913 ...failureMetadata,914 })915 throw new Error(summary)916 }917918 // Store the current file state before clearing919 const preCompactReadFileState = cacheToObject(context.readFileState)920 context.readFileState.clear()921 context.loadedNestedMemoryPaths?.clear()922 // Intentionally NOT resetting sentSkillNames — see compactConversation()923 // for rationale (~4K tokens saved per compact event).924925 const [fileAttachments, asyncAgentAttachments] = await Promise.all([926 createPostCompactFileAttachments(927 preCompactReadFileState,928 context,929 POST_COMPACT_MAX_FILES_TO_RESTORE,930 messagesToKeep,931 ),932 createAsyncAgentAttachmentsIfNeeded(context),933 ])934935 const postCompactFileAttachments: AttachmentMessage[] = [936 ...fileAttachments,937 ...asyncAgentAttachments,938 ]939 const planAttachment = createPlanAttachmentIfNeeded(context.agentId)940 if (planAttachment) {941 postCompactFileAttachments.push(planAttachment)942 }943944 // Add plan mode instructions if currently in plan mode945 const planModeAttachment = await createPlanModeAttachmentIfNeeded(context)946 if (planModeAttachment) {947 postCompactFileAttachments.push(planModeAttachment)948 }949950 const skillAttachment = createSkillAttachmentIfNeeded(context.agentId)951 if (skillAttachment) {952 postCompactFileAttachments.push(skillAttachment)953 }954955 // Re-announce only what was in the summarized portion — messagesToKeep956 // is scanned, so anything already announced there is skipped.957 for (const att of getDeferredToolsDeltaAttachment(958 context.options.tools,959 context.options.mainLoopModel,960 messagesToKeep,961 { callSite: 'compact_partial' },962 )) {963 postCompactFileAttachments.push(createAttachmentMessage(att))964 }965 for (const att of getAgentListingDeltaAttachment(context, messagesToKeep)) {966 postCompactFileAttachments.push(createAttachmentMessage(att))967 }968 for (const att of getMcpInstructionsDeltaAttachment(969 context.options.mcpClients,970 context.options.tools,971 context.options.mainLoopModel,972 messagesToKeep,973 )) {974 postCompactFileAttachments.push(createAttachmentMessage(att))975 }976977 context.onCompactProgress?.({978 type: 'hooks_start',979 hookType: 'session_start',980 })981 const hookMessages = await processSessionStartHooks('compact', {982 model: context.options.mainLoopModel,983 })984985 const postCompactTokenCount = tokenCountFromLastAPIResponse([986 summaryResponse,987 ])988 const compactionUsage = getTokenUsage(summaryResponse)989990 logEvent('tengu_partial_compact', {991 preCompactTokenCount,992 postCompactTokenCount,993 messagesKept: messagesToKeep.length,994 messagesSummarized: messagesToSummarize.length,995 direction:996 direction as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,997 hasUserFeedback: !!userFeedback,998 trigger:999 'message_selector' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1000 compactionInputTokens: compactionUsage?.input_tokens,1001 compactionOutputTokens: compactionUsage?.output_tokens,1002 compactionCacheReadTokens: compactionUsage?.cache_read_input_tokens ?? 0,1003 compactionCacheCreationTokens:1004 compactionUsage?.cache_creation_input_tokens ?? 0,1005 })10061007 // Progress messages aren't loggable, so forkSessionImpl would null out1008 // a logicalParentUuid pointing at one. Both directions skip them.1009 const lastPreCompactUuid =1010 direction === 'up_to'1011 ? allMessages.slice(0, pivotIndex).findLast(m => m.type !== 'progress')1012 ?.uuid1013 : messagesToKeep.at(-1)?.uuid1014 const boundaryMarker = createCompactBoundaryMessage(1015 'manual',1016 preCompactTokenCount ?? 0,1017 lastPreCompactUuid,1018 userFeedback,1019 messagesToSummarize.length,1020 )1021 // allMessages not just messagesToSummarize — set union is idempotent,1022 // simpler than tracking which half each tool lived in.1023 const preCompactDiscovered = extractDiscoveredToolNames(allMessages)1024 if (preCompactDiscovered.size > 0) {1025 boundaryMarker.compactMetadata.preCompactDiscoveredTools = [1026 ...preCompactDiscovered,1027 ].sort()1028 }10291030 const transcriptPath = getTranscriptPath()1031 const summaryMessages: UserMessage[] = [1032 createUserMessage({1033 content: getCompactUserSummaryMessage(summary, false, transcriptPath),1034 isCompactSummary: true,1035 ...(messagesToKeep.length > 01036 ? {1037 summarizeMetadata: {1038 messagesSummarized: messagesToSummarize.length,1039 userContext: userFeedback,1040 direction,1041 },1042 }1043 : { isVisibleInTranscriptOnly: true as const }),1044 }),1045 ]10461047 if (feature('PROMPT_CACHE_BREAK_DETECTION')) {1048 notifyCompaction(1049 context.options.querySource ?? 'compact',1050 context.agentId,1051 )1052 }1053 markPostCompaction()10541055 // Re-append session metadata (custom title, tag) so it stays within1056 // the 16KB tail window that readLiteMetadata reads for --resume display.1057 reAppendSessionMetadata()10581059 if (feature('KAIROS')) {1060 void sessionTranscriptModule?.writeSessionTranscriptSegment(1061 messagesToSummarize,1062 )1063 }10641065 context.onCompactProgress?.({1066 type: 'hooks_start',1067 hookType: 'post_compact',1068 })1069 const postCompactHookResult = await executePostCompactHooks(1070 {1071 trigger: 'manual',1072 compactSummary: summary,1073 },1074 context.abortController.signal,1075 )10761077 // 'from': prefix-preserving → boundary; 'up_to': suffix → last summary1078 const anchorUuid =1079 direction === 'up_to'1080 ? (summaryMessages.at(-1)?.uuid ?? boundaryMarker.uuid)1081 : boundaryMarker.uuid1082 return {1083 boundaryMarker: annotateBoundaryWithPreservedSegment(1084 boundaryMarker,1085 anchorUuid,1086 messagesToKeep,1087 ),1088 summaryMessages,1089 messagesToKeep,1090 attachments: postCompactFileAttachments,1091 hookResults: hookMessages,1092 userDisplayMessage: postCompactHookResult.userDisplayMessage,1093 preCompactTokenCount,1094 postCompactTokenCount,1095 compactionUsage,1096 }1097 } catch (error) {1098 addErrorNotificationIfNeeded(error, context)1099 throw error1100 } finally {1101 context.setStreamMode?.('requesting')1102 context.setResponseLength?.(() => 0)1103 context.onCompactProgress?.({ type: 'compact_end' })1104 context.setSDKStatus?.(null)1105 }1106}11071108function addErrorNotificationIfNeeded(1109 error: unknown,1110 context: Pick<ToolUseContext, 'addNotification'>,1111) {1112 if (1113 !hasExactErrorMessage(error, ERROR_MESSAGE_USER_ABORT) &&1114 !hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)1115 ) {1116 context.addNotification?.({1117 key: 'error-compacting-conversation',1118 text: 'Error compacting conversation',1119 priority: 'immediate',1120 color: 'error',1121 })1122 }1123}11241125export function createCompactCanUseTool(): CanUseToolFn {1126 return async () => ({1127 behavior: 'deny' as const,1128 message: 'Tool use is not allowed during compaction',1129 decisionReason: {1130 type: 'other' as const,1131 reason: 'compaction agent should only produce text summary',1132 },1133 })1134}11351136async function streamCompactSummary({1137 messages,1138 summaryRequest,1139 appState,1140 context,1141 preCompactTokenCount,1142 cacheSafeParams,1143}: {1144 messages: Message[]1145 summaryRequest: UserMessage1146 appState: Awaited<ReturnType<ToolUseContext['getAppState']>>1147 context: ToolUseContext1148 preCompactTokenCount: number1149 cacheSafeParams: CacheSafeParams1150}): Promise<AssistantMessage> {1151 // When prompt cache sharing is enabled, use forked agent to reuse the1152 // main conversation's cached prefix (system prompt, tools, context messages).1153 // Falls back to regular streaming path on failure.1154 // 3P default: true — see comment at the other tengu_compact_cache_prefix read above.1155 const promptCacheSharingEnabled = getFeatureValue_CACHED_MAY_BE_STALE(1156 'tengu_compact_cache_prefix',1157 true,1158 )1159 // Send keep-alive signals during compaction to prevent remote session1160 // WebSocket idle timeouts from dropping bridge connections. Compaction1161 // API calls can take 5-10+ seconds, during which no other messages1162 // flow through the transport — without keep-alives, the server may1163 // close the WebSocket for inactivity.1164 // Two signals: (1) PUT /worker heartbeat via sessionActivity, and1165 // (2) re-emit 'compacting' status so the SDK event stream stays active1166 // and the server doesn't consider the session stale.1167 const activityInterval = isSessionActivityTrackingActive()1168 ? setInterval(1169 (statusSetter?: (status: 'compacting' | null) => void) => {1170 sendSessionActivitySignal()1171 statusSetter?.('compacting')1172 },1173 30_000,1174 context.setSDKStatus,1175 )1176 : undefined11771178 try {1179 if (promptCacheSharingEnabled) {1180 try {1181 // DO NOT set maxOutputTokens here. The fork piggybacks on the main thread's1182 // prompt cache by sending identical cache-key params (system, tools, model,1183 // messages prefix, thinking config). Setting maxOutputTokens would clamp1184 // budget_tokens via Math.min(budget, maxOutputTokens-1) in claude.ts,1185 // creating a thinking config mismatch that invalidates the cache.1186 // The streaming fallback path (below) can safely set maxOutputTokensOverride1187 // since it doesn't share cache with the main thread.1188 const result = await runForkedAgent({1189 promptMessages: [summaryRequest],1190 cacheSafeParams,1191 canUseTool: createCompactCanUseTool(),1192 querySource: 'compact',1193 forkLabel: 'compact',1194 maxTurns: 1,1195 skipCacheWrite: true,1196 // Pass the compact context's abortController so user Esc aborts the1197 // fork — same signal the streaming fallback uses at1198 // `signal: context.abortController.signal` below.1199 overrides: { abortController: context.abortController },1200 })1201 const assistantMsg = getLastAssistantMessage(result.messages)1202 const assistantText = assistantMsg1203 ? getAssistantMessageText(assistantMsg)1204 : null1205 // Guard isApiErrorMessage: query() catches API errors (including1206 // APIUserAbortError on ESC) and yields them as synthetic assistant1207 // messages. Without this check, an aborted compact "succeeds" with1208 // "Request was aborted." as the summary — the text doesn't start with1209 // "API Error" so the caller's startsWithApiErrorPrefix guard misses it.1210 if (assistantMsg && assistantText && !assistantMsg.isApiErrorMessage) {1211 // Skip success logging for PTL error text — it's returned so the1212 // caller's retry loop catches it, but it's not a successful summary.1213 if (!assistantText.startsWith(PROMPT_TOO_LONG_ERROR_MESSAGE)) {1214 logEvent('tengu_compact_cache_sharing_success', {1215 preCompactTokenCount,1216 outputTokens: result.totalUsage.output_tokens,1217 cacheReadInputTokens: result.totalUsage.cache_read_input_tokens,1218 cacheCreationInputTokens:1219 result.totalUsage.cache_creation_input_tokens,1220 cacheHitRate:1221 result.totalUsage.cache_read_input_tokens > 01222 ? result.totalUsage.cache_read_input_tokens /1223 (result.totalUsage.cache_read_input_tokens +1224 result.totalUsage.cache_creation_input_tokens +1225 result.totalUsage.input_tokens)1226 : 0,1227 })1228 }1229 return assistantMsg1230 }1231 logForDebugging(1232 `Compact cache sharing: no text in response, falling back. Response: ${jsonStringify(assistantMsg)}`,1233 { level: 'warn' },1234 )1235 logEvent('tengu_compact_cache_sharing_fallback', {1236 reason:1237 'no_text_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1238 preCompactTokenCount,1239 })1240 } catch (error) {1241 logError(error)1242 logEvent('tengu_compact_cache_sharing_fallback', {1243 reason:1244 'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1245 preCompactTokenCount,1246 })1247 }1248 }12491250 // Regular streaming path (fallback when cache sharing fails or is disabled)1251 const retryEnabled = getFeatureValue_CACHED_MAY_BE_STALE(1252 'tengu_compact_streaming_retry',1253 false,1254 )1255 const maxAttempts = retryEnabled ? MAX_COMPACT_STREAMING_RETRIES : 112561257 for (let attempt = 1; attempt <= maxAttempts; attempt++) {1258 // Reset state for retry1259 let hasStartedStreaming = false1260 let response: AssistantMessage | undefined1261 context.setResponseLength?.(() => 0)12621263 // Check if tool search is enabled using the main loop's tools list.1264 // context.options.tools includes MCP tools merged via useMergedTools.1265 const useToolSearch = await isToolSearchEnabled(1266 context.options.mainLoopModel,1267 context.options.tools,1268 async () => appState.toolPermissionContext,1269 context.options.agentDefinitions.activeAgents,1270 'compact',1271 )12721273 // When tool search is enabled, include ToolSearchTool and MCP tools. They get1274 // defer_loading: true and don't count against context - the API filters them out1275 // of system_prompt_tools before token counting (see api/token_count_api/counting.py:1881276 // and api/public_api/messages/handler.py:324).1277 // Filter MCP tools from context.options.tools (not appState.mcp.tools) so we1278 // get the permission-filtered set from useMergedTools — same source used for1279 // isToolSearchEnabled above and normalizeMessagesForAPI below.1280 // Deduplicate by name to avoid API errors when MCP tools share names with built-in tools.1281 const tools: Tool[] = useToolSearch1282 ? uniqBy(1283 [1284 FileReadTool,1285 ToolSearchTool,1286 ...context.options.tools.filter(t => t.isMcp),1287 ],1288 'name',1289 )1290 : [FileReadTool]12911292 const streamingGen = queryModelWithStreaming({1293 messages: normalizeMessagesForAPI(1294 stripImagesFromMessages(1295 stripReinjectedAttachments([1296 ...getMessagesAfterCompactBoundary(messages),1297 summaryRequest,1298 ]),1299 ),1300 context.options.tools,1301 ),1302 systemPrompt: asSystemPrompt([1303 'You are a helpful AI assistant tasked with summarizing conversations.',1304 ]),1305 thinkingConfig: { type: 'disabled' as const },1306 tools,1307 signal: context.abortController.signal,1308 options: {1309 async getToolPermissionContext() {1310 const appState = context.getAppState()1311 return appState.toolPermissionContext1312 },1313 model: context.options.mainLoopModel,1314 toolChoice: undefined,1315 isNonInteractiveSession: context.options.isNonInteractiveSession,1316 hasAppendSystemPrompt: !!context.options.appendSystemPrompt,1317 maxOutputTokensOverride: Math.min(1318 COMPACT_MAX_OUTPUT_TOKENS,1319 getMaxOutputTokensForModel(context.options.mainLoopModel),1320 ),1321 querySource: 'compact',1322 agents: context.options.agentDefinitions.activeAgents,1323 mcpTools: [],1324 effortValue: appState.effortValue,1325 },1326 })1327 const streamIter = streamingGen[Symbol.asyncIterator]()1328 let next = await streamIter.next()13291330 while (!next.done) {1331 const event = next.value13321333 if (1334 !hasStartedStreaming &&1335 event.type === 'stream_event' &&1336 event.event.type === 'content_block_start' &&1337 event.event.content_block.type === 'text'1338 ) {1339 hasStartedStreaming = true1340 context.setStreamMode?.('responding')1341 }13421343 if (1344 event.type === 'stream_event' &&1345 event.event.type === 'content_block_delta' &&1346 event.event.delta.type === 'text_delta'1347 ) {1348 const charactersStreamed = event.event.delta.text.length1349 context.setResponseLength?.(length => length + charactersStreamed)1350 }13511352 if (event.type === 'assistant') {1353 response = event1354 }13551356 next = await streamIter.next()1357 }13581359 if (response) {1360 return response1361 }13621363 if (attempt < maxAttempts) {1364 logEvent('tengu_compact_streaming_retry', {1365 attempt,1366 preCompactTokenCount,1367 hasStartedStreaming,1368 })1369 await sleep(getRetryDelay(attempt), context.abortController.signal, {1370 abortError: () => new APIUserAbortError(),1371 })1372 continue1373 }13741375 logForDebugging(1376 `Compact streaming failed after ${attempt} attempts. hasStartedStreaming=${hasStartedStreaming}`,1377 { level: 'error' },1378 )1379 logEvent('tengu_compact_failed', {1380 reason:1381 'no_streaming_response' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1382 preCompactTokenCount,1383 hasStartedStreaming,1384 retryEnabled,1385 attempts: attempt,1386 promptCacheSharingEnabled,1387 })1388 throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)1389 }13901391 // This should never be reached due to the throw above, but TypeScript needs it1392 throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)1393 } finally {1394 clearInterval(activityInterval)1395 }1396}13971398/**1399 * Creates attachment messages for recently accessed files to restore them after compaction.1400 * This prevents the model from having to re-read files that were recently accessed.1401 * Re-reads files using FileReadTool to get fresh content with proper validation.1402 * Files are selected based on recency, but constrained by both file count and token budget limits.1403 *1404 * Files already present as Read tool results in preservedMessages are skipped —1405 * re-injecting identical content the model can already see in the preserved tail1406 * is pure waste (up to 25K tok/compact). Mirrors the diff-against-preserved1407 * pattern that getDeferredToolsDeltaAttachment uses at the same call sites.1408 *1409 * @param readFileState The current file state tracking recently read files1410 * @param toolUseContext The tool use context for calling FileReadTool1411 * @param maxFiles Maximum number of files to restore (default: 5)1412 * @param preservedMessages Messages kept post-compact; Read results here are skipped1413 * @returns Array of attachment messages for the most recently accessed files that fit within token budget1414 */1415export async function createPostCompactFileAttachments(1416 readFileState: Record<string, { content: string; timestamp: number }>,1417 toolUseContext: ToolUseContext,1418 maxFiles: number,1419 preservedMessages: Message[] = [],1420): Promise<AttachmentMessage[]> {1421 const preservedReadPaths = collectReadToolFilePaths(preservedMessages)1422 const recentFiles = Object.entries(readFileState)1423 .map(([filename, state]) => ({ filename, ...state }))1424 .filter(1425 file =>1426 !shouldExcludeFromPostCompactRestore(1427 file.filename,1428 toolUseContext.agentId,1429 ) && !preservedReadPaths.has(expandPath(file.filename)),1430 )1431 .sort((a, b) => b.timestamp - a.timestamp)1432 .slice(0, maxFiles)14331434 const results = await Promise.all(1435 recentFiles.map(async file => {1436 const attachment = await generateFileAttachment(1437 file.filename,1438 {1439 ...toolUseContext,1440 fileReadingLimits: {1441 maxTokens: POST_COMPACT_MAX_TOKENS_PER_FILE,1442 },1443 },1444 'tengu_post_compact_file_restore_success',1445 'tengu_post_compact_file_restore_error',1446 'compact',1447 )1448 return attachment ? createAttachmentMessage(attachment) : null1449 }),1450 )14511452 let usedTokens = 01453 return results.filter((result): result is AttachmentMessage => {1454 if (result === null) {1455 return false1456 }1457 const attachmentTokens = roughTokenCountEstimation(jsonStringify(result))1458 if (usedTokens + attachmentTokens <= POST_COMPACT_TOKEN_BUDGET) {1459 usedTokens += attachmentTokens1460 return true1461 }1462 return false1463 })1464}14651466/**1467 * Creates a plan file attachment if a plan file exists for the current session.1468 * This ensures the plan is preserved after compaction.1469 */1470export function createPlanAttachmentIfNeeded(1471 agentId?: AgentId,1472): AttachmentMessage | null {1473 const planContent = getPlan(agentId)14741475 if (!planContent) {1476 return null1477 }14781479 const planFilePath = getPlanFilePath(agentId)14801481 return createAttachmentMessage({1482 type: 'plan_file_reference',1483 planFilePath,1484 planContent,1485 })1486}14871488/**1489 * Creates an attachment for invoked skills to preserve their content across compaction.1490 * Only includes skills scoped to the given agent (or main session when agentId is null/undefined).1491 * This ensures skill guidelines remain available after the conversation is summarized1492 * without leaking skills from other agent contexts.1493 */1494export function createSkillAttachmentIfNeeded(1495 agentId?: string,1496): AttachmentMessage | null {1497 const invokedSkills = getInvokedSkillsForAgent(agentId)14981499 if (invokedSkills.size === 0) {1500 return null1501 }15021503 // Sorted most-recent-first so budget pressure drops the least-relevant skills.1504 // Per-skill truncation keeps the head of each file (where setup/usage1505 // instructions typically live) rather than dropping whole skills.1506 let usedTokens = 01507 const skills = Array.from(invokedSkills.values())1508 .sort((a, b) => b.invokedAt - a.invokedAt)1509 .map(skill => ({1510 name: skill.skillName,1511 path: skill.skillPath,1512 content: truncateToTokens(1513 skill.content,1514 POST_COMPACT_MAX_TOKENS_PER_SKILL,1515 ),1516 }))1517 .filter(skill => {1518 const tokens = roughTokenCountEstimation(skill.content)1519 if (usedTokens + tokens > POST_COMPACT_SKILLS_TOKEN_BUDGET) {1520 return false1521 }1522 usedTokens += tokens1523 return true1524 })15251526 if (skills.length === 0) {1527 return null1528 }15291530 return createAttachmentMessage({1531 type: 'invoked_skills',1532 skills,1533 })1534}15351536/**1537 * Creates a plan_mode attachment if the user is currently in plan mode.1538 * This ensures the model continues to operate in plan mode after compaction1539 * (otherwise it would lose the plan mode instructions since those are1540 * normally only injected on tool-use turns via getAttachmentMessages).1541 */1542export async function createPlanModeAttachmentIfNeeded(1543 context: ToolUseContext,1544): Promise<AttachmentMessage | null> {1545 const appState = context.getAppState()1546 if (appState.toolPermissionContext.mode !== 'plan') {1547 return null1548 }15491550 const planFilePath = getPlanFilePath(context.agentId)1551 const planExists = getPlan(context.agentId) !== null15521553 return createAttachmentMessage({1554 type: 'plan_mode',1555 reminderType: 'full',1556 isSubAgent: !!context.agentId,1557 planFilePath,1558 planExists,1559 })1560}15611562/**1563 * Creates attachments for async agents so the model knows about them after1564 * compaction. Covers both agents still running in the background (so the model1565 * doesn't spawn a duplicate) and agents that have finished but whose results1566 * haven't been retrieved yet.1567 */1568export async function createAsyncAgentAttachmentsIfNeeded(1569 context: ToolUseContext,1570): Promise<AttachmentMessage[]> {1571 const appState = context.getAppState()1572 const asyncAgents = Object.values(appState.tasks).filter(1573 (task): task is LocalAgentTaskState => task.type === 'local_agent',1574 )15751576 return asyncAgents.flatMap(agent => {1577 if (1578 agent.retrieved ||1579 agent.status === 'pending' ||1580 agent.agentId === context.agentId1581 ) {1582 return []1583 }1584 return [1585 createAttachmentMessage({1586 type: 'task_status',1587 taskId: agent.agentId,1588 taskType: 'local_agent',1589 description: agent.description,1590 status: agent.status,1591 deltaSummary:1592 agent.status === 'running'1593 ? (agent.progress?.summary ?? null)1594 : (agent.error ?? null),1595 outputFilePath: getTaskOutputPath(agent.agentId),1596 }),1597 ]1598 })1599}16001601/**1602 * Scan messages for Read tool_use blocks and collect their file_path inputs1603 * (normalized via expandPath). Used to dedup post-compact file restoration1604 * against what's already visible in the preserved tail.1605 *1606 * Skips Reads whose tool_result is a dedup stub — the stub points at an1607 * earlier full Read that may have been compacted away, so we want1608 * createPostCompactFileAttachments to re-inject the real content.1609 */1610function collectReadToolFilePaths(messages: Message[]): Set<string> {1611 const stubIds = new Set<string>()1612 for (const message of messages) {1613 if (message.type !== 'user' || !Array.isArray(message.message.content)) {1614 continue1615 }1616 for (const block of message.message.content) {1617 if (1618 block.type === 'tool_result' &&1619 typeof block.content === 'string' &&1620 block.content.startsWith(FILE_UNCHANGED_STUB)1621 ) {1622 stubIds.add(block.tool_use_id)1623 }1624 }1625 }16261627 const paths = new Set<string>()1628 for (const message of messages) {1629 if (1630 message.type !== 'assistant' ||1631 !Array.isArray(message.message.content)1632 ) {1633 continue1634 }1635 for (const block of message.message.content) {1636 if (1637 block.type !== 'tool_use' ||1638 block.name !== FILE_READ_TOOL_NAME ||1639 stubIds.has(block.id)1640 ) {1641 continue1642 }1643 const input = block.input1644 if (1645 input &&1646 typeof input === 'object' &&1647 'file_path' in input &&1648 typeof input.file_path === 'string'1649 ) {1650 paths.add(expandPath(input.file_path))1651 }1652 }1653 }1654 return paths1655}16561657const SKILL_TRUNCATION_MARKER =1658 '\n\n[... skill content truncated for compaction; use Read on the skill path if you need the full text]'16591660/**1661 * Truncate content to roughly maxTokens, keeping the head. roughTokenCountEstimation1662 * uses ~4 chars/token (its default bytesPerToken), so char budget = maxTokens * 41663 * minus the marker so the result stays within budget. Marker tells the model it1664 * can Read the full file if needed.1665 */1666function truncateToTokens(content: string, maxTokens: number): string {1667 if (roughTokenCountEstimation(content) <= maxTokens) {1668 return content1669 }1670 const charBudget = maxTokens * 4 - SKILL_TRUNCATION_MARKER.length1671 return content.slice(0, charBudget) + SKILL_TRUNCATION_MARKER1672}16731674function shouldExcludeFromPostCompactRestore(1675 filename: string,1676 agentId?: AgentId,1677): boolean {1678 const normalizedFilename = expandPath(filename)1679 // Exclude plan files1680 try {1681 const planFilePath = expandPath(getPlanFilePath(agentId))1682 if (normalizedFilename === planFilePath) {1683 return true1684 }1685 } catch {1686 // If we can't get plan file path, continue with other checks1687 }16881689 // Exclude all types of claude.md files1690 // TODO: Refactor to use isMemoryFilePath() from claudemd.ts for consistency1691 // and to also match child directory memory files (.claude/rules/*.md, etc.)1692 try {1693 const normalizedMemoryPaths = new Set(1694 MEMORY_TYPE_VALUES.map(type => expandPath(getMemoryPath(type))),1695 )16961697 if (normalizedMemoryPaths.has(normalizedFilename)) {1698 return true1699 }1700 } catch {1701 // If we can't get memory paths, continue1702 }17031704 return false1705}1706