services/mcp/client.ts
116 KB3349 lines
src/services/mcp/client.ts
1import { feature } from 'bun:bundle'2import type {3 Base64ImageSource,4 ContentBlockParam,5 MessageParam,6} from '@anthropic-ai/sdk/resources/index.mjs'7import { Client } from '@modelcontextprotocol/sdk/client/index.js'8import {9 SSEClientTransport,10 type SSEClientTransportOptions,11} from '@modelcontextprotocol/sdk/client/sse.js'12import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'13import {14 StreamableHTTPClientTransport,15 type StreamableHTTPClientTransportOptions,16} from '@modelcontextprotocol/sdk/client/streamableHttp.js'17import {18 createFetchWithInit,19 type FetchLike,20 type Transport,21} from '@modelcontextprotocol/sdk/shared/transport.js'22import {23 CallToolResultSchema,24 ElicitRequestSchema,25 type ElicitRequestURLParams,26 type ElicitResult,27 ErrorCode,28 type JSONRPCMessage,29 type ListPromptsResult,30 ListPromptsResultSchema,31 ListResourcesResultSchema,32 ListRootsRequestSchema,33 type ListToolsResult,34 ListToolsResultSchema,35 McpError,36 type PromptMessage,37 type ResourceLink,38} from '@modelcontextprotocol/sdk/types.js'39import mapValues from 'lodash-es/mapValues.js'40import memoize from 'lodash-es/memoize.js'41import zipObject from 'lodash-es/zipObject.js'42import pMap from 'p-map'43import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'44import type { Command } from '../../commands.js'45import { getOauthConfig } from '../../constants/oauth.js'46import { PRODUCT_URL } from '../../constants/product.js'47import type { AppState } from '../../state/AppState.js'48import {49 type Tool,50 type ToolCallProgress,51 toolMatchesName,52} from '../../Tool.js'53import { ListMcpResourcesTool } from '../../tools/ListMcpResourcesTool/ListMcpResourcesTool.js'54import { type MCPProgress, MCPTool } from '../../tools/MCPTool/MCPTool.js'55import { createMcpAuthTool } from '../../tools/McpAuthTool/McpAuthTool.js'56import { ReadMcpResourceTool } from '../../tools/ReadMcpResourceTool/ReadMcpResourceTool.js'57import { createAbortController } from '../../utils/abortController.js'58import { count } from '../../utils/array.js'59import {60 checkAndRefreshOAuthTokenIfNeeded,61 getClaudeAIOAuthTokens,62 handleOAuth401Error,63} from '../../utils/auth.js'64import { registerCleanup } from '../../utils/cleanupRegistry.js'65import { detectCodeIndexingFromMcpServerName } from '../../utils/codeIndexing.js'66import { logForDebugging } from '../../utils/debug.js'67import { isEnvDefinedFalsy, isEnvTruthy } from '../../utils/envUtils.js'68import {69 errorMessage,70 TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,71} from '../../utils/errors.js'72import { getMCPUserAgent } from '../../utils/http.js'73import { maybeNotifyIDEConnected } from '../../utils/ide.js'74import { maybeResizeAndDownsampleImageBuffer } from '../../utils/imageResizer.js'75import { logMCPDebug, logMCPError } from '../../utils/log.js'76import {77 getBinaryBlobSavedMessage,78 getFormatDescription,79 getLargeOutputInstructions,80 persistBinaryContent,81} from '../../utils/mcpOutputStorage.js'82import {83 getContentSizeEstimate,84 type MCPToolResult,85 mcpContentNeedsTruncation,86 truncateMcpContentIfNeeded,87} from '../../utils/mcpValidation.js'88import { WebSocketTransport } from '../../utils/mcpWebSocketTransport.js'89import { memoizeWithLRU } from '../../utils/memoize.js'90import { getWebSocketTLSOptions } from '../../utils/mtls.js'91import {92 getProxyFetchOptions,93 getWebSocketProxyAgent,94 getWebSocketProxyUrl,95} from '../../utils/proxy.js'96import { recursivelySanitizeUnicode } from '../../utils/sanitization.js'97import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'98import { subprocessEnv } from '../../utils/subprocessEnv.js'99import {100 isPersistError,101 persistToolResult,102} from '../../utils/toolResultStorage.js'103import {104 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,105 logEvent,106} from '../analytics/index.js'107import {108 type ElicitationWaitingState,109 runElicitationHooks,110 runElicitationResultHooks,111} from './elicitationHandler.js'112import { buildMcpToolName } from './mcpStringUtils.js'113import { normalizeNameForMCP } from './normalization.js'114import { getLoggingSafeMcpBaseUrl } from './utils.js'115116/* eslint-disable @typescript-eslint/no-require-imports */117const fetchMcpSkillsForClient = feature('MCP_SKILLS')118 ? (119 require('../../skills/mcpSkills.js') as typeof import('../../skills/mcpSkills.js')120 ).fetchMcpSkillsForClient121 : null122123import { UnauthorizedError } from '@modelcontextprotocol/sdk/client/auth.js'124import type { AssistantMessage } from 'src/types/message.js'125/* eslint-enable @typescript-eslint/no-require-imports */126import { classifyMcpToolForCollapse } from '../../tools/MCPTool/classifyForCollapse.js'127import { clearKeychainCache } from '../../utils/secureStorage/macOsKeychainHelpers.js'128import { sleep } from '../../utils/sleep.js'129import {130 ClaudeAuthProvider,131 hasMcpDiscoveryButNoToken,132 wrapFetchWithStepUpDetection,133} from './auth.js'134import { markClaudeAiMcpConnected } from './claudeai.js'135import { getAllMcpConfigs, isMcpServerDisabled } from './config.js'136import { getMcpServerHeaders } from './headersHelper.js'137import { SdkControlClientTransport } from './SdkControlTransport.js'138import type {139 ConnectedMCPServer,140 MCPServerConnection,141 McpSdkServerConfig,142 ScopedMcpServerConfig,143 ServerResource,144} from './types.js'145146/**147 * Custom error class to indicate that an MCP tool call failed due to148 * authentication issues (e.g., expired OAuth token returning 401).149 * This error should be caught at the tool execution layer to update150 * the client's status to 'needs-auth'.151 */152export class McpAuthError extends Error {153 serverName: string154 constructor(serverName: string, message: string) {155 super(message)156 this.name = 'McpAuthError'157 this.serverName = serverName158 }159}160161/**162 * Thrown when an MCP session has expired and the connection cache has been cleared.163 * The caller should get a fresh client via ensureConnectedClient and retry.164 */165class McpSessionExpiredError extends Error {166 constructor(serverName: string) {167 super(`MCP server "${serverName}" session expired`)168 this.name = 'McpSessionExpiredError'169 }170}171172/**173 * Thrown when an MCP tool returns `isError: true`. Carries the result's `_meta`174 * so SDK consumers can still receive it — per the MCP spec, `_meta` is on the175 * base Result type and is valid on error results.176 */177export class McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS extends TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {178 constructor(179 message: string,180 telemetryMessage: string,181 readonly mcpMeta?: { _meta?: Record<string, unknown> },182 ) {183 super(message, telemetryMessage)184 this.name = 'McpToolCallError'185 }186}187188/**189 * Detects whether an error is an MCP "Session not found" error (HTTP 404 + JSON-RPC code -32001).190 * Per the MCP spec, servers return 404 when a session ID is no longer valid.191 * We check both signals to avoid false positives from generic 404s (wrong URL, server gone, etc.).192 */193export function isMcpSessionExpiredError(error: Error): boolean {194 const httpStatus =195 'code' in error ? (error as Error & { code?: number }).code : undefined196 if (httpStatus !== 404) {197 return false198 }199 // The SDK embeds the response body text in the error message.200 // MCP servers return: {"error":{"code":-32001,"message":"Session not found"},...}201 // Check for the JSON-RPC error code to distinguish from generic web server 404s.202 return (203 error.message.includes('"code":-32001') ||204 error.message.includes('"code": -32001')205 )206}207208/**209 * Default timeout for MCP tool calls (effectively infinite - ~27.8 hours).210 */211const DEFAULT_MCP_TOOL_TIMEOUT_MS = 100_000_000212213/**214 * Cap on MCP tool descriptions and server instructions sent to the model.215 * OpenAPI-generated MCP servers have been observed dumping 15-60KB of endpoint216 * docs into tool.description; this caps the p95 tail without losing the intent.217 */218const MAX_MCP_DESCRIPTION_LENGTH = 2048219220/**221 * Gets the timeout for MCP tool calls in milliseconds.222 * Uses MCP_TOOL_TIMEOUT environment variable if set, otherwise defaults to ~27.8 hours.223 */224function getMcpToolTimeoutMs(): number {225 return (226 parseInt(process.env.MCP_TOOL_TIMEOUT || '', 10) ||227 DEFAULT_MCP_TOOL_TIMEOUT_MS228 )229}230231import { isClaudeInChromeMCPServer } from '../../utils/claudeInChrome/common.js'232233// Lazy: toolRendering.tsx pulls React/ink; only needed when Claude-in-Chrome MCP server is connected234/* eslint-disable @typescript-eslint/no-require-imports */235const claudeInChromeToolRendering =236 (): typeof import('../../utils/claudeInChrome/toolRendering.js') =>237 require('../../utils/claudeInChrome/toolRendering.js')238// Lazy: wrapper.tsx → hostAdapter.ts → executor.ts pulls both native modules239// (@ant/computer-use-input + @ant/computer-use-swift). Runtime-gated by240// GrowthBook tengu_malort_pedway (see gates.ts).241const computerUseWrapper = feature('CHICAGO_MCP')242 ? (): typeof import('../../utils/computerUse/wrapper.js') =>243 require('../../utils/computerUse/wrapper.js')244 : undefined245const isComputerUseMCPServer = feature('CHICAGO_MCP')246 ? (247 require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')248 ).isComputerUseMCPServer249 : undefined250251import { mkdir, readFile, unlink, writeFile } from 'fs/promises'252import { dirname, join } from 'path'253import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'254/* eslint-enable @typescript-eslint/no-require-imports */255import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'256257const MCP_AUTH_CACHE_TTL_MS = 15 * 60 * 1000 // 15 min258259type McpAuthCacheData = Record<string, { timestamp: number }>260261function getMcpAuthCachePath(): string {262 return join(getClaudeConfigHomeDir(), 'mcp-needs-auth-cache.json')263}264265// Memoized so N concurrent isMcpAuthCached() calls during batched connection266// share a single file read instead of N reads of the same file. Invalidated267// on write (setMcpAuthCacheEntry) and clear (clearMcpAuthCache). Not using268// lodash memoize because we need to null out the cache, not delete by key.269let authCachePromise: Promise<McpAuthCacheData> | null = null270271function getMcpAuthCache(): Promise<McpAuthCacheData> {272 if (!authCachePromise) {273 authCachePromise = readFile(getMcpAuthCachePath(), 'utf-8')274 .then(data => jsonParse(data) as McpAuthCacheData)275 .catch(() => ({}))276 }277 return authCachePromise278}279280async function isMcpAuthCached(serverId: string): Promise<boolean> {281 const cache = await getMcpAuthCache()282 const entry = cache[serverId]283 if (!entry) {284 return false285 }286 return Date.now() - entry.timestamp < MCP_AUTH_CACHE_TTL_MS287}288289// Serialize cache writes through a promise chain to prevent concurrent290// read-modify-write races when multiple servers return 401 in the same batch291let writeChain = Promise.resolve()292293function setMcpAuthCacheEntry(serverId: string): void {294 writeChain = writeChain295 .then(async () => {296 const cache = await getMcpAuthCache()297 cache[serverId] = { timestamp: Date.now() }298 const cachePath = getMcpAuthCachePath()299 await mkdir(dirname(cachePath), { recursive: true })300 await writeFile(cachePath, jsonStringify(cache))301 // Invalidate the read cache so subsequent reads see the new entry.302 // Safe because writeChain serializes writes: the next write's303 // getMcpAuthCache() call will re-read the file with this entry present.304 authCachePromise = null305 })306 .catch(() => {307 // Best-effort cache write308 })309}310311export function clearMcpAuthCache(): void {312 authCachePromise = null313 void unlink(getMcpAuthCachePath()).catch(() => {314 // Cache file may not exist315 })316}317318/**319 * Spread-ready analytics field for the server's base URL. Calls320 * getLoggingSafeMcpBaseUrl once (not twice like the inline ternary it replaces).321 * Typed as AnalyticsMetadata since the URL is query-stripped and safe to log.322 */323function mcpBaseUrlAnalytics(serverRef: ScopedMcpServerConfig): {324 mcpServerBaseUrl?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS325} {326 const url = getLoggingSafeMcpBaseUrl(serverRef)327 return url328 ? {329 mcpServerBaseUrl:330 url as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,331 }332 : {}333}334335/**336 * Shared handler for sse/http/claudeai-proxy auth failures during connect:337 * emits tengu_mcp_server_needs_auth, caches the needs-auth entry, and returns338 * the needs-auth connection result.339 */340function handleRemoteAuthFailure(341 name: string,342 serverRef: ScopedMcpServerConfig,343 transportType: 'sse' | 'http' | 'claudeai-proxy',344): MCPServerConnection {345 logEvent('tengu_mcp_server_needs_auth', {346 transportType:347 transportType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,348 ...mcpBaseUrlAnalytics(serverRef),349 })350 const label: Record<typeof transportType, string> = {351 sse: 'SSE',352 http: 'HTTP',353 'claudeai-proxy': 'claude.ai proxy',354 }355 logMCPDebug(356 name,357 `Authentication required for ${label[transportType]} server`,358 )359 setMcpAuthCacheEntry(name)360 return { name, type: 'needs-auth', config: serverRef }361}362363/**364 * Fetch wrapper for claude.ai proxy connections. Attaches the OAuth bearer365 * token and retries once on 401 via handleOAuth401Error (force-refresh).366 *367 * The Anthropic API path has this retry (withRetry.ts, grove.ts) to handle368 * memoize-cache staleness and clock drift. Without the same here, a single369 * stale token mass-401s every claude.ai connector and sticks them all in the370 * 15-min needs-auth cache.371 */372export function createClaudeAiProxyFetch(innerFetch: FetchLike): FetchLike {373 return async (url, init) => {374 const doRequest = async () => {375 await checkAndRefreshOAuthTokenIfNeeded()376 const currentTokens = getClaudeAIOAuthTokens()377 if (!currentTokens) {378 throw new Error('No claude.ai OAuth token available')379 }380 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins381 const headers = new Headers(init?.headers)382 headers.set('Authorization', `Bearer ${currentTokens.accessToken}`)383 const response = await innerFetch(url, { ...init, headers })384 // Return the exact token that was sent. Reading getClaudeAIOAuthTokens()385 // again after the request is wrong under concurrent 401s: another386 // connector's handleOAuth401Error clears the memoize cache, so we'd read387 // the NEW token from keychain, pass it to handleOAuth401Error, which388 // finds same-as-keychain → returns false → skips retry. Same pattern as389 // bridgeApi.ts withOAuthRetry (token passed as fn param).390 return { response, sentToken: currentTokens.accessToken }391 }392393 const { response, sentToken } = await doRequest()394 if (response.status !== 401) {395 return response396 }397 // handleOAuth401Error returns true only if the token actually changed398 // (keychain had a newer one, or force-refresh succeeded). Gate retry on399 // that — otherwise we double round-trip time for every connector whose400 // downstream service genuinely needs auth (the common case: 30+ servers401 // with "MCP server requires authentication but no OAuth token configured").402 const tokenChanged = await handleOAuth401Error(sentToken).catch(() => false)403 logEvent('tengu_mcp_claudeai_proxy_401', {404 tokenChanged:405 tokenChanged as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,406 })407 if (!tokenChanged) {408 // ELOCKED contention: another connector may have won the lockfile and refreshed — check if token changed underneath us409 const now = getClaudeAIOAuthTokens()?.accessToken410 if (!now || now === sentToken) {411 return response412 }413 }414 try {415 return (await doRequest()).response416 } catch {417 // Retry itself failed (network error). Return the original 401 so the418 // outer handler can classify it.419 return response420 }421 }422}423424// Minimal interface for WebSocket instances passed to mcpWebSocketTransport425type WsClientLike = {426 readonly readyState: number427 close(): void428 send(data: string): void429}430431/**432 * Create a ws.WebSocket client with the MCP protocol.433 * Bun's ws shim types lack the 3-arg constructor (url, protocols, options)434 * that the real ws package supports, so we cast the constructor here.435 */436async function createNodeWsClient(437 url: string,438 options: Record<string, unknown>,439): Promise<WsClientLike> {440 const wsModule = await import('ws')441 const WS = wsModule.default as unknown as new (442 url: string,443 protocols: string[],444 options: Record<string, unknown>,445 ) => WsClientLike446 return new WS(url, ['mcp'], options)447}448449const IMAGE_MIME_TYPES = new Set([450 'image/jpeg',451 'image/png',452 'image/gif',453 'image/webp',454])455456function getConnectionTimeoutMs(): number {457 return parseInt(process.env.MCP_TIMEOUT || '', 10) || 30000458}459460/**461 * Default timeout for individual MCP requests (auth, tool calls, etc.)462 */463const MCP_REQUEST_TIMEOUT_MS = 60000464465/**466 * MCP Streamable HTTP spec requires clients to advertise acceptance of both467 * JSON and SSE on every POST. Servers that enforce this strictly reject468 * requests without it (HTTP 406).469 * https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#sending-messages-to-the-server470 */471const MCP_STREAMABLE_HTTP_ACCEPT = 'application/json, text/event-stream'472473/**474 * Wraps a fetch function to apply a fresh timeout signal to each request.475 * This avoids the bug where a single AbortSignal.timeout() created at connection476 * time becomes stale after 60 seconds, causing all subsequent requests to fail477 * immediately with "The operation timed out." Uses a 60-second timeout.478 *479 * Also ensures the Accept header required by the MCP Streamable HTTP spec is480 * present on POSTs. The MCP SDK sets this inside StreamableHTTPClientTransport.send(),481 * but it is attached to a Headers instance that passes through an object spread here,482 * and some runtimes/agents have been observed dropping it before it reaches the wire.483 * See https://github.com/anthropics/claude-agent-sdk-typescript/issues/202.484 * Normalizing here (the last wrapper before fetch()) guarantees it is sent.485 *486 * GET requests are excluded from the timeout since, for MCP transports, they are487 * long-lived SSE streams meant to stay open indefinitely. (Auth-related GETs use488 * a separate fetch wrapper with its own timeout in auth.ts.)489 *490 * @param baseFetch - The fetch function to wrap491 */492export function wrapFetchWithTimeout(baseFetch: FetchLike): FetchLike {493 return async (url: string | URL, init?: RequestInit) => {494 const method = (init?.method ?? 'GET').toUpperCase()495496 // Skip timeout for GET requests - in MCP transports, these are long-lived SSE streams.497 // (OAuth discovery GETs in auth.ts use a separate createAuthFetch() with its own timeout.)498 if (method === 'GET') {499 return baseFetch(url, init)500 }501502 // Normalize headers and guarantee the Streamable-HTTP Accept value. new Headers()503 // accepts HeadersInit | undefined and copies from plain objects, tuple arrays,504 // and existing Headers instances — so whatever shape the SDK handed us, the505 // Accept value survives the spread below as an own property of a concrete object.506 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins507 const headers = new Headers(init?.headers)508 if (!headers.has('accept')) {509 headers.set('accept', MCP_STREAMABLE_HTTP_ACCEPT)510 }511512 // Use setTimeout instead of AbortSignal.timeout() so we can clearTimeout on513 // completion. AbortSignal.timeout's internal timer is only released when the514 // signal is GC'd, which in Bun is lazy — ~2.4KB of native memory per request515 // lingers for the full 60s even when the request completes in milliseconds.516 const controller = new AbortController()517 const timer = setTimeout(518 c =>519 c.abort(new DOMException('The operation timed out.', 'TimeoutError')),520 MCP_REQUEST_TIMEOUT_MS,521 controller,522 )523 timer.unref?.()524525 const parentSignal = init?.signal526 const abort = () => controller.abort(parentSignal?.reason)527 parentSignal?.addEventListener('abort', abort)528 if (parentSignal?.aborted) {529 controller.abort(parentSignal.reason)530 }531532 const cleanup = () => {533 clearTimeout(timer)534 parentSignal?.removeEventListener('abort', abort)535 }536537 try {538 const response = await baseFetch(url, {539 ...init,540 headers,541 signal: controller.signal,542 })543 cleanup()544 return response545 } catch (error) {546 cleanup()547 throw error548 }549 }550}551552export function getMcpServerConnectionBatchSize(): number {553 return parseInt(process.env.MCP_SERVER_CONNECTION_BATCH_SIZE || '', 10) || 3554}555556function getRemoteMcpServerConnectionBatchSize(): number {557 return (558 parseInt(process.env.MCP_REMOTE_SERVER_CONNECTION_BATCH_SIZE || '', 10) ||559 20560 )561}562563function isLocalMcpServer(config: ScopedMcpServerConfig): boolean {564 return !config.type || config.type === 'stdio' || config.type === 'sdk'565}566567// For the IDE MCP servers, we only include specific tools568const ALLOWED_IDE_TOOLS = ['mcp__ide__executeCode', 'mcp__ide__getDiagnostics']569function isIncludedMcpTool(tool: Tool): boolean {570 return (571 !tool.name.startsWith('mcp__ide__') || ALLOWED_IDE_TOOLS.includes(tool.name)572 )573}574575/**576 * Generates the cache key for a server connection577 * @param name Server name578 * @param serverRef Server configuration579 * @returns Cache key string580 */581export function getServerCacheKey(582 name: string,583 serverRef: ScopedMcpServerConfig,584): string {585 return `${name}-${jsonStringify(serverRef)}`586}587588/**589 * TODO (ollie): The memoization here increases complexity by a lot, and im not sure it really improves performance590 * Attempts to connect to a single MCP server591 * @param name Server name592 * @param serverRef Scoped server configuration593 * @returns A wrapped client (either connected or failed)594 */595export const connectToServer = memoize(596 async (597 name: string,598 serverRef: ScopedMcpServerConfig,599 serverStats?: {600 totalServers: number601 stdioCount: number602 sseCount: number603 httpCount: number604 sseIdeCount: number605 wsIdeCount: number606 },607 ): Promise<MCPServerConnection> => {608 const connectStartTime = Date.now()609 let inProcessServer:610 | { connect(t: Transport): Promise<void>; close(): Promise<void> }611 | undefined612 try {613 let transport614615 // If we have the session ingress JWT, we will connect via the session ingress rather than616 // to remote MCP's directly.617 const sessionIngressToken = getSessionIngressAuthToken()618619 if (serverRef.type === 'sse') {620 // Create an auth provider for this server621 const authProvider = new ClaudeAuthProvider(name, serverRef)622623 // Get combined headers (static + dynamic)624 const combinedHeaders = await getMcpServerHeaders(name, serverRef)625626 // Use the auth provider with SSEClientTransport627 const transportOptions: SSEClientTransportOptions = {628 authProvider,629 // Use fresh timeout per request to avoid stale AbortSignal bug.630 // Step-up detection wraps innermost so the 403 is seen before the631 // SDK's handler calls auth() → tokens().632 fetch: wrapFetchWithTimeout(633 wrapFetchWithStepUpDetection(createFetchWithInit(), authProvider),634 ),635 requestInit: {636 headers: {637 'User-Agent': getMCPUserAgent(),638 ...combinedHeaders,639 },640 },641 }642643 // IMPORTANT: Always set eventSourceInit with a fetch that does NOT use the644 // timeout wrapper. The EventSource connection is long-lived (stays open indefinitely645 // to receive server-sent events), so applying a 60-second timeout would kill it.646 // The timeout is only meant for individual API requests (POST, auth refresh), not647 // the persistent SSE stream.648 transportOptions.eventSourceInit = {649 fetch: async (url: string | URL, init?: RequestInit) => {650 // Get auth headers from the auth provider651 const authHeaders: Record<string, string> = {}652 const tokens = await authProvider.tokens()653 if (tokens) {654 authHeaders.Authorization = `Bearer ${tokens.access_token}`655 }656657 const proxyOptions = getProxyFetchOptions()658 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins659 return fetch(url, {660 ...init,661 ...proxyOptions,662 headers: {663 'User-Agent': getMCPUserAgent(),664 ...authHeaders,665 ...init?.headers,666 ...combinedHeaders,667 Accept: 'text/event-stream',668 },669 })670 },671 }672673 transport = new SSEClientTransport(674 new URL(serverRef.url),675 transportOptions,676 )677 logMCPDebug(name, `SSE transport initialized, awaiting connection`)678 } else if (serverRef.type === 'sse-ide') {679 logMCPDebug(name, `Setting up SSE-IDE transport to ${serverRef.url}`)680 // IDE servers don't need authentication681 // TODO: Use the auth token provided in the lockfile682 const proxyOptions = getProxyFetchOptions()683 const transportOptions: SSEClientTransportOptions =684 proxyOptions.dispatcher685 ? {686 eventSourceInit: {687 fetch: async (url: string | URL, init?: RequestInit) => {688 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins689 return fetch(url, {690 ...init,691 ...proxyOptions,692 headers: {693 'User-Agent': getMCPUserAgent(),694 ...init?.headers,695 },696 })697 },698 },699 }700 : {}701702 transport = new SSEClientTransport(703 new URL(serverRef.url),704 Object.keys(transportOptions).length > 0705 ? transportOptions706 : undefined,707 )708 } else if (serverRef.type === 'ws-ide') {709 const tlsOptions = getWebSocketTLSOptions()710 const wsHeaders = {711 'User-Agent': getMCPUserAgent(),712 ...(serverRef.authToken && {713 'X-Claude-Code-Ide-Authorization': serverRef.authToken,714 }),715 }716717 let wsClient: WsClientLike718 if (typeof Bun !== 'undefined') {719 // Bun's WebSocket supports headers/proxy/tls options but the DOM typings don't720 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins721 wsClient = new globalThis.WebSocket(serverRef.url, {722 protocols: ['mcp'],723 headers: wsHeaders,724 proxy: getWebSocketProxyUrl(serverRef.url),725 tls: tlsOptions || undefined,726 } as unknown as string[])727 } else {728 wsClient = await createNodeWsClient(serverRef.url, {729 headers: wsHeaders,730 agent: getWebSocketProxyAgent(serverRef.url),731 ...(tlsOptions || {}),732 })733 }734 transport = new WebSocketTransport(wsClient)735 } else if (serverRef.type === 'ws') {736 logMCPDebug(737 name,738 `Initializing WebSocket transport to ${serverRef.url}`,739 )740741 const combinedHeaders = await getMcpServerHeaders(name, serverRef)742743 const tlsOptions = getWebSocketTLSOptions()744 const wsHeaders = {745 'User-Agent': getMCPUserAgent(),746 ...(sessionIngressToken && {747 Authorization: `Bearer ${sessionIngressToken}`,748 }),749 ...combinedHeaders,750 }751752 // Redact sensitive headers before logging753 const wsHeadersForLogging = mapValues(wsHeaders, (value, key) =>754 key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,755 )756757 logMCPDebug(758 name,759 `WebSocket transport options: ${jsonStringify({760 url: serverRef.url,761 headers: wsHeadersForLogging,762 hasSessionAuth: !!sessionIngressToken,763 })}`,764 )765766 let wsClient: WsClientLike767 if (typeof Bun !== 'undefined') {768 // Bun's WebSocket supports headers/proxy/tls options but the DOM typings don't769 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins770 wsClient = new globalThis.WebSocket(serverRef.url, {771 protocols: ['mcp'],772 headers: wsHeaders,773 proxy: getWebSocketProxyUrl(serverRef.url),774 tls: tlsOptions || undefined,775 } as unknown as string[])776 } else {777 wsClient = await createNodeWsClient(serverRef.url, {778 headers: wsHeaders,779 agent: getWebSocketProxyAgent(serverRef.url),780 ...(tlsOptions || {}),781 })782 }783 transport = new WebSocketTransport(wsClient)784 } else if (serverRef.type === 'http') {785 logMCPDebug(name, `Initializing HTTP transport to ${serverRef.url}`)786 logMCPDebug(787 name,788 `Node version: ${process.version}, Platform: ${process.platform}`,789 )790 logMCPDebug(791 name,792 `Environment: ${jsonStringify({793 NODE_OPTIONS: process.env.NODE_OPTIONS || 'not set',794 UV_THREADPOOL_SIZE: process.env.UV_THREADPOOL_SIZE || 'default',795 HTTP_PROXY: process.env.HTTP_PROXY || 'not set',796 HTTPS_PROXY: process.env.HTTPS_PROXY || 'not set',797 NO_PROXY: process.env.NO_PROXY || 'not set',798 })}`,799 )800801 // Create an auth provider for this server802 const authProvider = new ClaudeAuthProvider(name, serverRef)803804 // Get combined headers (static + dynamic)805 const combinedHeaders = await getMcpServerHeaders(name, serverRef)806807 // Check if this server has stored OAuth tokens. If so, the SDK's808 // authProvider will set Authorization — don't override with the809 // session ingress token (SDK merges requestInit AFTER authProvider).810 // CCR proxy URLs (ccr_shttp_mcp) have no stored OAuth, so they still811 // get the ingress token. See PR #24454 discussion.812 const hasOAuthTokens = !!(await authProvider.tokens())813814 // Use the auth provider with StreamableHTTPClientTransport815 const proxyOptions = getProxyFetchOptions()816 logMCPDebug(817 name,818 `Proxy options: ${proxyOptions.dispatcher ? 'custom dispatcher' : 'default'}`,819 )820821 const transportOptions: StreamableHTTPClientTransportOptions = {822 authProvider,823 // Use fresh timeout per request to avoid stale AbortSignal bug.824 // Step-up detection wraps innermost so the 403 is seen before the825 // SDK's handler calls auth() → tokens().826 fetch: wrapFetchWithTimeout(827 wrapFetchWithStepUpDetection(createFetchWithInit(), authProvider),828 ),829 requestInit: {830 ...proxyOptions,831 headers: {832 'User-Agent': getMCPUserAgent(),833 ...(sessionIngressToken &&834 !hasOAuthTokens && {835 Authorization: `Bearer ${sessionIngressToken}`,836 }),837 ...combinedHeaders,838 },839 },840 }841842 // Redact sensitive headers before logging843 const headersForLogging = transportOptions.requestInit?.headers844 ? mapValues(845 transportOptions.requestInit.headers as Record<string, string>,846 (value, key) =>847 key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,848 )849 : undefined850851 logMCPDebug(852 name,853 `HTTP transport options: ${jsonStringify({854 url: serverRef.url,855 headers: headersForLogging,856 hasAuthProvider: !!authProvider,857 timeoutMs: MCP_REQUEST_TIMEOUT_MS,858 })}`,859 )860861 transport = new StreamableHTTPClientTransport(862 new URL(serverRef.url),863 transportOptions,864 )865 logMCPDebug(name, `HTTP transport created successfully`)866 } else if (serverRef.type === 'sdk') {867 throw new Error('SDK servers should be handled in print.ts')868 } else if (serverRef.type === 'claudeai-proxy') {869 logMCPDebug(870 name,871 `Initializing claude.ai proxy transport for server ${serverRef.id}`,872 )873874 const tokens = getClaudeAIOAuthTokens()875 if (!tokens) {876 throw new Error('No claude.ai OAuth token found')877 }878879 const oauthConfig = getOauthConfig()880 const proxyUrl = `${oauthConfig.MCP_PROXY_URL}${oauthConfig.MCP_PROXY_PATH.replace('{server_id}', serverRef.id)}`881882 logMCPDebug(name, `Using claude.ai proxy at ${proxyUrl}`)883884 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins885 const fetchWithAuth = createClaudeAiProxyFetch(globalThis.fetch)886887 const proxyOptions = getProxyFetchOptions()888 const transportOptions: StreamableHTTPClientTransportOptions = {889 // Wrap fetchWithAuth with fresh timeout per request890 fetch: wrapFetchWithTimeout(fetchWithAuth),891 requestInit: {892 ...proxyOptions,893 headers: {894 'User-Agent': getMCPUserAgent(),895 'X-Mcp-Client-Session-Id': getSessionId(),896 },897 },898 }899900 transport = new StreamableHTTPClientTransport(901 new URL(proxyUrl),902 transportOptions,903 )904 logMCPDebug(name, `claude.ai proxy transport created successfully`)905 } else if (906 (serverRef.type === 'stdio' || !serverRef.type) &&907 isClaudeInChromeMCPServer(name)908 ) {909 // Run the Chrome MCP server in-process to avoid spawning a ~325 MB subprocess910 const { createChromeContext } = await import(911 '../../utils/claudeInChrome/mcpServer.js'912 )913 const { createClaudeForChromeMcpServer } = await import(914 '@ant/claude-for-chrome-mcp'915 )916 const { createLinkedTransportPair } = await import(917 './InProcessTransport.js'918 )919 const context = createChromeContext(serverRef.env)920 inProcessServer = createClaudeForChromeMcpServer(context)921 const [clientTransport, serverTransport] = createLinkedTransportPair()922 await inProcessServer.connect(serverTransport)923 transport = clientTransport924 logMCPDebug(name, `In-process Chrome MCP server started`)925 } else if (926 feature('CHICAGO_MCP') &&927 (serverRef.type === 'stdio' || !serverRef.type) &&928 isComputerUseMCPServer!(name)929 ) {930 // Run the Computer Use MCP server in-process — same rationale as931 // Chrome above. The package's CallTool handler is a stub; real932 // dispatch goes through wrapper.tsx's .call() override.933 const { createComputerUseMcpServerForCli } = await import(934 '../../utils/computerUse/mcpServer.js'935 )936 const { createLinkedTransportPair } = await import(937 './InProcessTransport.js'938 )939 inProcessServer = await createComputerUseMcpServerForCli()940 const [clientTransport, serverTransport] = createLinkedTransportPair()941 await inProcessServer.connect(serverTransport)942 transport = clientTransport943 logMCPDebug(name, `In-process Computer Use MCP server started`)944 } else if (serverRef.type === 'stdio' || !serverRef.type) {945 const finalCommand =946 process.env.CLAUDE_CODE_SHELL_PREFIX || serverRef.command947 const finalArgs = process.env.CLAUDE_CODE_SHELL_PREFIX948 ? [[serverRef.command, ...serverRef.args].join(' ')]949 : serverRef.args950 transport = new StdioClientTransport({951 command: finalCommand,952 args: finalArgs,953 env: {954 ...subprocessEnv(),955 ...serverRef.env,956 } as Record<string, string>,957 stderr: 'pipe', // prevents error output from the MCP server from printing to the UI958 })959 } else {960 throw new Error(`Unsupported server type: ${serverRef.type}`)961 }962963 // Set up stderr logging for stdio transport before connecting in case there are any stderr964 // outputs emitted during the connection start (this can be useful for debugging failed connections).965 // Store handler reference for cleanup to prevent memory leaks966 let stderrHandler: ((data: Buffer) => void) | undefined967 let stderrOutput = ''968 if (serverRef.type === 'stdio' || !serverRef.type) {969 const stdioTransport = transport as StdioClientTransport970 if (stdioTransport.stderr) {971 stderrHandler = (data: Buffer) => {972 // Cap stderr accumulation to prevent unbounded memory growth973 if (stderrOutput.length < 64 * 1024 * 1024) {974 try {975 stderrOutput += data.toString()976 } catch {977 // Ignore errors from exceeding max string length978 }979 }980 }981 stdioTransport.stderr.on('data', stderrHandler)982 }983 }984985 const client = new Client(986 {987 name: 'claude-code',988 title: 'Claude Code',989 version: MACRO.VERSION ?? 'unknown',990 description: "Anthropic's agentic coding tool",991 websiteUrl: PRODUCT_URL,992 },993 {994 capabilities: {995 roots: {},996 // Empty object declares the capability. Sending {form:{},url:{}}997 // breaks Java MCP SDK servers (Spring AI) whose Elicitation class998 // has zero fields and fails on unknown properties.999 elicitation: {},1000 },1001 },1002 )10031004 // Add debug logging for client events if available1005 if (serverRef.type === 'http') {1006 logMCPDebug(name, `Client created, setting up request handler`)1007 }10081009 client.setRequestHandler(ListRootsRequestSchema, async () => {1010 logMCPDebug(name, `Received ListRoots request from server`)1011 return {1012 roots: [1013 {1014 uri: `file://${getOriginalCwd()}`,1015 },1016 ],1017 }1018 })10191020 // Add a timeout to connection attempts to prevent tests from hanging indefinitely1021 logMCPDebug(1022 name,1023 `Starting connection with timeout of ${getConnectionTimeoutMs()}ms`,1024 )10251026 // For HTTP transport, try a basic connectivity test first1027 if (serverRef.type === 'http') {1028 logMCPDebug(name, `Testing basic HTTP connectivity to ${serverRef.url}`)1029 try {1030 const testUrl = new URL(serverRef.url)1031 logMCPDebug(1032 name,1033 `Parsed URL: host=${testUrl.hostname}, port=${testUrl.port || 'default'}, protocol=${testUrl.protocol}`,1034 )10351036 // Log DNS resolution attempt1037 if (1038 testUrl.hostname === '127.0.0.1' ||1039 testUrl.hostname === 'localhost'1040 ) {1041 logMCPDebug(name, `Using loopback address: ${testUrl.hostname}`)1042 }1043 } catch (urlError) {1044 logMCPDebug(name, `Failed to parse URL: ${urlError}`)1045 }1046 }10471048 const connectPromise = client.connect(transport)1049 const timeoutPromise = new Promise<never>((_, reject) => {1050 const timeoutId = setTimeout(() => {1051 const elapsed = Date.now() - connectStartTime1052 logMCPDebug(1053 name,1054 `Connection timeout triggered after ${elapsed}ms (limit: ${getConnectionTimeoutMs()}ms)`,1055 )1056 if (inProcessServer) {1057 inProcessServer.close().catch(() => {})1058 }1059 transport.close().catch(() => {})1060 reject(1061 new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(1062 `MCP server "${name}" connection timed out after ${getConnectionTimeoutMs()}ms`,1063 'MCP connection timeout',1064 ),1065 )1066 }, getConnectionTimeoutMs())10671068 // Clean up timeout if connect resolves or rejects1069 connectPromise.then(1070 () => {1071 clearTimeout(timeoutId)1072 },1073 _error => {1074 clearTimeout(timeoutId)1075 },1076 )1077 })10781079 try {1080 await Promise.race([connectPromise, timeoutPromise])1081 if (stderrOutput) {1082 logMCPError(name, `Server stderr: ${stderrOutput}`)1083 stderrOutput = '' // Release accumulated string to prevent memory growth1084 }1085 const elapsed = Date.now() - connectStartTime1086 logMCPDebug(1087 name,1088 `Successfully connected (transport: ${serverRef.type || 'stdio'}) in ${elapsed}ms`,1089 )1090 } catch (error) {1091 const elapsed = Date.now() - connectStartTime1092 // SSE-specific error logging1093 if (serverRef.type === 'sse' && error instanceof Error) {1094 logMCPDebug(1095 name,1096 `SSE Connection failed after ${elapsed}ms: ${jsonStringify({1097 url: serverRef.url,1098 error: error.message,1099 errorType: error.constructor.name,1100 stack: error.stack,1101 })}`,1102 )1103 logMCPError(name, error)11041105 if (error instanceof UnauthorizedError) {1106 return handleRemoteAuthFailure(name, serverRef, 'sse')1107 }1108 } else if (serverRef.type === 'http' && error instanceof Error) {1109 const errorObj = error as Error & {1110 cause?: unknown1111 code?: string1112 errno?: string | number1113 syscall?: string1114 }1115 logMCPDebug(1116 name,1117 `HTTP Connection failed after ${elapsed}ms: ${error.message} (code: ${errorObj.code || 'none'}, errno: ${errorObj.errno || 'none'})`,1118 )1119 logMCPError(name, error)11201121 if (error instanceof UnauthorizedError) {1122 return handleRemoteAuthFailure(name, serverRef, 'http')1123 }1124 } else if (1125 serverRef.type === 'claudeai-proxy' &&1126 error instanceof Error1127 ) {1128 logMCPDebug(1129 name,1130 `claude.ai proxy connection failed after ${elapsed}ms: ${error.message}`,1131 )1132 logMCPError(name, error)11331134 // StreamableHTTPError has a `code` property with the HTTP status1135 const errorCode = (error as Error & { code?: number }).code1136 if (errorCode === 401) {1137 return handleRemoteAuthFailure(name, serverRef, 'claudeai-proxy')1138 }1139 } else if (1140 serverRef.type === 'sse-ide' ||1141 serverRef.type === 'ws-ide'1142 ) {1143 logEvent('tengu_mcp_ide_server_connection_failed', {1144 connectionDurationMs: elapsed,1145 })1146 }1147 if (inProcessServer) {1148 inProcessServer.close().catch(() => {})1149 }1150 transport.close().catch(() => {})1151 if (stderrOutput) {1152 logMCPError(name, `Server stderr: ${stderrOutput}`)1153 }1154 throw error1155 }11561157 const capabilities = client.getServerCapabilities()1158 const serverVersion = client.getServerVersion()1159 const rawInstructions = client.getInstructions()1160 let instructions = rawInstructions1161 if (1162 rawInstructions &&1163 rawInstructions.length > MAX_MCP_DESCRIPTION_LENGTH1164 ) {1165 instructions =1166 rawInstructions.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]'1167 logMCPDebug(1168 name,1169 `Server instructions truncated from ${rawInstructions.length} to ${MAX_MCP_DESCRIPTION_LENGTH} chars`,1170 )1171 }11721173 // Log successful connection details1174 logMCPDebug(1175 name,1176 `Connection established with capabilities: ${jsonStringify({1177 hasTools: !!capabilities?.tools,1178 hasPrompts: !!capabilities?.prompts,1179 hasResources: !!capabilities?.resources,1180 hasResourceSubscribe: !!capabilities?.resources?.subscribe,1181 serverVersion: serverVersion || 'unknown',1182 })}`,1183 )1184 logForDebugging(1185 `[MCP] Server "${name}" connected with subscribe=${!!capabilities?.resources?.subscribe}`,1186 )11871188 // Register default elicitation handler that returns cancel during the1189 // window before registerElicitationHandler overwrites it in1190 // onConnectionAttempt (useManageMCPConnections).1191 client.setRequestHandler(ElicitRequestSchema, async request => {1192 logMCPDebug(1193 name,1194 `Elicitation request received during initialization: ${jsonStringify(request)}`,1195 )1196 return { action: 'cancel' as const }1197 })11981199 if (serverRef.type === 'sse-ide' || serverRef.type === 'ws-ide') {1200 const ideConnectionDurationMs = Date.now() - connectStartTime1201 logEvent('tengu_mcp_ide_server_connection_succeeded', {1202 connectionDurationMs: ideConnectionDurationMs,1203 serverVersion:1204 serverVersion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1205 })1206 try {1207 void maybeNotifyIDEConnected(client)1208 } catch (error) {1209 logMCPError(1210 name,1211 `Failed to send ide_connected notification: ${error}`,1212 )1213 }1214 }12151216 // Enhanced connection drop detection and logging for all transport types1217 const connectionStartTime = Date.now()1218 let hasErrorOccurred = false12191220 // Store original handlers1221 const originalOnerror = client.onerror1222 const originalOnclose = client.onclose12231224 // The SDK's transport calls onerror on connection failures but doesn't call onclose,1225 // which CC uses to trigger reconnection. We bridge this gap by tracking consecutive1226 // terminal errors and manually closing after MAX_ERRORS_BEFORE_RECONNECT failures.1227 let consecutiveConnectionErrors = 01228 const MAX_ERRORS_BEFORE_RECONNECT = 312291230 // Guard against re-entry: close() aborts in-flight streams which may fire1231 // onerror again before the close chain completes.1232 let hasTriggeredClose = false12331234 // client.close() → transport.close() → transport.onclose → SDK's _onclose():1235 // rejects all pending request handlers (so hung callTool() promises fail with1236 // McpError -32000 "Connection closed") and then invokes our client.onclose1237 // handler below (which clears the memo cache so the next call reconnects).1238 // Calling client.onclose?.() directly would only clear the cache — pending1239 // tool calls would stay hung.1240 const closeTransportAndRejectPending = (reason: string) => {1241 if (hasTriggeredClose) return1242 hasTriggeredClose = true1243 logMCPDebug(name, `Closing transport (${reason})`)1244 void client.close().catch(e => {1245 logMCPDebug(name, `Error during close: ${errorMessage(e)}`)1246 })1247 }12481249 const isTerminalConnectionError = (msg: string): boolean => {1250 return (1251 msg.includes('ECONNRESET') ||1252 msg.includes('ETIMEDOUT') ||1253 msg.includes('EPIPE') ||1254 msg.includes('EHOSTUNREACH') ||1255 msg.includes('ECONNREFUSED') ||1256 msg.includes('Body Timeout Error') ||1257 msg.includes('terminated') ||1258 // SDK SSE reconnection intermediate errors — may be wrapped around the1259 // actual network error, so the substrings above won't match1260 msg.includes('SSE stream disconnected') ||1261 msg.includes('Failed to reconnect SSE stream')1262 )1263 }12641265 // Enhanced error handler with detailed logging1266 client.onerror = (error: Error) => {1267 const uptime = Date.now() - connectionStartTime1268 hasErrorOccurred = true1269 const transportType = serverRef.type || 'stdio'12701271 // Log the connection drop with context1272 logMCPDebug(1273 name,1274 `${transportType.toUpperCase()} connection dropped after ${Math.floor(uptime / 1000)}s uptime`,1275 )12761277 // Log specific error details for debugging1278 if (error.message) {1279 if (error.message.includes('ECONNRESET')) {1280 logMCPDebug(1281 name,1282 `Connection reset - server may have crashed or restarted`,1283 )1284 } else if (error.message.includes('ETIMEDOUT')) {1285 logMCPDebug(1286 name,1287 `Connection timeout - network issue or server unresponsive`,1288 )1289 } else if (error.message.includes('ECONNREFUSED')) {1290 logMCPDebug(name, `Connection refused - server may be down`)1291 } else if (error.message.includes('EPIPE')) {1292 logMCPDebug(1293 name,1294 `Broken pipe - server closed connection unexpectedly`,1295 )1296 } else if (error.message.includes('EHOSTUNREACH')) {1297 logMCPDebug(name, `Host unreachable - network connectivity issue`)1298 } else if (error.message.includes('ESRCH')) {1299 logMCPDebug(1300 name,1301 `Process not found - stdio server process terminated`,1302 )1303 } else if (error.message.includes('spawn')) {1304 logMCPDebug(1305 name,1306 `Failed to spawn process - check command and permissions`,1307 )1308 } else {1309 logMCPDebug(name, `Connection error: ${error.message}`)1310 }1311 }13121313 // For HTTP transports, detect session expiry (404 + JSON-RPC -32001)1314 // and close the transport so pending tool calls reject and the next1315 // call reconnects with a fresh session ID.1316 if (1317 (transportType === 'http' || transportType === 'claudeai-proxy') &&1318 isMcpSessionExpiredError(error)1319 ) {1320 logMCPDebug(1321 name,1322 `MCP session expired (server returned 404 with session-not-found), triggering reconnection`,1323 )1324 closeTransportAndRejectPending('session expired')1325 if (originalOnerror) {1326 originalOnerror(error)1327 }1328 return1329 }13301331 // For remote transports (SSE/HTTP), track terminal connection errors1332 // and trigger reconnection via close if we see repeated failures.1333 if (1334 transportType === 'sse' ||1335 transportType === 'http' ||1336 transportType === 'claudeai-proxy'1337 ) {1338 // The SDK's StreamableHTTP transport fires this after exhausting its1339 // own SSE reconnect attempts (default maxRetries: 2) — but it never1340 // calls onclose, so pending callTool() promises hang indefinitely.1341 // This is the definitive "transport gave up" signal.1342 if (error.message.includes('Maximum reconnection attempts')) {1343 closeTransportAndRejectPending('SSE reconnection exhausted')1344 if (originalOnerror) {1345 originalOnerror(error)1346 }1347 return1348 }13491350 if (isTerminalConnectionError(error.message)) {1351 consecutiveConnectionErrors++1352 logMCPDebug(1353 name,1354 `Terminal connection error ${consecutiveConnectionErrors}/${MAX_ERRORS_BEFORE_RECONNECT}`,1355 )13561357 if (consecutiveConnectionErrors >= MAX_ERRORS_BEFORE_RECONNECT) {1358 consecutiveConnectionErrors = 01359 closeTransportAndRejectPending('max consecutive terminal errors')1360 }1361 } else {1362 // Non-terminal error (e.g., transient issue), reset counter1363 consecutiveConnectionErrors = 01364 }1365 }13661367 // Call original handler1368 if (originalOnerror) {1369 originalOnerror(error)1370 }1371 }13721373 // Enhanced close handler with connection drop context1374 client.onclose = () => {1375 const uptime = Date.now() - connectionStartTime1376 const transportType = serverRef.type ?? 'unknown'13771378 logMCPDebug(1379 name,1380 `${transportType.toUpperCase()} connection closed after ${Math.floor(uptime / 1000)}s (${hasErrorOccurred ? 'with errors' : 'cleanly'})`,1381 )13821383 // Clear the memoization cache so next operation reconnects1384 const key = getServerCacheKey(name, serverRef)13851386 // Also clear fetch caches (keyed by server name). Reconnection1387 // creates a new connection object; without clearing, the next1388 // fetch would return stale tools/resources from the old connection.1389 fetchToolsForClient.cache.delete(name)1390 fetchResourcesForClient.cache.delete(name)1391 fetchCommandsForClient.cache.delete(name)1392 if (feature('MCP_SKILLS')) {1393 fetchMcpSkillsForClient!.cache.delete(name)1394 }13951396 connectToServer.cache.delete(key)1397 logMCPDebug(name, `Cleared connection cache for reconnection`)13981399 if (originalOnclose) {1400 originalOnclose()1401 }1402 }14031404 const cleanup = async () => {1405 // In-process servers (e.g. Chrome MCP) don't have child processes or stderr1406 if (inProcessServer) {1407 try {1408 await inProcessServer.close()1409 } catch (error) {1410 logMCPDebug(name, `Error closing in-process server: ${error}`)1411 }1412 try {1413 await client.close()1414 } catch (error) {1415 logMCPDebug(name, `Error closing client: ${error}`)1416 }1417 return1418 }14191420 // Remove stderr event listener to prevent memory leaks1421 if (stderrHandler && (serverRef.type === 'stdio' || !serverRef.type)) {1422 const stdioTransport = transport as StdioClientTransport1423 stdioTransport.stderr?.off('data', stderrHandler)1424 }14251426 // For stdio transports, explicitly terminate the child process with proper signals1427 // NOTE: StdioClientTransport.close() only sends an abort signal, but many MCP servers1428 // (especially Docker containers) need explicit SIGINT/SIGTERM signals to trigger graceful shutdown1429 if (serverRef.type === 'stdio') {1430 try {1431 const stdioTransport = transport as StdioClientTransport1432 const childPid = stdioTransport.pid14331434 if (childPid) {1435 logMCPDebug(name, 'Sending SIGINT to MCP server process')14361437 // First try SIGINT (like Ctrl+C)1438 try {1439 process.kill(childPid, 'SIGINT')1440 } catch (error) {1441 logMCPDebug(name, `Error sending SIGINT: ${error}`)1442 return1443 }14441445 // Wait for graceful shutdown with rapid escalation (total 500ms to keep CLI responsive)1446 await new Promise<void>(async resolve => {1447 let resolved = false14481449 // Set up a timer to check if process still exists1450 const checkInterval = setInterval(() => {1451 try {1452 // process.kill(pid, 0) checks if process exists without killing it1453 process.kill(childPid, 0)1454 } catch {1455 // Process no longer exists1456 if (!resolved) {1457 resolved = true1458 clearInterval(checkInterval)1459 clearTimeout(failsafeTimeout)1460 logMCPDebug(name, 'MCP server process exited cleanly')1461 resolve()1462 }1463 }1464 }, 50)14651466 // Absolute failsafe: clear interval after 600ms no matter what1467 const failsafeTimeout = setTimeout(() => {1468 if (!resolved) {1469 resolved = true1470 clearInterval(checkInterval)1471 logMCPDebug(1472 name,1473 'Cleanup timeout reached, stopping process monitoring',1474 )1475 resolve()1476 }1477 }, 600)14781479 try {1480 // Wait 100ms for SIGINT to work (usually much faster)1481 await sleep(100)14821483 if (!resolved) {1484 // Check if process still exists1485 try {1486 process.kill(childPid, 0)1487 // Process still exists, SIGINT failed, try SIGTERM1488 logMCPDebug(1489 name,1490 'SIGINT failed, sending SIGTERM to MCP server process',1491 )1492 try {1493 process.kill(childPid, 'SIGTERM')1494 } catch (termError) {1495 logMCPDebug(name, `Error sending SIGTERM: ${termError}`)1496 resolved = true1497 clearInterval(checkInterval)1498 clearTimeout(failsafeTimeout)1499 resolve()1500 return1501 }1502 } catch {1503 // Process already exited1504 resolved = true1505 clearInterval(checkInterval)1506 clearTimeout(failsafeTimeout)1507 resolve()1508 return1509 }15101511 // Wait 400ms for SIGTERM to work (slower than SIGINT, often used for cleanup)1512 await sleep(400)15131514 if (!resolved) {1515 // Check if process still exists1516 try {1517 process.kill(childPid, 0)1518 // Process still exists, SIGTERM failed, force kill with SIGKILL1519 logMCPDebug(1520 name,1521 'SIGTERM failed, sending SIGKILL to MCP server process',1522 )1523 try {1524 process.kill(childPid, 'SIGKILL')1525 } catch (killError) {1526 logMCPDebug(1527 name,1528 `Error sending SIGKILL: ${killError}`,1529 )1530 }1531 } catch {1532 // Process already exited1533 resolved = true1534 clearInterval(checkInterval)1535 clearTimeout(failsafeTimeout)1536 resolve()1537 }1538 }1539 }15401541 // Final timeout - always resolve after 500ms max (total cleanup time)1542 if (!resolved) {1543 resolved = true1544 clearInterval(checkInterval)1545 clearTimeout(failsafeTimeout)1546 resolve()1547 }1548 } catch {1549 // Handle any errors in the escalation sequence1550 if (!resolved) {1551 resolved = true1552 clearInterval(checkInterval)1553 clearTimeout(failsafeTimeout)1554 resolve()1555 }1556 }1557 })1558 }1559 } catch (processError) {1560 logMCPDebug(name, `Error terminating process: ${processError}`)1561 }1562 }15631564 // Close the client connection (which also closes the transport)1565 try {1566 await client.close()1567 } catch (error) {1568 logMCPDebug(name, `Error closing client: ${error}`)1569 }1570 }15711572 // Register cleanup for all transport types - even network transports might need cleanup1573 // This ensures all MCP servers get properly terminated, not just stdio ones1574 const cleanupUnregister = registerCleanup(cleanup)15751576 // Create the wrapped cleanup that includes unregistering1577 const wrappedCleanup = async () => {1578 cleanupUnregister?.()1579 await cleanup()1580 }15811582 const connectionDurationMs = Date.now() - connectStartTime1583 logEvent('tengu_mcp_server_connection_succeeded', {1584 connectionDurationMs,1585 transportType: (serverRef.type ??1586 'stdio') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1587 totalServers: serverStats?.totalServers,1588 stdioCount: serverStats?.stdioCount,1589 sseCount: serverStats?.sseCount,1590 httpCount: serverStats?.httpCount,1591 sseIdeCount: serverStats?.sseIdeCount,1592 wsIdeCount: serverStats?.wsIdeCount,1593 ...mcpBaseUrlAnalytics(serverRef),1594 })1595 return {1596 name,1597 client,1598 type: 'connected' as const,1599 capabilities: capabilities ?? {},1600 serverInfo: serverVersion,1601 instructions,1602 config: serverRef,1603 cleanup: wrappedCleanup,1604 }1605 } catch (error) {1606 const connectionDurationMs = Date.now() - connectStartTime1607 logEvent('tengu_mcp_server_connection_failed', {1608 connectionDurationMs,1609 totalServers: serverStats?.totalServers || 1,1610 stdioCount:1611 serverStats?.stdioCount || (serverRef.type === 'stdio' ? 1 : 0),1612 sseCount: serverStats?.sseCount || (serverRef.type === 'sse' ? 1 : 0),1613 httpCount:1614 serverStats?.httpCount || (serverRef.type === 'http' ? 1 : 0),1615 sseIdeCount:1616 serverStats?.sseIdeCount || (serverRef.type === 'sse-ide' ? 1 : 0),1617 wsIdeCount:1618 serverStats?.wsIdeCount || (serverRef.type === 'ws-ide' ? 1 : 0),1619 transportType: (serverRef.type ??1620 'stdio') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,1621 ...mcpBaseUrlAnalytics(serverRef),1622 })1623 logMCPDebug(1624 name,1625 `Connection failed after ${connectionDurationMs}ms: ${errorMessage(error)}`,1626 )1627 logMCPError(name, `Connection failed: ${errorMessage(error)}`)16281629 if (inProcessServer) {1630 inProcessServer.close().catch(() => {})1631 }1632 return {1633 name,1634 type: 'failed' as const,1635 config: serverRef,1636 error: errorMessage(error),1637 }1638 }1639 },1640 getServerCacheKey,1641)16421643/**1644 * Clears the memoize cache for a specific server1645 * @param name Server name1646 * @param serverRef Server configuration1647 */1648export async function clearServerCache(1649 name: string,1650 serverRef: ScopedMcpServerConfig,1651): Promise<void> {1652 const key = getServerCacheKey(name, serverRef)16531654 try {1655 const wrappedClient = await connectToServer(name, serverRef)16561657 if (wrappedClient.type === 'connected') {1658 await wrappedClient.cleanup()1659 }1660 } catch {1661 // Ignore errors - server might have failed to connect1662 }16631664 // Clear from cache (both connection and fetch caches so reconnect1665 // fetches fresh tools/resources/commands instead of stale ones)1666 connectToServer.cache.delete(key)1667 fetchToolsForClient.cache.delete(name)1668 fetchResourcesForClient.cache.delete(name)1669 fetchCommandsForClient.cache.delete(name)1670 if (feature('MCP_SKILLS')) {1671 fetchMcpSkillsForClient!.cache.delete(name)1672 }1673}16741675/**1676 * Ensures a valid connected client for an MCP server.1677 * For most server types, uses the memoization cache if available, or reconnects1678 * if the cache was cleared (e.g., after onclose). This ensures tool/resource1679 * calls always use a valid connection.1680 *1681 * SDK MCP servers run in-process and are handled separately via setupSdkMcpClients,1682 * so they are returned as-is without going through connectToServer.1683 *1684 * @param client The connected MCP server client1685 * @returns Connected MCP server client (same or reconnected)1686 * @throws Error if server cannot be connected1687 */1688export async function ensureConnectedClient(1689 client: ConnectedMCPServer,1690): Promise<ConnectedMCPServer> {1691 // SDK MCP servers run in-process and are handled separately via setupSdkMcpClients1692 if (client.config.type === 'sdk') {1693 return client1694 }16951696 const connectedClient = await connectToServer(client.name, client.config)1697 if (connectedClient.type !== 'connected') {1698 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(1699 `MCP server "${client.name}" is not connected`,1700 'MCP server not connected',1701 )1702 }1703 return connectedClient1704}17051706/**1707 * Compares two MCP server configurations to determine if they are equivalent.1708 * Used to detect when a server needs to be reconnected due to config changes.1709 */1710export function areMcpConfigsEqual(1711 a: ScopedMcpServerConfig,1712 b: ScopedMcpServerConfig,1713): boolean {1714 // Quick type check first1715 if (a.type !== b.type) return false17161717 // Compare by serializing - this handles all config variations1718 // We exclude 'scope' from comparison since it's metadata, not connection config1719 const { scope: _scopeA, ...configA } = a1720 const { scope: _scopeB, ...configB } = b1721 return jsonStringify(configA) === jsonStringify(configB)1722}17231724// Max cache size for fetch* caches. Keyed by server name (stable across1725// reconnects), bounded to prevent unbounded growth with many MCP servers.1726const MCP_FETCH_CACHE_SIZE = 2017271728/**1729 * Encode MCP tool input for the auto-mode security classifier.1730 * Exported so the auto-mode eval scripts can mirror production encoding1731 * for `mcp__*` tool stubs without duplicating this logic.1732 */1733export function mcpToolInputToAutoClassifierInput(1734 input: Record<string, unknown>,1735 toolName: string,1736): string {1737 const keys = Object.keys(input)1738 return keys.length > 01739 ? keys.map(k => `${k}=${String(input[k])}`).join(' ')1740 : toolName1741}17421743export const fetchToolsForClient = memoizeWithLRU(1744 async (client: MCPServerConnection): Promise<Tool[]> => {1745 if (client.type !== 'connected') return []17461747 try {1748 if (!client.capabilities?.tools) {1749 return []1750 }17511752 const result = (await client.client.request(1753 { method: 'tools/list' },1754 ListToolsResultSchema,1755 )) as ListToolsResult17561757 // Sanitize tool data from MCP server1758 const toolsToProcess = recursivelySanitizeUnicode(result.tools)17591760 // Check if we should skip the mcp__ prefix for SDK MCP servers1761 const skipPrefix =1762 client.config.type === 'sdk' &&1763 isEnvTruthy(process.env.CLAUDE_AGENT_SDK_MCP_NO_PREFIX)17641765 // Convert MCP tools to our Tool format1766 return toolsToProcess1767 .map((tool): Tool => {1768 const fullyQualifiedName = buildMcpToolName(client.name, tool.name)1769 return {1770 ...MCPTool,1771 // In skip-prefix mode, use the original name for model invocation so MCP tools1772 // can override builtins by name. mcpInfo is used for permission checking.1773 name: skipPrefix ? tool.name : fullyQualifiedName,1774 mcpInfo: { serverName: client.name, toolName: tool.name },1775 isMcp: true,1776 // Collapse whitespace: _meta is open to external MCP servers, and1777 // a newline here would inject orphan lines into the deferred-tool1778 // list (formatDeferredToolLine joins on '\n').1779 searchHint:1780 typeof tool._meta?.['anthropic/searchHint'] === 'string'1781 ? tool._meta['anthropic/searchHint']1782 .replace(/\s+/g, ' ')1783 .trim() || undefined1784 : undefined,1785 alwaysLoad: tool._meta?.['anthropic/alwaysLoad'] === true,1786 async description() {1787 return tool.description ?? ''1788 },1789 async prompt() {1790 const desc = tool.description ?? ''1791 return desc.length > MAX_MCP_DESCRIPTION_LENGTH1792 ? desc.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]'1793 : desc1794 },1795 isConcurrencySafe() {1796 return tool.annotations?.readOnlyHint ?? false1797 },1798 isReadOnly() {1799 return tool.annotations?.readOnlyHint ?? false1800 },1801 toAutoClassifierInput(input) {1802 return mcpToolInputToAutoClassifierInput(input, tool.name)1803 },1804 isDestructive() {1805 return tool.annotations?.destructiveHint ?? false1806 },1807 isOpenWorld() {1808 return tool.annotations?.openWorldHint ?? false1809 },1810 isSearchOrReadCommand() {1811 return classifyMcpToolForCollapse(client.name, tool.name)1812 },1813 inputJSONSchema: tool.inputSchema as Tool['inputJSONSchema'],1814 async checkPermissions() {1815 return {1816 behavior: 'passthrough' as const,1817 message: 'MCPTool requires permission.',1818 suggestions: [1819 {1820 type: 'addRules' as const,1821 rules: [1822 {1823 toolName: fullyQualifiedName,1824 ruleContent: undefined,1825 },1826 ],1827 behavior: 'allow' as const,1828 destination: 'localSettings' as const,1829 },1830 ],1831 }1832 },1833 async call(1834 args: Record<string, unknown>,1835 context,1836 _canUseTool,1837 parentMessage,1838 onProgress?: ToolCallProgress<MCPProgress>,1839 ) {1840 const toolUseId = extractToolUseId(parentMessage)1841 const meta = toolUseId1842 ? { 'claudecode/toolUseId': toolUseId }1843 : {}18441845 // Emit progress when tool starts1846 if (onProgress && toolUseId) {1847 onProgress({1848 toolUseID: toolUseId,1849 data: {1850 type: 'mcp_progress',1851 status: 'started',1852 serverName: client.name,1853 toolName: tool.name,1854 },1855 })1856 }18571858 const startTime = Date.now()1859 const MAX_SESSION_RETRIES = 11860 for (let attempt = 0; ; attempt++) {1861 try {1862 const connectedClient = await ensureConnectedClient(client)1863 const mcpResult = await callMCPToolWithUrlElicitationRetry({1864 client: connectedClient,1865 clientConnection: client,1866 tool: tool.name,1867 args,1868 meta,1869 signal: context.abortController.signal,1870 setAppState: context.setAppState,1871 onProgress:1872 onProgress && toolUseId1873 ? progressData => {1874 onProgress({1875 toolUseID: toolUseId,1876 data: progressData,1877 })1878 }1879 : undefined,1880 handleElicitation: context.handleElicitation,1881 })18821883 // Emit progress when tool completes successfully1884 if (onProgress && toolUseId) {1885 onProgress({1886 toolUseID: toolUseId,1887 data: {1888 type: 'mcp_progress',1889 status: 'completed',1890 serverName: client.name,1891 toolName: tool.name,1892 elapsedTimeMs: Date.now() - startTime,1893 },1894 })1895 }18961897 return {1898 data: mcpResult.content,1899 ...((mcpResult._meta || mcpResult.structuredContent) && {1900 mcpMeta: {1901 ...(mcpResult._meta && {1902 _meta: mcpResult._meta,1903 }),1904 ...(mcpResult.structuredContent && {1905 structuredContent: mcpResult.structuredContent,1906 }),1907 },1908 }),1909 }1910 } catch (error) {1911 // Session expired — the connection cache has been1912 // cleared, so retry with a fresh client.1913 if (1914 error instanceof McpSessionExpiredError &&1915 attempt < MAX_SESSION_RETRIES1916 ) {1917 logMCPDebug(1918 client.name,1919 `Retrying tool '${tool.name}' after session recovery`,1920 )1921 continue1922 }19231924 // Emit progress when tool fails1925 if (onProgress && toolUseId) {1926 onProgress({1927 toolUseID: toolUseId,1928 data: {1929 type: 'mcp_progress',1930 status: 'failed',1931 serverName: client.name,1932 toolName: tool.name,1933 elapsedTimeMs: Date.now() - startTime,1934 },1935 })1936 }1937 // Wrap MCP SDK errors so telemetry gets useful context1938 // instead of just "Error" or "McpError" (the constructor1939 // name). MCP SDK errors are protocol-level messages and1940 // don't contain user file paths or code.1941 if (1942 error instanceof Error &&1943 !(1944 error instanceof1945 TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS1946 )1947 ) {1948 const name = error.constructor.name1949 if (name === 'Error') {1950 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(1951 error.message,1952 error.message.slice(0, 200),1953 )1954 }1955 // McpError has a numeric `code` with the JSON-RPC error1956 // code (e.g. -32000 ConnectionClosed, -32001 RequestTimeout)1957 if (1958 name === 'McpError' &&1959 'code' in error &&1960 typeof error.code === 'number'1961 ) {1962 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(1963 error.message,1964 `McpError ${error.code}`,1965 )1966 }1967 }1968 throw error1969 }1970 }1971 },1972 userFacingName() {1973 // Prefer title annotation if available, otherwise use tool name1974 const displayName = tool.annotations?.title || tool.name1975 return `${client.name} - ${displayName} (MCP)`1976 },1977 ...(isClaudeInChromeMCPServer(client.name) &&1978 (client.config.type === 'stdio' || !client.config.type)1979 ? claudeInChromeToolRendering().getClaudeInChromeMCPToolOverrides(1980 tool.name,1981 )1982 : {}),1983 ...(feature('CHICAGO_MCP') &&1984 (client.config.type === 'stdio' || !client.config.type) &&1985 isComputerUseMCPServer!(client.name)1986 ? computerUseWrapper!().getComputerUseMCPToolOverrides(tool.name)1987 : {}),1988 }1989 })1990 .filter(isIncludedMcpTool)1991 } catch (error) {1992 logMCPError(client.name, `Failed to fetch tools: ${errorMessage(error)}`)1993 return []1994 }1995 },1996 (client: MCPServerConnection) => client.name,1997 MCP_FETCH_CACHE_SIZE,1998)19992000export const fetchResourcesForClient = memoizeWithLRU(2001 async (client: MCPServerConnection): Promise<ServerResource[]> => {2002 if (client.type !== 'connected') return []20032004 try {2005 if (!client.capabilities?.resources) {2006 return []2007 }20082009 const result = await client.client.request(2010 { method: 'resources/list' },2011 ListResourcesResultSchema,2012 )20132014 if (!result.resources) return []20152016 // Add server name to each resource2017 return result.resources.map(resource => ({2018 ...resource,2019 server: client.name,2020 }))2021 } catch (error) {2022 logMCPError(2023 client.name,2024 `Failed to fetch resources: ${errorMessage(error)}`,2025 )2026 return []2027 }2028 },2029 (client: MCPServerConnection) => client.name,2030 MCP_FETCH_CACHE_SIZE,2031)20322033export const fetchCommandsForClient = memoizeWithLRU(2034 async (client: MCPServerConnection): Promise<Command[]> => {2035 if (client.type !== 'connected') return []20362037 try {2038 if (!client.capabilities?.prompts) {2039 return []2040 }20412042 // Request prompts list from client2043 const result = (await client.client.request(2044 { method: 'prompts/list' },2045 ListPromptsResultSchema,2046 )) as ListPromptsResult20472048 if (!result.prompts) return []20492050 // Sanitize prompt data from MCP server2051 const promptsToProcess = recursivelySanitizeUnicode(result.prompts)20522053 // Convert MCP prompts to our Command format2054 return promptsToProcess.map(prompt => {2055 const argNames = Object.values(prompt.arguments ?? {}).map(k => k.name)2056 return {2057 type: 'prompt' as const,2058 name: 'mcp__' + normalizeNameForMCP(client.name) + '__' + prompt.name,2059 description: prompt.description ?? '',2060 hasUserSpecifiedDescription: !!prompt.description,2061 contentLength: 0, // Dynamic MCP content2062 isEnabled: () => true,2063 isHidden: false,2064 isMcp: true,2065 progressMessage: 'running',2066 userFacingName() {2067 // Use prompt.name (programmatic identifier) not prompt.title (display name)2068 // to avoid spaces breaking slash command parsing2069 return `${client.name}:${prompt.name} (MCP)`2070 },2071 argNames,2072 source: 'mcp',2073 async getPromptForCommand(args: string) {2074 const argsArray = args.split(' ')2075 try {2076 const connectedClient = await ensureConnectedClient(client)2077 const result = await connectedClient.client.getPrompt({2078 name: prompt.name,2079 arguments: zipObject(argNames, argsArray),2080 })2081 const transformed = await Promise.all(2082 result.messages.map(message =>2083 transformResultContent(message.content, connectedClient.name),2084 ),2085 )2086 return transformed.flat()2087 } catch (error) {2088 logMCPError(2089 client.name,2090 `Error running command '${prompt.name}': ${errorMessage(error)}`,2091 )2092 throw error2093 }2094 },2095 }2096 })2097 } catch (error) {2098 logMCPError(2099 client.name,2100 `Failed to fetch commands: ${errorMessage(error)}`,2101 )2102 return []2103 }2104 },2105 (client: MCPServerConnection) => client.name,2106 MCP_FETCH_CACHE_SIZE,2107)21082109/**2110 * Call an IDE tool directly as an RPC2111 * @param toolName The name of the tool to call2112 * @param args The arguments to pass to the tool2113 * @param client The IDE client to use for the RPC call2114 * @returns The result of the tool call2115 */2116export async function callIdeRpc(2117 toolName: string,2118 args: Record<string, unknown>,2119 client: ConnectedMCPServer,2120): Promise<string | ContentBlockParam[] | undefined> {2121 const result = await callMCPTool({2122 client,2123 tool: toolName,2124 args,2125 signal: createAbortController().signal,2126 })2127 return result.content2128}21292130/**2131 * Note: This should not be called by UI components directly, they should use the reconnectMcpServer2132 * function from useManageMcpConnections.2133 * @param name Server name2134 * @param config Server configuration2135 * @returns Object containing the client connection and its resources2136 */2137export async function reconnectMcpServerImpl(2138 name: string,2139 config: ScopedMcpServerConfig,2140): Promise<{2141 client: MCPServerConnection2142 tools: Tool[]2143 commands: Command[]2144 resources?: ServerResource[]2145}> {2146 try {2147 // Invalidate the keychain cache so we read fresh credentials from disk.2148 // This is necessary when another process (e.g. the VS Code extension host)2149 // has modified stored tokens (cleared auth, saved new OAuth tokens) and then2150 // asks the CLI subprocess to reconnect. Without this, the subprocess would2151 // use stale cached data and never notice the tokens were removed.2152 clearKeychainCache()21532154 await clearServerCache(name, config)2155 const client = await connectToServer(name, config)21562157 if (client.type !== 'connected') {2158 return {2159 client,2160 tools: [],2161 commands: [],2162 }2163 }21642165 if (config.type === 'claudeai-proxy') {2166 markClaudeAiMcpConnected(name)2167 }21682169 const supportsResources = !!client.capabilities?.resources21702171 const [tools, mcpCommands, mcpSkills, resources] = await Promise.all([2172 fetchToolsForClient(client),2173 fetchCommandsForClient(client),2174 feature('MCP_SKILLS') && supportsResources2175 ? fetchMcpSkillsForClient!(client)2176 : Promise.resolve([]),2177 supportsResources ? fetchResourcesForClient(client) : Promise.resolve([]),2178 ])2179 const commands = [...mcpCommands, ...mcpSkills]21802181 // Check if we need to add resource tools2182 const resourceTools: Tool[] = []2183 if (supportsResources) {2184 // Only add resource tools if no other server has them2185 const hasResourceTools = [ListMcpResourcesTool, ReadMcpResourceTool].some(2186 tool => tools.some(t => toolMatchesName(t, tool.name)),2187 )2188 if (!hasResourceTools) {2189 resourceTools.push(ListMcpResourcesTool, ReadMcpResourceTool)2190 }2191 }21922193 return {2194 client,2195 tools: [...tools, ...resourceTools],2196 commands,2197 resources: resources.length > 0 ? resources : undefined,2198 }2199 } catch (error) {2200 // Handle errors gracefully - connection might have closed during fetch2201 logMCPError(name, `Error during reconnection: ${errorMessage(error)}`)22022203 // Return with failed status2204 return {2205 client: { name, type: 'failed' as const, config },2206 tools: [],2207 commands: [],2208 }2209 }2210}22112212// Replaced 2026-03: previous implementation ran fixed-size sequential batches2213// (await batch 1 fully, then start batch 2). That meant one slow server in2214// batch N held up ALL servers in batch N+1, even if the other 19 slots were2215// idle. pMap frees each slot as soon as its server completes, so a single2216// slow server only occupies one slot instead of blocking an entire batch2217// boundary. Same concurrency ceiling, same results, better scheduling.2218async function processBatched<T>(2219 items: T[],2220 concurrency: number,2221 processor: (item: T) => Promise<void>,2222): Promise<void> {2223 await pMap(items, processor, { concurrency })2224}22252226export async function getMcpToolsCommandsAndResources(2227 onConnectionAttempt: (params: {2228 client: MCPServerConnection2229 tools: Tool[]2230 commands: Command[]2231 resources?: ServerResource[]2232 }) => void,2233 mcpConfigs?: Record<string, ScopedMcpServerConfig>,2234): Promise<void> {2235 let resourceToolsAdded = false22362237 const allConfigEntries = Object.entries(2238 mcpConfigs ?? (await getAllMcpConfigs()).servers,2239 )22402241 // Partition into disabled and active entries — disabled servers should2242 // never generate HTTP connections or flow through batch processing2243 const configEntries: typeof allConfigEntries = []2244 for (const entry of allConfigEntries) {2245 if (isMcpServerDisabled(entry[0])) {2246 onConnectionAttempt({2247 client: { name: entry[0], type: 'disabled', config: entry[1] },2248 tools: [],2249 commands: [],2250 })2251 } else {2252 configEntries.push(entry)2253 }2254 }22552256 // Calculate transport counts for logging2257 const totalServers = configEntries.length2258 const stdioCount = count(configEntries, ([_, c]) => c.type === 'stdio')2259 const sseCount = count(configEntries, ([_, c]) => c.type === 'sse')2260 const httpCount = count(configEntries, ([_, c]) => c.type === 'http')2261 const sseIdeCount = count(configEntries, ([_, c]) => c.type === 'sse-ide')2262 const wsIdeCount = count(configEntries, ([_, c]) => c.type === 'ws-ide')22632264 // Split servers by type: local (stdio/sdk) need lower concurrency due to2265 // process spawning, remote servers can connect with higher concurrency2266 const localServers = configEntries.filter(([_, config]) =>2267 isLocalMcpServer(config),2268 )2269 const remoteServers = configEntries.filter(2270 ([_, config]) => !isLocalMcpServer(config),2271 )22722273 const serverStats = {2274 totalServers,2275 stdioCount,2276 sseCount,2277 httpCount,2278 sseIdeCount,2279 wsIdeCount,2280 }22812282 const processServer = async ([name, config]: [2283 string,2284 ScopedMcpServerConfig,2285 ]): Promise<void> => {2286 try {2287 // Check if server is disabled - if so, just add it to state without connecting2288 if (isMcpServerDisabled(name)) {2289 onConnectionAttempt({2290 client: {2291 name,2292 type: 'disabled',2293 config,2294 },2295 tools: [],2296 commands: [],2297 })2298 return2299 }23002301 // Skip connection for servers that recently returned 401 (15min TTL),2302 // or that we have probed before but hold no token for. The second2303 // check closes the gap the TTL leaves open: without it, every 15min2304 // we re-probe servers that cannot succeed until the user runs /mcp.2305 // Each probe is a network round-trip for connect-401 plus OAuth2306 // discovery, and print mode awaits the whole batch (main.tsx:3503).2307 if (2308 (config.type === 'claudeai-proxy' ||2309 config.type === 'http' ||2310 config.type === 'sse') &&2311 ((await isMcpAuthCached(name)) ||2312 ((config.type === 'http' || config.type === 'sse') &&2313 hasMcpDiscoveryButNoToken(name, config)))2314 ) {2315 logMCPDebug(name, `Skipping connection (cached needs-auth)`)2316 onConnectionAttempt({2317 client: { name, type: 'needs-auth' as const, config },2318 tools: [createMcpAuthTool(name, config)],2319 commands: [],2320 })2321 return2322 }23232324 const client = await connectToServer(name, config, serverStats)23252326 if (client.type !== 'connected') {2327 onConnectionAttempt({2328 client,2329 tools:2330 client.type === 'needs-auth'2331 ? [createMcpAuthTool(name, config)]2332 : [],2333 commands: [],2334 })2335 return2336 }23372338 if (config.type === 'claudeai-proxy') {2339 markClaudeAiMcpConnected(name)2340 }23412342 const supportsResources = !!client.capabilities?.resources23432344 const [tools, mcpCommands, mcpSkills, resources] = await Promise.all([2345 fetchToolsForClient(client),2346 fetchCommandsForClient(client),2347 // Discover skills from skill:// resources2348 feature('MCP_SKILLS') && supportsResources2349 ? fetchMcpSkillsForClient!(client)2350 : Promise.resolve([]),2351 // Fetch resources if supported2352 supportsResources2353 ? fetchResourcesForClient(client)2354 : Promise.resolve([]),2355 ])2356 const commands = [...mcpCommands, ...mcpSkills]23572358 // If this server resources and we haven't added resource tools yet,2359 // include our resource tools with this client's tools2360 const resourceTools: Tool[] = []2361 if (supportsResources && !resourceToolsAdded) {2362 resourceToolsAdded = true2363 resourceTools.push(ListMcpResourcesTool, ReadMcpResourceTool)2364 }23652366 onConnectionAttempt({2367 client,2368 tools: [...tools, ...resourceTools],2369 commands,2370 resources: resources.length > 0 ? resources : undefined,2371 })2372 } catch (error) {2373 // Handle errors gracefully - connection might have closed during fetch2374 logMCPError(2375 name,2376 `Error fetching tools/commands/resources: ${errorMessage(error)}`,2377 )23782379 // Still update with the client but no tools/commands2380 onConnectionAttempt({2381 client: { name, type: 'failed' as const, config },2382 tools: [],2383 commands: [],2384 })2385 }2386 }23872388 // Process both groups concurrently, each with their own concurrency limits:2389 // - Local servers (stdio/sdk): lower concurrency to avoid process spawning resource contention2390 // - Remote servers: higher concurrency since they're just network connections2391 await Promise.all([2392 processBatched(2393 localServers,2394 getMcpServerConnectionBatchSize(),2395 processServer,2396 ),2397 processBatched(2398 remoteServers,2399 getRemoteMcpServerConnectionBatchSize(),2400 processServer,2401 ),2402 ])2403}24042405// Not memoized: called only 2-3 times at startup/reconfig. The inner work2406// (connectToServer, fetch*ForClient) is already cached. Memoizing here by2407// mcpConfigs object ref leaked — main.tsx creates fresh config objects each call.2408export function prefetchAllMcpResources(2409 mcpConfigs: Record<string, ScopedMcpServerConfig>,2410): Promise<{2411 clients: MCPServerConnection[]2412 tools: Tool[]2413 commands: Command[]2414}> {2415 return new Promise(resolve => {2416 let pendingCount = 02417 let completedCount = 024182419 pendingCount = Object.keys(mcpConfigs).length24202421 if (pendingCount === 0) {2422 void resolve({2423 clients: [],2424 tools: [],2425 commands: [],2426 })2427 return2428 }24292430 const clients: MCPServerConnection[] = []2431 const tools: Tool[] = []2432 const commands: Command[] = []24332434 getMcpToolsCommandsAndResources(result => {2435 clients.push(result.client)2436 tools.push(...result.tools)2437 commands.push(...result.commands)24382439 completedCount++2440 if (completedCount >= pendingCount) {2441 const commandsMetadataLength = commands.reduce((sum, command) => {2442 const commandMetadataLength =2443 command.name.length +2444 (command.description ?? '').length +2445 (command.argumentHint ?? '').length2446 return sum + commandMetadataLength2447 }, 0)2448 logEvent('tengu_mcp_tools_commands_loaded', {2449 tools_count: tools.length,2450 commands_count: commands.length,2451 commands_metadata_length: commandsMetadataLength,2452 })24532454 void resolve({2455 clients,2456 tools,2457 commands,2458 })2459 }2460 }, mcpConfigs).catch(error => {2461 logMCPError(2462 'prefetchAllMcpResources',2463 `Failed to get MCP resources: ${errorMessage(error)}`,2464 )2465 // Still resolve with empty results2466 void resolve({2467 clients: [],2468 tools: [],2469 commands: [],2470 })2471 })2472 })2473}24742475/**2476 * Transform result content from an MCP tool or MCP prompt into message blocks2477 */2478export async function transformResultContent(2479 resultContent: PromptMessage['content'],2480 serverName: string,2481): Promise<Array<ContentBlockParam>> {2482 switch (resultContent.type) {2483 case 'text':2484 return [2485 {2486 type: 'text',2487 text: resultContent.text,2488 },2489 ]2490 case 'audio': {2491 const audioData = resultContent as {2492 type: 'audio'2493 data: string2494 mimeType?: string2495 }2496 return await persistBlobToTextBlock(2497 Buffer.from(audioData.data, 'base64'),2498 audioData.mimeType,2499 serverName,2500 `[Audio from ${serverName}] `,2501 )2502 }2503 case 'image': {2504 // Resize and compress image data, enforcing API dimension limits2505 const imageBuffer = Buffer.from(String(resultContent.data), 'base64')2506 const ext = resultContent.mimeType?.split('/')[1] || 'png'2507 const resized = await maybeResizeAndDownsampleImageBuffer(2508 imageBuffer,2509 imageBuffer.length,2510 ext,2511 )2512 return [2513 {2514 type: 'image',2515 source: {2516 data: resized.buffer.toString('base64'),2517 media_type:2518 `image/${resized.mediaType}` as Base64ImageSource['media_type'],2519 type: 'base64',2520 },2521 },2522 ]2523 }2524 case 'resource': {2525 const resource = resultContent.resource2526 const prefix = `[Resource from ${serverName} at ${resource.uri}] `25272528 if ('text' in resource) {2529 return [2530 {2531 type: 'text',2532 text: `${prefix}${resource.text}`,2533 },2534 ]2535 } else if ('blob' in resource) {2536 const isImage = IMAGE_MIME_TYPES.has(resource.mimeType ?? '')25372538 if (isImage) {2539 // Resize and compress image blob, enforcing API dimension limits2540 const imageBuffer = Buffer.from(resource.blob, 'base64')2541 const ext = resource.mimeType?.split('/')[1] || 'png'2542 const resized = await maybeResizeAndDownsampleImageBuffer(2543 imageBuffer,2544 imageBuffer.length,2545 ext,2546 )2547 const content: MessageParam['content'] = []2548 if (prefix) {2549 content.push({2550 type: 'text',2551 text: prefix,2552 })2553 }2554 content.push({2555 type: 'image',2556 source: {2557 data: resized.buffer.toString('base64'),2558 media_type:2559 `image/${resized.mediaType}` as Base64ImageSource['media_type'],2560 type: 'base64',2561 },2562 })2563 return content2564 } else {2565 return await persistBlobToTextBlock(2566 Buffer.from(resource.blob, 'base64'),2567 resource.mimeType,2568 serverName,2569 prefix,2570 )2571 }2572 }2573 return []2574 }2575 case 'resource_link': {2576 const resourceLink = resultContent as ResourceLink2577 let text = `[Resource link: ${resourceLink.name}] ${resourceLink.uri}`2578 if (resourceLink.description) {2579 text += ` (${resourceLink.description})`2580 }2581 return [2582 {2583 type: 'text',2584 text,2585 },2586 ]2587 }2588 default:2589 return []2590 }2591}25922593/**2594 * Decode base64 binary content, write it to disk with the proper extension,2595 * and return a small text block with the file path. Replaces the old behavior2596 * of dumping raw base64 into the context.2597 */2598async function persistBlobToTextBlock(2599 bytes: Buffer,2600 mimeType: string | undefined,2601 serverName: string,2602 sourceDescription: string,2603): Promise<Array<ContentBlockParam>> {2604 const persistId = `mcp-${normalizeNameForMCP(serverName)}-blob-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`2605 const result = await persistBinaryContent(bytes, mimeType, persistId)26062607 if ('error' in result) {2608 return [2609 {2610 type: 'text',2611 text: `${sourceDescription}Binary content (${mimeType || 'unknown type'}, ${bytes.length} bytes) could not be saved to disk: ${result.error}`,2612 },2613 ]2614 }26152616 return [2617 {2618 type: 'text',2619 text: getBinaryBlobSavedMessage(2620 result.filepath,2621 mimeType,2622 result.size,2623 sourceDescription,2624 ),2625 },2626 ]2627}26282629/**2630 * Processes MCP tool result into a normalized format.2631 */2632export type MCPResultType = 'toolResult' | 'structuredContent' | 'contentArray'26332634export type TransformedMCPResult = {2635 content: MCPToolResult2636 type: MCPResultType2637 schema?: string2638}26392640/**2641 * Generates a compact, jq-friendly type signature for a value.2642 * e.g. "{title: string, items: [{id: number, name: string}]}"2643 */2644export function inferCompactSchema(value: unknown, depth = 2): string {2645 if (value === null) return 'null'2646 if (Array.isArray(value)) {2647 if (value.length === 0) return '[]'2648 return `[${inferCompactSchema(value[0], depth - 1)}]`2649 }2650 if (typeof value === 'object') {2651 if (depth <= 0) return '{...}'2652 const entries = Object.entries(value).slice(0, 10)2653 const props = entries.map(2654 ([k, v]) => `${k}: ${inferCompactSchema(v, depth - 1)}`,2655 )2656 const suffix = Object.keys(value).length > 10 ? ', ...' : ''2657 return `{${props.join(', ')}${suffix}}`2658 }2659 return typeof value2660}26612662export async function transformMCPResult(2663 result: unknown,2664 tool: string, // Tool name for validation (e.g., "search")2665 name: string, // Server name for transformation (e.g., "slack")2666): Promise<TransformedMCPResult> {2667 if (result && typeof result === 'object') {2668 if ('toolResult' in result) {2669 return {2670 content: String(result.toolResult),2671 type: 'toolResult',2672 }2673 }26742675 if (2676 'structuredContent' in result &&2677 result.structuredContent !== undefined2678 ) {2679 return {2680 content: jsonStringify(result.structuredContent),2681 type: 'structuredContent',2682 schema: inferCompactSchema(result.structuredContent),2683 }2684 }26852686 if ('content' in result && Array.isArray(result.content)) {2687 const transformedContent = (2688 await Promise.all(2689 result.content.map(item => transformResultContent(item, name)),2690 )2691 ).flat()2692 return {2693 content: transformedContent,2694 type: 'contentArray',2695 schema: inferCompactSchema(transformedContent),2696 }2697 }2698 }26992700 const errorMessage = `MCP server "${name}" tool "${tool}": unexpected response format`2701 logMCPError(name, errorMessage)2702 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(2703 errorMessage,2704 'MCP tool unexpected response format',2705 )2706}27072708/**2709 * Check if MCP content contains any image blocks.2710 * Used to decide whether to persist to file (images should use truncation instead2711 * to preserve image compression and viewability).2712 */2713function contentContainsImages(content: MCPToolResult): boolean {2714 if (!content || typeof content === 'string') {2715 return false2716 }2717 return content.some(block => block.type === 'image')2718}27192720export async function processMCPResult(2721 result: unknown,2722 tool: string, // Tool name for validation (e.g., "search")2723 name: string, // Server name for IDE check and transformation (e.g., "slack")2724): Promise<MCPToolResult> {2725 const { content, type, schema } = await transformMCPResult(result, tool, name)27262727 // IDE tools are not going to the model directly, so we don't need to2728 // handle large output.2729 if (name === 'ide') {2730 return content2731 }27322733 // Check if content needs truncation (i.e., is too large)2734 if (!(await mcpContentNeedsTruncation(content))) {2735 return content2736 }27372738 const sizeEstimateTokens = getContentSizeEstimate(content)27392740 // If large output files feature is disabled, fall back to old truncation behavior2741 if (isEnvDefinedFalsy(process.env.ENABLE_MCP_LARGE_OUTPUT_FILES)) {2742 logEvent('tengu_mcp_large_result_handled', {2743 outcome: 'truncated',2744 reason: 'env_disabled',2745 sizeEstimateTokens,2746 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)2747 return await truncateMcpContentIfNeeded(content)2748 }27492750 // Save large output to file and return instructions for reading it2751 // Content is guaranteed to exist at this point (we checked mcpContentNeedsTruncation)2752 if (!content) {2753 return content2754 }27552756 // If content contains images, fall back to truncation - persisting images as JSON2757 // defeats the image compression logic and makes them non-viewable2758 if (contentContainsImages(content)) {2759 logEvent('tengu_mcp_large_result_handled', {2760 outcome: 'truncated',2761 reason: 'contains_images',2762 sizeEstimateTokens,2763 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)2764 return await truncateMcpContentIfNeeded(content)2765 }27662767 // Generate a unique ID for the persisted file (server__tool-timestamp)2768 const timestamp = Date.now()2769 const persistId = `mcp-${normalizeNameForMCP(name)}-${normalizeNameForMCP(tool)}-${timestamp}`2770 // Convert to string for persistence (persistToolResult expects string or specific block types)2771 const contentStr =2772 typeof content === 'string' ? content : jsonStringify(content, null, 2)2773 const persistResult = await persistToolResult(contentStr, persistId)27742775 if (isPersistError(persistResult)) {2776 // If file save failed, fall back to returning truncated content info2777 const contentLength = contentStr.length2778 logEvent('tengu_mcp_large_result_handled', {2779 outcome: 'truncated',2780 reason: 'persist_failed',2781 sizeEstimateTokens,2782 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)2783 return `Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Failed to save output to file: ${persistResult.error}. If this MCP server provides pagination or filtering tools, use them to retrieve specific portions of the data.`2784 }27852786 logEvent('tengu_mcp_large_result_handled', {2787 outcome: 'persisted',2788 reason: 'file_saved',2789 sizeEstimateTokens,2790 persistedSizeChars: persistResult.originalSize,2791 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)27922793 const formatDescription = getFormatDescription(type, schema)2794 return getLargeOutputInstructions(2795 persistResult.filepath,2796 persistResult.originalSize,2797 formatDescription,2798 )2799}28002801/**2802 * Call an MCP tool, handling UrlElicitationRequiredError (-32042) by2803 * displaying the URL elicitation to the user, waiting for the completion2804 * notification, and retrying the tool call.2805 */2806type MCPToolCallResult = {2807 content: MCPToolResult2808 _meta?: Record<string, unknown>2809 structuredContent?: Record<string, unknown>2810}28112812/** @internal Exported for testing. */2813export async function callMCPToolWithUrlElicitationRetry({2814 client: connectedClient,2815 clientConnection,2816 tool,2817 args,2818 meta,2819 signal,2820 setAppState,2821 onProgress,2822 callToolFn = callMCPTool,2823 handleElicitation,2824}: {2825 client: ConnectedMCPServer2826 clientConnection: MCPServerConnection2827 tool: string2828 args: Record<string, unknown>2829 meta?: Record<string, unknown>2830 signal: AbortSignal2831 setAppState: (f: (prev: AppState) => AppState) => void2832 onProgress?: (data: MCPProgress) => void2833 /** Injectable for testing. Defaults to callMCPTool. */2834 callToolFn?: (opts: {2835 client: ConnectedMCPServer2836 tool: string2837 args: Record<string, unknown>2838 meta?: Record<string, unknown>2839 signal: AbortSignal2840 onProgress?: (data: MCPProgress) => void2841 }) => Promise<MCPToolCallResult>2842 /** Handler for URL elicitations when no hook handles them.2843 * In print/SDK mode, delegates to structuredIO. In REPL, falls back to queue. */2844 handleElicitation?: (2845 serverName: string,2846 params: ElicitRequestURLParams,2847 signal: AbortSignal,2848 ) => Promise<ElicitResult>2849}): Promise<MCPToolCallResult> {2850 const MAX_URL_ELICITATION_RETRIES = 32851 for (let attempt = 0; ; attempt++) {2852 try {2853 return await callToolFn({2854 client: connectedClient,2855 tool,2856 args,2857 meta,2858 signal,2859 onProgress,2860 })2861 } catch (error) {2862 // The MCP SDK's Protocol creates plain McpError (not UrlElicitationRequiredError)2863 // for error responses, so we check the error code instead of instanceof.2864 if (2865 !(error instanceof McpError) ||2866 error.code !== ErrorCode.UrlElicitationRequired2867 ) {2868 throw error2869 }28702871 // Limit the number of URL elicitation retries2872 if (attempt >= MAX_URL_ELICITATION_RETRIES) {2873 throw error2874 }28752876 const errorData = error.data2877 const rawElicitations =2878 errorData != null &&2879 typeof errorData === 'object' &&2880 'elicitations' in errorData &&2881 Array.isArray(errorData.elicitations)2882 ? (errorData.elicitations as unknown[])2883 : []28842885 // Validate each element has the required fields for ElicitRequestURLParams2886 const elicitations = rawElicitations.filter(2887 (e): e is ElicitRequestURLParams => {2888 if (e == null || typeof e !== 'object') return false2889 const obj = e as Record<string, unknown>2890 return (2891 obj.mode === 'url' &&2892 typeof obj.url === 'string' &&2893 typeof obj.elicitationId === 'string' &&2894 typeof obj.message === 'string'2895 )2896 },2897 )28982899 const serverName =2900 clientConnection.type === 'connected'2901 ? clientConnection.name2902 : 'unknown'29032904 if (elicitations.length === 0) {2905 logMCPDebug(2906 serverName,2907 `Tool '${tool}' returned -32042 but no valid elicitations in error data`,2908 )2909 throw error2910 }29112912 logMCPDebug(2913 serverName,2914 `Tool '${tool}' requires URL elicitation (error -32042, attempt ${attempt + 1}), processing ${elicitations.length} elicitation(s)`,2915 )29162917 // Process each URL elicitation from the error.2918 // The completion notification handler (in registerElicitationHandler) sets2919 // `completed: true` on the matching queue event; the dialog reacts to this flag.2920 for (const elicitation of elicitations) {2921 const { elicitationId } = elicitation29222923 // Run elicitation hooks — they can resolve URL elicitations programmatically2924 const hookResponse = await runElicitationHooks(2925 serverName,2926 elicitation,2927 signal,2928 )2929 if (hookResponse) {2930 logMCPDebug(2931 serverName,2932 `URL elicitation ${elicitationId} resolved by hook: ${jsonStringify(hookResponse)}`,2933 )2934 if (hookResponse.action !== 'accept') {2935 return {2936 content: `URL elicitation was ${hookResponse.action === 'decline' ? 'declined' : hookResponse.action + 'ed'} by a hook. The tool "${tool}" could not complete because it requires the user to open a URL.`,2937 }2938 }2939 // Hook accepted — skip the UI and proceed to retry2940 continue2941 }29422943 // Resolve the URL elicitation via callback (print/SDK mode) or queue (REPL mode).2944 let userResult: ElicitResult2945 if (handleElicitation) {2946 // Print/SDK mode: delegate to structuredIO which sends a control request2947 userResult = await handleElicitation(serverName, elicitation, signal)2948 } else {2949 // REPL mode: queue for ElicitationDialog with two-phase consent/waiting flow2950 const waitingState: ElicitationWaitingState = {2951 actionLabel: 'Retry now',2952 showCancel: true,2953 }2954 userResult = await new Promise<ElicitResult>(resolve => {2955 const onAbort = () => {2956 void resolve({ action: 'cancel' })2957 }2958 if (signal.aborted) {2959 onAbort()2960 return2961 }2962 signal.addEventListener('abort', onAbort, { once: true })29632964 setAppState(prev => ({2965 ...prev,2966 elicitation: {2967 queue: [2968 ...prev.elicitation.queue,2969 {2970 serverName,2971 requestId: `error-elicit-${elicitationId}`,2972 params: elicitation,2973 signal,2974 waitingState,2975 respond: result => {2976 // Phase 1 consent: accept is a no-op (doesn't resolve retry Promise)2977 if (result.action === 'accept') {2978 return2979 }2980 // Decline or cancel: resolve the retry Promise2981 signal.removeEventListener('abort', onAbort)2982 void resolve(result)2983 },2984 onWaitingDismiss: action => {2985 signal.removeEventListener('abort', onAbort)2986 if (action === 'retry') {2987 void resolve({ action: 'accept' })2988 } else {2989 void resolve({ action: 'cancel' })2990 }2991 },2992 },2993 ],2994 },2995 }))2996 })2997 }29982999 // Run ElicitationResult hooks — they can modify or block the response3000 const finalResult = await runElicitationResultHooks(3001 serverName,3002 userResult,3003 signal,3004 'url',3005 elicitationId,3006 )30073008 if (finalResult.action !== 'accept') {3009 logMCPDebug(3010 serverName,3011 `User ${finalResult.action === 'decline' ? 'declined' : finalResult.action + 'ed'} URL elicitation ${elicitationId}`,3012 )3013 return {3014 content: `URL elicitation was ${finalResult.action === 'decline' ? 'declined' : finalResult.action + 'ed'} by the user. The tool "${tool}" could not complete because it requires the user to open a URL.`,3015 }3016 }30173018 logMCPDebug(3019 serverName,3020 `Elicitation ${elicitationId} completed, retrying tool call`,3021 )3022 }30233024 // Loop back to retry the tool call3025 }3026 }3027}30283029async function callMCPTool({3030 client: { client, name, config },3031 tool,3032 args,3033 meta,3034 signal,3035 onProgress,3036}: {3037 client: ConnectedMCPServer3038 tool: string3039 args: Record<string, unknown>3040 meta?: Record<string, unknown>3041 signal: AbortSignal3042 onProgress?: (data: MCPProgress) => void3043}): Promise<{3044 content: MCPToolResult3045 _meta?: Record<string, unknown>3046 structuredContent?: Record<string, unknown>3047}> {3048 const toolStartTime = Date.now()3049 let progressInterval: NodeJS.Timeout | undefined30503051 try {3052 logMCPDebug(name, `Calling MCP tool: ${tool}`)30533054 // Set up progress logging for long-running tools (every 30 seconds)3055 progressInterval = setInterval(3056 (startTime, name, tool) => {3057 const elapsed = Date.now() - startTime3058 const elapsedSeconds = Math.floor(elapsed / 1000)3059 const duration = `${elapsedSeconds}s`3060 logMCPDebug(name, `Tool '${tool}' still running (${duration} elapsed)`)3061 },3062 30000, // Log every 30 seconds3063 toolStartTime,3064 name,3065 tool,3066 )30673068 // Use Promise.race with our own timeout to handle cases where SDK's3069 // internal timeout doesn't work (e.g., SSE stream breaks mid-request)3070 const timeoutMs = getMcpToolTimeoutMs()3071 let timeoutId: NodeJS.Timeout | undefined30723073 const timeoutPromise = new Promise<never>((_, reject) => {3074 timeoutId = setTimeout(3075 (reject, name, tool, timeoutMs) => {3076 reject(3077 new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(3078 `MCP server "${name}" tool "${tool}" timed out after ${Math.floor(timeoutMs / 1000)}s`,3079 'MCP tool timeout',3080 ),3081 )3082 },3083 timeoutMs,3084 reject,3085 name,3086 tool,3087 timeoutMs,3088 )3089 })30903091 const result = await Promise.race([3092 client.callTool(3093 {3094 name: tool,3095 arguments: args,3096 _meta: meta,3097 },3098 CallToolResultSchema,3099 {3100 signal,3101 timeout: timeoutMs,3102 onprogress: onProgress3103 ? sdkProgress => {3104 onProgress({3105 type: 'mcp_progress',3106 status: 'progress',3107 serverName: name,3108 toolName: tool,3109 progress: sdkProgress.progress,3110 total: sdkProgress.total,3111 progressMessage: sdkProgress.message,3112 })3113 }3114 : undefined,3115 },3116 ),3117 timeoutPromise,3118 ]).finally(() => {3119 if (timeoutId) {3120 clearTimeout(timeoutId)3121 }3122 })31233124 if ('isError' in result && result.isError) {3125 let errorDetails = 'Unknown error'3126 if (3127 'content' in result &&3128 Array.isArray(result.content) &&3129 result.content.length > 03130 ) {3131 const firstContent = result.content[0]3132 if (3133 firstContent &&3134 typeof firstContent === 'object' &&3135 'text' in firstContent3136 ) {3137 errorDetails = firstContent.text3138 }3139 } else if ('error' in result) {3140 // Fallback for legacy error format3141 errorDetails = String(result.error)3142 }3143 logMCPError(name, errorDetails)3144 throw new McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(3145 errorDetails,3146 'MCP tool returned error',3147 '_meta' in result && result._meta ? { _meta: result._meta } : undefined,3148 )3149 }3150 const elapsed = Date.now() - toolStartTime3151 const duration =3152 elapsed < 10003153 ? `${elapsed}ms`3154 : elapsed < 600003155 ? `${Math.floor(elapsed / 1000)}s`3156 : `${Math.floor(elapsed / 60000)}m ${Math.floor((elapsed % 60000) / 1000)}s`31573158 logMCPDebug(name, `Tool '${tool}' completed successfully in ${duration}`)31593160 // Log code indexing tool usage3161 const codeIndexingTool = detectCodeIndexingFromMcpServerName(name)3162 if (codeIndexingTool) {3163 logEvent('tengu_code_indexing_tool_used', {3164 tool: codeIndexingTool as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,3165 source:3166 'mcp' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,3167 success: true,3168 })3169 }31703171 const content = await processMCPResult(result, tool, name)3172 return {3173 content,3174 _meta: result._meta as Record<string, unknown> | undefined,3175 structuredContent: result.structuredContent as3176 | Record<string, unknown>3177 | undefined,3178 }3179 } catch (e) {3180 // Clear intervals on error3181 if (progressInterval !== undefined) {3182 clearInterval(progressInterval)3183 }31843185 const elapsed = Date.now() - toolStartTime31863187 if (e instanceof Error && e.name !== 'AbortError') {3188 logMCPDebug(3189 name,3190 `Tool '${tool}' failed after ${Math.floor(elapsed / 1000)}s: ${e.message}`,3191 )3192 }31933194 // Check for 401 errors indicating expired/invalid OAuth tokens3195 // The MCP SDK's StreamableHTTPError has a `code` property with the HTTP status3196 if (e instanceof Error) {3197 const errorCode = 'code' in e ? (e.code as number | undefined) : undefined3198 if (errorCode === 401 || e instanceof UnauthorizedError) {3199 logMCPDebug(3200 name,3201 `Tool call returned 401 Unauthorized - token may have expired`,3202 )3203 logEvent('tengu_mcp_tool_call_auth_error', {})3204 throw new McpAuthError(3205 name,3206 `MCP server "${name}" requires re-authorization (token expired)`,3207 )3208 }32093210 // Check for session expiry — two error shapes can surface here:3211 // 1. Direct 404 + JSON-RPC -32001 from the server (StreamableHTTPError)3212 // 2. -32000 "Connection closed" (McpError) — the SDK closes the transport3213 // after the onerror handler fires, so the pending callTool() rejects3214 // with this derived error instead of the original 404.3215 // In both cases, clear the connection cache so the next tool call3216 // creates a fresh session.3217 const isSessionExpired = isMcpSessionExpiredError(e)3218 const isConnectionClosedOnHttp =3219 'code' in e &&3220 (e as Error & { code?: number }).code === -32000 &&3221 e.message.includes('Connection closed') &&3222 (config.type === 'http' || config.type === 'claudeai-proxy')3223 if (isSessionExpired || isConnectionClosedOnHttp) {3224 logMCPDebug(3225 name,3226 `MCP session expired during tool call (${isSessionExpired ? '404/-32001' : 'connection closed'}), clearing connection cache for re-initialization`,3227 )3228 logEvent('tengu_mcp_session_expired', {})3229 await clearServerCache(name, config)3230 throw new McpSessionExpiredError(name)3231 }3232 }32333234 // When the users hits esc, avoid logspew3235 if (!(e instanceof Error) || e.name !== 'AbortError') {3236 throw e3237 }3238 return { content: undefined }3239 } finally {3240 // Always clear intervals3241 if (progressInterval !== undefined) {3242 clearInterval(progressInterval)3243 }3244 }3245}32463247function extractToolUseId(message: AssistantMessage): string | undefined {3248 if (message.message.content[0]?.type !== 'tool_use') {3249 return undefined3250 }3251 return message.message.content[0].id3252}32533254/**3255 * Sets up SDK MCP clients by creating transports and connecting them.3256 * This is used for SDK MCP servers that run in the same process as the SDK.3257 *3258 * @param sdkMcpConfigs - The SDK MCP server configurations3259 * @param sendMcpMessage - Callback to send MCP messages through the control channel3260 * @returns Connected clients, their tools, and transport map for message routing3261 */3262export async function setupSdkMcpClients(3263 sdkMcpConfigs: Record<string, McpSdkServerConfig>,3264 sendMcpMessage: (3265 serverName: string,3266 message: JSONRPCMessage,3267 ) => Promise<JSONRPCMessage>,3268): Promise<{3269 clients: MCPServerConnection[]3270 tools: Tool[]3271}> {3272 const clients: MCPServerConnection[] = []3273 const tools: Tool[] = []32743275 // Connect to all servers in parallel3276 const results = await Promise.allSettled(3277 Object.entries(sdkMcpConfigs).map(async ([name, config]) => {3278 const transport = new SdkControlClientTransport(name, sendMcpMessage)32793280 const client = new Client(3281 {3282 name: 'claude-code',3283 title: 'Claude Code',3284 version: MACRO.VERSION ?? 'unknown',3285 description: "Anthropic's agentic coding tool",3286 websiteUrl: PRODUCT_URL,3287 },3288 {3289 capabilities: {},3290 },3291 )32923293 try {3294 // Connect the client3295 await client.connect(transport)32963297 // Get capabilities from the server3298 const capabilities = client.getServerCapabilities()32993300 // Create the connected client object3301 const connectedClient: MCPServerConnection = {3302 type: 'connected',3303 name,3304 capabilities: capabilities || {},3305 client,3306 config: { ...config, scope: 'dynamic' as const },3307 cleanup: async () => {3308 await client.close()3309 },3310 }33113312 // Fetch tools if the server has them3313 const serverTools: Tool[] = []3314 if (capabilities?.tools) {3315 const sdkTools = await fetchToolsForClient(connectedClient)3316 serverTools.push(...sdkTools)3317 }33183319 return {3320 client: connectedClient,3321 tools: serverTools,3322 }3323 } catch (error) {3324 // If connection fails, return failed server3325 logMCPError(name, `Failed to connect SDK MCP server: ${error}`)3326 return {3327 client: {3328 type: 'failed' as const,3329 name,3330 config: { ...config, scope: 'user' as const },3331 },3332 tools: [],3333 }3334 }3335 }),3336 )33373338 // Process results and collect clients and tools3339 for (const result of results) {3340 if (result.status === 'fulfilled') {3341 clients.push(result.value.client)3342 tools.push(...result.value.tools)3343 }3344 // If rejected (unexpected), the error was already logged inside the promise3345 }33463347 return { clients, tools }3348}3349