DDCC

services/mcp/client.ts

116 KB3349 lines
src/services/mcp/client.ts
1import { feature } from 'bun:bundle'
2import type {
3 Base64ImageSource,
4 ContentBlockParam,
5 MessageParam,
6} from '@anthropic-ai/sdk/resources/index.mjs'
7import { Client } from '@modelcontextprotocol/sdk/client/index.js'
8import {
9 SSEClientTransport,
10 type SSEClientTransportOptions,
11} from '@modelcontextprotocol/sdk/client/sse.js'
12import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'
13import {
14 StreamableHTTPClientTransport,
15 type StreamableHTTPClientTransportOptions,
16} from '@modelcontextprotocol/sdk/client/streamableHttp.js'
17import {
18 createFetchWithInit,
19 type FetchLike,
20 type Transport,
21} from '@modelcontextprotocol/sdk/shared/transport.js'
22import {
23 CallToolResultSchema,
24 ElicitRequestSchema,
25 type ElicitRequestURLParams,
26 type ElicitResult,
27 ErrorCode,
28 type JSONRPCMessage,
29 type ListPromptsResult,
30 ListPromptsResultSchema,
31 ListResourcesResultSchema,
32 ListRootsRequestSchema,
33 type ListToolsResult,
34 ListToolsResultSchema,
35 McpError,
36 type PromptMessage,
37 type ResourceLink,
38} from '@modelcontextprotocol/sdk/types.js'
39import mapValues from 'lodash-es/mapValues.js'
40import memoize from 'lodash-es/memoize.js'
41import zipObject from 'lodash-es/zipObject.js'
42import pMap from 'p-map'
43import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'
44import type { Command } from '../../commands.js'
45import { getOauthConfig } from '../../constants/oauth.js'
46import { PRODUCT_URL } from '../../constants/product.js'
47import type { AppState } from '../../state/AppState.js'
48import {
49 type Tool,
50 type ToolCallProgress,
51 toolMatchesName,
52} from '../../Tool.js'
53import { ListMcpResourcesTool } from '../../tools/ListMcpResourcesTool/ListMcpResourcesTool.js'
54import { type MCPProgress, MCPTool } from '../../tools/MCPTool/MCPTool.js'
55import { createMcpAuthTool } from '../../tools/McpAuthTool/McpAuthTool.js'
56import { ReadMcpResourceTool } from '../../tools/ReadMcpResourceTool/ReadMcpResourceTool.js'
57import { createAbortController } from '../../utils/abortController.js'
58import { count } from '../../utils/array.js'
59import {
60 checkAndRefreshOAuthTokenIfNeeded,
61 getClaudeAIOAuthTokens,
62 handleOAuth401Error,
63} from '../../utils/auth.js'
64import { registerCleanup } from '../../utils/cleanupRegistry.js'
65import { detectCodeIndexingFromMcpServerName } from '../../utils/codeIndexing.js'
66import { logForDebugging } from '../../utils/debug.js'
67import { isEnvDefinedFalsy, isEnvTruthy } from '../../utils/envUtils.js'
68import {
69 errorMessage,
70 TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
71} from '../../utils/errors.js'
72import { getMCPUserAgent } from '../../utils/http.js'
73import { maybeNotifyIDEConnected } from '../../utils/ide.js'
74import { maybeResizeAndDownsampleImageBuffer } from '../../utils/imageResizer.js'
75import { logMCPDebug, logMCPError } from '../../utils/log.js'
76import {
77 getBinaryBlobSavedMessage,
78 getFormatDescription,
79 getLargeOutputInstructions,
80 persistBinaryContent,
81} from '../../utils/mcpOutputStorage.js'
82import {
83 getContentSizeEstimate,
84 type MCPToolResult,
85 mcpContentNeedsTruncation,
86 truncateMcpContentIfNeeded,
87} from '../../utils/mcpValidation.js'
88import { WebSocketTransport } from '../../utils/mcpWebSocketTransport.js'
89import { memoizeWithLRU } from '../../utils/memoize.js'
90import { getWebSocketTLSOptions } from '../../utils/mtls.js'
91import {
92 getProxyFetchOptions,
93 getWebSocketProxyAgent,
94 getWebSocketProxyUrl,
95} from '../../utils/proxy.js'
96import { recursivelySanitizeUnicode } from '../../utils/sanitization.js'
97import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
98import { subprocessEnv } from '../../utils/subprocessEnv.js'
99import {
100 isPersistError,
101 persistToolResult,
102} from '../../utils/toolResultStorage.js'
103import {
104 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
105 logEvent,
106} from '../analytics/index.js'
107import {
108 type ElicitationWaitingState,
109 runElicitationHooks,
110 runElicitationResultHooks,
111} from './elicitationHandler.js'
112import { buildMcpToolName } from './mcpStringUtils.js'
113import { normalizeNameForMCP } from './normalization.js'
114import { getLoggingSafeMcpBaseUrl } from './utils.js'
115
116/* eslint-disable @typescript-eslint/no-require-imports */
117const fetchMcpSkillsForClient = feature('MCP_SKILLS')
118 ? (
119 require('../../skills/mcpSkills.js') as typeof import('../../skills/mcpSkills.js')
120 ).fetchMcpSkillsForClient
121 : null
122
123import { UnauthorizedError } from '@modelcontextprotocol/sdk/client/auth.js'
124import type { AssistantMessage } from 'src/types/message.js'
125/* eslint-enable @typescript-eslint/no-require-imports */
126import { classifyMcpToolForCollapse } from '../../tools/MCPTool/classifyForCollapse.js'
127import { clearKeychainCache } from '../../utils/secureStorage/macOsKeychainHelpers.js'
128import { sleep } from '../../utils/sleep.js'
129import {
130 ClaudeAuthProvider,
131 hasMcpDiscoveryButNoToken,
132 wrapFetchWithStepUpDetection,
133} from './auth.js'
134import { markClaudeAiMcpConnected } from './claudeai.js'
135import { getAllMcpConfigs, isMcpServerDisabled } from './config.js'
136import { getMcpServerHeaders } from './headersHelper.js'
137import { SdkControlClientTransport } from './SdkControlTransport.js'
138import type {
139 ConnectedMCPServer,
140 MCPServerConnection,
141 McpSdkServerConfig,
142 ScopedMcpServerConfig,
143 ServerResource,
144} from './types.js'
145
146/**
147 * Custom error class to indicate that an MCP tool call failed due to
148 * authentication issues (e.g., expired OAuth token returning 401).
149 * This error should be caught at the tool execution layer to update
150 * the client's status to 'needs-auth'.
151 */
152export class McpAuthError extends Error {
153 serverName: string
154 constructor(serverName: string, message: string) {
155 super(message)
156 this.name = 'McpAuthError'
157 this.serverName = serverName
158 }
159}
160
161/**
162 * Thrown when an MCP session has expired and the connection cache has been cleared.
163 * The caller should get a fresh client via ensureConnectedClient and retry.
164 */
165class McpSessionExpiredError extends Error {
166 constructor(serverName: string) {
167 super(`MCP server "${serverName}" session expired`)
168 this.name = 'McpSessionExpiredError'
169 }
170}
171
172/**
173 * Thrown when an MCP tool returns `isError: true`. Carries the result's `_meta`
174 * so SDK consumers can still receive it — per the MCP spec, `_meta` is on the
175 * base Result type and is valid on error results.
176 */
177export class McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS extends TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
178 constructor(
179 message: string,
180 telemetryMessage: string,
181 readonly mcpMeta?: { _meta?: Record<string, unknown> },
182 ) {
183 super(message, telemetryMessage)
184 this.name = 'McpToolCallError'
185 }
186}
187
188/**
189 * Detects whether an error is an MCP "Session not found" error (HTTP 404 + JSON-RPC code -32001).
190 * Per the MCP spec, servers return 404 when a session ID is no longer valid.
191 * We check both signals to avoid false positives from generic 404s (wrong URL, server gone, etc.).
192 */
193export function isMcpSessionExpiredError(error: Error): boolean {
194 const httpStatus =
195 'code' in error ? (error as Error & { code?: number }).code : undefined
196 if (httpStatus !== 404) {
197 return false
198 }
199 // The SDK embeds the response body text in the error message.
200 // MCP servers return: {"error":{"code":-32001,"message":"Session not found"},...}
201 // Check for the JSON-RPC error code to distinguish from generic web server 404s.
202 return (
203 error.message.includes('"code":-32001') ||
204 error.message.includes('"code": -32001')
205 )
206}
207
208/**
209 * Default timeout for MCP tool calls (effectively infinite - ~27.8 hours).
210 */
211const DEFAULT_MCP_TOOL_TIMEOUT_MS = 100_000_000
212
213/**
214 * Cap on MCP tool descriptions and server instructions sent to the model.
215 * OpenAPI-generated MCP servers have been observed dumping 15-60KB of endpoint
216 * docs into tool.description; this caps the p95 tail without losing the intent.
217 */
218const MAX_MCP_DESCRIPTION_LENGTH = 2048
219
220/**
221 * Gets the timeout for MCP tool calls in milliseconds.
222 * Uses MCP_TOOL_TIMEOUT environment variable if set, otherwise defaults to ~27.8 hours.
223 */
224function getMcpToolTimeoutMs(): number {
225 return (
226 parseInt(process.env.MCP_TOOL_TIMEOUT || '', 10) ||
227 DEFAULT_MCP_TOOL_TIMEOUT_MS
228 )
229}
230
231import { isClaudeInChromeMCPServer } from '../../utils/claudeInChrome/common.js'
232
233// Lazy: toolRendering.tsx pulls React/ink; only needed when Claude-in-Chrome MCP server is connected
234/* eslint-disable @typescript-eslint/no-require-imports */
235const claudeInChromeToolRendering =
236 (): typeof import('../../utils/claudeInChrome/toolRendering.js') =>
237 require('../../utils/claudeInChrome/toolRendering.js')
238// Lazy: wrapper.tsx → hostAdapter.ts → executor.ts pulls both native modules
239// (@ant/computer-use-input + @ant/computer-use-swift). Runtime-gated by
240// GrowthBook tengu_malort_pedway (see gates.ts).
241const computerUseWrapper = feature('CHICAGO_MCP')
242 ? (): typeof import('../../utils/computerUse/wrapper.js') =>
243 require('../../utils/computerUse/wrapper.js')
244 : undefined
245const isComputerUseMCPServer = feature('CHICAGO_MCP')
246 ? (
247 require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
248 ).isComputerUseMCPServer
249 : undefined
250
251import { mkdir, readFile, unlink, writeFile } from 'fs/promises'
252import { dirname, join } from 'path'
253import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
254/* eslint-enable @typescript-eslint/no-require-imports */
255import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
256
257const MCP_AUTH_CACHE_TTL_MS = 15 * 60 * 1000 // 15 min
258
259type McpAuthCacheData = Record<string, { timestamp: number }>
260
261function getMcpAuthCachePath(): string {
262 return join(getClaudeConfigHomeDir(), 'mcp-needs-auth-cache.json')
263}
264
265// Memoized so N concurrent isMcpAuthCached() calls during batched connection
266// share a single file read instead of N reads of the same file. Invalidated
267// on write (setMcpAuthCacheEntry) and clear (clearMcpAuthCache). Not using
268// lodash memoize because we need to null out the cache, not delete by key.
269let authCachePromise: Promise<McpAuthCacheData> | null = null
270
271function getMcpAuthCache(): Promise<McpAuthCacheData> {
272 if (!authCachePromise) {
273 authCachePromise = readFile(getMcpAuthCachePath(), 'utf-8')
274 .then(data => jsonParse(data) as McpAuthCacheData)
275 .catch(() => ({}))
276 }
277 return authCachePromise
278}
279
280async function isMcpAuthCached(serverId: string): Promise<boolean> {
281 const cache = await getMcpAuthCache()
282 const entry = cache[serverId]
283 if (!entry) {
284 return false
285 }
286 return Date.now() - entry.timestamp < MCP_AUTH_CACHE_TTL_MS
287}
288
289// Serialize cache writes through a promise chain to prevent concurrent
290// read-modify-write races when multiple servers return 401 in the same batch
291let writeChain = Promise.resolve()
292
293function setMcpAuthCacheEntry(serverId: string): void {
294 writeChain = writeChain
295 .then(async () => {
296 const cache = await getMcpAuthCache()
297 cache[serverId] = { timestamp: Date.now() }
298 const cachePath = getMcpAuthCachePath()
299 await mkdir(dirname(cachePath), { recursive: true })
300 await writeFile(cachePath, jsonStringify(cache))
301 // Invalidate the read cache so subsequent reads see the new entry.
302 // Safe because writeChain serializes writes: the next write's
303 // getMcpAuthCache() call will re-read the file with this entry present.
304 authCachePromise = null
305 })
306 .catch(() => {
307 // Best-effort cache write
308 })
309}
310
311export function clearMcpAuthCache(): void {
312 authCachePromise = null
313 void unlink(getMcpAuthCachePath()).catch(() => {
314 // Cache file may not exist
315 })
316}
317
318/**
319 * Spread-ready analytics field for the server's base URL. Calls
320 * getLoggingSafeMcpBaseUrl once (not twice like the inline ternary it replaces).
321 * Typed as AnalyticsMetadata since the URL is query-stripped and safe to log.
322 */
323function mcpBaseUrlAnalytics(serverRef: ScopedMcpServerConfig): {
324 mcpServerBaseUrl?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
325} {
326 const url = getLoggingSafeMcpBaseUrl(serverRef)
327 return url
328 ? {
329 mcpServerBaseUrl:
330 url as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
331 }
332 : {}
333}
334
335/**
336 * Shared handler for sse/http/claudeai-proxy auth failures during connect:
337 * emits tengu_mcp_server_needs_auth, caches the needs-auth entry, and returns
338 * the needs-auth connection result.
339 */
340function handleRemoteAuthFailure(
341 name: string,
342 serverRef: ScopedMcpServerConfig,
343 transportType: 'sse' | 'http' | 'claudeai-proxy',
344): MCPServerConnection {
345 logEvent('tengu_mcp_server_needs_auth', {
346 transportType:
347 transportType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
348 ...mcpBaseUrlAnalytics(serverRef),
349 })
350 const label: Record<typeof transportType, string> = {
351 sse: 'SSE',
352 http: 'HTTP',
353 'claudeai-proxy': 'claude.ai proxy',
354 }
355 logMCPDebug(
356 name,
357 `Authentication required for ${label[transportType]} server`,
358 )
359 setMcpAuthCacheEntry(name)
360 return { name, type: 'needs-auth', config: serverRef }
361}
362
363/**
364 * Fetch wrapper for claude.ai proxy connections. Attaches the OAuth bearer
365 * token and retries once on 401 via handleOAuth401Error (force-refresh).
366 *
367 * The Anthropic API path has this retry (withRetry.ts, grove.ts) to handle
368 * memoize-cache staleness and clock drift. Without the same here, a single
369 * stale token mass-401s every claude.ai connector and sticks them all in the
370 * 15-min needs-auth cache.
371 */
372export function createClaudeAiProxyFetch(innerFetch: FetchLike): FetchLike {
373 return async (url, init) => {
374 const doRequest = async () => {
375 await checkAndRefreshOAuthTokenIfNeeded()
376 const currentTokens = getClaudeAIOAuthTokens()
377 if (!currentTokens) {
378 throw new Error('No claude.ai OAuth token available')
379 }
380 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
381 const headers = new Headers(init?.headers)
382 headers.set('Authorization', `Bearer ${currentTokens.accessToken}`)
383 const response = await innerFetch(url, { ...init, headers })
384 // Return the exact token that was sent. Reading getClaudeAIOAuthTokens()
385 // again after the request is wrong under concurrent 401s: another
386 // connector's handleOAuth401Error clears the memoize cache, so we'd read
387 // the NEW token from keychain, pass it to handleOAuth401Error, which
388 // finds same-as-keychain → returns false → skips retry. Same pattern as
389 // bridgeApi.ts withOAuthRetry (token passed as fn param).
390 return { response, sentToken: currentTokens.accessToken }
391 }
392
393 const { response, sentToken } = await doRequest()
394 if (response.status !== 401) {
395 return response
396 }
397 // handleOAuth401Error returns true only if the token actually changed
398 // (keychain had a newer one, or force-refresh succeeded). Gate retry on
399 // that — otherwise we double round-trip time for every connector whose
400 // downstream service genuinely needs auth (the common case: 30+ servers
401 // with "MCP server requires authentication but no OAuth token configured").
402 const tokenChanged = await handleOAuth401Error(sentToken).catch(() => false)
403 logEvent('tengu_mcp_claudeai_proxy_401', {
404 tokenChanged:
405 tokenChanged as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
406 })
407 if (!tokenChanged) {
408 // ELOCKED contention: another connector may have won the lockfile and refreshed — check if token changed underneath us
409 const now = getClaudeAIOAuthTokens()?.accessToken
410 if (!now || now === sentToken) {
411 return response
412 }
413 }
414 try {
415 return (await doRequest()).response
416 } catch {
417 // Retry itself failed (network error). Return the original 401 so the
418 // outer handler can classify it.
419 return response
420 }
421 }
422}
423
424// Minimal interface for WebSocket instances passed to mcpWebSocketTransport
425type WsClientLike = {
426 readonly readyState: number
427 close(): void
428 send(data: string): void
429}
430
431/**
432 * Create a ws.WebSocket client with the MCP protocol.
433 * Bun's ws shim types lack the 3-arg constructor (url, protocols, options)
434 * that the real ws package supports, so we cast the constructor here.
435 */
436async function createNodeWsClient(
437 url: string,
438 options: Record<string, unknown>,
439): Promise<WsClientLike> {
440 const wsModule = await import('ws')
441 const WS = wsModule.default as unknown as new (
442 url: string,
443 protocols: string[],
444 options: Record<string, unknown>,
445 ) => WsClientLike
446 return new WS(url, ['mcp'], options)
447}
448
449const IMAGE_MIME_TYPES = new Set([
450 'image/jpeg',
451 'image/png',
452 'image/gif',
453 'image/webp',
454])
455
456function getConnectionTimeoutMs(): number {
457 return parseInt(process.env.MCP_TIMEOUT || '', 10) || 30000
458}
459
460/**
461 * Default timeout for individual MCP requests (auth, tool calls, etc.)
462 */
463const MCP_REQUEST_TIMEOUT_MS = 60000
464
465/**
466 * MCP Streamable HTTP spec requires clients to advertise acceptance of both
467 * JSON and SSE on every POST. Servers that enforce this strictly reject
468 * requests without it (HTTP 406).
469 * https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#sending-messages-to-the-server
470 */
471const MCP_STREAMABLE_HTTP_ACCEPT = 'application/json, text/event-stream'
472
473/**
474 * Wraps a fetch function to apply a fresh timeout signal to each request.
475 * This avoids the bug where a single AbortSignal.timeout() created at connection
476 * time becomes stale after 60 seconds, causing all subsequent requests to fail
477 * immediately with "The operation timed out." Uses a 60-second timeout.
478 *
479 * Also ensures the Accept header required by the MCP Streamable HTTP spec is
480 * present on POSTs. The MCP SDK sets this inside StreamableHTTPClientTransport.send(),
481 * but it is attached to a Headers instance that passes through an object spread here,
482 * and some runtimes/agents have been observed dropping it before it reaches the wire.
483 * See https://github.com/anthropics/claude-agent-sdk-typescript/issues/202.
484 * Normalizing here (the last wrapper before fetch()) guarantees it is sent.
485 *
486 * GET requests are excluded from the timeout since, for MCP transports, they are
487 * long-lived SSE streams meant to stay open indefinitely. (Auth-related GETs use
488 * a separate fetch wrapper with its own timeout in auth.ts.)
489 *
490 * @param baseFetch - The fetch function to wrap
491 */
492export function wrapFetchWithTimeout(baseFetch: FetchLike): FetchLike {
493 return async (url: string | URL, init?: RequestInit) => {
494 const method = (init?.method ?? 'GET').toUpperCase()
495
496 // Skip timeout for GET requests - in MCP transports, these are long-lived SSE streams.
497 // (OAuth discovery GETs in auth.ts use a separate createAuthFetch() with its own timeout.)
498 if (method === 'GET') {
499 return baseFetch(url, init)
500 }
501
502 // Normalize headers and guarantee the Streamable-HTTP Accept value. new Headers()
503 // accepts HeadersInit | undefined and copies from plain objects, tuple arrays,
504 // and existing Headers instances — so whatever shape the SDK handed us, the
505 // Accept value survives the spread below as an own property of a concrete object.
506 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
507 const headers = new Headers(init?.headers)
508 if (!headers.has('accept')) {
509 headers.set('accept', MCP_STREAMABLE_HTTP_ACCEPT)
510 }
511
512 // Use setTimeout instead of AbortSignal.timeout() so we can clearTimeout on
513 // completion. AbortSignal.timeout's internal timer is only released when the
514 // signal is GC'd, which in Bun is lazy — ~2.4KB of native memory per request
515 // lingers for the full 60s even when the request completes in milliseconds.
516 const controller = new AbortController()
517 const timer = setTimeout(
518 c =>
519 c.abort(new DOMException('The operation timed out.', 'TimeoutError')),
520 MCP_REQUEST_TIMEOUT_MS,
521 controller,
522 )
523 timer.unref?.()
524
525 const parentSignal = init?.signal
526 const abort = () => controller.abort(parentSignal?.reason)
527 parentSignal?.addEventListener('abort', abort)
528 if (parentSignal?.aborted) {
529 controller.abort(parentSignal.reason)
530 }
531
532 const cleanup = () => {
533 clearTimeout(timer)
534 parentSignal?.removeEventListener('abort', abort)
535 }
536
537 try {
538 const response = await baseFetch(url, {
539 ...init,
540 headers,
541 signal: controller.signal,
542 })
543 cleanup()
544 return response
545 } catch (error) {
546 cleanup()
547 throw error
548 }
549 }
550}
551
552export function getMcpServerConnectionBatchSize(): number {
553 return parseInt(process.env.MCP_SERVER_CONNECTION_BATCH_SIZE || '', 10) || 3
554}
555
556function getRemoteMcpServerConnectionBatchSize(): number {
557 return (
558 parseInt(process.env.MCP_REMOTE_SERVER_CONNECTION_BATCH_SIZE || '', 10) ||
559 20
560 )
561}
562
563function isLocalMcpServer(config: ScopedMcpServerConfig): boolean {
564 return !config.type || config.type === 'stdio' || config.type === 'sdk'
565}
566
567// For the IDE MCP servers, we only include specific tools
568const ALLOWED_IDE_TOOLS = ['mcp__ide__executeCode', 'mcp__ide__getDiagnostics']
569function isIncludedMcpTool(tool: Tool): boolean {
570 return (
571 !tool.name.startsWith('mcp__ide__') || ALLOWED_IDE_TOOLS.includes(tool.name)
572 )
573}
574
575/**
576 * Generates the cache key for a server connection
577 * @param name Server name
578 * @param serverRef Server configuration
579 * @returns Cache key string
580 */
581export function getServerCacheKey(
582 name: string,
583 serverRef: ScopedMcpServerConfig,
584): string {
585 return `${name}-${jsonStringify(serverRef)}`
586}
587
588/**
589 * TODO (ollie): The memoization here increases complexity by a lot, and im not sure it really improves performance
590 * Attempts to connect to a single MCP server
591 * @param name Server name
592 * @param serverRef Scoped server configuration
593 * @returns A wrapped client (either connected or failed)
594 */
595export const connectToServer = memoize(
596 async (
597 name: string,
598 serverRef: ScopedMcpServerConfig,
599 serverStats?: {
600 totalServers: number
601 stdioCount: number
602 sseCount: number
603 httpCount: number
604 sseIdeCount: number
605 wsIdeCount: number
606 },
607 ): Promise<MCPServerConnection> => {
608 const connectStartTime = Date.now()
609 let inProcessServer:
610 | { connect(t: Transport): Promise<void>; close(): Promise<void> }
611 | undefined
612 try {
613 let transport
614
615 // If we have the session ingress JWT, we will connect via the session ingress rather than
616 // to remote MCP's directly.
617 const sessionIngressToken = getSessionIngressAuthToken()
618
619 if (serverRef.type === 'sse') {
620 // Create an auth provider for this server
621 const authProvider = new ClaudeAuthProvider(name, serverRef)
622
623 // Get combined headers (static + dynamic)
624 const combinedHeaders = await getMcpServerHeaders(name, serverRef)
625
626 // Use the auth provider with SSEClientTransport
627 const transportOptions: SSEClientTransportOptions = {
628 authProvider,
629 // Use fresh timeout per request to avoid stale AbortSignal bug.
630 // Step-up detection wraps innermost so the 403 is seen before the
631 // SDK's handler calls auth() → tokens().
632 fetch: wrapFetchWithTimeout(
633 wrapFetchWithStepUpDetection(createFetchWithInit(), authProvider),
634 ),
635 requestInit: {
636 headers: {
637 'User-Agent': getMCPUserAgent(),
638 ...combinedHeaders,
639 },
640 },
641 }
642
643 // IMPORTANT: Always set eventSourceInit with a fetch that does NOT use the
644 // timeout wrapper. The EventSource connection is long-lived (stays open indefinitely
645 // to receive server-sent events), so applying a 60-second timeout would kill it.
646 // The timeout is only meant for individual API requests (POST, auth refresh), not
647 // the persistent SSE stream.
648 transportOptions.eventSourceInit = {
649 fetch: async (url: string | URL, init?: RequestInit) => {
650 // Get auth headers from the auth provider
651 const authHeaders: Record<string, string> = {}
652 const tokens = await authProvider.tokens()
653 if (tokens) {
654 authHeaders.Authorization = `Bearer ${tokens.access_token}`
655 }
656
657 const proxyOptions = getProxyFetchOptions()
658 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
659 return fetch(url, {
660 ...init,
661 ...proxyOptions,
662 headers: {
663 'User-Agent': getMCPUserAgent(),
664 ...authHeaders,
665 ...init?.headers,
666 ...combinedHeaders,
667 Accept: 'text/event-stream',
668 },
669 })
670 },
671 }
672
673 transport = new SSEClientTransport(
674 new URL(serverRef.url),
675 transportOptions,
676 )
677 logMCPDebug(name, `SSE transport initialized, awaiting connection`)
678 } else if (serverRef.type === 'sse-ide') {
679 logMCPDebug(name, `Setting up SSE-IDE transport to ${serverRef.url}`)
680 // IDE servers don't need authentication
681 // TODO: Use the auth token provided in the lockfile
682 const proxyOptions = getProxyFetchOptions()
683 const transportOptions: SSEClientTransportOptions =
684 proxyOptions.dispatcher
685 ? {
686 eventSourceInit: {
687 fetch: async (url: string | URL, init?: RequestInit) => {
688 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
689 return fetch(url, {
690 ...init,
691 ...proxyOptions,
692 headers: {
693 'User-Agent': getMCPUserAgent(),
694 ...init?.headers,
695 },
696 })
697 },
698 },
699 }
700 : {}
701
702 transport = new SSEClientTransport(
703 new URL(serverRef.url),
704 Object.keys(transportOptions).length > 0
705 ? transportOptions
706 : undefined,
707 )
708 } else if (serverRef.type === 'ws-ide') {
709 const tlsOptions = getWebSocketTLSOptions()
710 const wsHeaders = {
711 'User-Agent': getMCPUserAgent(),
712 ...(serverRef.authToken && {
713 'X-Claude-Code-Ide-Authorization': serverRef.authToken,
714 }),
715 }
716
717 let wsClient: WsClientLike
718 if (typeof Bun !== 'undefined') {
719 // Bun's WebSocket supports headers/proxy/tls options but the DOM typings don't
720 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
721 wsClient = new globalThis.WebSocket(serverRef.url, {
722 protocols: ['mcp'],
723 headers: wsHeaders,
724 proxy: getWebSocketProxyUrl(serverRef.url),
725 tls: tlsOptions || undefined,
726 } as unknown as string[])
727 } else {
728 wsClient = await createNodeWsClient(serverRef.url, {
729 headers: wsHeaders,
730 agent: getWebSocketProxyAgent(serverRef.url),
731 ...(tlsOptions || {}),
732 })
733 }
734 transport = new WebSocketTransport(wsClient)
735 } else if (serverRef.type === 'ws') {
736 logMCPDebug(
737 name,
738 `Initializing WebSocket transport to ${serverRef.url}`,
739 )
740
741 const combinedHeaders = await getMcpServerHeaders(name, serverRef)
742
743 const tlsOptions = getWebSocketTLSOptions()
744 const wsHeaders = {
745 'User-Agent': getMCPUserAgent(),
746 ...(sessionIngressToken && {
747 Authorization: `Bearer ${sessionIngressToken}`,
748 }),
749 ...combinedHeaders,
750 }
751
752 // Redact sensitive headers before logging
753 const wsHeadersForLogging = mapValues(wsHeaders, (value, key) =>
754 key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,
755 )
756
757 logMCPDebug(
758 name,
759 `WebSocket transport options: ${jsonStringify({
760 url: serverRef.url,
761 headers: wsHeadersForLogging,
762 hasSessionAuth: !!sessionIngressToken,
763 })}`,
764 )
765
766 let wsClient: WsClientLike
767 if (typeof Bun !== 'undefined') {
768 // Bun's WebSocket supports headers/proxy/tls options but the DOM typings don't
769 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
770 wsClient = new globalThis.WebSocket(serverRef.url, {
771 protocols: ['mcp'],
772 headers: wsHeaders,
773 proxy: getWebSocketProxyUrl(serverRef.url),
774 tls: tlsOptions || undefined,
775 } as unknown as string[])
776 } else {
777 wsClient = await createNodeWsClient(serverRef.url, {
778 headers: wsHeaders,
779 agent: getWebSocketProxyAgent(serverRef.url),
780 ...(tlsOptions || {}),
781 })
782 }
783 transport = new WebSocketTransport(wsClient)
784 } else if (serverRef.type === 'http') {
785 logMCPDebug(name, `Initializing HTTP transport to ${serverRef.url}`)
786 logMCPDebug(
787 name,
788 `Node version: ${process.version}, Platform: ${process.platform}`,
789 )
790 logMCPDebug(
791 name,
792 `Environment: ${jsonStringify({
793 NODE_OPTIONS: process.env.NODE_OPTIONS || 'not set',
794 UV_THREADPOOL_SIZE: process.env.UV_THREADPOOL_SIZE || 'default',
795 HTTP_PROXY: process.env.HTTP_PROXY || 'not set',
796 HTTPS_PROXY: process.env.HTTPS_PROXY || 'not set',
797 NO_PROXY: process.env.NO_PROXY || 'not set',
798 })}`,
799 )
800
801 // Create an auth provider for this server
802 const authProvider = new ClaudeAuthProvider(name, serverRef)
803
804 // Get combined headers (static + dynamic)
805 const combinedHeaders = await getMcpServerHeaders(name, serverRef)
806
807 // Check if this server has stored OAuth tokens. If so, the SDK's
808 // authProvider will set Authorization — don't override with the
809 // session ingress token (SDK merges requestInit AFTER authProvider).
810 // CCR proxy URLs (ccr_shttp_mcp) have no stored OAuth, so they still
811 // get the ingress token. See PR #24454 discussion.
812 const hasOAuthTokens = !!(await authProvider.tokens())
813
814 // Use the auth provider with StreamableHTTPClientTransport
815 const proxyOptions = getProxyFetchOptions()
816 logMCPDebug(
817 name,
818 `Proxy options: ${proxyOptions.dispatcher ? 'custom dispatcher' : 'default'}`,
819 )
820
821 const transportOptions: StreamableHTTPClientTransportOptions = {
822 authProvider,
823 // Use fresh timeout per request to avoid stale AbortSignal bug.
824 // Step-up detection wraps innermost so the 403 is seen before the
825 // SDK's handler calls auth() → tokens().
826 fetch: wrapFetchWithTimeout(
827 wrapFetchWithStepUpDetection(createFetchWithInit(), authProvider),
828 ),
829 requestInit: {
830 ...proxyOptions,
831 headers: {
832 'User-Agent': getMCPUserAgent(),
833 ...(sessionIngressToken &&
834 !hasOAuthTokens && {
835 Authorization: `Bearer ${sessionIngressToken}`,
836 }),
837 ...combinedHeaders,
838 },
839 },
840 }
841
842 // Redact sensitive headers before logging
843 const headersForLogging = transportOptions.requestInit?.headers
844 ? mapValues(
845 transportOptions.requestInit.headers as Record<string, string>,
846 (value, key) =>
847 key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,
848 )
849 : undefined
850
851 logMCPDebug(
852 name,
853 `HTTP transport options: ${jsonStringify({
854 url: serverRef.url,
855 headers: headersForLogging,
856 hasAuthProvider: !!authProvider,
857 timeoutMs: MCP_REQUEST_TIMEOUT_MS,
858 })}`,
859 )
860
861 transport = new StreamableHTTPClientTransport(
862 new URL(serverRef.url),
863 transportOptions,
864 )
865 logMCPDebug(name, `HTTP transport created successfully`)
866 } else if (serverRef.type === 'sdk') {
867 throw new Error('SDK servers should be handled in print.ts')
868 } else if (serverRef.type === 'claudeai-proxy') {
869 logMCPDebug(
870 name,
871 `Initializing claude.ai proxy transport for server ${serverRef.id}`,
872 )
873
874 const tokens = getClaudeAIOAuthTokens()
875 if (!tokens) {
876 throw new Error('No claude.ai OAuth token found')
877 }
878
879 const oauthConfig = getOauthConfig()
880 const proxyUrl = `${oauthConfig.MCP_PROXY_URL}${oauthConfig.MCP_PROXY_PATH.replace('{server_id}', serverRef.id)}`
881
882 logMCPDebug(name, `Using claude.ai proxy at ${proxyUrl}`)
883
884 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
885 const fetchWithAuth = createClaudeAiProxyFetch(globalThis.fetch)
886
887 const proxyOptions = getProxyFetchOptions()
888 const transportOptions: StreamableHTTPClientTransportOptions = {
889 // Wrap fetchWithAuth with fresh timeout per request
890 fetch: wrapFetchWithTimeout(fetchWithAuth),
891 requestInit: {
892 ...proxyOptions,
893 headers: {
894 'User-Agent': getMCPUserAgent(),
895 'X-Mcp-Client-Session-Id': getSessionId(),
896 },
897 },
898 }
899
900 transport = new StreamableHTTPClientTransport(
901 new URL(proxyUrl),
902 transportOptions,
903 )
904 logMCPDebug(name, `claude.ai proxy transport created successfully`)
905 } else if (
906 (serverRef.type === 'stdio' || !serverRef.type) &&
907 isClaudeInChromeMCPServer(name)
908 ) {
909 // Run the Chrome MCP server in-process to avoid spawning a ~325 MB subprocess
910 const { createChromeContext } = await import(
911 '../../utils/claudeInChrome/mcpServer.js'
912 )
913 const { createClaudeForChromeMcpServer } = await import(
914 '@ant/claude-for-chrome-mcp'
915 )
916 const { createLinkedTransportPair } = await import(
917 './InProcessTransport.js'
918 )
919 const context = createChromeContext(serverRef.env)
920 inProcessServer = createClaudeForChromeMcpServer(context)
921 const [clientTransport, serverTransport] = createLinkedTransportPair()
922 await inProcessServer.connect(serverTransport)
923 transport = clientTransport
924 logMCPDebug(name, `In-process Chrome MCP server started`)
925 } else if (
926 feature('CHICAGO_MCP') &&
927 (serverRef.type === 'stdio' || !serverRef.type) &&
928 isComputerUseMCPServer!(name)
929 ) {
930 // Run the Computer Use MCP server in-process — same rationale as
931 // Chrome above. The package's CallTool handler is a stub; real
932 // dispatch goes through wrapper.tsx's .call() override.
933 const { createComputerUseMcpServerForCli } = await import(
934 '../../utils/computerUse/mcpServer.js'
935 )
936 const { createLinkedTransportPair } = await import(
937 './InProcessTransport.js'
938 )
939 inProcessServer = await createComputerUseMcpServerForCli()
940 const [clientTransport, serverTransport] = createLinkedTransportPair()
941 await inProcessServer.connect(serverTransport)
942 transport = clientTransport
943 logMCPDebug(name, `In-process Computer Use MCP server started`)
944 } else if (serverRef.type === 'stdio' || !serverRef.type) {
945 const finalCommand =
946 process.env.CLAUDE_CODE_SHELL_PREFIX || serverRef.command
947 const finalArgs = process.env.CLAUDE_CODE_SHELL_PREFIX
948 ? [[serverRef.command, ...serverRef.args].join(' ')]
949 : serverRef.args
950 transport = new StdioClientTransport({
951 command: finalCommand,
952 args: finalArgs,
953 env: {
954 ...subprocessEnv(),
955 ...serverRef.env,
956 } as Record<string, string>,
957 stderr: 'pipe', // prevents error output from the MCP server from printing to the UI
958 })
959 } else {
960 throw new Error(`Unsupported server type: ${serverRef.type}`)
961 }
962
963 // Set up stderr logging for stdio transport before connecting in case there are any stderr
964 // outputs emitted during the connection start (this can be useful for debugging failed connections).
965 // Store handler reference for cleanup to prevent memory leaks
966 let stderrHandler: ((data: Buffer) => void) | undefined
967 let stderrOutput = ''
968 if (serverRef.type === 'stdio' || !serverRef.type) {
969 const stdioTransport = transport as StdioClientTransport
970 if (stdioTransport.stderr) {
971 stderrHandler = (data: Buffer) => {
972 // Cap stderr accumulation to prevent unbounded memory growth
973 if (stderrOutput.length < 64 * 1024 * 1024) {
974 try {
975 stderrOutput += data.toString()
976 } catch {
977 // Ignore errors from exceeding max string length
978 }
979 }
980 }
981 stdioTransport.stderr.on('data', stderrHandler)
982 }
983 }
984
985 const client = new Client(
986 {
987 name: 'claude-code',
988 title: 'Claude Code',
989 version: MACRO.VERSION ?? 'unknown',
990 description: "Anthropic's agentic coding tool",
991 websiteUrl: PRODUCT_URL,
992 },
993 {
994 capabilities: {
995 roots: {},
996 // Empty object declares the capability. Sending {form:{},url:{}}
997 // breaks Java MCP SDK servers (Spring AI) whose Elicitation class
998 // has zero fields and fails on unknown properties.
999 elicitation: {},
1000 },
1001 },
1002 )
1003
1004 // Add debug logging for client events if available
1005 if (serverRef.type === 'http') {
1006 logMCPDebug(name, `Client created, setting up request handler`)
1007 }
1008
1009 client.setRequestHandler(ListRootsRequestSchema, async () => {
1010 logMCPDebug(name, `Received ListRoots request from server`)
1011 return {
1012 roots: [
1013 {
1014 uri: `file://${getOriginalCwd()}`,
1015 },
1016 ],
1017 }
1018 })
1019
1020 // Add a timeout to connection attempts to prevent tests from hanging indefinitely
1021 logMCPDebug(
1022 name,
1023 `Starting connection with timeout of ${getConnectionTimeoutMs()}ms`,
1024 )
1025
1026 // For HTTP transport, try a basic connectivity test first
1027 if (serverRef.type === 'http') {
1028 logMCPDebug(name, `Testing basic HTTP connectivity to ${serverRef.url}`)
1029 try {
1030 const testUrl = new URL(serverRef.url)
1031 logMCPDebug(
1032 name,
1033 `Parsed URL: host=${testUrl.hostname}, port=${testUrl.port || 'default'}, protocol=${testUrl.protocol}`,
1034 )
1035
1036 // Log DNS resolution attempt
1037 if (
1038 testUrl.hostname === '127.0.0.1' ||
1039 testUrl.hostname === 'localhost'
1040 ) {
1041 logMCPDebug(name, `Using loopback address: ${testUrl.hostname}`)
1042 }
1043 } catch (urlError) {
1044 logMCPDebug(name, `Failed to parse URL: ${urlError}`)
1045 }
1046 }
1047
1048 const connectPromise = client.connect(transport)
1049 const timeoutPromise = new Promise<never>((_, reject) => {
1050 const timeoutId = setTimeout(() => {
1051 const elapsed = Date.now() - connectStartTime
1052 logMCPDebug(
1053 name,
1054 `Connection timeout triggered after ${elapsed}ms (limit: ${getConnectionTimeoutMs()}ms)`,
1055 )
1056 if (inProcessServer) {
1057 inProcessServer.close().catch(() => {})
1058 }
1059 transport.close().catch(() => {})
1060 reject(
1061 new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1062 `MCP server "${name}" connection timed out after ${getConnectionTimeoutMs()}ms`,
1063 'MCP connection timeout',
1064 ),
1065 )
1066 }, getConnectionTimeoutMs())
1067
1068 // Clean up timeout if connect resolves or rejects
1069 connectPromise.then(
1070 () => {
1071 clearTimeout(timeoutId)
1072 },
1073 _error => {
1074 clearTimeout(timeoutId)
1075 },
1076 )
1077 })
1078
1079 try {
1080 await Promise.race([connectPromise, timeoutPromise])
1081 if (stderrOutput) {
1082 logMCPError(name, `Server stderr: ${stderrOutput}`)
1083 stderrOutput = '' // Release accumulated string to prevent memory growth
1084 }
1085 const elapsed = Date.now() - connectStartTime
1086 logMCPDebug(
1087 name,
1088 `Successfully connected (transport: ${serverRef.type || 'stdio'}) in ${elapsed}ms`,
1089 )
1090 } catch (error) {
1091 const elapsed = Date.now() - connectStartTime
1092 // SSE-specific error logging
1093 if (serverRef.type === 'sse' && error instanceof Error) {
1094 logMCPDebug(
1095 name,
1096 `SSE Connection failed after ${elapsed}ms: ${jsonStringify({
1097 url: serverRef.url,
1098 error: error.message,
1099 errorType: error.constructor.name,
1100 stack: error.stack,
1101 })}`,
1102 )
1103 logMCPError(name, error)
1104
1105 if (error instanceof UnauthorizedError) {
1106 return handleRemoteAuthFailure(name, serverRef, 'sse')
1107 }
1108 } else if (serverRef.type === 'http' && error instanceof Error) {
1109 const errorObj = error as Error & {
1110 cause?: unknown
1111 code?: string
1112 errno?: string | number
1113 syscall?: string
1114 }
1115 logMCPDebug(
1116 name,
1117 `HTTP Connection failed after ${elapsed}ms: ${error.message} (code: ${errorObj.code || 'none'}, errno: ${errorObj.errno || 'none'})`,
1118 )
1119 logMCPError(name, error)
1120
1121 if (error instanceof UnauthorizedError) {
1122 return handleRemoteAuthFailure(name, serverRef, 'http')
1123 }
1124 } else if (
1125 serverRef.type === 'claudeai-proxy' &&
1126 error instanceof Error
1127 ) {
1128 logMCPDebug(
1129 name,
1130 `claude.ai proxy connection failed after ${elapsed}ms: ${error.message}`,
1131 )
1132 logMCPError(name, error)
1133
1134 // StreamableHTTPError has a `code` property with the HTTP status
1135 const errorCode = (error as Error & { code?: number }).code
1136 if (errorCode === 401) {
1137 return handleRemoteAuthFailure(name, serverRef, 'claudeai-proxy')
1138 }
1139 } else if (
1140 serverRef.type === 'sse-ide' ||
1141 serverRef.type === 'ws-ide'
1142 ) {
1143 logEvent('tengu_mcp_ide_server_connection_failed', {
1144 connectionDurationMs: elapsed,
1145 })
1146 }
1147 if (inProcessServer) {
1148 inProcessServer.close().catch(() => {})
1149 }
1150 transport.close().catch(() => {})
1151 if (stderrOutput) {
1152 logMCPError(name, `Server stderr: ${stderrOutput}`)
1153 }
1154 throw error
1155 }
1156
1157 const capabilities = client.getServerCapabilities()
1158 const serverVersion = client.getServerVersion()
1159 const rawInstructions = client.getInstructions()
1160 let instructions = rawInstructions
1161 if (
1162 rawInstructions &&
1163 rawInstructions.length > MAX_MCP_DESCRIPTION_LENGTH
1164 ) {
1165 instructions =
1166 rawInstructions.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]'
1167 logMCPDebug(
1168 name,
1169 `Server instructions truncated from ${rawInstructions.length} to ${MAX_MCP_DESCRIPTION_LENGTH} chars`,
1170 )
1171 }
1172
1173 // Log successful connection details
1174 logMCPDebug(
1175 name,
1176 `Connection established with capabilities: ${jsonStringify({
1177 hasTools: !!capabilities?.tools,
1178 hasPrompts: !!capabilities?.prompts,
1179 hasResources: !!capabilities?.resources,
1180 hasResourceSubscribe: !!capabilities?.resources?.subscribe,
1181 serverVersion: serverVersion || 'unknown',
1182 })}`,
1183 )
1184 logForDebugging(
1185 `[MCP] Server "${name}" connected with subscribe=${!!capabilities?.resources?.subscribe}`,
1186 )
1187
1188 // Register default elicitation handler that returns cancel during the
1189 // window before registerElicitationHandler overwrites it in
1190 // onConnectionAttempt (useManageMCPConnections).
1191 client.setRequestHandler(ElicitRequestSchema, async request => {
1192 logMCPDebug(
1193 name,
1194 `Elicitation request received during initialization: ${jsonStringify(request)}`,
1195 )
1196 return { action: 'cancel' as const }
1197 })
1198
1199 if (serverRef.type === 'sse-ide' || serverRef.type === 'ws-ide') {
1200 const ideConnectionDurationMs = Date.now() - connectStartTime
1201 logEvent('tengu_mcp_ide_server_connection_succeeded', {
1202 connectionDurationMs: ideConnectionDurationMs,
1203 serverVersion:
1204 serverVersion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1205 })
1206 try {
1207 void maybeNotifyIDEConnected(client)
1208 } catch (error) {
1209 logMCPError(
1210 name,
1211 `Failed to send ide_connected notification: ${error}`,
1212 )
1213 }
1214 }
1215
1216 // Enhanced connection drop detection and logging for all transport types
1217 const connectionStartTime = Date.now()
1218 let hasErrorOccurred = false
1219
1220 // Store original handlers
1221 const originalOnerror = client.onerror
1222 const originalOnclose = client.onclose
1223
1224 // The SDK's transport calls onerror on connection failures but doesn't call onclose,
1225 // which CC uses to trigger reconnection. We bridge this gap by tracking consecutive
1226 // terminal errors and manually closing after MAX_ERRORS_BEFORE_RECONNECT failures.
1227 let consecutiveConnectionErrors = 0
1228 const MAX_ERRORS_BEFORE_RECONNECT = 3
1229
1230 // Guard against re-entry: close() aborts in-flight streams which may fire
1231 // onerror again before the close chain completes.
1232 let hasTriggeredClose = false
1233
1234 // client.close() → transport.close() → transport.onclose → SDK's _onclose():
1235 // rejects all pending request handlers (so hung callTool() promises fail with
1236 // McpError -32000 "Connection closed") and then invokes our client.onclose
1237 // handler below (which clears the memo cache so the next call reconnects).
1238 // Calling client.onclose?.() directly would only clear the cache — pending
1239 // tool calls would stay hung.
1240 const closeTransportAndRejectPending = (reason: string) => {
1241 if (hasTriggeredClose) return
1242 hasTriggeredClose = true
1243 logMCPDebug(name, `Closing transport (${reason})`)
1244 void client.close().catch(e => {
1245 logMCPDebug(name, `Error during close: ${errorMessage(e)}`)
1246 })
1247 }
1248
1249 const isTerminalConnectionError = (msg: string): boolean => {
1250 return (
1251 msg.includes('ECONNRESET') ||
1252 msg.includes('ETIMEDOUT') ||
1253 msg.includes('EPIPE') ||
1254 msg.includes('EHOSTUNREACH') ||
1255 msg.includes('ECONNREFUSED') ||
1256 msg.includes('Body Timeout Error') ||
1257 msg.includes('terminated') ||
1258 // SDK SSE reconnection intermediate errors — may be wrapped around the
1259 // actual network error, so the substrings above won't match
1260 msg.includes('SSE stream disconnected') ||
1261 msg.includes('Failed to reconnect SSE stream')
1262 )
1263 }
1264
1265 // Enhanced error handler with detailed logging
1266 client.onerror = (error: Error) => {
1267 const uptime = Date.now() - connectionStartTime
1268 hasErrorOccurred = true
1269 const transportType = serverRef.type || 'stdio'
1270
1271 // Log the connection drop with context
1272 logMCPDebug(
1273 name,
1274 `${transportType.toUpperCase()} connection dropped after ${Math.floor(uptime / 1000)}s uptime`,
1275 )
1276
1277 // Log specific error details for debugging
1278 if (error.message) {
1279 if (error.message.includes('ECONNRESET')) {
1280 logMCPDebug(
1281 name,
1282 `Connection reset - server may have crashed or restarted`,
1283 )
1284 } else if (error.message.includes('ETIMEDOUT')) {
1285 logMCPDebug(
1286 name,
1287 `Connection timeout - network issue or server unresponsive`,
1288 )
1289 } else if (error.message.includes('ECONNREFUSED')) {
1290 logMCPDebug(name, `Connection refused - server may be down`)
1291 } else if (error.message.includes('EPIPE')) {
1292 logMCPDebug(
1293 name,
1294 `Broken pipe - server closed connection unexpectedly`,
1295 )
1296 } else if (error.message.includes('EHOSTUNREACH')) {
1297 logMCPDebug(name, `Host unreachable - network connectivity issue`)
1298 } else if (error.message.includes('ESRCH')) {
1299 logMCPDebug(
1300 name,
1301 `Process not found - stdio server process terminated`,
1302 )
1303 } else if (error.message.includes('spawn')) {
1304 logMCPDebug(
1305 name,
1306 `Failed to spawn process - check command and permissions`,
1307 )
1308 } else {
1309 logMCPDebug(name, `Connection error: ${error.message}`)
1310 }
1311 }
1312
1313 // For HTTP transports, detect session expiry (404 + JSON-RPC -32001)
1314 // and close the transport so pending tool calls reject and the next
1315 // call reconnects with a fresh session ID.
1316 if (
1317 (transportType === 'http' || transportType === 'claudeai-proxy') &&
1318 isMcpSessionExpiredError(error)
1319 ) {
1320 logMCPDebug(
1321 name,
1322 `MCP session expired (server returned 404 with session-not-found), triggering reconnection`,
1323 )
1324 closeTransportAndRejectPending('session expired')
1325 if (originalOnerror) {
1326 originalOnerror(error)
1327 }
1328 return
1329 }
1330
1331 // For remote transports (SSE/HTTP), track terminal connection errors
1332 // and trigger reconnection via close if we see repeated failures.
1333 if (
1334 transportType === 'sse' ||
1335 transportType === 'http' ||
1336 transportType === 'claudeai-proxy'
1337 ) {
1338 // The SDK's StreamableHTTP transport fires this after exhausting its
1339 // own SSE reconnect attempts (default maxRetries: 2) — but it never
1340 // calls onclose, so pending callTool() promises hang indefinitely.
1341 // This is the definitive "transport gave up" signal.
1342 if (error.message.includes('Maximum reconnection attempts')) {
1343 closeTransportAndRejectPending('SSE reconnection exhausted')
1344 if (originalOnerror) {
1345 originalOnerror(error)
1346 }
1347 return
1348 }
1349
1350 if (isTerminalConnectionError(error.message)) {
1351 consecutiveConnectionErrors++
1352 logMCPDebug(
1353 name,
1354 `Terminal connection error ${consecutiveConnectionErrors}/${MAX_ERRORS_BEFORE_RECONNECT}`,
1355 )
1356
1357 if (consecutiveConnectionErrors >= MAX_ERRORS_BEFORE_RECONNECT) {
1358 consecutiveConnectionErrors = 0
1359 closeTransportAndRejectPending('max consecutive terminal errors')
1360 }
1361 } else {
1362 // Non-terminal error (e.g., transient issue), reset counter
1363 consecutiveConnectionErrors = 0
1364 }
1365 }
1366
1367 // Call original handler
1368 if (originalOnerror) {
1369 originalOnerror(error)
1370 }
1371 }
1372
1373 // Enhanced close handler with connection drop context
1374 client.onclose = () => {
1375 const uptime = Date.now() - connectionStartTime
1376 const transportType = serverRef.type ?? 'unknown'
1377
1378 logMCPDebug(
1379 name,
1380 `${transportType.toUpperCase()} connection closed after ${Math.floor(uptime / 1000)}s (${hasErrorOccurred ? 'with errors' : 'cleanly'})`,
1381 )
1382
1383 // Clear the memoization cache so next operation reconnects
1384 const key = getServerCacheKey(name, serverRef)
1385
1386 // Also clear fetch caches (keyed by server name). Reconnection
1387 // creates a new connection object; without clearing, the next
1388 // fetch would return stale tools/resources from the old connection.
1389 fetchToolsForClient.cache.delete(name)
1390 fetchResourcesForClient.cache.delete(name)
1391 fetchCommandsForClient.cache.delete(name)
1392 if (feature('MCP_SKILLS')) {
1393 fetchMcpSkillsForClient!.cache.delete(name)
1394 }
1395
1396 connectToServer.cache.delete(key)
1397 logMCPDebug(name, `Cleared connection cache for reconnection`)
1398
1399 if (originalOnclose) {
1400 originalOnclose()
1401 }
1402 }
1403
1404 const cleanup = async () => {
1405 // In-process servers (e.g. Chrome MCP) don't have child processes or stderr
1406 if (inProcessServer) {
1407 try {
1408 await inProcessServer.close()
1409 } catch (error) {
1410 logMCPDebug(name, `Error closing in-process server: ${error}`)
1411 }
1412 try {
1413 await client.close()
1414 } catch (error) {
1415 logMCPDebug(name, `Error closing client: ${error}`)
1416 }
1417 return
1418 }
1419
1420 // Remove stderr event listener to prevent memory leaks
1421 if (stderrHandler && (serverRef.type === 'stdio' || !serverRef.type)) {
1422 const stdioTransport = transport as StdioClientTransport
1423 stdioTransport.stderr?.off('data', stderrHandler)
1424 }
1425
1426 // For stdio transports, explicitly terminate the child process with proper signals
1427 // NOTE: StdioClientTransport.close() only sends an abort signal, but many MCP servers
1428 // (especially Docker containers) need explicit SIGINT/SIGTERM signals to trigger graceful shutdown
1429 if (serverRef.type === 'stdio') {
1430 try {
1431 const stdioTransport = transport as StdioClientTransport
1432 const childPid = stdioTransport.pid
1433
1434 if (childPid) {
1435 logMCPDebug(name, 'Sending SIGINT to MCP server process')
1436
1437 // First try SIGINT (like Ctrl+C)
1438 try {
1439 process.kill(childPid, 'SIGINT')
1440 } catch (error) {
1441 logMCPDebug(name, `Error sending SIGINT: ${error}`)
1442 return
1443 }
1444
1445 // Wait for graceful shutdown with rapid escalation (total 500ms to keep CLI responsive)
1446 await new Promise<void>(async resolve => {
1447 let resolved = false
1448
1449 // Set up a timer to check if process still exists
1450 const checkInterval = setInterval(() => {
1451 try {
1452 // process.kill(pid, 0) checks if process exists without killing it
1453 process.kill(childPid, 0)
1454 } catch {
1455 // Process no longer exists
1456 if (!resolved) {
1457 resolved = true
1458 clearInterval(checkInterval)
1459 clearTimeout(failsafeTimeout)
1460 logMCPDebug(name, 'MCP server process exited cleanly')
1461 resolve()
1462 }
1463 }
1464 }, 50)
1465
1466 // Absolute failsafe: clear interval after 600ms no matter what
1467 const failsafeTimeout = setTimeout(() => {
1468 if (!resolved) {
1469 resolved = true
1470 clearInterval(checkInterval)
1471 logMCPDebug(
1472 name,
1473 'Cleanup timeout reached, stopping process monitoring',
1474 )
1475 resolve()
1476 }
1477 }, 600)
1478
1479 try {
1480 // Wait 100ms for SIGINT to work (usually much faster)
1481 await sleep(100)
1482
1483 if (!resolved) {
1484 // Check if process still exists
1485 try {
1486 process.kill(childPid, 0)
1487 // Process still exists, SIGINT failed, try SIGTERM
1488 logMCPDebug(
1489 name,
1490 'SIGINT failed, sending SIGTERM to MCP server process',
1491 )
1492 try {
1493 process.kill(childPid, 'SIGTERM')
1494 } catch (termError) {
1495 logMCPDebug(name, `Error sending SIGTERM: ${termError}`)
1496 resolved = true
1497 clearInterval(checkInterval)
1498 clearTimeout(failsafeTimeout)
1499 resolve()
1500 return
1501 }
1502 } catch {
1503 // Process already exited
1504 resolved = true
1505 clearInterval(checkInterval)
1506 clearTimeout(failsafeTimeout)
1507 resolve()
1508 return
1509 }
1510
1511 // Wait 400ms for SIGTERM to work (slower than SIGINT, often used for cleanup)
1512 await sleep(400)
1513
1514 if (!resolved) {
1515 // Check if process still exists
1516 try {
1517 process.kill(childPid, 0)
1518 // Process still exists, SIGTERM failed, force kill with SIGKILL
1519 logMCPDebug(
1520 name,
1521 'SIGTERM failed, sending SIGKILL to MCP server process',
1522 )
1523 try {
1524 process.kill(childPid, 'SIGKILL')
1525 } catch (killError) {
1526 logMCPDebug(
1527 name,
1528 `Error sending SIGKILL: ${killError}`,
1529 )
1530 }
1531 } catch {
1532 // Process already exited
1533 resolved = true
1534 clearInterval(checkInterval)
1535 clearTimeout(failsafeTimeout)
1536 resolve()
1537 }
1538 }
1539 }
1540
1541 // Final timeout - always resolve after 500ms max (total cleanup time)
1542 if (!resolved) {
1543 resolved = true
1544 clearInterval(checkInterval)
1545 clearTimeout(failsafeTimeout)
1546 resolve()
1547 }
1548 } catch {
1549 // Handle any errors in the escalation sequence
1550 if (!resolved) {
1551 resolved = true
1552 clearInterval(checkInterval)
1553 clearTimeout(failsafeTimeout)
1554 resolve()
1555 }
1556 }
1557 })
1558 }
1559 } catch (processError) {
1560 logMCPDebug(name, `Error terminating process: ${processError}`)
1561 }
1562 }
1563
1564 // Close the client connection (which also closes the transport)
1565 try {
1566 await client.close()
1567 } catch (error) {
1568 logMCPDebug(name, `Error closing client: ${error}`)
1569 }
1570 }
1571
1572 // Register cleanup for all transport types - even network transports might need cleanup
1573 // This ensures all MCP servers get properly terminated, not just stdio ones
1574 const cleanupUnregister = registerCleanup(cleanup)
1575
1576 // Create the wrapped cleanup that includes unregistering
1577 const wrappedCleanup = async () => {
1578 cleanupUnregister?.()
1579 await cleanup()
1580 }
1581
1582 const connectionDurationMs = Date.now() - connectStartTime
1583 logEvent('tengu_mcp_server_connection_succeeded', {
1584 connectionDurationMs,
1585 transportType: (serverRef.type ??
1586 'stdio') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1587 totalServers: serverStats?.totalServers,
1588 stdioCount: serverStats?.stdioCount,
1589 sseCount: serverStats?.sseCount,
1590 httpCount: serverStats?.httpCount,
1591 sseIdeCount: serverStats?.sseIdeCount,
1592 wsIdeCount: serverStats?.wsIdeCount,
1593 ...mcpBaseUrlAnalytics(serverRef),
1594 })
1595 return {
1596 name,
1597 client,
1598 type: 'connected' as const,
1599 capabilities: capabilities ?? {},
1600 serverInfo: serverVersion,
1601 instructions,
1602 config: serverRef,
1603 cleanup: wrappedCleanup,
1604 }
1605 } catch (error) {
1606 const connectionDurationMs = Date.now() - connectStartTime
1607 logEvent('tengu_mcp_server_connection_failed', {
1608 connectionDurationMs,
1609 totalServers: serverStats?.totalServers || 1,
1610 stdioCount:
1611 serverStats?.stdioCount || (serverRef.type === 'stdio' ? 1 : 0),
1612 sseCount: serverStats?.sseCount || (serverRef.type === 'sse' ? 1 : 0),
1613 httpCount:
1614 serverStats?.httpCount || (serverRef.type === 'http' ? 1 : 0),
1615 sseIdeCount:
1616 serverStats?.sseIdeCount || (serverRef.type === 'sse-ide' ? 1 : 0),
1617 wsIdeCount:
1618 serverStats?.wsIdeCount || (serverRef.type === 'ws-ide' ? 1 : 0),
1619 transportType: (serverRef.type ??
1620 'stdio') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1621 ...mcpBaseUrlAnalytics(serverRef),
1622 })
1623 logMCPDebug(
1624 name,
1625 `Connection failed after ${connectionDurationMs}ms: ${errorMessage(error)}`,
1626 )
1627 logMCPError(name, `Connection failed: ${errorMessage(error)}`)
1628
1629 if (inProcessServer) {
1630 inProcessServer.close().catch(() => {})
1631 }
1632 return {
1633 name,
1634 type: 'failed' as const,
1635 config: serverRef,
1636 error: errorMessage(error),
1637 }
1638 }
1639 },
1640 getServerCacheKey,
1641)
1642
1643/**
1644 * Clears the memoize cache for a specific server
1645 * @param name Server name
1646 * @param serverRef Server configuration
1647 */
1648export async function clearServerCache(
1649 name: string,
1650 serverRef: ScopedMcpServerConfig,
1651): Promise<void> {
1652 const key = getServerCacheKey(name, serverRef)
1653
1654 try {
1655 const wrappedClient = await connectToServer(name, serverRef)
1656
1657 if (wrappedClient.type === 'connected') {
1658 await wrappedClient.cleanup()
1659 }
1660 } catch {
1661 // Ignore errors - server might have failed to connect
1662 }
1663
1664 // Clear from cache (both connection and fetch caches so reconnect
1665 // fetches fresh tools/resources/commands instead of stale ones)
1666 connectToServer.cache.delete(key)
1667 fetchToolsForClient.cache.delete(name)
1668 fetchResourcesForClient.cache.delete(name)
1669 fetchCommandsForClient.cache.delete(name)
1670 if (feature('MCP_SKILLS')) {
1671 fetchMcpSkillsForClient!.cache.delete(name)
1672 }
1673}
1674
1675/**
1676 * Ensures a valid connected client for an MCP server.
1677 * For most server types, uses the memoization cache if available, or reconnects
1678 * if the cache was cleared (e.g., after onclose). This ensures tool/resource
1679 * calls always use a valid connection.
1680 *
1681 * SDK MCP servers run in-process and are handled separately via setupSdkMcpClients,
1682 * so they are returned as-is without going through connectToServer.
1683 *
1684 * @param client The connected MCP server client
1685 * @returns Connected MCP server client (same or reconnected)
1686 * @throws Error if server cannot be connected
1687 */
1688export async function ensureConnectedClient(
1689 client: ConnectedMCPServer,
1690): Promise<ConnectedMCPServer> {
1691 // SDK MCP servers run in-process and are handled separately via setupSdkMcpClients
1692 if (client.config.type === 'sdk') {
1693 return client
1694 }
1695
1696 const connectedClient = await connectToServer(client.name, client.config)
1697 if (connectedClient.type !== 'connected') {
1698 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1699 `MCP server "${client.name}" is not connected`,
1700 'MCP server not connected',
1701 )
1702 }
1703 return connectedClient
1704}
1705
1706/**
1707 * Compares two MCP server configurations to determine if they are equivalent.
1708 * Used to detect when a server needs to be reconnected due to config changes.
1709 */
1710export function areMcpConfigsEqual(
1711 a: ScopedMcpServerConfig,
1712 b: ScopedMcpServerConfig,
1713): boolean {
1714 // Quick type check first
1715 if (a.type !== b.type) return false
1716
1717 // Compare by serializing - this handles all config variations
1718 // We exclude 'scope' from comparison since it's metadata, not connection config
1719 const { scope: _scopeA, ...configA } = a
1720 const { scope: _scopeB, ...configB } = b
1721 return jsonStringify(configA) === jsonStringify(configB)
1722}
1723
1724// Max cache size for fetch* caches. Keyed by server name (stable across
1725// reconnects), bounded to prevent unbounded growth with many MCP servers.
1726const MCP_FETCH_CACHE_SIZE = 20
1727
1728/**
1729 * Encode MCP tool input for the auto-mode security classifier.
1730 * Exported so the auto-mode eval scripts can mirror production encoding
1731 * for `mcp__*` tool stubs without duplicating this logic.
1732 */
1733export function mcpToolInputToAutoClassifierInput(
1734 input: Record<string, unknown>,
1735 toolName: string,
1736): string {
1737 const keys = Object.keys(input)
1738 return keys.length > 0
1739 ? keys.map(k => `${k}=${String(input[k])}`).join(' ')
1740 : toolName
1741}
1742
1743export const fetchToolsForClient = memoizeWithLRU(
1744 async (client: MCPServerConnection): Promise<Tool[]> => {
1745 if (client.type !== 'connected') return []
1746
1747 try {
1748 if (!client.capabilities?.tools) {
1749 return []
1750 }
1751
1752 const result = (await client.client.request(
1753 { method: 'tools/list' },
1754 ListToolsResultSchema,
1755 )) as ListToolsResult
1756
1757 // Sanitize tool data from MCP server
1758 const toolsToProcess = recursivelySanitizeUnicode(result.tools)
1759
1760 // Check if we should skip the mcp__ prefix for SDK MCP servers
1761 const skipPrefix =
1762 client.config.type === 'sdk' &&
1763 isEnvTruthy(process.env.CLAUDE_AGENT_SDK_MCP_NO_PREFIX)
1764
1765 // Convert MCP tools to our Tool format
1766 return toolsToProcess
1767 .map((tool): Tool => {
1768 const fullyQualifiedName = buildMcpToolName(client.name, tool.name)
1769 return {
1770 ...MCPTool,
1771 // In skip-prefix mode, use the original name for model invocation so MCP tools
1772 // can override builtins by name. mcpInfo is used for permission checking.
1773 name: skipPrefix ? tool.name : fullyQualifiedName,
1774 mcpInfo: { serverName: client.name, toolName: tool.name },
1775 isMcp: true,
1776 // Collapse whitespace: _meta is open to external MCP servers, and
1777 // a newline here would inject orphan lines into the deferred-tool
1778 // list (formatDeferredToolLine joins on '\n').
1779 searchHint:
1780 typeof tool._meta?.['anthropic/searchHint'] === 'string'
1781 ? tool._meta['anthropic/searchHint']
1782 .replace(/\s+/g, ' ')
1783 .trim() || undefined
1784 : undefined,
1785 alwaysLoad: tool._meta?.['anthropic/alwaysLoad'] === true,
1786 async description() {
1787 return tool.description ?? ''
1788 },
1789 async prompt() {
1790 const desc = tool.description ?? ''
1791 return desc.length > MAX_MCP_DESCRIPTION_LENGTH
1792 ? desc.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]'
1793 : desc
1794 },
1795 isConcurrencySafe() {
1796 return tool.annotations?.readOnlyHint ?? false
1797 },
1798 isReadOnly() {
1799 return tool.annotations?.readOnlyHint ?? false
1800 },
1801 toAutoClassifierInput(input) {
1802 return mcpToolInputToAutoClassifierInput(input, tool.name)
1803 },
1804 isDestructive() {
1805 return tool.annotations?.destructiveHint ?? false
1806 },
1807 isOpenWorld() {
1808 return tool.annotations?.openWorldHint ?? false
1809 },
1810 isSearchOrReadCommand() {
1811 return classifyMcpToolForCollapse(client.name, tool.name)
1812 },
1813 inputJSONSchema: tool.inputSchema as Tool['inputJSONSchema'],
1814 async checkPermissions() {
1815 return {
1816 behavior: 'passthrough' as const,
1817 message: 'MCPTool requires permission.',
1818 suggestions: [
1819 {
1820 type: 'addRules' as const,
1821 rules: [
1822 {
1823 toolName: fullyQualifiedName,
1824 ruleContent: undefined,
1825 },
1826 ],
1827 behavior: 'allow' as const,
1828 destination: 'localSettings' as const,
1829 },
1830 ],
1831 }
1832 },
1833 async call(
1834 args: Record<string, unknown>,
1835 context,
1836 _canUseTool,
1837 parentMessage,
1838 onProgress?: ToolCallProgress<MCPProgress>,
1839 ) {
1840 const toolUseId = extractToolUseId(parentMessage)
1841 const meta = toolUseId
1842 ? { 'claudecode/toolUseId': toolUseId }
1843 : {}
1844
1845 // Emit progress when tool starts
1846 if (onProgress && toolUseId) {
1847 onProgress({
1848 toolUseID: toolUseId,
1849 data: {
1850 type: 'mcp_progress',
1851 status: 'started',
1852 serverName: client.name,
1853 toolName: tool.name,
1854 },
1855 })
1856 }
1857
1858 const startTime = Date.now()
1859 const MAX_SESSION_RETRIES = 1
1860 for (let attempt = 0; ; attempt++) {
1861 try {
1862 const connectedClient = await ensureConnectedClient(client)
1863 const mcpResult = await callMCPToolWithUrlElicitationRetry({
1864 client: connectedClient,
1865 clientConnection: client,
1866 tool: tool.name,
1867 args,
1868 meta,
1869 signal: context.abortController.signal,
1870 setAppState: context.setAppState,
1871 onProgress:
1872 onProgress && toolUseId
1873 ? progressData => {
1874 onProgress({
1875 toolUseID: toolUseId,
1876 data: progressData,
1877 })
1878 }
1879 : undefined,
1880 handleElicitation: context.handleElicitation,
1881 })
1882
1883 // Emit progress when tool completes successfully
1884 if (onProgress && toolUseId) {
1885 onProgress({
1886 toolUseID: toolUseId,
1887 data: {
1888 type: 'mcp_progress',
1889 status: 'completed',
1890 serverName: client.name,
1891 toolName: tool.name,
1892 elapsedTimeMs: Date.now() - startTime,
1893 },
1894 })
1895 }
1896
1897 return {
1898 data: mcpResult.content,
1899 ...((mcpResult._meta || mcpResult.structuredContent) && {
1900 mcpMeta: {
1901 ...(mcpResult._meta && {
1902 _meta: mcpResult._meta,
1903 }),
1904 ...(mcpResult.structuredContent && {
1905 structuredContent: mcpResult.structuredContent,
1906 }),
1907 },
1908 }),
1909 }
1910 } catch (error) {
1911 // Session expired — the connection cache has been
1912 // cleared, so retry with a fresh client.
1913 if (
1914 error instanceof McpSessionExpiredError &&
1915 attempt < MAX_SESSION_RETRIES
1916 ) {
1917 logMCPDebug(
1918 client.name,
1919 `Retrying tool '${tool.name}' after session recovery`,
1920 )
1921 continue
1922 }
1923
1924 // Emit progress when tool fails
1925 if (onProgress && toolUseId) {
1926 onProgress({
1927 toolUseID: toolUseId,
1928 data: {
1929 type: 'mcp_progress',
1930 status: 'failed',
1931 serverName: client.name,
1932 toolName: tool.name,
1933 elapsedTimeMs: Date.now() - startTime,
1934 },
1935 })
1936 }
1937 // Wrap MCP SDK errors so telemetry gets useful context
1938 // instead of just "Error" or "McpError" (the constructor
1939 // name). MCP SDK errors are protocol-level messages and
1940 // don't contain user file paths or code.
1941 if (
1942 error instanceof Error &&
1943 !(
1944 error instanceof
1945 TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
1946 )
1947 ) {
1948 const name = error.constructor.name
1949 if (name === 'Error') {
1950 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1951 error.message,
1952 error.message.slice(0, 200),
1953 )
1954 }
1955 // McpError has a numeric `code` with the JSON-RPC error
1956 // code (e.g. -32000 ConnectionClosed, -32001 RequestTimeout)
1957 if (
1958 name === 'McpError' &&
1959 'code' in error &&
1960 typeof error.code === 'number'
1961 ) {
1962 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1963 error.message,
1964 `McpError ${error.code}`,
1965 )
1966 }
1967 }
1968 throw error
1969 }
1970 }
1971 },
1972 userFacingName() {
1973 // Prefer title annotation if available, otherwise use tool name
1974 const displayName = tool.annotations?.title || tool.name
1975 return `${client.name} - ${displayName} (MCP)`
1976 },
1977 ...(isClaudeInChromeMCPServer(client.name) &&
1978 (client.config.type === 'stdio' || !client.config.type)
1979 ? claudeInChromeToolRendering().getClaudeInChromeMCPToolOverrides(
1980 tool.name,
1981 )
1982 : {}),
1983 ...(feature('CHICAGO_MCP') &&
1984 (client.config.type === 'stdio' || !client.config.type) &&
1985 isComputerUseMCPServer!(client.name)
1986 ? computerUseWrapper!().getComputerUseMCPToolOverrides(tool.name)
1987 : {}),
1988 }
1989 })
1990 .filter(isIncludedMcpTool)
1991 } catch (error) {
1992 logMCPError(client.name, `Failed to fetch tools: ${errorMessage(error)}`)
1993 return []
1994 }
1995 },
1996 (client: MCPServerConnection) => client.name,
1997 MCP_FETCH_CACHE_SIZE,
1998)
1999
2000export const fetchResourcesForClient = memoizeWithLRU(
2001 async (client: MCPServerConnection): Promise<ServerResource[]> => {
2002 if (client.type !== 'connected') return []
2003
2004 try {
2005 if (!client.capabilities?.resources) {
2006 return []
2007 }
2008
2009 const result = await client.client.request(
2010 { method: 'resources/list' },
2011 ListResourcesResultSchema,
2012 )
2013
2014 if (!result.resources) return []
2015
2016 // Add server name to each resource
2017 return result.resources.map(resource => ({
2018 ...resource,
2019 server: client.name,
2020 }))
2021 } catch (error) {
2022 logMCPError(
2023 client.name,
2024 `Failed to fetch resources: ${errorMessage(error)}`,
2025 )
2026 return []
2027 }
2028 },
2029 (client: MCPServerConnection) => client.name,
2030 MCP_FETCH_CACHE_SIZE,
2031)
2032
2033export const fetchCommandsForClient = memoizeWithLRU(
2034 async (client: MCPServerConnection): Promise<Command[]> => {
2035 if (client.type !== 'connected') return []
2036
2037 try {
2038 if (!client.capabilities?.prompts) {
2039 return []
2040 }
2041
2042 // Request prompts list from client
2043 const result = (await client.client.request(
2044 { method: 'prompts/list' },
2045 ListPromptsResultSchema,
2046 )) as ListPromptsResult
2047
2048 if (!result.prompts) return []
2049
2050 // Sanitize prompt data from MCP server
2051 const promptsToProcess = recursivelySanitizeUnicode(result.prompts)
2052
2053 // Convert MCP prompts to our Command format
2054 return promptsToProcess.map(prompt => {
2055 const argNames = Object.values(prompt.arguments ?? {}).map(k => k.name)
2056 return {
2057 type: 'prompt' as const,
2058 name: 'mcp__' + normalizeNameForMCP(client.name) + '__' + prompt.name,
2059 description: prompt.description ?? '',
2060 hasUserSpecifiedDescription: !!prompt.description,
2061 contentLength: 0, // Dynamic MCP content
2062 isEnabled: () => true,
2063 isHidden: false,
2064 isMcp: true,
2065 progressMessage: 'running',
2066 userFacingName() {
2067 // Use prompt.name (programmatic identifier) not prompt.title (display name)
2068 // to avoid spaces breaking slash command parsing
2069 return `${client.name}:${prompt.name} (MCP)`
2070 },
2071 argNames,
2072 source: 'mcp',
2073 async getPromptForCommand(args: string) {
2074 const argsArray = args.split(' ')
2075 try {
2076 const connectedClient = await ensureConnectedClient(client)
2077 const result = await connectedClient.client.getPrompt({
2078 name: prompt.name,
2079 arguments: zipObject(argNames, argsArray),
2080 })
2081 const transformed = await Promise.all(
2082 result.messages.map(message =>
2083 transformResultContent(message.content, connectedClient.name),
2084 ),
2085 )
2086 return transformed.flat()
2087 } catch (error) {
2088 logMCPError(
2089 client.name,
2090 `Error running command '${prompt.name}': ${errorMessage(error)}`,
2091 )
2092 throw error
2093 }
2094 },
2095 }
2096 })
2097 } catch (error) {
2098 logMCPError(
2099 client.name,
2100 `Failed to fetch commands: ${errorMessage(error)}`,
2101 )
2102 return []
2103 }
2104 },
2105 (client: MCPServerConnection) => client.name,
2106 MCP_FETCH_CACHE_SIZE,
2107)
2108
2109/**
2110 * Call an IDE tool directly as an RPC
2111 * @param toolName The name of the tool to call
2112 * @param args The arguments to pass to the tool
2113 * @param client The IDE client to use for the RPC call
2114 * @returns The result of the tool call
2115 */
2116export async function callIdeRpc(
2117 toolName: string,
2118 args: Record<string, unknown>,
2119 client: ConnectedMCPServer,
2120): Promise<string | ContentBlockParam[] | undefined> {
2121 const result = await callMCPTool({
2122 client,
2123 tool: toolName,
2124 args,
2125 signal: createAbortController().signal,
2126 })
2127 return result.content
2128}
2129
2130/**
2131 * Note: This should not be called by UI components directly, they should use the reconnectMcpServer
2132 * function from useManageMcpConnections.
2133 * @param name Server name
2134 * @param config Server configuration
2135 * @returns Object containing the client connection and its resources
2136 */
2137export async function reconnectMcpServerImpl(
2138 name: string,
2139 config: ScopedMcpServerConfig,
2140): Promise<{
2141 client: MCPServerConnection
2142 tools: Tool[]
2143 commands: Command[]
2144 resources?: ServerResource[]
2145}> {
2146 try {
2147 // Invalidate the keychain cache so we read fresh credentials from disk.
2148 // This is necessary when another process (e.g. the VS Code extension host)
2149 // has modified stored tokens (cleared auth, saved new OAuth tokens) and then
2150 // asks the CLI subprocess to reconnect. Without this, the subprocess would
2151 // use stale cached data and never notice the tokens were removed.
2152 clearKeychainCache()
2153
2154 await clearServerCache(name, config)
2155 const client = await connectToServer(name, config)
2156
2157 if (client.type !== 'connected') {
2158 return {
2159 client,
2160 tools: [],
2161 commands: [],
2162 }
2163 }
2164
2165 if (config.type === 'claudeai-proxy') {
2166 markClaudeAiMcpConnected(name)
2167 }
2168
2169 const supportsResources = !!client.capabilities?.resources
2170
2171 const [tools, mcpCommands, mcpSkills, resources] = await Promise.all([
2172 fetchToolsForClient(client),
2173 fetchCommandsForClient(client),
2174 feature('MCP_SKILLS') && supportsResources
2175 ? fetchMcpSkillsForClient!(client)
2176 : Promise.resolve([]),
2177 supportsResources ? fetchResourcesForClient(client) : Promise.resolve([]),
2178 ])
2179 const commands = [...mcpCommands, ...mcpSkills]
2180
2181 // Check if we need to add resource tools
2182 const resourceTools: Tool[] = []
2183 if (supportsResources) {
2184 // Only add resource tools if no other server has them
2185 const hasResourceTools = [ListMcpResourcesTool, ReadMcpResourceTool].some(
2186 tool => tools.some(t => toolMatchesName(t, tool.name)),
2187 )
2188 if (!hasResourceTools) {
2189 resourceTools.push(ListMcpResourcesTool, ReadMcpResourceTool)
2190 }
2191 }
2192
2193 return {
2194 client,
2195 tools: [...tools, ...resourceTools],
2196 commands,
2197 resources: resources.length > 0 ? resources : undefined,
2198 }
2199 } catch (error) {
2200 // Handle errors gracefully - connection might have closed during fetch
2201 logMCPError(name, `Error during reconnection: ${errorMessage(error)}`)
2202
2203 // Return with failed status
2204 return {
2205 client: { name, type: 'failed' as const, config },
2206 tools: [],
2207 commands: [],
2208 }
2209 }
2210}
2211
2212// Replaced 2026-03: previous implementation ran fixed-size sequential batches
2213// (await batch 1 fully, then start batch 2). That meant one slow server in
2214// batch N held up ALL servers in batch N+1, even if the other 19 slots were
2215// idle. pMap frees each slot as soon as its server completes, so a single
2216// slow server only occupies one slot instead of blocking an entire batch
2217// boundary. Same concurrency ceiling, same results, better scheduling.
2218async function processBatched<T>(
2219 items: T[],
2220 concurrency: number,
2221 processor: (item: T) => Promise<void>,
2222): Promise<void> {
2223 await pMap(items, processor, { concurrency })
2224}
2225
2226export async function getMcpToolsCommandsAndResources(
2227 onConnectionAttempt: (params: {
2228 client: MCPServerConnection
2229 tools: Tool[]
2230 commands: Command[]
2231 resources?: ServerResource[]
2232 }) => void,
2233 mcpConfigs?: Record<string, ScopedMcpServerConfig>,
2234): Promise<void> {
2235 let resourceToolsAdded = false
2236
2237 const allConfigEntries = Object.entries(
2238 mcpConfigs ?? (await getAllMcpConfigs()).servers,
2239 )
2240
2241 // Partition into disabled and active entries — disabled servers should
2242 // never generate HTTP connections or flow through batch processing
2243 const configEntries: typeof allConfigEntries = []
2244 for (const entry of allConfigEntries) {
2245 if (isMcpServerDisabled(entry[0])) {
2246 onConnectionAttempt({
2247 client: { name: entry[0], type: 'disabled', config: entry[1] },
2248 tools: [],
2249 commands: [],
2250 })
2251 } else {
2252 configEntries.push(entry)
2253 }
2254 }
2255
2256 // Calculate transport counts for logging
2257 const totalServers = configEntries.length
2258 const stdioCount = count(configEntries, ([_, c]) => c.type === 'stdio')
2259 const sseCount = count(configEntries, ([_, c]) => c.type === 'sse')
2260 const httpCount = count(configEntries, ([_, c]) => c.type === 'http')
2261 const sseIdeCount = count(configEntries, ([_, c]) => c.type === 'sse-ide')
2262 const wsIdeCount = count(configEntries, ([_, c]) => c.type === 'ws-ide')
2263
2264 // Split servers by type: local (stdio/sdk) need lower concurrency due to
2265 // process spawning, remote servers can connect with higher concurrency
2266 const localServers = configEntries.filter(([_, config]) =>
2267 isLocalMcpServer(config),
2268 )
2269 const remoteServers = configEntries.filter(
2270 ([_, config]) => !isLocalMcpServer(config),
2271 )
2272
2273 const serverStats = {
2274 totalServers,
2275 stdioCount,
2276 sseCount,
2277 httpCount,
2278 sseIdeCount,
2279 wsIdeCount,
2280 }
2281
2282 const processServer = async ([name, config]: [
2283 string,
2284 ScopedMcpServerConfig,
2285 ]): Promise<void> => {
2286 try {
2287 // Check if server is disabled - if so, just add it to state without connecting
2288 if (isMcpServerDisabled(name)) {
2289 onConnectionAttempt({
2290 client: {
2291 name,
2292 type: 'disabled',
2293 config,
2294 },
2295 tools: [],
2296 commands: [],
2297 })
2298 return
2299 }
2300
2301 // Skip connection for servers that recently returned 401 (15min TTL),
2302 // or that we have probed before but hold no token for. The second
2303 // check closes the gap the TTL leaves open: without it, every 15min
2304 // we re-probe servers that cannot succeed until the user runs /mcp.
2305 // Each probe is a network round-trip for connect-401 plus OAuth
2306 // discovery, and print mode awaits the whole batch (main.tsx:3503).
2307 if (
2308 (config.type === 'claudeai-proxy' ||
2309 config.type === 'http' ||
2310 config.type === 'sse') &&
2311 ((await isMcpAuthCached(name)) ||
2312 ((config.type === 'http' || config.type === 'sse') &&
2313 hasMcpDiscoveryButNoToken(name, config)))
2314 ) {
2315 logMCPDebug(name, `Skipping connection (cached needs-auth)`)
2316 onConnectionAttempt({
2317 client: { name, type: 'needs-auth' as const, config },
2318 tools: [createMcpAuthTool(name, config)],
2319 commands: [],
2320 })
2321 return
2322 }
2323
2324 const client = await connectToServer(name, config, serverStats)
2325
2326 if (client.type !== 'connected') {
2327 onConnectionAttempt({
2328 client,
2329 tools:
2330 client.type === 'needs-auth'
2331 ? [createMcpAuthTool(name, config)]
2332 : [],
2333 commands: [],
2334 })
2335 return
2336 }
2337
2338 if (config.type === 'claudeai-proxy') {
2339 markClaudeAiMcpConnected(name)
2340 }
2341
2342 const supportsResources = !!client.capabilities?.resources
2343
2344 const [tools, mcpCommands, mcpSkills, resources] = await Promise.all([
2345 fetchToolsForClient(client),
2346 fetchCommandsForClient(client),
2347 // Discover skills from skill:// resources
2348 feature('MCP_SKILLS') && supportsResources
2349 ? fetchMcpSkillsForClient!(client)
2350 : Promise.resolve([]),
2351 // Fetch resources if supported
2352 supportsResources
2353 ? fetchResourcesForClient(client)
2354 : Promise.resolve([]),
2355 ])
2356 const commands = [...mcpCommands, ...mcpSkills]
2357
2358 // If this server resources and we haven't added resource tools yet,
2359 // include our resource tools with this client's tools
2360 const resourceTools: Tool[] = []
2361 if (supportsResources && !resourceToolsAdded) {
2362 resourceToolsAdded = true
2363 resourceTools.push(ListMcpResourcesTool, ReadMcpResourceTool)
2364 }
2365
2366 onConnectionAttempt({
2367 client,
2368 tools: [...tools, ...resourceTools],
2369 commands,
2370 resources: resources.length > 0 ? resources : undefined,
2371 })
2372 } catch (error) {
2373 // Handle errors gracefully - connection might have closed during fetch
2374 logMCPError(
2375 name,
2376 `Error fetching tools/commands/resources: ${errorMessage(error)}`,
2377 )
2378
2379 // Still update with the client but no tools/commands
2380 onConnectionAttempt({
2381 client: { name, type: 'failed' as const, config },
2382 tools: [],
2383 commands: [],
2384 })
2385 }
2386 }
2387
2388 // Process both groups concurrently, each with their own concurrency limits:
2389 // - Local servers (stdio/sdk): lower concurrency to avoid process spawning resource contention
2390 // - Remote servers: higher concurrency since they're just network connections
2391 await Promise.all([
2392 processBatched(
2393 localServers,
2394 getMcpServerConnectionBatchSize(),
2395 processServer,
2396 ),
2397 processBatched(
2398 remoteServers,
2399 getRemoteMcpServerConnectionBatchSize(),
2400 processServer,
2401 ),
2402 ])
2403}
2404
2405// Not memoized: called only 2-3 times at startup/reconfig. The inner work
2406// (connectToServer, fetch*ForClient) is already cached. Memoizing here by
2407// mcpConfigs object ref leaked — main.tsx creates fresh config objects each call.
2408export function prefetchAllMcpResources(
2409 mcpConfigs: Record<string, ScopedMcpServerConfig>,
2410): Promise<{
2411 clients: MCPServerConnection[]
2412 tools: Tool[]
2413 commands: Command[]
2414}> {
2415 return new Promise(resolve => {
2416 let pendingCount = 0
2417 let completedCount = 0
2418
2419 pendingCount = Object.keys(mcpConfigs).length
2420
2421 if (pendingCount === 0) {
2422 void resolve({
2423 clients: [],
2424 tools: [],
2425 commands: [],
2426 })
2427 return
2428 }
2429
2430 const clients: MCPServerConnection[] = []
2431 const tools: Tool[] = []
2432 const commands: Command[] = []
2433
2434 getMcpToolsCommandsAndResources(result => {
2435 clients.push(result.client)
2436 tools.push(...result.tools)
2437 commands.push(...result.commands)
2438
2439 completedCount++
2440 if (completedCount >= pendingCount) {
2441 const commandsMetadataLength = commands.reduce((sum, command) => {
2442 const commandMetadataLength =
2443 command.name.length +
2444 (command.description ?? '').length +
2445 (command.argumentHint ?? '').length
2446 return sum + commandMetadataLength
2447 }, 0)
2448 logEvent('tengu_mcp_tools_commands_loaded', {
2449 tools_count: tools.length,
2450 commands_count: commands.length,
2451 commands_metadata_length: commandsMetadataLength,
2452 })
2453
2454 void resolve({
2455 clients,
2456 tools,
2457 commands,
2458 })
2459 }
2460 }, mcpConfigs).catch(error => {
2461 logMCPError(
2462 'prefetchAllMcpResources',
2463 `Failed to get MCP resources: ${errorMessage(error)}`,
2464 )
2465 // Still resolve with empty results
2466 void resolve({
2467 clients: [],
2468 tools: [],
2469 commands: [],
2470 })
2471 })
2472 })
2473}
2474
2475/**
2476 * Transform result content from an MCP tool or MCP prompt into message blocks
2477 */
2478export async function transformResultContent(
2479 resultContent: PromptMessage['content'],
2480 serverName: string,
2481): Promise<Array<ContentBlockParam>> {
2482 switch (resultContent.type) {
2483 case 'text':
2484 return [
2485 {
2486 type: 'text',
2487 text: resultContent.text,
2488 },
2489 ]
2490 case 'audio': {
2491 const audioData = resultContent as {
2492 type: 'audio'
2493 data: string
2494 mimeType?: string
2495 }
2496 return await persistBlobToTextBlock(
2497 Buffer.from(audioData.data, 'base64'),
2498 audioData.mimeType,
2499 serverName,
2500 `[Audio from ${serverName}] `,
2501 )
2502 }
2503 case 'image': {
2504 // Resize and compress image data, enforcing API dimension limits
2505 const imageBuffer = Buffer.from(String(resultContent.data), 'base64')
2506 const ext = resultContent.mimeType?.split('/')[1] || 'png'
2507 const resized = await maybeResizeAndDownsampleImageBuffer(
2508 imageBuffer,
2509 imageBuffer.length,
2510 ext,
2511 )
2512 return [
2513 {
2514 type: 'image',
2515 source: {
2516 data: resized.buffer.toString('base64'),
2517 media_type:
2518 `image/${resized.mediaType}` as Base64ImageSource['media_type'],
2519 type: 'base64',
2520 },
2521 },
2522 ]
2523 }
2524 case 'resource': {
2525 const resource = resultContent.resource
2526 const prefix = `[Resource from ${serverName} at ${resource.uri}] `
2527
2528 if ('text' in resource) {
2529 return [
2530 {
2531 type: 'text',
2532 text: `${prefix}${resource.text}`,
2533 },
2534 ]
2535 } else if ('blob' in resource) {
2536 const isImage = IMAGE_MIME_TYPES.has(resource.mimeType ?? '')
2537
2538 if (isImage) {
2539 // Resize and compress image blob, enforcing API dimension limits
2540 const imageBuffer = Buffer.from(resource.blob, 'base64')
2541 const ext = resource.mimeType?.split('/')[1] || 'png'
2542 const resized = await maybeResizeAndDownsampleImageBuffer(
2543 imageBuffer,
2544 imageBuffer.length,
2545 ext,
2546 )
2547 const content: MessageParam['content'] = []
2548 if (prefix) {
2549 content.push({
2550 type: 'text',
2551 text: prefix,
2552 })
2553 }
2554 content.push({
2555 type: 'image',
2556 source: {
2557 data: resized.buffer.toString('base64'),
2558 media_type:
2559 `image/${resized.mediaType}` as Base64ImageSource['media_type'],
2560 type: 'base64',
2561 },
2562 })
2563 return content
2564 } else {
2565 return await persistBlobToTextBlock(
2566 Buffer.from(resource.blob, 'base64'),
2567 resource.mimeType,
2568 serverName,
2569 prefix,
2570 )
2571 }
2572 }
2573 return []
2574 }
2575 case 'resource_link': {
2576 const resourceLink = resultContent as ResourceLink
2577 let text = `[Resource link: ${resourceLink.name}] ${resourceLink.uri}`
2578 if (resourceLink.description) {
2579 text += ` (${resourceLink.description})`
2580 }
2581 return [
2582 {
2583 type: 'text',
2584 text,
2585 },
2586 ]
2587 }
2588 default:
2589 return []
2590 }
2591}
2592
2593/**
2594 * Decode base64 binary content, write it to disk with the proper extension,
2595 * and return a small text block with the file path. Replaces the old behavior
2596 * of dumping raw base64 into the context.
2597 */
2598async function persistBlobToTextBlock(
2599 bytes: Buffer,
2600 mimeType: string | undefined,
2601 serverName: string,
2602 sourceDescription: string,
2603): Promise<Array<ContentBlockParam>> {
2604 const persistId = `mcp-${normalizeNameForMCP(serverName)}-blob-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
2605 const result = await persistBinaryContent(bytes, mimeType, persistId)
2606
2607 if ('error' in result) {
2608 return [
2609 {
2610 type: 'text',
2611 text: `${sourceDescription}Binary content (${mimeType || 'unknown type'}, ${bytes.length} bytes) could not be saved to disk: ${result.error}`,
2612 },
2613 ]
2614 }
2615
2616 return [
2617 {
2618 type: 'text',
2619 text: getBinaryBlobSavedMessage(
2620 result.filepath,
2621 mimeType,
2622 result.size,
2623 sourceDescription,
2624 ),
2625 },
2626 ]
2627}
2628
2629/**
2630 * Processes MCP tool result into a normalized format.
2631 */
2632export type MCPResultType = 'toolResult' | 'structuredContent' | 'contentArray'
2633
2634export type TransformedMCPResult = {
2635 content: MCPToolResult
2636 type: MCPResultType
2637 schema?: string
2638}
2639
2640/**
2641 * Generates a compact, jq-friendly type signature for a value.
2642 * e.g. "{title: string, items: [{id: number, name: string}]}"
2643 */
2644export function inferCompactSchema(value: unknown, depth = 2): string {
2645 if (value === null) return 'null'
2646 if (Array.isArray(value)) {
2647 if (value.length === 0) return '[]'
2648 return `[${inferCompactSchema(value[0], depth - 1)}]`
2649 }
2650 if (typeof value === 'object') {
2651 if (depth <= 0) return '{...}'
2652 const entries = Object.entries(value).slice(0, 10)
2653 const props = entries.map(
2654 ([k, v]) => `${k}: ${inferCompactSchema(v, depth - 1)}`,
2655 )
2656 const suffix = Object.keys(value).length > 10 ? ', ...' : ''
2657 return `{${props.join(', ')}${suffix}}`
2658 }
2659 return typeof value
2660}
2661
2662export async function transformMCPResult(
2663 result: unknown,
2664 tool: string, // Tool name for validation (e.g., "search")
2665 name: string, // Server name for transformation (e.g., "slack")
2666): Promise<TransformedMCPResult> {
2667 if (result && typeof result === 'object') {
2668 if ('toolResult' in result) {
2669 return {
2670 content: String(result.toolResult),
2671 type: 'toolResult',
2672 }
2673 }
2674
2675 if (
2676 'structuredContent' in result &&
2677 result.structuredContent !== undefined
2678 ) {
2679 return {
2680 content: jsonStringify(result.structuredContent),
2681 type: 'structuredContent',
2682 schema: inferCompactSchema(result.structuredContent),
2683 }
2684 }
2685
2686 if ('content' in result && Array.isArray(result.content)) {
2687 const transformedContent = (
2688 await Promise.all(
2689 result.content.map(item => transformResultContent(item, name)),
2690 )
2691 ).flat()
2692 return {
2693 content: transformedContent,
2694 type: 'contentArray',
2695 schema: inferCompactSchema(transformedContent),
2696 }
2697 }
2698 }
2699
2700 const errorMessage = `MCP server "${name}" tool "${tool}": unexpected response format`
2701 logMCPError(name, errorMessage)
2702 throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
2703 errorMessage,
2704 'MCP tool unexpected response format',
2705 )
2706}
2707
2708/**
2709 * Check if MCP content contains any image blocks.
2710 * Used to decide whether to persist to file (images should use truncation instead
2711 * to preserve image compression and viewability).
2712 */
2713function contentContainsImages(content: MCPToolResult): boolean {
2714 if (!content || typeof content === 'string') {
2715 return false
2716 }
2717 return content.some(block => block.type === 'image')
2718}
2719
2720export async function processMCPResult(
2721 result: unknown,
2722 tool: string, // Tool name for validation (e.g., "search")
2723 name: string, // Server name for IDE check and transformation (e.g., "slack")
2724): Promise<MCPToolResult> {
2725 const { content, type, schema } = await transformMCPResult(result, tool, name)
2726
2727 // IDE tools are not going to the model directly, so we don't need to
2728 // handle large output.
2729 if (name === 'ide') {
2730 return content
2731 }
2732
2733 // Check if content needs truncation (i.e., is too large)
2734 if (!(await mcpContentNeedsTruncation(content))) {
2735 return content
2736 }
2737
2738 const sizeEstimateTokens = getContentSizeEstimate(content)
2739
2740 // If large output files feature is disabled, fall back to old truncation behavior
2741 if (isEnvDefinedFalsy(process.env.ENABLE_MCP_LARGE_OUTPUT_FILES)) {
2742 logEvent('tengu_mcp_large_result_handled', {
2743 outcome: 'truncated',
2744 reason: 'env_disabled',
2745 sizeEstimateTokens,
2746 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2747 return await truncateMcpContentIfNeeded(content)
2748 }
2749
2750 // Save large output to file and return instructions for reading it
2751 // Content is guaranteed to exist at this point (we checked mcpContentNeedsTruncation)
2752 if (!content) {
2753 return content
2754 }
2755
2756 // If content contains images, fall back to truncation - persisting images as JSON
2757 // defeats the image compression logic and makes them non-viewable
2758 if (contentContainsImages(content)) {
2759 logEvent('tengu_mcp_large_result_handled', {
2760 outcome: 'truncated',
2761 reason: 'contains_images',
2762 sizeEstimateTokens,
2763 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2764 return await truncateMcpContentIfNeeded(content)
2765 }
2766
2767 // Generate a unique ID for the persisted file (server__tool-timestamp)
2768 const timestamp = Date.now()
2769 const persistId = `mcp-${normalizeNameForMCP(name)}-${normalizeNameForMCP(tool)}-${timestamp}`
2770 // Convert to string for persistence (persistToolResult expects string or specific block types)
2771 const contentStr =
2772 typeof content === 'string' ? content : jsonStringify(content, null, 2)
2773 const persistResult = await persistToolResult(contentStr, persistId)
2774
2775 if (isPersistError(persistResult)) {
2776 // If file save failed, fall back to returning truncated content info
2777 const contentLength = contentStr.length
2778 logEvent('tengu_mcp_large_result_handled', {
2779 outcome: 'truncated',
2780 reason: 'persist_failed',
2781 sizeEstimateTokens,
2782 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2783 return `Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Failed to save output to file: ${persistResult.error}. If this MCP server provides pagination or filtering tools, use them to retrieve specific portions of the data.`
2784 }
2785
2786 logEvent('tengu_mcp_large_result_handled', {
2787 outcome: 'persisted',
2788 reason: 'file_saved',
2789 sizeEstimateTokens,
2790 persistedSizeChars: persistResult.originalSize,
2791 } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2792
2793 const formatDescription = getFormatDescription(type, schema)
2794 return getLargeOutputInstructions(
2795 persistResult.filepath,
2796 persistResult.originalSize,
2797 formatDescription,
2798 )
2799}
2800
2801/**
2802 * Call an MCP tool, handling UrlElicitationRequiredError (-32042) by
2803 * displaying the URL elicitation to the user, waiting for the completion
2804 * notification, and retrying the tool call.
2805 */
2806type MCPToolCallResult = {
2807 content: MCPToolResult
2808 _meta?: Record<string, unknown>
2809 structuredContent?: Record<string, unknown>
2810}
2811
2812/** @internal Exported for testing. */
2813export async function callMCPToolWithUrlElicitationRetry({
2814 client: connectedClient,
2815 clientConnection,
2816 tool,
2817 args,
2818 meta,
2819 signal,
2820 setAppState,
2821 onProgress,
2822 callToolFn = callMCPTool,
2823 handleElicitation,
2824}: {
2825 client: ConnectedMCPServer
2826 clientConnection: MCPServerConnection
2827 tool: string
2828 args: Record<string, unknown>
2829 meta?: Record<string, unknown>
2830 signal: AbortSignal
2831 setAppState: (f: (prev: AppState) => AppState) => void
2832 onProgress?: (data: MCPProgress) => void
2833 /** Injectable for testing. Defaults to callMCPTool. */
2834 callToolFn?: (opts: {
2835 client: ConnectedMCPServer
2836 tool: string
2837 args: Record<string, unknown>
2838 meta?: Record<string, unknown>
2839 signal: AbortSignal
2840 onProgress?: (data: MCPProgress) => void
2841 }) => Promise<MCPToolCallResult>
2842 /** Handler for URL elicitations when no hook handles them.
2843 * In print/SDK mode, delegates to structuredIO. In REPL, falls back to queue. */
2844 handleElicitation?: (
2845 serverName: string,
2846 params: ElicitRequestURLParams,
2847 signal: AbortSignal,
2848 ) => Promise<ElicitResult>
2849}): Promise<MCPToolCallResult> {
2850 const MAX_URL_ELICITATION_RETRIES = 3
2851 for (let attempt = 0; ; attempt++) {
2852 try {
2853 return await callToolFn({
2854 client: connectedClient,
2855 tool,
2856 args,
2857 meta,
2858 signal,
2859 onProgress,
2860 })
2861 } catch (error) {
2862 // The MCP SDK's Protocol creates plain McpError (not UrlElicitationRequiredError)
2863 // for error responses, so we check the error code instead of instanceof.
2864 if (
2865 !(error instanceof McpError) ||
2866 error.code !== ErrorCode.UrlElicitationRequired
2867 ) {
2868 throw error
2869 }
2870
2871 // Limit the number of URL elicitation retries
2872 if (attempt >= MAX_URL_ELICITATION_RETRIES) {
2873 throw error
2874 }
2875
2876 const errorData = error.data
2877 const rawElicitations =
2878 errorData != null &&
2879 typeof errorData === 'object' &&
2880 'elicitations' in errorData &&
2881 Array.isArray(errorData.elicitations)
2882 ? (errorData.elicitations as unknown[])
2883 : []
2884
2885 // Validate each element has the required fields for ElicitRequestURLParams
2886 const elicitations = rawElicitations.filter(
2887 (e): e is ElicitRequestURLParams => {
2888 if (e == null || typeof e !== 'object') return false
2889 const obj = e as Record<string, unknown>
2890 return (
2891 obj.mode === 'url' &&
2892 typeof obj.url === 'string' &&
2893 typeof obj.elicitationId === 'string' &&
2894 typeof obj.message === 'string'
2895 )
2896 },
2897 )
2898
2899 const serverName =
2900 clientConnection.type === 'connected'
2901 ? clientConnection.name
2902 : 'unknown'
2903
2904 if (elicitations.length === 0) {
2905 logMCPDebug(
2906 serverName,
2907 `Tool '${tool}' returned -32042 but no valid elicitations in error data`,
2908 )
2909 throw error
2910 }
2911
2912 logMCPDebug(
2913 serverName,
2914 `Tool '${tool}' requires URL elicitation (error -32042, attempt ${attempt + 1}), processing ${elicitations.length} elicitation(s)`,
2915 )
2916
2917 // Process each URL elicitation from the error.
2918 // The completion notification handler (in registerElicitationHandler) sets
2919 // `completed: true` on the matching queue event; the dialog reacts to this flag.
2920 for (const elicitation of elicitations) {
2921 const { elicitationId } = elicitation
2922
2923 // Run elicitation hooks — they can resolve URL elicitations programmatically
2924 const hookResponse = await runElicitationHooks(
2925 serverName,
2926 elicitation,
2927 signal,
2928 )
2929 if (hookResponse) {
2930 logMCPDebug(
2931 serverName,
2932 `URL elicitation ${elicitationId} resolved by hook: ${jsonStringify(hookResponse)}`,
2933 )
2934 if (hookResponse.action !== 'accept') {
2935 return {
2936 content: `URL elicitation was ${hookResponse.action === 'decline' ? 'declined' : hookResponse.action + 'ed'} by a hook. The tool "${tool}" could not complete because it requires the user to open a URL.`,
2937 }
2938 }
2939 // Hook accepted — skip the UI and proceed to retry
2940 continue
2941 }
2942
2943 // Resolve the URL elicitation via callback (print/SDK mode) or queue (REPL mode).
2944 let userResult: ElicitResult
2945 if (handleElicitation) {
2946 // Print/SDK mode: delegate to structuredIO which sends a control request
2947 userResult = await handleElicitation(serverName, elicitation, signal)
2948 } else {
2949 // REPL mode: queue for ElicitationDialog with two-phase consent/waiting flow
2950 const waitingState: ElicitationWaitingState = {
2951 actionLabel: 'Retry now',
2952 showCancel: true,
2953 }
2954 userResult = await new Promise<ElicitResult>(resolve => {
2955 const onAbort = () => {
2956 void resolve({ action: 'cancel' })
2957 }
2958 if (signal.aborted) {
2959 onAbort()
2960 return
2961 }
2962 signal.addEventListener('abort', onAbort, { once: true })
2963
2964 setAppState(prev => ({
2965 ...prev,
2966 elicitation: {
2967 queue: [
2968 ...prev.elicitation.queue,
2969 {
2970 serverName,
2971 requestId: `error-elicit-${elicitationId}`,
2972 params: elicitation,
2973 signal,
2974 waitingState,
2975 respond: result => {
2976 // Phase 1 consent: accept is a no-op (doesn't resolve retry Promise)
2977 if (result.action === 'accept') {
2978 return
2979 }
2980 // Decline or cancel: resolve the retry Promise
2981 signal.removeEventListener('abort', onAbort)
2982 void resolve(result)
2983 },
2984 onWaitingDismiss: action => {
2985 signal.removeEventListener('abort', onAbort)
2986 if (action === 'retry') {
2987 void resolve({ action: 'accept' })
2988 } else {
2989 void resolve({ action: 'cancel' })
2990 }
2991 },
2992 },
2993 ],
2994 },
2995 }))
2996 })
2997 }
2998
2999 // Run ElicitationResult hooks — they can modify or block the response
3000 const finalResult = await runElicitationResultHooks(
3001 serverName,
3002 userResult,
3003 signal,
3004 'url',
3005 elicitationId,
3006 )
3007
3008 if (finalResult.action !== 'accept') {
3009 logMCPDebug(
3010 serverName,
3011 `User ${finalResult.action === 'decline' ? 'declined' : finalResult.action + 'ed'} URL elicitation ${elicitationId}`,
3012 )
3013 return {
3014 content: `URL elicitation was ${finalResult.action === 'decline' ? 'declined' : finalResult.action + 'ed'} by the user. The tool "${tool}" could not complete because it requires the user to open a URL.`,
3015 }
3016 }
3017
3018 logMCPDebug(
3019 serverName,
3020 `Elicitation ${elicitationId} completed, retrying tool call`,
3021 )
3022 }
3023
3024 // Loop back to retry the tool call
3025 }
3026 }
3027}
3028
3029async function callMCPTool({
3030 client: { client, name, config },
3031 tool,
3032 args,
3033 meta,
3034 signal,
3035 onProgress,
3036}: {
3037 client: ConnectedMCPServer
3038 tool: string
3039 args: Record<string, unknown>
3040 meta?: Record<string, unknown>
3041 signal: AbortSignal
3042 onProgress?: (data: MCPProgress) => void
3043}): Promise<{
3044 content: MCPToolResult
3045 _meta?: Record<string, unknown>
3046 structuredContent?: Record<string, unknown>
3047}> {
3048 const toolStartTime = Date.now()
3049 let progressInterval: NodeJS.Timeout | undefined
3050
3051 try {
3052 logMCPDebug(name, `Calling MCP tool: ${tool}`)
3053
3054 // Set up progress logging for long-running tools (every 30 seconds)
3055 progressInterval = setInterval(
3056 (startTime, name, tool) => {
3057 const elapsed = Date.now() - startTime
3058 const elapsedSeconds = Math.floor(elapsed / 1000)
3059 const duration = `${elapsedSeconds}s`
3060 logMCPDebug(name, `Tool '${tool}' still running (${duration} elapsed)`)
3061 },
3062 30000, // Log every 30 seconds
3063 toolStartTime,
3064 name,
3065 tool,
3066 )
3067
3068 // Use Promise.race with our own timeout to handle cases where SDK's
3069 // internal timeout doesn't work (e.g., SSE stream breaks mid-request)
3070 const timeoutMs = getMcpToolTimeoutMs()
3071 let timeoutId: NodeJS.Timeout | undefined
3072
3073 const timeoutPromise = new Promise<never>((_, reject) => {
3074 timeoutId = setTimeout(
3075 (reject, name, tool, timeoutMs) => {
3076 reject(
3077 new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
3078 `MCP server "${name}" tool "${tool}" timed out after ${Math.floor(timeoutMs / 1000)}s`,
3079 'MCP tool timeout',
3080 ),
3081 )
3082 },
3083 timeoutMs,
3084 reject,
3085 name,
3086 tool,
3087 timeoutMs,
3088 )
3089 })
3090
3091 const result = await Promise.race([
3092 client.callTool(
3093 {
3094 name: tool,
3095 arguments: args,
3096 _meta: meta,
3097 },
3098 CallToolResultSchema,
3099 {
3100 signal,
3101 timeout: timeoutMs,
3102 onprogress: onProgress
3103 ? sdkProgress => {
3104 onProgress({
3105 type: 'mcp_progress',
3106 status: 'progress',
3107 serverName: name,
3108 toolName: tool,
3109 progress: sdkProgress.progress,
3110 total: sdkProgress.total,
3111 progressMessage: sdkProgress.message,
3112 })
3113 }
3114 : undefined,
3115 },
3116 ),
3117 timeoutPromise,
3118 ]).finally(() => {
3119 if (timeoutId) {
3120 clearTimeout(timeoutId)
3121 }
3122 })
3123
3124 if ('isError' in result && result.isError) {
3125 let errorDetails = 'Unknown error'
3126 if (
3127 'content' in result &&
3128 Array.isArray(result.content) &&
3129 result.content.length > 0
3130 ) {
3131 const firstContent = result.content[0]
3132 if (
3133 firstContent &&
3134 typeof firstContent === 'object' &&
3135 'text' in firstContent
3136 ) {
3137 errorDetails = firstContent.text
3138 }
3139 } else if ('error' in result) {
3140 // Fallback for legacy error format
3141 errorDetails = String(result.error)
3142 }
3143 logMCPError(name, errorDetails)
3144 throw new McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
3145 errorDetails,
3146 'MCP tool returned error',
3147 '_meta' in result && result._meta ? { _meta: result._meta } : undefined,
3148 )
3149 }
3150 const elapsed = Date.now() - toolStartTime
3151 const duration =
3152 elapsed < 1000
3153 ? `${elapsed}ms`
3154 : elapsed < 60000
3155 ? `${Math.floor(elapsed / 1000)}s`
3156 : `${Math.floor(elapsed / 60000)}m ${Math.floor((elapsed % 60000) / 1000)}s`
3157
3158 logMCPDebug(name, `Tool '${tool}' completed successfully in ${duration}`)
3159
3160 // Log code indexing tool usage
3161 const codeIndexingTool = detectCodeIndexingFromMcpServerName(name)
3162 if (codeIndexingTool) {
3163 logEvent('tengu_code_indexing_tool_used', {
3164 tool: codeIndexingTool as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
3165 source:
3166 'mcp' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
3167 success: true,
3168 })
3169 }
3170
3171 const content = await processMCPResult(result, tool, name)
3172 return {
3173 content,
3174 _meta: result._meta as Record<string, unknown> | undefined,
3175 structuredContent: result.structuredContent as
3176 | Record<string, unknown>
3177 | undefined,
3178 }
3179 } catch (e) {
3180 // Clear intervals on error
3181 if (progressInterval !== undefined) {
3182 clearInterval(progressInterval)
3183 }
3184
3185 const elapsed = Date.now() - toolStartTime
3186
3187 if (e instanceof Error && e.name !== 'AbortError') {
3188 logMCPDebug(
3189 name,
3190 `Tool '${tool}' failed after ${Math.floor(elapsed / 1000)}s: ${e.message}`,
3191 )
3192 }
3193
3194 // Check for 401 errors indicating expired/invalid OAuth tokens
3195 // The MCP SDK's StreamableHTTPError has a `code` property with the HTTP status
3196 if (e instanceof Error) {
3197 const errorCode = 'code' in e ? (e.code as number | undefined) : undefined
3198 if (errorCode === 401 || e instanceof UnauthorizedError) {
3199 logMCPDebug(
3200 name,
3201 `Tool call returned 401 Unauthorized - token may have expired`,
3202 )
3203 logEvent('tengu_mcp_tool_call_auth_error', {})
3204 throw new McpAuthError(
3205 name,
3206 `MCP server "${name}" requires re-authorization (token expired)`,
3207 )
3208 }
3209
3210 // Check for session expiry — two error shapes can surface here:
3211 // 1. Direct 404 + JSON-RPC -32001 from the server (StreamableHTTPError)
3212 // 2. -32000 "Connection closed" (McpError) — the SDK closes the transport
3213 // after the onerror handler fires, so the pending callTool() rejects
3214 // with this derived error instead of the original 404.
3215 // In both cases, clear the connection cache so the next tool call
3216 // creates a fresh session.
3217 const isSessionExpired = isMcpSessionExpiredError(e)
3218 const isConnectionClosedOnHttp =
3219 'code' in e &&
3220 (e as Error & { code?: number }).code === -32000 &&
3221 e.message.includes('Connection closed') &&
3222 (config.type === 'http' || config.type === 'claudeai-proxy')
3223 if (isSessionExpired || isConnectionClosedOnHttp) {
3224 logMCPDebug(
3225 name,
3226 `MCP session expired during tool call (${isSessionExpired ? '404/-32001' : 'connection closed'}), clearing connection cache for re-initialization`,
3227 )
3228 logEvent('tengu_mcp_session_expired', {})
3229 await clearServerCache(name, config)
3230 throw new McpSessionExpiredError(name)
3231 }
3232 }
3233
3234 // When the users hits esc, avoid logspew
3235 if (!(e instanceof Error) || e.name !== 'AbortError') {
3236 throw e
3237 }
3238 return { content: undefined }
3239 } finally {
3240 // Always clear intervals
3241 if (progressInterval !== undefined) {
3242 clearInterval(progressInterval)
3243 }
3244 }
3245}
3246
3247function extractToolUseId(message: AssistantMessage): string | undefined {
3248 if (message.message.content[0]?.type !== 'tool_use') {
3249 return undefined
3250 }
3251 return message.message.content[0].id
3252}
3253
3254/**
3255 * Sets up SDK MCP clients by creating transports and connecting them.
3256 * This is used for SDK MCP servers that run in the same process as the SDK.
3257 *
3258 * @param sdkMcpConfigs - The SDK MCP server configurations
3259 * @param sendMcpMessage - Callback to send MCP messages through the control channel
3260 * @returns Connected clients, their tools, and transport map for message routing
3261 */
3262export async function setupSdkMcpClients(
3263 sdkMcpConfigs: Record<string, McpSdkServerConfig>,
3264 sendMcpMessage: (
3265 serverName: string,
3266 message: JSONRPCMessage,
3267 ) => Promise<JSONRPCMessage>,
3268): Promise<{
3269 clients: MCPServerConnection[]
3270 tools: Tool[]
3271}> {
3272 const clients: MCPServerConnection[] = []
3273 const tools: Tool[] = []
3274
3275 // Connect to all servers in parallel
3276 const results = await Promise.allSettled(
3277 Object.entries(sdkMcpConfigs).map(async ([name, config]) => {
3278 const transport = new SdkControlClientTransport(name, sendMcpMessage)
3279
3280 const client = new Client(
3281 {
3282 name: 'claude-code',
3283 title: 'Claude Code',
3284 version: MACRO.VERSION ?? 'unknown',
3285 description: "Anthropic's agentic coding tool",
3286 websiteUrl: PRODUCT_URL,
3287 },
3288 {
3289 capabilities: {},
3290 },
3291 )
3292
3293 try {
3294 // Connect the client
3295 await client.connect(transport)
3296
3297 // Get capabilities from the server
3298 const capabilities = client.getServerCapabilities()
3299
3300 // Create the connected client object
3301 const connectedClient: MCPServerConnection = {
3302 type: 'connected',
3303 name,
3304 capabilities: capabilities || {},
3305 client,
3306 config: { ...config, scope: 'dynamic' as const },
3307 cleanup: async () => {
3308 await client.close()
3309 },
3310 }
3311
3312 // Fetch tools if the server has them
3313 const serverTools: Tool[] = []
3314 if (capabilities?.tools) {
3315 const sdkTools = await fetchToolsForClient(connectedClient)
3316 serverTools.push(...sdkTools)
3317 }
3318
3319 return {
3320 client: connectedClient,
3321 tools: serverTools,
3322 }
3323 } catch (error) {
3324 // If connection fails, return failed server
3325 logMCPError(name, `Failed to connect SDK MCP server: ${error}`)
3326 return {
3327 client: {
3328 type: 'failed' as const,
3329 name,
3330 config: { ...config, scope: 'user' as const },
3331 },
3332 tools: [],
3333 }
3334 }
3335 }),
3336 )
3337
3338 // Process results and collect clients and tools
3339 for (const result of results) {
3340 if (result.status === 'fulfilled') {
3341 clients.push(result.value.client)
3342 tools.push(...result.value.tools)
3343 }
3344 // If rejected (unexpected), the error was already logged inside the promise
3345 }
3346
3347 return { clients, tools }
3348}
3349