services/mcp/client.ts

116 KB3349 lines
src/services/mcp/client.ts
1import { feature } from 'bun:bundle'
2import type {
3  Base64ImageSource,
4  ContentBlockParam,
5  MessageParam,
6} from '@anthropic-ai/sdk/resources/index.mjs'
7import { Client } from '@modelcontextprotocol/sdk/client/index.js'
8import {
9  SSEClientTransport,
10  type SSEClientTransportOptions,
11} from '@modelcontextprotocol/sdk/client/sse.js'
12import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'
13import {
14  StreamableHTTPClientTransport,
15  type StreamableHTTPClientTransportOptions,
16} from '@modelcontextprotocol/sdk/client/streamableHttp.js'
17import {
18  createFetchWithInit,
19  type FetchLike,
20  type Transport,
21} from '@modelcontextprotocol/sdk/shared/transport.js'
22import {
23  CallToolResultSchema,
24  ElicitRequestSchema,
25  type ElicitRequestURLParams,
26  type ElicitResult,
27  ErrorCode,
28  type JSONRPCMessage,
29  type ListPromptsResult,
30  ListPromptsResultSchema,
31  ListResourcesResultSchema,
32  ListRootsRequestSchema,
33  type ListToolsResult,
34  ListToolsResultSchema,
35  McpError,
36  type PromptMessage,
37  type ResourceLink,
38} from '@modelcontextprotocol/sdk/types.js'
39import mapValues from 'lodash-es/mapValues.js'
40import memoize from 'lodash-es/memoize.js'
41import zipObject from 'lodash-es/zipObject.js'
42import pMap from 'p-map'
43import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'
44import type { Command } from '../../commands.js'
45import { getOauthConfig } from '../../constants/oauth.js'
46import { PRODUCT_URL } from '../../constants/product.js'
47import type { AppState } from '../../state/AppState.js'
48import {
49  type Tool,
50  type ToolCallProgress,
51  toolMatchesName,
52} from '../../Tool.js'
53import { ListMcpResourcesTool } from '../../tools/ListMcpResourcesTool/ListMcpResourcesTool.js'
54import { type MCPProgress, MCPTool } from '../../tools/MCPTool/MCPTool.js'
55import { createMcpAuthTool } from '../../tools/McpAuthTool/McpAuthTool.js'
56import { ReadMcpResourceTool } from '../../tools/ReadMcpResourceTool/ReadMcpResourceTool.js'
57import { createAbortController } from '../../utils/abortController.js'
58import { count } from '../../utils/array.js'
59import {
60  checkAndRefreshOAuthTokenIfNeeded,
61  getClaudeAIOAuthTokens,
62  handleOAuth401Error,
63} from '../../utils/auth.js'
64import { registerCleanup } from '../../utils/cleanupRegistry.js'
65import { detectCodeIndexingFromMcpServerName } from '../../utils/codeIndexing.js'
66import { logForDebugging } from '../../utils/debug.js'
67import { isEnvDefinedFalsy, isEnvTruthy } from '../../utils/envUtils.js'
68import {
69  errorMessage,
70  TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
71} from '../../utils/errors.js'
72import { getMCPUserAgent } from '../../utils/http.js'
73import { maybeNotifyIDEConnected } from '../../utils/ide.js'
74import { maybeResizeAndDownsampleImageBuffer } from '../../utils/imageResizer.js'
75import { logMCPDebug, logMCPError } from '../../utils/log.js'
76import {
77  getBinaryBlobSavedMessage,
78  getFormatDescription,
79  getLargeOutputInstructions,
80  persistBinaryContent,
81} from '../../utils/mcpOutputStorage.js'
82import {
83  getContentSizeEstimate,
84  type MCPToolResult,
85  mcpContentNeedsTruncation,
86  truncateMcpContentIfNeeded,
87} from '../../utils/mcpValidation.js'
88import { WebSocketTransport } from '../../utils/mcpWebSocketTransport.js'
89import { memoizeWithLRU } from '../../utils/memoize.js'
90import { getWebSocketTLSOptions } from '../../utils/mtls.js'
91import {
92  getProxyFetchOptions,
93  getWebSocketProxyAgent,
94  getWebSocketProxyUrl,
95} from '../../utils/proxy.js'
96import { recursivelySanitizeUnicode } from '../../utils/sanitization.js'
97import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
98import { subprocessEnv } from '../../utils/subprocessEnv.js'
99import {
100  isPersistError,
101  persistToolResult,
102} from '../../utils/toolResultStorage.js'
103import {
104  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
105  logEvent,
106} from '../analytics/index.js'
107import {
108  type ElicitationWaitingState,
109  runElicitationHooks,
110  runElicitationResultHooks,
111} from './elicitationHandler.js'
112import { buildMcpToolName } from './mcpStringUtils.js'
113import { normalizeNameForMCP } from './normalization.js'
114import { getLoggingSafeMcpBaseUrl } from './utils.js'
115
116/* eslint-disable @typescript-eslint/no-require-imports */
117const fetchMcpSkillsForClient = feature('MCP_SKILLS')
118  ? (
119      require('../../skills/mcpSkills.js') as typeof import('../../skills/mcpSkills.js')
120    ).fetchMcpSkillsForClient
121  : null
122
123import { UnauthorizedError } from '@modelcontextprotocol/sdk/client/auth.js'
124import type { AssistantMessage } from 'src/types/message.js'
125/* eslint-enable @typescript-eslint/no-require-imports */
126import { classifyMcpToolForCollapse } from '../../tools/MCPTool/classifyForCollapse.js'
127import { clearKeychainCache } from '../../utils/secureStorage/macOsKeychainHelpers.js'
128import { sleep } from '../../utils/sleep.js'
129import {
130  ClaudeAuthProvider,
131  hasMcpDiscoveryButNoToken,
132  wrapFetchWithStepUpDetection,
133} from './auth.js'
134import { markClaudeAiMcpConnected } from './claudeai.js'
135import { getAllMcpConfigs, isMcpServerDisabled } from './config.js'
136import { getMcpServerHeaders } from './headersHelper.js'
137import { SdkControlClientTransport } from './SdkControlTransport.js'
138import type {
139  ConnectedMCPServer,
140  MCPServerConnection,
141  McpSdkServerConfig,
142  ScopedMcpServerConfig,
143  ServerResource,
144} from './types.js'
145
146/**
147 * Custom error class to indicate that an MCP tool call failed due to
148 * authentication issues (e.g., expired OAuth token returning 401).
149 * This error should be caught at the tool execution layer to update
150 * the client's status to 'needs-auth'.
151 */
152export class McpAuthError extends Error {
153  serverName: string
154  constructor(serverName: string, message: string) {
155    super(message)
156    this.name = 'McpAuthError'
157    this.serverName = serverName
158  }
159}
160
161/**
162 * Thrown when an MCP session has expired and the connection cache has been cleared.
163 * The caller should get a fresh client via ensureConnectedClient and retry.
164 */
165class McpSessionExpiredError extends Error {
166  constructor(serverName: string) {
167    super(`MCP server "${serverName}" session expired`)
168    this.name = 'McpSessionExpiredError'
169  }
170}
171
172/**
173 * Thrown when an MCP tool returns `isError: true`. Carries the result's `_meta`
174 * so SDK consumers can still receive it — per the MCP spec, `_meta` is on the
175 * base Result type and is valid on error results.
176 */
177export class McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS extends TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
178  constructor(
179    message: string,
180    telemetryMessage: string,
181    readonly mcpMeta?: { _meta?: Record<string, unknown> },
182  ) {
183    super(message, telemetryMessage)
184    this.name = 'McpToolCallError'
185  }
186}
187
188/**
189 * Detects whether an error is an MCP "Session not found" error (HTTP 404 + JSON-RPC code -32001).
190 * Per the MCP spec, servers return 404 when a session ID is no longer valid.
191 * We check both signals to avoid false positives from generic 404s (wrong URL, server gone, etc.).
192 */
193export function isMcpSessionExpiredError(error: Error): boolean {
194  const httpStatus =
195    'code' in error ? (error as Error & { code?: number }).code : undefined
196  if (httpStatus !== 404) {
197    return false
198  }
199  // The SDK embeds the response body text in the error message.
200  // MCP servers return: {"error":{"code":-32001,"message":"Session not found"},...}
201  // Check for the JSON-RPC error code to distinguish from generic web server 404s.
202  return (
203    error.message.includes('"code":-32001') ||
204    error.message.includes('"code": -32001')
205  )
206}
207
208/**
209 * Default timeout for MCP tool calls (effectively infinite - ~27.8 hours).
210 */
211const DEFAULT_MCP_TOOL_TIMEOUT_MS = 100_000_000
212
213/**
214 * Cap on MCP tool descriptions and server instructions sent to the model.
215 * OpenAPI-generated MCP servers have been observed dumping 15-60KB of endpoint
216 * docs into tool.description; this caps the p95 tail without losing the intent.
217 */
218const MAX_MCP_DESCRIPTION_LENGTH = 2048
219
220/**
221 * Gets the timeout for MCP tool calls in milliseconds.
222 * Uses MCP_TOOL_TIMEOUT environment variable if set, otherwise defaults to ~27.8 hours.
223 */
224function getMcpToolTimeoutMs(): number {
225  return (
226    parseInt(process.env.MCP_TOOL_TIMEOUT || '', 10) ||
227    DEFAULT_MCP_TOOL_TIMEOUT_MS
228  )
229}
230
231import { isClaudeInChromeMCPServer } from '../../utils/claudeInChrome/common.js'
232
233// Lazy: toolRendering.tsx pulls React/ink; only needed when Claude-in-Chrome MCP server is connected
234/* eslint-disable @typescript-eslint/no-require-imports */
235const claudeInChromeToolRendering =
236  (): typeof import('../../utils/claudeInChrome/toolRendering.js') =>
237    require('../../utils/claudeInChrome/toolRendering.js')
238// Lazy: wrapper.tsx → hostAdapter.ts → executor.ts pulls both native modules
239// (@ant/computer-use-input + @ant/computer-use-swift). Runtime-gated by
240// GrowthBook tengu_malort_pedway (see gates.ts).
241const computerUseWrapper = feature('CHICAGO_MCP')
242  ? (): typeof import('../../utils/computerUse/wrapper.js') =>
243      require('../../utils/computerUse/wrapper.js')
244  : undefined
245const isComputerUseMCPServer = feature('CHICAGO_MCP')
246  ? (
247      require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
248    ).isComputerUseMCPServer
249  : undefined
250
251import { mkdir, readFile, unlink, writeFile } from 'fs/promises'
252import { dirname, join } from 'path'
253import { getClaudeConfigHomeDir } from '../../utils/envUtils.js'
254/* eslint-enable @typescript-eslint/no-require-imports */
255import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
256
257const MCP_AUTH_CACHE_TTL_MS = 15 * 60 * 1000 // 15 min
258
259type McpAuthCacheData = Record<string, { timestamp: number }>
260
261function getMcpAuthCachePath(): string {
262  return join(getClaudeConfigHomeDir(), 'mcp-needs-auth-cache.json')
263}
264
265// Memoized so N concurrent isMcpAuthCached() calls during batched connection
266// share a single file read instead of N reads of the same file. Invalidated
267// on write (setMcpAuthCacheEntry) and clear (clearMcpAuthCache). Not using
268// lodash memoize because we need to null out the cache, not delete by key.
269let authCachePromise: Promise<McpAuthCacheData> | null = null
270
271function getMcpAuthCache(): Promise<McpAuthCacheData> {
272  if (!authCachePromise) {
273    authCachePromise = readFile(getMcpAuthCachePath(), 'utf-8')
274      .then(data => jsonParse(data) as McpAuthCacheData)
275      .catch(() => ({}))
276  }
277  return authCachePromise
278}
279
280async function isMcpAuthCached(serverId: string): Promise<boolean> {
281  const cache = await getMcpAuthCache()
282  const entry = cache[serverId]
283  if (!entry) {
284    return false
285  }
286  return Date.now() - entry.timestamp < MCP_AUTH_CACHE_TTL_MS
287}
288
289// Serialize cache writes through a promise chain to prevent concurrent
290// read-modify-write races when multiple servers return 401 in the same batch
291let writeChain = Promise.resolve()
292
293function setMcpAuthCacheEntry(serverId: string): void {
294  writeChain = writeChain
295    .then(async () => {
296      const cache = await getMcpAuthCache()
297      cache[serverId] = { timestamp: Date.now() }
298      const cachePath = getMcpAuthCachePath()
299      await mkdir(dirname(cachePath), { recursive: true })
300      await writeFile(cachePath, jsonStringify(cache))
301      // Invalidate the read cache so subsequent reads see the new entry.
302      // Safe because writeChain serializes writes: the next write's
303      // getMcpAuthCache() call will re-read the file with this entry present.
304      authCachePromise = null
305    })
306    .catch(() => {
307      // Best-effort cache write
308    })
309}
310
311export function clearMcpAuthCache(): void {
312  authCachePromise = null
313  void unlink(getMcpAuthCachePath()).catch(() => {
314    // Cache file may not exist
315  })
316}
317
318/**
319 * Spread-ready analytics field for the server's base URL. Calls
320 * getLoggingSafeMcpBaseUrl once (not twice like the inline ternary it replaces).
321 * Typed as AnalyticsMetadata since the URL is query-stripped and safe to log.
322 */
323function mcpBaseUrlAnalytics(serverRef: ScopedMcpServerConfig): {
324  mcpServerBaseUrl?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
325} {
326  const url = getLoggingSafeMcpBaseUrl(serverRef)
327  return url
328    ? {
329        mcpServerBaseUrl:
330          url as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
331      }
332    : {}
333}
334
335/**
336 * Shared handler for sse/http/claudeai-proxy auth failures during connect:
337 * emits tengu_mcp_server_needs_auth, caches the needs-auth entry, and returns
338 * the needs-auth connection result.
339 */
340function handleRemoteAuthFailure(
341  name: string,
342  serverRef: ScopedMcpServerConfig,
343  transportType: 'sse' | 'http' | 'claudeai-proxy',
344): MCPServerConnection {
345  logEvent('tengu_mcp_server_needs_auth', {
346    transportType:
347      transportType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
348    ...mcpBaseUrlAnalytics(serverRef),
349  })
350  const label: Record<typeof transportType, string> = {
351    sse: 'SSE',
352    http: 'HTTP',
353    'claudeai-proxy': 'claude.ai proxy',
354  }
355  logMCPDebug(
356    name,
357    `Authentication required for ${label[transportType]} server`,
358  )
359  setMcpAuthCacheEntry(name)
360  return { name, type: 'needs-auth', config: serverRef }
361}
362
363/**
364 * Fetch wrapper for claude.ai proxy connections. Attaches the OAuth bearer
365 * token and retries once on 401 via handleOAuth401Error (force-refresh).
366 *
367 * The Anthropic API path has this retry (withRetry.ts, grove.ts) to handle
368 * memoize-cache staleness and clock drift. Without the same here, a single
369 * stale token mass-401s every claude.ai connector and sticks them all in the
370 * 15-min needs-auth cache.
371 */
372export function createClaudeAiProxyFetch(innerFetch: FetchLike): FetchLike {
373  return async (url, init) => {
374    const doRequest = async () => {
375      await checkAndRefreshOAuthTokenIfNeeded()
376      const currentTokens = getClaudeAIOAuthTokens()
377      if (!currentTokens) {
378        throw new Error('No claude.ai OAuth token available')
379      }
380      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
381      const headers = new Headers(init?.headers)
382      headers.set('Authorization', `Bearer ${currentTokens.accessToken}`)
383      const response = await innerFetch(url, { ...init, headers })
384      // Return the exact token that was sent. Reading getClaudeAIOAuthTokens()
385      // again after the request is wrong under concurrent 401s: another
386      // connector's handleOAuth401Error clears the memoize cache, so we'd read
387      // the NEW token from keychain, pass it to handleOAuth401Error, which
388      // finds same-as-keychain → returns false → skips retry. Same pattern as
389      // bridgeApi.ts withOAuthRetry (token passed as fn param).
390      return { response, sentToken: currentTokens.accessToken }
391    }
392
393    const { response, sentToken } = await doRequest()
394    if (response.status !== 401) {
395      return response
396    }
397    // handleOAuth401Error returns true only if the token actually changed
398    // (keychain had a newer one, or force-refresh succeeded). Gate retry on
399    // that — otherwise we double round-trip time for every connector whose
400    // downstream service genuinely needs auth (the common case: 30+ servers
401    // with "MCP server requires authentication but no OAuth token configured").
402    const tokenChanged = await handleOAuth401Error(sentToken).catch(() => false)
403    logEvent('tengu_mcp_claudeai_proxy_401', {
404      tokenChanged:
405        tokenChanged as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
406    })
407    if (!tokenChanged) {
408      // ELOCKED contention: another connector may have won the lockfile and refreshed — check if token changed underneath us
409      const now = getClaudeAIOAuthTokens()?.accessToken
410      if (!now || now === sentToken) {
411        return response
412      }
413    }
414    try {
415      return (await doRequest()).response
416    } catch {
417      // Retry itself failed (network error). Return the original 401 so the
418      // outer handler can classify it.
419      return response
420    }
421  }
422}
423
424// Minimal interface for WebSocket instances passed to mcpWebSocketTransport
425type WsClientLike = {
426  readonly readyState: number
427  close(): void
428  send(data: string): void
429}
430
431/**
432 * Create a ws.WebSocket client with the MCP protocol.
433 * Bun's ws shim types lack the 3-arg constructor (url, protocols, options)
434 * that the real ws package supports, so we cast the constructor here.
435 */
436async function createNodeWsClient(
437  url: string,
438  options: Record<string, unknown>,
439): Promise<WsClientLike> {
440  const wsModule = await import('ws')
441  const WS = wsModule.default as unknown as new (
442    url: string,
443    protocols: string[],
444    options: Record<string, unknown>,
445  ) => WsClientLike
446  return new WS(url, ['mcp'], options)
447}
448
449const IMAGE_MIME_TYPES = new Set([
450  'image/jpeg',
451  'image/png',
452  'image/gif',
453  'image/webp',
454])
455
456function getConnectionTimeoutMs(): number {
457  return parseInt(process.env.MCP_TIMEOUT || '', 10) || 30000
458}
459
460/**
461 * Default timeout for individual MCP requests (auth, tool calls, etc.)
462 */
463const MCP_REQUEST_TIMEOUT_MS = 60000
464
465/**
466 * MCP Streamable HTTP spec requires clients to advertise acceptance of both
467 * JSON and SSE on every POST. Servers that enforce this strictly reject
468 * requests without it (HTTP 406).
469 * https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#sending-messages-to-the-server
470 */
471const MCP_STREAMABLE_HTTP_ACCEPT = 'application/json, text/event-stream'
472
473/**
474 * Wraps a fetch function to apply a fresh timeout signal to each request.
475 * This avoids the bug where a single AbortSignal.timeout() created at connection
476 * time becomes stale after 60 seconds, causing all subsequent requests to fail
477 * immediately with "The operation timed out." Uses a 60-second timeout.
478 *
479 * Also ensures the Accept header required by the MCP Streamable HTTP spec is
480 * present on POSTs. The MCP SDK sets this inside StreamableHTTPClientTransport.send(),
481 * but it is attached to a Headers instance that passes through an object spread here,
482 * and some runtimes/agents have been observed dropping it before it reaches the wire.
483 * See https://github.com/anthropics/claude-agent-sdk-typescript/issues/202.
484 * Normalizing here (the last wrapper before fetch()) guarantees it is sent.
485 *
486 * GET requests are excluded from the timeout since, for MCP transports, they are
487 * long-lived SSE streams meant to stay open indefinitely. (Auth-related GETs use
488 * a separate fetch wrapper with its own timeout in auth.ts.)
489 *
490 * @param baseFetch - The fetch function to wrap
491 */
492export function wrapFetchWithTimeout(baseFetch: FetchLike): FetchLike {
493  return async (url: string | URL, init?: RequestInit) => {
494    const method = (init?.method ?? 'GET').toUpperCase()
495
496    // Skip timeout for GET requests - in MCP transports, these are long-lived SSE streams.
497    // (OAuth discovery GETs in auth.ts use a separate createAuthFetch() with its own timeout.)
498    if (method === 'GET') {
499      return baseFetch(url, init)
500    }
501
502    // Normalize headers and guarantee the Streamable-HTTP Accept value. new Headers()
503    // accepts HeadersInit | undefined and copies from plain objects, tuple arrays,
504    // and existing Headers instances — so whatever shape the SDK handed us, the
505    // Accept value survives the spread below as an own property of a concrete object.
506    // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
507    const headers = new Headers(init?.headers)
508    if (!headers.has('accept')) {
509      headers.set('accept', MCP_STREAMABLE_HTTP_ACCEPT)
510    }
511
512    // Use setTimeout instead of AbortSignal.timeout() so we can clearTimeout on
513    // completion. AbortSignal.timeout's internal timer is only released when the
514    // signal is GC'd, which in Bun is lazy — ~2.4KB of native memory per request
515    // lingers for the full 60s even when the request completes in milliseconds.
516    const controller = new AbortController()
517    const timer = setTimeout(
518      c =>
519        c.abort(new DOMException('The operation timed out.', 'TimeoutError')),
520      MCP_REQUEST_TIMEOUT_MS,
521      controller,
522    )
523    timer.unref?.()
524
525    const parentSignal = init?.signal
526    const abort = () => controller.abort(parentSignal?.reason)
527    parentSignal?.addEventListener('abort', abort)
528    if (parentSignal?.aborted) {
529      controller.abort(parentSignal.reason)
530    }
531
532    const cleanup = () => {
533      clearTimeout(timer)
534      parentSignal?.removeEventListener('abort', abort)
535    }
536
537    try {
538      const response = await baseFetch(url, {
539        ...init,
540        headers,
541        signal: controller.signal,
542      })
543      cleanup()
544      return response
545    } catch (error) {
546      cleanup()
547      throw error
548    }
549  }
550}
551
552export function getMcpServerConnectionBatchSize(): number {
553  return parseInt(process.env.MCP_SERVER_CONNECTION_BATCH_SIZE || '', 10) || 3
554}
555
556function getRemoteMcpServerConnectionBatchSize(): number {
557  return (
558    parseInt(process.env.MCP_REMOTE_SERVER_CONNECTION_BATCH_SIZE || '', 10) ||
559    20
560  )
561}
562
563function isLocalMcpServer(config: ScopedMcpServerConfig): boolean {
564  return !config.type || config.type === 'stdio' || config.type === 'sdk'
565}
566
567// For the IDE MCP servers, we only include specific tools
568const ALLOWED_IDE_TOOLS = ['mcp__ide__executeCode', 'mcp__ide__getDiagnostics']
569function isIncludedMcpTool(tool: Tool): boolean {
570  return (
571    !tool.name.startsWith('mcp__ide__') || ALLOWED_IDE_TOOLS.includes(tool.name)
572  )
573}
574
575/**
576 * Generates the cache key for a server connection
577 * @param name Server name
578 * @param serverRef Server configuration
579 * @returns Cache key string
580 */
581export function getServerCacheKey(
582  name: string,
583  serverRef: ScopedMcpServerConfig,
584): string {
585  return `${name}-${jsonStringify(serverRef)}`
586}
587
588/**
589 * TODO (ollie): The memoization here increases complexity by a lot, and im not sure it really improves performance
590 * Attempts to connect to a single MCP server
591 * @param name Server name
592 * @param serverRef Scoped server configuration
593 * @returns A wrapped client (either connected or failed)
594 */
595export const connectToServer = memoize(
596  async (
597    name: string,
598    serverRef: ScopedMcpServerConfig,
599    serverStats?: {
600      totalServers: number
601      stdioCount: number
602      sseCount: number
603      httpCount: number
604      sseIdeCount: number
605      wsIdeCount: number
606    },
607  ): Promise<MCPServerConnection> => {
608    const connectStartTime = Date.now()
609    let inProcessServer:
610      | { connect(t: Transport): Promise<void>; close(): Promise<void> }
611      | undefined
612    try {
613      let transport
614
615      // If we have the session ingress JWT, we will connect via the session ingress rather than
616      // to remote MCP's directly.
617      const sessionIngressToken = getSessionIngressAuthToken()
618
619      if (serverRef.type === 'sse') {
620        // Create an auth provider for this server
621        const authProvider = new ClaudeAuthProvider(name, serverRef)
622
623        // Get combined headers (static + dynamic)
624        const combinedHeaders = await getMcpServerHeaders(name, serverRef)
625
626        // Use the auth provider with SSEClientTransport
627        const transportOptions: SSEClientTransportOptions = {
628          authProvider,
629          // Use fresh timeout per request to avoid stale AbortSignal bug.
630          // Step-up detection wraps innermost so the 403 is seen before the
631          // SDK's handler calls auth() → tokens().
632          fetch: wrapFetchWithTimeout(
633            wrapFetchWithStepUpDetection(createFetchWithInit(), authProvider),
634          ),
635          requestInit: {
636            headers: {
637              'User-Agent': getMCPUserAgent(),
638              ...combinedHeaders,
639            },
640          },
641        }
642
643        // IMPORTANT: Always set eventSourceInit with a fetch that does NOT use the
644        // timeout wrapper. The EventSource connection is long-lived (stays open indefinitely
645        // to receive server-sent events), so applying a 60-second timeout would kill it.
646        // The timeout is only meant for individual API requests (POST, auth refresh), not
647        // the persistent SSE stream.
648        transportOptions.eventSourceInit = {
649          fetch: async (url: string | URL, init?: RequestInit) => {
650            // Get auth headers from the auth provider
651            const authHeaders: Record<string, string> = {}
652            const tokens = await authProvider.tokens()
653            if (tokens) {
654              authHeaders.Authorization = `Bearer ${tokens.access_token}`
655            }
656
657            const proxyOptions = getProxyFetchOptions()
658            // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
659            return fetch(url, {
660              ...init,
661              ...proxyOptions,
662              headers: {
663                'User-Agent': getMCPUserAgent(),
664                ...authHeaders,
665                ...init?.headers,
666                ...combinedHeaders,
667                Accept: 'text/event-stream',
668              },
669            })
670          },
671        }
672
673        transport = new SSEClientTransport(
674          new URL(serverRef.url),
675          transportOptions,
676        )
677        logMCPDebug(name, `SSE transport initialized, awaiting connection`)
678      } else if (serverRef.type === 'sse-ide') {
679        logMCPDebug(name, `Setting up SSE-IDE transport to ${serverRef.url}`)
680        // IDE servers don't need authentication
681        // TODO: Use the auth token provided in the lockfile
682        const proxyOptions = getProxyFetchOptions()
683        const transportOptions: SSEClientTransportOptions =
684          proxyOptions.dispatcher
685            ? {
686                eventSourceInit: {
687                  fetch: async (url: string | URL, init?: RequestInit) => {
688                    // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
689                    return fetch(url, {
690                      ...init,
691                      ...proxyOptions,
692                      headers: {
693                        'User-Agent': getMCPUserAgent(),
694                        ...init?.headers,
695                      },
696                    })
697                  },
698                },
699              }
700            : {}
701
702        transport = new SSEClientTransport(
703          new URL(serverRef.url),
704          Object.keys(transportOptions).length > 0
705            ? transportOptions
706            : undefined,
707        )
708      } else if (serverRef.type === 'ws-ide') {
709        const tlsOptions = getWebSocketTLSOptions()
710        const wsHeaders = {
711          'User-Agent': getMCPUserAgent(),
712          ...(serverRef.authToken && {
713            'X-Claude-Code-Ide-Authorization': serverRef.authToken,
714          }),
715        }
716
717        let wsClient: WsClientLike
718        if (typeof Bun !== 'undefined') {
719          // Bun's WebSocket supports headers/proxy/tls options but the DOM typings don't
720          // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
721          wsClient = new globalThis.WebSocket(serverRef.url, {
722            protocols: ['mcp'],
723            headers: wsHeaders,
724            proxy: getWebSocketProxyUrl(serverRef.url),
725            tls: tlsOptions || undefined,
726          } as unknown as string[])
727        } else {
728          wsClient = await createNodeWsClient(serverRef.url, {
729            headers: wsHeaders,
730            agent: getWebSocketProxyAgent(serverRef.url),
731            ...(tlsOptions || {}),
732          })
733        }
734        transport = new WebSocketTransport(wsClient)
735      } else if (serverRef.type === 'ws') {
736        logMCPDebug(
737          name,
738          `Initializing WebSocket transport to ${serverRef.url}`,
739        )
740
741        const combinedHeaders = await getMcpServerHeaders(name, serverRef)
742
743        const tlsOptions = getWebSocketTLSOptions()
744        const wsHeaders = {
745          'User-Agent': getMCPUserAgent(),
746          ...(sessionIngressToken && {
747            Authorization: `Bearer ${sessionIngressToken}`,
748          }),
749          ...combinedHeaders,
750        }
751
752        // Redact sensitive headers before logging
753        const wsHeadersForLogging = mapValues(wsHeaders, (value, key) =>
754          key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,
755        )
756
757        logMCPDebug(
758          name,
759          `WebSocket transport options: ${jsonStringify({
760            url: serverRef.url,
761            headers: wsHeadersForLogging,
762            hasSessionAuth: !!sessionIngressToken,
763          })}`,
764        )
765
766        let wsClient: WsClientLike
767        if (typeof Bun !== 'undefined') {
768          // Bun's WebSocket supports headers/proxy/tls options but the DOM typings don't
769          // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
770          wsClient = new globalThis.WebSocket(serverRef.url, {
771            protocols: ['mcp'],
772            headers: wsHeaders,
773            proxy: getWebSocketProxyUrl(serverRef.url),
774            tls: tlsOptions || undefined,
775          } as unknown as string[])
776        } else {
777          wsClient = await createNodeWsClient(serverRef.url, {
778            headers: wsHeaders,
779            agent: getWebSocketProxyAgent(serverRef.url),
780            ...(tlsOptions || {}),
781          })
782        }
783        transport = new WebSocketTransport(wsClient)
784      } else if (serverRef.type === 'http') {
785        logMCPDebug(name, `Initializing HTTP transport to ${serverRef.url}`)
786        logMCPDebug(
787          name,
788          `Node version: ${process.version}, Platform: ${process.platform}`,
789        )
790        logMCPDebug(
791          name,
792          `Environment: ${jsonStringify({
793            NODE_OPTIONS: process.env.NODE_OPTIONS || 'not set',
794            UV_THREADPOOL_SIZE: process.env.UV_THREADPOOL_SIZE || 'default',
795            HTTP_PROXY: process.env.HTTP_PROXY || 'not set',
796            HTTPS_PROXY: process.env.HTTPS_PROXY || 'not set',
797            NO_PROXY: process.env.NO_PROXY || 'not set',
798          })}`,
799        )
800
801        // Create an auth provider for this server
802        const authProvider = new ClaudeAuthProvider(name, serverRef)
803
804        // Get combined headers (static + dynamic)
805        const combinedHeaders = await getMcpServerHeaders(name, serverRef)
806
807        // Check if this server has stored OAuth tokens. If so, the SDK's
808        // authProvider will set Authorization — don't override with the
809        // session ingress token (SDK merges requestInit AFTER authProvider).
810        // CCR proxy URLs (ccr_shttp_mcp) have no stored OAuth, so they still
811        // get the ingress token. See PR #24454 discussion.
812        const hasOAuthTokens = !!(await authProvider.tokens())
813
814        // Use the auth provider with StreamableHTTPClientTransport
815        const proxyOptions = getProxyFetchOptions()
816        logMCPDebug(
817          name,
818          `Proxy options: ${proxyOptions.dispatcher ? 'custom dispatcher' : 'default'}`,
819        )
820
821        const transportOptions: StreamableHTTPClientTransportOptions = {
822          authProvider,
823          // Use fresh timeout per request to avoid stale AbortSignal bug.
824          // Step-up detection wraps innermost so the 403 is seen before the
825          // SDK's handler calls auth() → tokens().
826          fetch: wrapFetchWithTimeout(
827            wrapFetchWithStepUpDetection(createFetchWithInit(), authProvider),
828          ),
829          requestInit: {
830            ...proxyOptions,
831            headers: {
832              'User-Agent': getMCPUserAgent(),
833              ...(sessionIngressToken &&
834                !hasOAuthTokens && {
835                  Authorization: `Bearer ${sessionIngressToken}`,
836                }),
837              ...combinedHeaders,
838            },
839          },
840        }
841
842        // Redact sensitive headers before logging
843        const headersForLogging = transportOptions.requestInit?.headers
844          ? mapValues(
845              transportOptions.requestInit.headers as Record<string, string>,
846              (value, key) =>
847                key.toLowerCase() === 'authorization' ? '[REDACTED]' : value,
848            )
849          : undefined
850
851        logMCPDebug(
852          name,
853          `HTTP transport options: ${jsonStringify({
854            url: serverRef.url,
855            headers: headersForLogging,
856            hasAuthProvider: !!authProvider,
857            timeoutMs: MCP_REQUEST_TIMEOUT_MS,
858          })}`,
859        )
860
861        transport = new StreamableHTTPClientTransport(
862          new URL(serverRef.url),
863          transportOptions,
864        )
865        logMCPDebug(name, `HTTP transport created successfully`)
866      } else if (serverRef.type === 'sdk') {
867        throw new Error('SDK servers should be handled in print.ts')
868      } else if (serverRef.type === 'claudeai-proxy') {
869        logMCPDebug(
870          name,
871          `Initializing claude.ai proxy transport for server ${serverRef.id}`,
872        )
873
874        const tokens = getClaudeAIOAuthTokens()
875        if (!tokens) {
876          throw new Error('No claude.ai OAuth token found')
877        }
878
879        const oauthConfig = getOauthConfig()
880        const proxyUrl = `${oauthConfig.MCP_PROXY_URL}${oauthConfig.MCP_PROXY_PATH.replace('{server_id}', serverRef.id)}`
881
882        logMCPDebug(name, `Using claude.ai proxy at ${proxyUrl}`)
883
884        // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
885        const fetchWithAuth = createClaudeAiProxyFetch(globalThis.fetch)
886
887        const proxyOptions = getProxyFetchOptions()
888        const transportOptions: StreamableHTTPClientTransportOptions = {
889          // Wrap fetchWithAuth with fresh timeout per request
890          fetch: wrapFetchWithTimeout(fetchWithAuth),
891          requestInit: {
892            ...proxyOptions,
893            headers: {
894              'User-Agent': getMCPUserAgent(),
895              'X-Mcp-Client-Session-Id': getSessionId(),
896            },
897          },
898        }
899
900        transport = new StreamableHTTPClientTransport(
901          new URL(proxyUrl),
902          transportOptions,
903        )
904        logMCPDebug(name, `claude.ai proxy transport created successfully`)
905      } else if (
906        (serverRef.type === 'stdio' || !serverRef.type) &&
907        isClaudeInChromeMCPServer(name)
908      ) {
909        // Run the Chrome MCP server in-process to avoid spawning a ~325 MB subprocess
910        const { createChromeContext } = await import(
911          '../../utils/claudeInChrome/mcpServer.js'
912        )
913        const { createClaudeForChromeMcpServer } = await import(
914          '@ant/claude-for-chrome-mcp'
915        )
916        const { createLinkedTransportPair } = await import(
917          './InProcessTransport.js'
918        )
919        const context = createChromeContext(serverRef.env)
920        inProcessServer = createClaudeForChromeMcpServer(context)
921        const [clientTransport, serverTransport] = createLinkedTransportPair()
922        await inProcessServer.connect(serverTransport)
923        transport = clientTransport
924        logMCPDebug(name, `In-process Chrome MCP server started`)
925      } else if (
926        feature('CHICAGO_MCP') &&
927        (serverRef.type === 'stdio' || !serverRef.type) &&
928        isComputerUseMCPServer!(name)
929      ) {
930        // Run the Computer Use MCP server in-process — same rationale as
931        // Chrome above. The package's CallTool handler is a stub; real
932        // dispatch goes through wrapper.tsx's .call() override.
933        const { createComputerUseMcpServerForCli } = await import(
934          '../../utils/computerUse/mcpServer.js'
935        )
936        const { createLinkedTransportPair } = await import(
937          './InProcessTransport.js'
938        )
939        inProcessServer = await createComputerUseMcpServerForCli()
940        const [clientTransport, serverTransport] = createLinkedTransportPair()
941        await inProcessServer.connect(serverTransport)
942        transport = clientTransport
943        logMCPDebug(name, `In-process Computer Use MCP server started`)
944      } else if (serverRef.type === 'stdio' || !serverRef.type) {
945        const finalCommand =
946          process.env.CLAUDE_CODE_SHELL_PREFIX || serverRef.command
947        const finalArgs = process.env.CLAUDE_CODE_SHELL_PREFIX
948          ? [[serverRef.command, ...serverRef.args].join(' ')]
949          : serverRef.args
950        transport = new StdioClientTransport({
951          command: finalCommand,
952          args: finalArgs,
953          env: {
954            ...subprocessEnv(),
955            ...serverRef.env,
956          } as Record<string, string>,
957          stderr: 'pipe', // prevents error output from the MCP server from printing to the UI
958        })
959      } else {
960        throw new Error(`Unsupported server type: ${serverRef.type}`)
961      }
962
963      // Set up stderr logging for stdio transport before connecting in case there are any stderr
964      // outputs emitted during the connection start (this can be useful for debugging failed connections).
965      // Store handler reference for cleanup to prevent memory leaks
966      let stderrHandler: ((data: Buffer) => void) | undefined
967      let stderrOutput = ''
968      if (serverRef.type === 'stdio' || !serverRef.type) {
969        const stdioTransport = transport as StdioClientTransport
970        if (stdioTransport.stderr) {
971          stderrHandler = (data: Buffer) => {
972            // Cap stderr accumulation to prevent unbounded memory growth
973            if (stderrOutput.length < 64 * 1024 * 1024) {
974              try {
975                stderrOutput += data.toString()
976              } catch {
977                // Ignore errors from exceeding max string length
978              }
979            }
980          }
981          stdioTransport.stderr.on('data', stderrHandler)
982        }
983      }
984
985      const client = new Client(
986        {
987          name: 'claude-code',
988          title: 'Claude Code',
989          version: MACRO.VERSION ?? 'unknown',
990          description: "Anthropic's agentic coding tool",
991          websiteUrl: PRODUCT_URL,
992        },
993        {
994          capabilities: {
995            roots: {},
996            // Empty object declares the capability. Sending {form:{},url:{}}
997            // breaks Java MCP SDK servers (Spring AI) whose Elicitation class
998            // has zero fields and fails on unknown properties.
999            elicitation: {},
1000          },
1001        },
1002      )
1003
1004      // Add debug logging for client events if available
1005      if (serverRef.type === 'http') {
1006        logMCPDebug(name, `Client created, setting up request handler`)
1007      }
1008
1009      client.setRequestHandler(ListRootsRequestSchema, async () => {
1010        logMCPDebug(name, `Received ListRoots request from server`)
1011        return {
1012          roots: [
1013            {
1014              uri: `file://${getOriginalCwd()}`,
1015            },
1016          ],
1017        }
1018      })
1019
1020      // Add a timeout to connection attempts to prevent tests from hanging indefinitely
1021      logMCPDebug(
1022        name,
1023        `Starting connection with timeout of ${getConnectionTimeoutMs()}ms`,
1024      )
1025
1026      // For HTTP transport, try a basic connectivity test first
1027      if (serverRef.type === 'http') {
1028        logMCPDebug(name, `Testing basic HTTP connectivity to ${serverRef.url}`)
1029        try {
1030          const testUrl = new URL(serverRef.url)
1031          logMCPDebug(
1032            name,
1033            `Parsed URL: host=${testUrl.hostname}, port=${testUrl.port || 'default'}, protocol=${testUrl.protocol}`,
1034          )
1035
1036          // Log DNS resolution attempt
1037          if (
1038            testUrl.hostname === '127.0.0.1' ||
1039            testUrl.hostname === 'localhost'
1040          ) {
1041            logMCPDebug(name, `Using loopback address: ${testUrl.hostname}`)
1042          }
1043        } catch (urlError) {
1044          logMCPDebug(name, `Failed to parse URL: ${urlError}`)
1045        }
1046      }
1047
1048      const connectPromise = client.connect(transport)
1049      const timeoutPromise = new Promise<never>((_, reject) => {
1050        const timeoutId = setTimeout(() => {
1051          const elapsed = Date.now() - connectStartTime
1052          logMCPDebug(
1053            name,
1054            `Connection timeout triggered after ${elapsed}ms (limit: ${getConnectionTimeoutMs()}ms)`,
1055          )
1056          if (inProcessServer) {
1057            inProcessServer.close().catch(() => {})
1058          }
1059          transport.close().catch(() => {})
1060          reject(
1061            new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1062              `MCP server "${name}" connection timed out after ${getConnectionTimeoutMs()}ms`,
1063              'MCP connection timeout',
1064            ),
1065          )
1066        }, getConnectionTimeoutMs())
1067
1068        // Clean up timeout if connect resolves or rejects
1069        connectPromise.then(
1070          () => {
1071            clearTimeout(timeoutId)
1072          },
1073          _error => {
1074            clearTimeout(timeoutId)
1075          },
1076        )
1077      })
1078
1079      try {
1080        await Promise.race([connectPromise, timeoutPromise])
1081        if (stderrOutput) {
1082          logMCPError(name, `Server stderr: ${stderrOutput}`)
1083          stderrOutput = '' // Release accumulated string to prevent memory growth
1084        }
1085        const elapsed = Date.now() - connectStartTime
1086        logMCPDebug(
1087          name,
1088          `Successfully connected (transport: ${serverRef.type || 'stdio'}) in ${elapsed}ms`,
1089        )
1090      } catch (error) {
1091        const elapsed = Date.now() - connectStartTime
1092        // SSE-specific error logging
1093        if (serverRef.type === 'sse' && error instanceof Error) {
1094          logMCPDebug(
1095            name,
1096            `SSE Connection failed after ${elapsed}ms: ${jsonStringify({
1097              url: serverRef.url,
1098              error: error.message,
1099              errorType: error.constructor.name,
1100              stack: error.stack,
1101            })}`,
1102          )
1103          logMCPError(name, error)
1104
1105          if (error instanceof UnauthorizedError) {
1106            return handleRemoteAuthFailure(name, serverRef, 'sse')
1107          }
1108        } else if (serverRef.type === 'http' && error instanceof Error) {
1109          const errorObj = error as Error & {
1110            cause?: unknown
1111            code?: string
1112            errno?: string | number
1113            syscall?: string
1114          }
1115          logMCPDebug(
1116            name,
1117            `HTTP Connection failed after ${elapsed}ms: ${error.message} (code: ${errorObj.code || 'none'}, errno: ${errorObj.errno || 'none'})`,
1118          )
1119          logMCPError(name, error)
1120
1121          if (error instanceof UnauthorizedError) {
1122            return handleRemoteAuthFailure(name, serverRef, 'http')
1123          }
1124        } else if (
1125          serverRef.type === 'claudeai-proxy' &&
1126          error instanceof Error
1127        ) {
1128          logMCPDebug(
1129            name,
1130            `claude.ai proxy connection failed after ${elapsed}ms: ${error.message}`,
1131          )
1132          logMCPError(name, error)
1133
1134          // StreamableHTTPError has a `code` property with the HTTP status
1135          const errorCode = (error as Error & { code?: number }).code
1136          if (errorCode === 401) {
1137            return handleRemoteAuthFailure(name, serverRef, 'claudeai-proxy')
1138          }
1139        } else if (
1140          serverRef.type === 'sse-ide' ||
1141          serverRef.type === 'ws-ide'
1142        ) {
1143          logEvent('tengu_mcp_ide_server_connection_failed', {
1144            connectionDurationMs: elapsed,
1145          })
1146        }
1147        if (inProcessServer) {
1148          inProcessServer.close().catch(() => {})
1149        }
1150        transport.close().catch(() => {})
1151        if (stderrOutput) {
1152          logMCPError(name, `Server stderr: ${stderrOutput}`)
1153        }
1154        throw error
1155      }
1156
1157      const capabilities = client.getServerCapabilities()
1158      const serverVersion = client.getServerVersion()
1159      const rawInstructions = client.getInstructions()
1160      let instructions = rawInstructions
1161      if (
1162        rawInstructions &&
1163        rawInstructions.length > MAX_MCP_DESCRIPTION_LENGTH
1164      ) {
1165        instructions =
1166          rawInstructions.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]'
1167        logMCPDebug(
1168          name,
1169          `Server instructions truncated from ${rawInstructions.length} to ${MAX_MCP_DESCRIPTION_LENGTH} chars`,
1170        )
1171      }
1172
1173      // Log successful connection details
1174      logMCPDebug(
1175        name,
1176        `Connection established with capabilities: ${jsonStringify({
1177          hasTools: !!capabilities?.tools,
1178          hasPrompts: !!capabilities?.prompts,
1179          hasResources: !!capabilities?.resources,
1180          hasResourceSubscribe: !!capabilities?.resources?.subscribe,
1181          serverVersion: serverVersion || 'unknown',
1182        })}`,
1183      )
1184      logForDebugging(
1185        `[MCP] Server "${name}" connected with subscribe=${!!capabilities?.resources?.subscribe}`,
1186      )
1187
1188      // Register default elicitation handler that returns cancel during the
1189      // window before registerElicitationHandler overwrites it in
1190      // onConnectionAttempt (useManageMCPConnections).
1191      client.setRequestHandler(ElicitRequestSchema, async request => {
1192        logMCPDebug(
1193          name,
1194          `Elicitation request received during initialization: ${jsonStringify(request)}`,
1195        )
1196        return { action: 'cancel' as const }
1197      })
1198
1199      if (serverRef.type === 'sse-ide' || serverRef.type === 'ws-ide') {
1200        const ideConnectionDurationMs = Date.now() - connectStartTime
1201        logEvent('tengu_mcp_ide_server_connection_succeeded', {
1202          connectionDurationMs: ideConnectionDurationMs,
1203          serverVersion:
1204            serverVersion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1205        })
1206        try {
1207          void maybeNotifyIDEConnected(client)
1208        } catch (error) {
1209          logMCPError(
1210            name,
1211            `Failed to send ide_connected notification: ${error}`,
1212          )
1213        }
1214      }
1215
1216      // Enhanced connection drop detection and logging for all transport types
1217      const connectionStartTime = Date.now()
1218      let hasErrorOccurred = false
1219
1220      // Store original handlers
1221      const originalOnerror = client.onerror
1222      const originalOnclose = client.onclose
1223
1224      // The SDK's transport calls onerror on connection failures but doesn't call onclose,
1225      // which CC uses to trigger reconnection. We bridge this gap by tracking consecutive
1226      // terminal errors and manually closing after MAX_ERRORS_BEFORE_RECONNECT failures.
1227      let consecutiveConnectionErrors = 0
1228      const MAX_ERRORS_BEFORE_RECONNECT = 3
1229
1230      // Guard against re-entry: close() aborts in-flight streams which may fire
1231      // onerror again before the close chain completes.
1232      let hasTriggeredClose = false
1233
1234      // client.close() → transport.close() → transport.onclose → SDK's _onclose():
1235      // rejects all pending request handlers (so hung callTool() promises fail with
1236      // McpError -32000 "Connection closed") and then invokes our client.onclose
1237      // handler below (which clears the memo cache so the next call reconnects).
1238      // Calling client.onclose?.() directly would only clear the cache — pending
1239      // tool calls would stay hung.
1240      const closeTransportAndRejectPending = (reason: string) => {
1241        if (hasTriggeredClose) return
1242        hasTriggeredClose = true
1243        logMCPDebug(name, `Closing transport (${reason})`)
1244        void client.close().catch(e => {
1245          logMCPDebug(name, `Error during close: ${errorMessage(e)}`)
1246        })
1247      }
1248
1249      const isTerminalConnectionError = (msg: string): boolean => {
1250        return (
1251          msg.includes('ECONNRESET') ||
1252          msg.includes('ETIMEDOUT') ||
1253          msg.includes('EPIPE') ||
1254          msg.includes('EHOSTUNREACH') ||
1255          msg.includes('ECONNREFUSED') ||
1256          msg.includes('Body Timeout Error') ||
1257          msg.includes('terminated') ||
1258          // SDK SSE reconnection intermediate errors — may be wrapped around the
1259          // actual network error, so the substrings above won't match
1260          msg.includes('SSE stream disconnected') ||
1261          msg.includes('Failed to reconnect SSE stream')
1262        )
1263      }
1264
1265      // Enhanced error handler with detailed logging
1266      client.onerror = (error: Error) => {
1267        const uptime = Date.now() - connectionStartTime
1268        hasErrorOccurred = true
1269        const transportType = serverRef.type || 'stdio'
1270
1271        // Log the connection drop with context
1272        logMCPDebug(
1273          name,
1274          `${transportType.toUpperCase()} connection dropped after ${Math.floor(uptime / 1000)}s uptime`,
1275        )
1276
1277        // Log specific error details for debugging
1278        if (error.message) {
1279          if (error.message.includes('ECONNRESET')) {
1280            logMCPDebug(
1281              name,
1282              `Connection reset - server may have crashed or restarted`,
1283            )
1284          } else if (error.message.includes('ETIMEDOUT')) {
1285            logMCPDebug(
1286              name,
1287              `Connection timeout - network issue or server unresponsive`,
1288            )
1289          } else if (error.message.includes('ECONNREFUSED')) {
1290            logMCPDebug(name, `Connection refused - server may be down`)
1291          } else if (error.message.includes('EPIPE')) {
1292            logMCPDebug(
1293              name,
1294              `Broken pipe - server closed connection unexpectedly`,
1295            )
1296          } else if (error.message.includes('EHOSTUNREACH')) {
1297            logMCPDebug(name, `Host unreachable - network connectivity issue`)
1298          } else if (error.message.includes('ESRCH')) {
1299            logMCPDebug(
1300              name,
1301              `Process not found - stdio server process terminated`,
1302            )
1303          } else if (error.message.includes('spawn')) {
1304            logMCPDebug(
1305              name,
1306              `Failed to spawn process - check command and permissions`,
1307            )
1308          } else {
1309            logMCPDebug(name, `Connection error: ${error.message}`)
1310          }
1311        }
1312
1313        // For HTTP transports, detect session expiry (404 + JSON-RPC -32001)
1314        // and close the transport so pending tool calls reject and the next
1315        // call reconnects with a fresh session ID.
1316        if (
1317          (transportType === 'http' || transportType === 'claudeai-proxy') &&
1318          isMcpSessionExpiredError(error)
1319        ) {
1320          logMCPDebug(
1321            name,
1322            `MCP session expired (server returned 404 with session-not-found), triggering reconnection`,
1323          )
1324          closeTransportAndRejectPending('session expired')
1325          if (originalOnerror) {
1326            originalOnerror(error)
1327          }
1328          return
1329        }
1330
1331        // For remote transports (SSE/HTTP), track terminal connection errors
1332        // and trigger reconnection via close if we see repeated failures.
1333        if (
1334          transportType === 'sse' ||
1335          transportType === 'http' ||
1336          transportType === 'claudeai-proxy'
1337        ) {
1338          // The SDK's StreamableHTTP transport fires this after exhausting its
1339          // own SSE reconnect attempts (default maxRetries: 2) — but it never
1340          // calls onclose, so pending callTool() promises hang indefinitely.
1341          // This is the definitive "transport gave up" signal.
1342          if (error.message.includes('Maximum reconnection attempts')) {
1343            closeTransportAndRejectPending('SSE reconnection exhausted')
1344            if (originalOnerror) {
1345              originalOnerror(error)
1346            }
1347            return
1348          }
1349
1350          if (isTerminalConnectionError(error.message)) {
1351            consecutiveConnectionErrors++
1352            logMCPDebug(
1353              name,
1354              `Terminal connection error ${consecutiveConnectionErrors}/${MAX_ERRORS_BEFORE_RECONNECT}`,
1355            )
1356
1357            if (consecutiveConnectionErrors >= MAX_ERRORS_BEFORE_RECONNECT) {
1358              consecutiveConnectionErrors = 0
1359              closeTransportAndRejectPending('max consecutive terminal errors')
1360            }
1361          } else {
1362            // Non-terminal error (e.g., transient issue), reset counter
1363            consecutiveConnectionErrors = 0
1364          }
1365        }
1366
1367        // Call original handler
1368        if (originalOnerror) {
1369          originalOnerror(error)
1370        }
1371      }
1372
1373      // Enhanced close handler with connection drop context
1374      client.onclose = () => {
1375        const uptime = Date.now() - connectionStartTime
1376        const transportType = serverRef.type ?? 'unknown'
1377
1378        logMCPDebug(
1379          name,
1380          `${transportType.toUpperCase()} connection closed after ${Math.floor(uptime / 1000)}s (${hasErrorOccurred ? 'with errors' : 'cleanly'})`,
1381        )
1382
1383        // Clear the memoization cache so next operation reconnects
1384        const key = getServerCacheKey(name, serverRef)
1385
1386        // Also clear fetch caches (keyed by server name). Reconnection
1387        // creates a new connection object; without clearing, the next
1388        // fetch would return stale tools/resources from the old connection.
1389        fetchToolsForClient.cache.delete(name)
1390        fetchResourcesForClient.cache.delete(name)
1391        fetchCommandsForClient.cache.delete(name)
1392        if (feature('MCP_SKILLS')) {
1393          fetchMcpSkillsForClient!.cache.delete(name)
1394        }
1395
1396        connectToServer.cache.delete(key)
1397        logMCPDebug(name, `Cleared connection cache for reconnection`)
1398
1399        if (originalOnclose) {
1400          originalOnclose()
1401        }
1402      }
1403
1404      const cleanup = async () => {
1405        // In-process servers (e.g. Chrome MCP) don't have child processes or stderr
1406        if (inProcessServer) {
1407          try {
1408            await inProcessServer.close()
1409          } catch (error) {
1410            logMCPDebug(name, `Error closing in-process server: ${error}`)
1411          }
1412          try {
1413            await client.close()
1414          } catch (error) {
1415            logMCPDebug(name, `Error closing client: ${error}`)
1416          }
1417          return
1418        }
1419
1420        // Remove stderr event listener to prevent memory leaks
1421        if (stderrHandler && (serverRef.type === 'stdio' || !serverRef.type)) {
1422          const stdioTransport = transport as StdioClientTransport
1423          stdioTransport.stderr?.off('data', stderrHandler)
1424        }
1425
1426        // For stdio transports, explicitly terminate the child process with proper signals
1427        // NOTE: StdioClientTransport.close() only sends an abort signal, but many MCP servers
1428        // (especially Docker containers) need explicit SIGINT/SIGTERM signals to trigger graceful shutdown
1429        if (serverRef.type === 'stdio') {
1430          try {
1431            const stdioTransport = transport as StdioClientTransport
1432            const childPid = stdioTransport.pid
1433
1434            if (childPid) {
1435              logMCPDebug(name, 'Sending SIGINT to MCP server process')
1436
1437              // First try SIGINT (like Ctrl+C)
1438              try {
1439                process.kill(childPid, 'SIGINT')
1440              } catch (error) {
1441                logMCPDebug(name, `Error sending SIGINT: ${error}`)
1442                return
1443              }
1444
1445              // Wait for graceful shutdown with rapid escalation (total 500ms to keep CLI responsive)
1446              await new Promise<void>(async resolve => {
1447                let resolved = false
1448
1449                // Set up a timer to check if process still exists
1450                const checkInterval = setInterval(() => {
1451                  try {
1452                    // process.kill(pid, 0) checks if process exists without killing it
1453                    process.kill(childPid, 0)
1454                  } catch {
1455                    // Process no longer exists
1456                    if (!resolved) {
1457                      resolved = true
1458                      clearInterval(checkInterval)
1459                      clearTimeout(failsafeTimeout)
1460                      logMCPDebug(name, 'MCP server process exited cleanly')
1461                      resolve()
1462                    }
1463                  }
1464                }, 50)
1465
1466                // Absolute failsafe: clear interval after 600ms no matter what
1467                const failsafeTimeout = setTimeout(() => {
1468                  if (!resolved) {
1469                    resolved = true
1470                    clearInterval(checkInterval)
1471                    logMCPDebug(
1472                      name,
1473                      'Cleanup timeout reached, stopping process monitoring',
1474                    )
1475                    resolve()
1476                  }
1477                }, 600)
1478
1479                try {
1480                  // Wait 100ms for SIGINT to work (usually much faster)
1481                  await sleep(100)
1482
1483                  if (!resolved) {
1484                    // Check if process still exists
1485                    try {
1486                      process.kill(childPid, 0)
1487                      // Process still exists, SIGINT failed, try SIGTERM
1488                      logMCPDebug(
1489                        name,
1490                        'SIGINT failed, sending SIGTERM to MCP server process',
1491                      )
1492                      try {
1493                        process.kill(childPid, 'SIGTERM')
1494                      } catch (termError) {
1495                        logMCPDebug(name, `Error sending SIGTERM: ${termError}`)
1496                        resolved = true
1497                        clearInterval(checkInterval)
1498                        clearTimeout(failsafeTimeout)
1499                        resolve()
1500                        return
1501                      }
1502                    } catch {
1503                      // Process already exited
1504                      resolved = true
1505                      clearInterval(checkInterval)
1506                      clearTimeout(failsafeTimeout)
1507                      resolve()
1508                      return
1509                    }
1510
1511                    // Wait 400ms for SIGTERM to work (slower than SIGINT, often used for cleanup)
1512                    await sleep(400)
1513
1514                    if (!resolved) {
1515                      // Check if process still exists
1516                      try {
1517                        process.kill(childPid, 0)
1518                        // Process still exists, SIGTERM failed, force kill with SIGKILL
1519                        logMCPDebug(
1520                          name,
1521                          'SIGTERM failed, sending SIGKILL to MCP server process',
1522                        )
1523                        try {
1524                          process.kill(childPid, 'SIGKILL')
1525                        } catch (killError) {
1526                          logMCPDebug(
1527                            name,
1528                            `Error sending SIGKILL: ${killError}`,
1529                          )
1530                        }
1531                      } catch {
1532                        // Process already exited
1533                        resolved = true
1534                        clearInterval(checkInterval)
1535                        clearTimeout(failsafeTimeout)
1536                        resolve()
1537                      }
1538                    }
1539                  }
1540
1541                  // Final timeout - always resolve after 500ms max (total cleanup time)
1542                  if (!resolved) {
1543                    resolved = true
1544                    clearInterval(checkInterval)
1545                    clearTimeout(failsafeTimeout)
1546                    resolve()
1547                  }
1548                } catch {
1549                  // Handle any errors in the escalation sequence
1550                  if (!resolved) {
1551                    resolved = true
1552                    clearInterval(checkInterval)
1553                    clearTimeout(failsafeTimeout)
1554                    resolve()
1555                  }
1556                }
1557              })
1558            }
1559          } catch (processError) {
1560            logMCPDebug(name, `Error terminating process: ${processError}`)
1561          }
1562        }
1563
1564        // Close the client connection (which also closes the transport)
1565        try {
1566          await client.close()
1567        } catch (error) {
1568          logMCPDebug(name, `Error closing client: ${error}`)
1569        }
1570      }
1571
1572      // Register cleanup for all transport types - even network transports might need cleanup
1573      // This ensures all MCP servers get properly terminated, not just stdio ones
1574      const cleanupUnregister = registerCleanup(cleanup)
1575
1576      // Create the wrapped cleanup that includes unregistering
1577      const wrappedCleanup = async () => {
1578        cleanupUnregister?.()
1579        await cleanup()
1580      }
1581
1582      const connectionDurationMs = Date.now() - connectStartTime
1583      logEvent('tengu_mcp_server_connection_succeeded', {
1584        connectionDurationMs,
1585        transportType: (serverRef.type ??
1586          'stdio') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1587        totalServers: serverStats?.totalServers,
1588        stdioCount: serverStats?.stdioCount,
1589        sseCount: serverStats?.sseCount,
1590        httpCount: serverStats?.httpCount,
1591        sseIdeCount: serverStats?.sseIdeCount,
1592        wsIdeCount: serverStats?.wsIdeCount,
1593        ...mcpBaseUrlAnalytics(serverRef),
1594      })
1595      return {
1596        name,
1597        client,
1598        type: 'connected' as const,
1599        capabilities: capabilities ?? {},
1600        serverInfo: serverVersion,
1601        instructions,
1602        config: serverRef,
1603        cleanup: wrappedCleanup,
1604      }
1605    } catch (error) {
1606      const connectionDurationMs = Date.now() - connectStartTime
1607      logEvent('tengu_mcp_server_connection_failed', {
1608        connectionDurationMs,
1609        totalServers: serverStats?.totalServers || 1,
1610        stdioCount:
1611          serverStats?.stdioCount || (serverRef.type === 'stdio' ? 1 : 0),
1612        sseCount: serverStats?.sseCount || (serverRef.type === 'sse' ? 1 : 0),
1613        httpCount:
1614          serverStats?.httpCount || (serverRef.type === 'http' ? 1 : 0),
1615        sseIdeCount:
1616          serverStats?.sseIdeCount || (serverRef.type === 'sse-ide' ? 1 : 0),
1617        wsIdeCount:
1618          serverStats?.wsIdeCount || (serverRef.type === 'ws-ide' ? 1 : 0),
1619        transportType: (serverRef.type ??
1620          'stdio') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1621        ...mcpBaseUrlAnalytics(serverRef),
1622      })
1623      logMCPDebug(
1624        name,
1625        `Connection failed after ${connectionDurationMs}ms: ${errorMessage(error)}`,
1626      )
1627      logMCPError(name, `Connection failed: ${errorMessage(error)}`)
1628
1629      if (inProcessServer) {
1630        inProcessServer.close().catch(() => {})
1631      }
1632      return {
1633        name,
1634        type: 'failed' as const,
1635        config: serverRef,
1636        error: errorMessage(error),
1637      }
1638    }
1639  },
1640  getServerCacheKey,
1641)
1642
1643/**
1644 * Clears the memoize cache for a specific server
1645 * @param name Server name
1646 * @param serverRef Server configuration
1647 */
1648export async function clearServerCache(
1649  name: string,
1650  serverRef: ScopedMcpServerConfig,
1651): Promise<void> {
1652  const key = getServerCacheKey(name, serverRef)
1653
1654  try {
1655    const wrappedClient = await connectToServer(name, serverRef)
1656
1657    if (wrappedClient.type === 'connected') {
1658      await wrappedClient.cleanup()
1659    }
1660  } catch {
1661    // Ignore errors - server might have failed to connect
1662  }
1663
1664  // Clear from cache (both connection and fetch caches so reconnect
1665  // fetches fresh tools/resources/commands instead of stale ones)
1666  connectToServer.cache.delete(key)
1667  fetchToolsForClient.cache.delete(name)
1668  fetchResourcesForClient.cache.delete(name)
1669  fetchCommandsForClient.cache.delete(name)
1670  if (feature('MCP_SKILLS')) {
1671    fetchMcpSkillsForClient!.cache.delete(name)
1672  }
1673}
1674
1675/**
1676 * Ensures a valid connected client for an MCP server.
1677 * For most server types, uses the memoization cache if available, or reconnects
1678 * if the cache was cleared (e.g., after onclose). This ensures tool/resource
1679 * calls always use a valid connection.
1680 *
1681 * SDK MCP servers run in-process and are handled separately via setupSdkMcpClients,
1682 * so they are returned as-is without going through connectToServer.
1683 *
1684 * @param client The connected MCP server client
1685 * @returns Connected MCP server client (same or reconnected)
1686 * @throws Error if server cannot be connected
1687 */
1688export async function ensureConnectedClient(
1689  client: ConnectedMCPServer,
1690): Promise<ConnectedMCPServer> {
1691  // SDK MCP servers run in-process and are handled separately via setupSdkMcpClients
1692  if (client.config.type === 'sdk') {
1693    return client
1694  }
1695
1696  const connectedClient = await connectToServer(client.name, client.config)
1697  if (connectedClient.type !== 'connected') {
1698    throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1699      `MCP server "${client.name}" is not connected`,
1700      'MCP server not connected',
1701    )
1702  }
1703  return connectedClient
1704}
1705
1706/**
1707 * Compares two MCP server configurations to determine if they are equivalent.
1708 * Used to detect when a server needs to be reconnected due to config changes.
1709 */
1710export function areMcpConfigsEqual(
1711  a: ScopedMcpServerConfig,
1712  b: ScopedMcpServerConfig,
1713): boolean {
1714  // Quick type check first
1715  if (a.type !== b.type) return false
1716
1717  // Compare by serializing - this handles all config variations
1718  // We exclude 'scope' from comparison since it's metadata, not connection config
1719  const { scope: _scopeA, ...configA } = a
1720  const { scope: _scopeB, ...configB } = b
1721  return jsonStringify(configA) === jsonStringify(configB)
1722}
1723
1724// Max cache size for fetch* caches. Keyed by server name (stable across
1725// reconnects), bounded to prevent unbounded growth with many MCP servers.
1726const MCP_FETCH_CACHE_SIZE = 20
1727
1728/**
1729 * Encode MCP tool input for the auto-mode security classifier.
1730 * Exported so the auto-mode eval scripts can mirror production encoding
1731 * for `mcp__*` tool stubs without duplicating this logic.
1732 */
1733export function mcpToolInputToAutoClassifierInput(
1734  input: Record<string, unknown>,
1735  toolName: string,
1736): string {
1737  const keys = Object.keys(input)
1738  return keys.length > 0
1739    ? keys.map(k => `${k}=${String(input[k])}`).join(' ')
1740    : toolName
1741}
1742
1743export const fetchToolsForClient = memoizeWithLRU(
1744  async (client: MCPServerConnection): Promise<Tool[]> => {
1745    if (client.type !== 'connected') return []
1746
1747    try {
1748      if (!client.capabilities?.tools) {
1749        return []
1750      }
1751
1752      const result = (await client.client.request(
1753        { method: 'tools/list' },
1754        ListToolsResultSchema,
1755      )) as ListToolsResult
1756
1757      // Sanitize tool data from MCP server
1758      const toolsToProcess = recursivelySanitizeUnicode(result.tools)
1759
1760      // Check if we should skip the mcp__ prefix for SDK MCP servers
1761      const skipPrefix =
1762        client.config.type === 'sdk' &&
1763        isEnvTruthy(process.env.CLAUDE_AGENT_SDK_MCP_NO_PREFIX)
1764
1765      // Convert MCP tools to our Tool format
1766      return toolsToProcess
1767        .map((tool): Tool => {
1768          const fullyQualifiedName = buildMcpToolName(client.name, tool.name)
1769          return {
1770            ...MCPTool,
1771            // In skip-prefix mode, use the original name for model invocation so MCP tools
1772            // can override builtins by name. mcpInfo is used for permission checking.
1773            name: skipPrefix ? tool.name : fullyQualifiedName,
1774            mcpInfo: { serverName: client.name, toolName: tool.name },
1775            isMcp: true,
1776            // Collapse whitespace: _meta is open to external MCP servers, and
1777            // a newline here would inject orphan lines into the deferred-tool
1778            // list (formatDeferredToolLine joins on '\n').
1779            searchHint:
1780              typeof tool._meta?.['anthropic/searchHint'] === 'string'
1781                ? tool._meta['anthropic/searchHint']
1782                    .replace(/\s+/g, ' ')
1783                    .trim() || undefined
1784                : undefined,
1785            alwaysLoad: tool._meta?.['anthropic/alwaysLoad'] === true,
1786            async description() {
1787              return tool.description ?? ''
1788            },
1789            async prompt() {
1790              const desc = tool.description ?? ''
1791              return desc.length > MAX_MCP_DESCRIPTION_LENGTH
1792                ? desc.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]'
1793                : desc
1794            },
1795            isConcurrencySafe() {
1796              return tool.annotations?.readOnlyHint ?? false
1797            },
1798            isReadOnly() {
1799              return tool.annotations?.readOnlyHint ?? false
1800            },
1801            toAutoClassifierInput(input) {
1802              return mcpToolInputToAutoClassifierInput(input, tool.name)
1803            },
1804            isDestructive() {
1805              return tool.annotations?.destructiveHint ?? false
1806            },
1807            isOpenWorld() {
1808              return tool.annotations?.openWorldHint ?? false
1809            },
1810            isSearchOrReadCommand() {
1811              return classifyMcpToolForCollapse(client.name, tool.name)
1812            },
1813            inputJSONSchema: tool.inputSchema as Tool['inputJSONSchema'],
1814            async checkPermissions() {
1815              return {
1816                behavior: 'passthrough' as const,
1817                message: 'MCPTool requires permission.',
1818                suggestions: [
1819                  {
1820                    type: 'addRules' as const,
1821                    rules: [
1822                      {
1823                        toolName: fullyQualifiedName,
1824                        ruleContent: undefined,
1825                      },
1826                    ],
1827                    behavior: 'allow' as const,
1828                    destination: 'localSettings' as const,
1829                  },
1830                ],
1831              }
1832            },
1833            async call(
1834              args: Record<string, unknown>,
1835              context,
1836              _canUseTool,
1837              parentMessage,
1838              onProgress?: ToolCallProgress<MCPProgress>,
1839            ) {
1840              const toolUseId = extractToolUseId(parentMessage)
1841              const meta = toolUseId
1842                ? { 'claudecode/toolUseId': toolUseId }
1843                : {}
1844
1845              // Emit progress when tool starts
1846              if (onProgress && toolUseId) {
1847                onProgress({
1848                  toolUseID: toolUseId,
1849                  data: {
1850                    type: 'mcp_progress',
1851                    status: 'started',
1852                    serverName: client.name,
1853                    toolName: tool.name,
1854                  },
1855                })
1856              }
1857
1858              const startTime = Date.now()
1859              const MAX_SESSION_RETRIES = 1
1860              for (let attempt = 0; ; attempt++) {
1861                try {
1862                  const connectedClient = await ensureConnectedClient(client)
1863                  const mcpResult = await callMCPToolWithUrlElicitationRetry({
1864                    client: connectedClient,
1865                    clientConnection: client,
1866                    tool: tool.name,
1867                    args,
1868                    meta,
1869                    signal: context.abortController.signal,
1870                    setAppState: context.setAppState,
1871                    onProgress:
1872                      onProgress && toolUseId
1873                        ? progressData => {
1874                            onProgress({
1875                              toolUseID: toolUseId,
1876                              data: progressData,
1877                            })
1878                          }
1879                        : undefined,
1880                    handleElicitation: context.handleElicitation,
1881                  })
1882
1883                  // Emit progress when tool completes successfully
1884                  if (onProgress && toolUseId) {
1885                    onProgress({
1886                      toolUseID: toolUseId,
1887                      data: {
1888                        type: 'mcp_progress',
1889                        status: 'completed',
1890                        serverName: client.name,
1891                        toolName: tool.name,
1892                        elapsedTimeMs: Date.now() - startTime,
1893                      },
1894                    })
1895                  }
1896
1897                  return {
1898                    data: mcpResult.content,
1899                    ...((mcpResult._meta || mcpResult.structuredContent) && {
1900                      mcpMeta: {
1901                        ...(mcpResult._meta && {
1902                          _meta: mcpResult._meta,
1903                        }),
1904                        ...(mcpResult.structuredContent && {
1905                          structuredContent: mcpResult.structuredContent,
1906                        }),
1907                      },
1908                    }),
1909                  }
1910                } catch (error) {
1911                  // Session expired — the connection cache has been
1912                  // cleared, so retry with a fresh client.
1913                  if (
1914                    error instanceof McpSessionExpiredError &&
1915                    attempt < MAX_SESSION_RETRIES
1916                  ) {
1917                    logMCPDebug(
1918                      client.name,
1919                      `Retrying tool '${tool.name}' after session recovery`,
1920                    )
1921                    continue
1922                  }
1923
1924                  // Emit progress when tool fails
1925                  if (onProgress && toolUseId) {
1926                    onProgress({
1927                      toolUseID: toolUseId,
1928                      data: {
1929                        type: 'mcp_progress',
1930                        status: 'failed',
1931                        serverName: client.name,
1932                        toolName: tool.name,
1933                        elapsedTimeMs: Date.now() - startTime,
1934                      },
1935                    })
1936                  }
1937                  // Wrap MCP SDK errors so telemetry gets useful context
1938                  // instead of just "Error" or "McpError" (the constructor
1939                  // name). MCP SDK errors are protocol-level messages and
1940                  // don't contain user file paths or code.
1941                  if (
1942                    error instanceof Error &&
1943                    !(
1944                      error instanceof
1945                      TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
1946                    )
1947                  ) {
1948                    const name = error.constructor.name
1949                    if (name === 'Error') {
1950                      throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1951                        error.message,
1952                        error.message.slice(0, 200),
1953                      )
1954                    }
1955                    // McpError has a numeric `code` with the JSON-RPC error
1956                    // code (e.g. -32000 ConnectionClosed, -32001 RequestTimeout)
1957                    if (
1958                      name === 'McpError' &&
1959                      'code' in error &&
1960                      typeof error.code === 'number'
1961                    ) {
1962                      throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
1963                        error.message,
1964                        `McpError ${error.code}`,
1965                      )
1966                    }
1967                  }
1968                  throw error
1969                }
1970              }
1971            },
1972            userFacingName() {
1973              // Prefer title annotation if available, otherwise use tool name
1974              const displayName = tool.annotations?.title || tool.name
1975              return `${client.name} - ${displayName} (MCP)`
1976            },
1977            ...(isClaudeInChromeMCPServer(client.name) &&
1978            (client.config.type === 'stdio' || !client.config.type)
1979              ? claudeInChromeToolRendering().getClaudeInChromeMCPToolOverrides(
1980                  tool.name,
1981                )
1982              : {}),
1983            ...(feature('CHICAGO_MCP') &&
1984            (client.config.type === 'stdio' || !client.config.type) &&
1985            isComputerUseMCPServer!(client.name)
1986              ? computerUseWrapper!().getComputerUseMCPToolOverrides(tool.name)
1987              : {}),
1988          }
1989        })
1990        .filter(isIncludedMcpTool)
1991    } catch (error) {
1992      logMCPError(client.name, `Failed to fetch tools: ${errorMessage(error)}`)
1993      return []
1994    }
1995  },
1996  (client: MCPServerConnection) => client.name,
1997  MCP_FETCH_CACHE_SIZE,
1998)
1999
2000export const fetchResourcesForClient = memoizeWithLRU(
2001  async (client: MCPServerConnection): Promise<ServerResource[]> => {
2002    if (client.type !== 'connected') return []
2003
2004    try {
2005      if (!client.capabilities?.resources) {
2006        return []
2007      }
2008
2009      const result = await client.client.request(
2010        { method: 'resources/list' },
2011        ListResourcesResultSchema,
2012      )
2013
2014      if (!result.resources) return []
2015
2016      // Add server name to each resource
2017      return result.resources.map(resource => ({
2018        ...resource,
2019        server: client.name,
2020      }))
2021    } catch (error) {
2022      logMCPError(
2023        client.name,
2024        `Failed to fetch resources: ${errorMessage(error)}`,
2025      )
2026      return []
2027    }
2028  },
2029  (client: MCPServerConnection) => client.name,
2030  MCP_FETCH_CACHE_SIZE,
2031)
2032
2033export const fetchCommandsForClient = memoizeWithLRU(
2034  async (client: MCPServerConnection): Promise<Command[]> => {
2035    if (client.type !== 'connected') return []
2036
2037    try {
2038      if (!client.capabilities?.prompts) {
2039        return []
2040      }
2041
2042      // Request prompts list from client
2043      const result = (await client.client.request(
2044        { method: 'prompts/list' },
2045        ListPromptsResultSchema,
2046      )) as ListPromptsResult
2047
2048      if (!result.prompts) return []
2049
2050      // Sanitize prompt data from MCP server
2051      const promptsToProcess = recursivelySanitizeUnicode(result.prompts)
2052
2053      // Convert MCP prompts to our Command format
2054      return promptsToProcess.map(prompt => {
2055        const argNames = Object.values(prompt.arguments ?? {}).map(k => k.name)
2056        return {
2057          type: 'prompt' as const,
2058          name: 'mcp__' + normalizeNameForMCP(client.name) + '__' + prompt.name,
2059          description: prompt.description ?? '',
2060          hasUserSpecifiedDescription: !!prompt.description,
2061          contentLength: 0, // Dynamic MCP content
2062          isEnabled: () => true,
2063          isHidden: false,
2064          isMcp: true,
2065          progressMessage: 'running',
2066          userFacingName() {
2067            // Use prompt.name (programmatic identifier) not prompt.title (display name)
2068            // to avoid spaces breaking slash command parsing
2069            return `${client.name}:${prompt.name} (MCP)`
2070          },
2071          argNames,
2072          source: 'mcp',
2073          async getPromptForCommand(args: string) {
2074            const argsArray = args.split(' ')
2075            try {
2076              const connectedClient = await ensureConnectedClient(client)
2077              const result = await connectedClient.client.getPrompt({
2078                name: prompt.name,
2079                arguments: zipObject(argNames, argsArray),
2080              })
2081              const transformed = await Promise.all(
2082                result.messages.map(message =>
2083                  transformResultContent(message.content, connectedClient.name),
2084                ),
2085              )
2086              return transformed.flat()
2087            } catch (error) {
2088              logMCPError(
2089                client.name,
2090                `Error running command '${prompt.name}': ${errorMessage(error)}`,
2091              )
2092              throw error
2093            }
2094          },
2095        }
2096      })
2097    } catch (error) {
2098      logMCPError(
2099        client.name,
2100        `Failed to fetch commands: ${errorMessage(error)}`,
2101      )
2102      return []
2103    }
2104  },
2105  (client: MCPServerConnection) => client.name,
2106  MCP_FETCH_CACHE_SIZE,
2107)
2108
2109/**
2110 * Call an IDE tool directly as an RPC
2111 * @param toolName The name of the tool to call
2112 * @param args The arguments to pass to the tool
2113 * @param client The IDE client to use for the RPC call
2114 * @returns The result of the tool call
2115 */
2116export async function callIdeRpc(
2117  toolName: string,
2118  args: Record<string, unknown>,
2119  client: ConnectedMCPServer,
2120): Promise<string | ContentBlockParam[] | undefined> {
2121  const result = await callMCPTool({
2122    client,
2123    tool: toolName,
2124    args,
2125    signal: createAbortController().signal,
2126  })
2127  return result.content
2128}
2129
2130/**
2131 * Note: This should not be called by UI components directly, they should use the reconnectMcpServer
2132 * function from useManageMcpConnections.
2133 * @param name Server name
2134 * @param config Server configuration
2135 * @returns Object containing the client connection and its resources
2136 */
2137export async function reconnectMcpServerImpl(
2138  name: string,
2139  config: ScopedMcpServerConfig,
2140): Promise<{
2141  client: MCPServerConnection
2142  tools: Tool[]
2143  commands: Command[]
2144  resources?: ServerResource[]
2145}> {
2146  try {
2147    // Invalidate the keychain cache so we read fresh credentials from disk.
2148    // This is necessary when another process (e.g. the VS Code extension host)
2149    // has modified stored tokens (cleared auth, saved new OAuth tokens) and then
2150    // asks the CLI subprocess to reconnect.  Without this, the subprocess would
2151    // use stale cached data and never notice the tokens were removed.
2152    clearKeychainCache()
2153
2154    await clearServerCache(name, config)
2155    const client = await connectToServer(name, config)
2156
2157    if (client.type !== 'connected') {
2158      return {
2159        client,
2160        tools: [],
2161        commands: [],
2162      }
2163    }
2164
2165    if (config.type === 'claudeai-proxy') {
2166      markClaudeAiMcpConnected(name)
2167    }
2168
2169    const supportsResources = !!client.capabilities?.resources
2170
2171    const [tools, mcpCommands, mcpSkills, resources] = await Promise.all([
2172      fetchToolsForClient(client),
2173      fetchCommandsForClient(client),
2174      feature('MCP_SKILLS') && supportsResources
2175        ? fetchMcpSkillsForClient!(client)
2176        : Promise.resolve([]),
2177      supportsResources ? fetchResourcesForClient(client) : Promise.resolve([]),
2178    ])
2179    const commands = [...mcpCommands, ...mcpSkills]
2180
2181    // Check if we need to add resource tools
2182    const resourceTools: Tool[] = []
2183    if (supportsResources) {
2184      // Only add resource tools if no other server has them
2185      const hasResourceTools = [ListMcpResourcesTool, ReadMcpResourceTool].some(
2186        tool => tools.some(t => toolMatchesName(t, tool.name)),
2187      )
2188      if (!hasResourceTools) {
2189        resourceTools.push(ListMcpResourcesTool, ReadMcpResourceTool)
2190      }
2191    }
2192
2193    return {
2194      client,
2195      tools: [...tools, ...resourceTools],
2196      commands,
2197      resources: resources.length > 0 ? resources : undefined,
2198    }
2199  } catch (error) {
2200    // Handle errors gracefully - connection might have closed during fetch
2201    logMCPError(name, `Error during reconnection: ${errorMessage(error)}`)
2202
2203    // Return with failed status
2204    return {
2205      client: { name, type: 'failed' as const, config },
2206      tools: [],
2207      commands: [],
2208    }
2209  }
2210}
2211
2212// Replaced 2026-03: previous implementation ran fixed-size sequential batches
2213// (await batch 1 fully, then start batch 2). That meant one slow server in
2214// batch N held up ALL servers in batch N+1, even if the other 19 slots were
2215// idle. pMap frees each slot as soon as its server completes, so a single
2216// slow server only occupies one slot instead of blocking an entire batch
2217// boundary. Same concurrency ceiling, same results, better scheduling.
2218async function processBatched<T>(
2219  items: T[],
2220  concurrency: number,
2221  processor: (item: T) => Promise<void>,
2222): Promise<void> {
2223  await pMap(items, processor, { concurrency })
2224}
2225
2226export async function getMcpToolsCommandsAndResources(
2227  onConnectionAttempt: (params: {
2228    client: MCPServerConnection
2229    tools: Tool[]
2230    commands: Command[]
2231    resources?: ServerResource[]
2232  }) => void,
2233  mcpConfigs?: Record<string, ScopedMcpServerConfig>,
2234): Promise<void> {
2235  let resourceToolsAdded = false
2236
2237  const allConfigEntries = Object.entries(
2238    mcpConfigs ?? (await getAllMcpConfigs()).servers,
2239  )
2240
2241  // Partition into disabled and active entries — disabled servers should
2242  // never generate HTTP connections or flow through batch processing
2243  const configEntries: typeof allConfigEntries = []
2244  for (const entry of allConfigEntries) {
2245    if (isMcpServerDisabled(entry[0])) {
2246      onConnectionAttempt({
2247        client: { name: entry[0], type: 'disabled', config: entry[1] },
2248        tools: [],
2249        commands: [],
2250      })
2251    } else {
2252      configEntries.push(entry)
2253    }
2254  }
2255
2256  // Calculate transport counts for logging
2257  const totalServers = configEntries.length
2258  const stdioCount = count(configEntries, ([_, c]) => c.type === 'stdio')
2259  const sseCount = count(configEntries, ([_, c]) => c.type === 'sse')
2260  const httpCount = count(configEntries, ([_, c]) => c.type === 'http')
2261  const sseIdeCount = count(configEntries, ([_, c]) => c.type === 'sse-ide')
2262  const wsIdeCount = count(configEntries, ([_, c]) => c.type === 'ws-ide')
2263
2264  // Split servers by type: local (stdio/sdk) need lower concurrency due to
2265  // process spawning, remote servers can connect with higher concurrency
2266  const localServers = configEntries.filter(([_, config]) =>
2267    isLocalMcpServer(config),
2268  )
2269  const remoteServers = configEntries.filter(
2270    ([_, config]) => !isLocalMcpServer(config),
2271  )
2272
2273  const serverStats = {
2274    totalServers,
2275    stdioCount,
2276    sseCount,
2277    httpCount,
2278    sseIdeCount,
2279    wsIdeCount,
2280  }
2281
2282  const processServer = async ([name, config]: [
2283    string,
2284    ScopedMcpServerConfig,
2285  ]): Promise<void> => {
2286    try {
2287      // Check if server is disabled - if so, just add it to state without connecting
2288      if (isMcpServerDisabled(name)) {
2289        onConnectionAttempt({
2290          client: {
2291            name,
2292            type: 'disabled',
2293            config,
2294          },
2295          tools: [],
2296          commands: [],
2297        })
2298        return
2299      }
2300
2301      // Skip connection for servers that recently returned 401 (15min TTL),
2302      // or that we have probed before but hold no token for. The second
2303      // check closes the gap the TTL leaves open: without it, every 15min
2304      // we re-probe servers that cannot succeed until the user runs /mcp.
2305      // Each probe is a network round-trip for connect-401 plus OAuth
2306      // discovery, and print mode awaits the whole batch (main.tsx:3503).
2307      if (
2308        (config.type === 'claudeai-proxy' ||
2309          config.type === 'http' ||
2310          config.type === 'sse') &&
2311        ((await isMcpAuthCached(name)) ||
2312          ((config.type === 'http' || config.type === 'sse') &&
2313            hasMcpDiscoveryButNoToken(name, config)))
2314      ) {
2315        logMCPDebug(name, `Skipping connection (cached needs-auth)`)
2316        onConnectionAttempt({
2317          client: { name, type: 'needs-auth' as const, config },
2318          tools: [createMcpAuthTool(name, config)],
2319          commands: [],
2320        })
2321        return
2322      }
2323
2324      const client = await connectToServer(name, config, serverStats)
2325
2326      if (client.type !== 'connected') {
2327        onConnectionAttempt({
2328          client,
2329          tools:
2330            client.type === 'needs-auth'
2331              ? [createMcpAuthTool(name, config)]
2332              : [],
2333          commands: [],
2334        })
2335        return
2336      }
2337
2338      if (config.type === 'claudeai-proxy') {
2339        markClaudeAiMcpConnected(name)
2340      }
2341
2342      const supportsResources = !!client.capabilities?.resources
2343
2344      const [tools, mcpCommands, mcpSkills, resources] = await Promise.all([
2345        fetchToolsForClient(client),
2346        fetchCommandsForClient(client),
2347        // Discover skills from skill:// resources
2348        feature('MCP_SKILLS') && supportsResources
2349          ? fetchMcpSkillsForClient!(client)
2350          : Promise.resolve([]),
2351        // Fetch resources if supported
2352        supportsResources
2353          ? fetchResourcesForClient(client)
2354          : Promise.resolve([]),
2355      ])
2356      const commands = [...mcpCommands, ...mcpSkills]
2357
2358      // If this server resources and we haven't added resource tools yet,
2359      // include our resource tools with this client's tools
2360      const resourceTools: Tool[] = []
2361      if (supportsResources && !resourceToolsAdded) {
2362        resourceToolsAdded = true
2363        resourceTools.push(ListMcpResourcesTool, ReadMcpResourceTool)
2364      }
2365
2366      onConnectionAttempt({
2367        client,
2368        tools: [...tools, ...resourceTools],
2369        commands,
2370        resources: resources.length > 0 ? resources : undefined,
2371      })
2372    } catch (error) {
2373      // Handle errors gracefully - connection might have closed during fetch
2374      logMCPError(
2375        name,
2376        `Error fetching tools/commands/resources: ${errorMessage(error)}`,
2377      )
2378
2379      // Still update with the client but no tools/commands
2380      onConnectionAttempt({
2381        client: { name, type: 'failed' as const, config },
2382        tools: [],
2383        commands: [],
2384      })
2385    }
2386  }
2387
2388  // Process both groups concurrently, each with their own concurrency limits:
2389  // - Local servers (stdio/sdk): lower concurrency to avoid process spawning resource contention
2390  // - Remote servers: higher concurrency since they're just network connections
2391  await Promise.all([
2392    processBatched(
2393      localServers,
2394      getMcpServerConnectionBatchSize(),
2395      processServer,
2396    ),
2397    processBatched(
2398      remoteServers,
2399      getRemoteMcpServerConnectionBatchSize(),
2400      processServer,
2401    ),
2402  ])
2403}
2404
2405// Not memoized: called only 2-3 times at startup/reconfig. The inner work
2406// (connectToServer, fetch*ForClient) is already cached. Memoizing here by
2407// mcpConfigs object ref leaked — main.tsx creates fresh config objects each call.
2408export function prefetchAllMcpResources(
2409  mcpConfigs: Record<string, ScopedMcpServerConfig>,
2410): Promise<{
2411  clients: MCPServerConnection[]
2412  tools: Tool[]
2413  commands: Command[]
2414}> {
2415  return new Promise(resolve => {
2416    let pendingCount = 0
2417    let completedCount = 0
2418
2419    pendingCount = Object.keys(mcpConfigs).length
2420
2421    if (pendingCount === 0) {
2422      void resolve({
2423        clients: [],
2424        tools: [],
2425        commands: [],
2426      })
2427      return
2428    }
2429
2430    const clients: MCPServerConnection[] = []
2431    const tools: Tool[] = []
2432    const commands: Command[] = []
2433
2434    getMcpToolsCommandsAndResources(result => {
2435      clients.push(result.client)
2436      tools.push(...result.tools)
2437      commands.push(...result.commands)
2438
2439      completedCount++
2440      if (completedCount >= pendingCount) {
2441        const commandsMetadataLength = commands.reduce((sum, command) => {
2442          const commandMetadataLength =
2443            command.name.length +
2444            (command.description ?? '').length +
2445            (command.argumentHint ?? '').length
2446          return sum + commandMetadataLength
2447        }, 0)
2448        logEvent('tengu_mcp_tools_commands_loaded', {
2449          tools_count: tools.length,
2450          commands_count: commands.length,
2451          commands_metadata_length: commandsMetadataLength,
2452        })
2453
2454        void resolve({
2455          clients,
2456          tools,
2457          commands,
2458        })
2459      }
2460    }, mcpConfigs).catch(error => {
2461      logMCPError(
2462        'prefetchAllMcpResources',
2463        `Failed to get MCP resources: ${errorMessage(error)}`,
2464      )
2465      // Still resolve with empty results
2466      void resolve({
2467        clients: [],
2468        tools: [],
2469        commands: [],
2470      })
2471    })
2472  })
2473}
2474
2475/**
2476 * Transform result content from an MCP tool or MCP prompt into message blocks
2477 */
2478export async function transformResultContent(
2479  resultContent: PromptMessage['content'],
2480  serverName: string,
2481): Promise<Array<ContentBlockParam>> {
2482  switch (resultContent.type) {
2483    case 'text':
2484      return [
2485        {
2486          type: 'text',
2487          text: resultContent.text,
2488        },
2489      ]
2490    case 'audio': {
2491      const audioData = resultContent as {
2492        type: 'audio'
2493        data: string
2494        mimeType?: string
2495      }
2496      return await persistBlobToTextBlock(
2497        Buffer.from(audioData.data, 'base64'),
2498        audioData.mimeType,
2499        serverName,
2500        `[Audio from ${serverName}] `,
2501      )
2502    }
2503    case 'image': {
2504      // Resize and compress image data, enforcing API dimension limits
2505      const imageBuffer = Buffer.from(String(resultContent.data), 'base64')
2506      const ext = resultContent.mimeType?.split('/')[1] || 'png'
2507      const resized = await maybeResizeAndDownsampleImageBuffer(
2508        imageBuffer,
2509        imageBuffer.length,
2510        ext,
2511      )
2512      return [
2513        {
2514          type: 'image',
2515          source: {
2516            data: resized.buffer.toString('base64'),
2517            media_type:
2518              `image/${resized.mediaType}` as Base64ImageSource['media_type'],
2519            type: 'base64',
2520          },
2521        },
2522      ]
2523    }
2524    case 'resource': {
2525      const resource = resultContent.resource
2526      const prefix = `[Resource from ${serverName} at ${resource.uri}] `
2527
2528      if ('text' in resource) {
2529        return [
2530          {
2531            type: 'text',
2532            text: `${prefix}${resource.text}`,
2533          },
2534        ]
2535      } else if ('blob' in resource) {
2536        const isImage = IMAGE_MIME_TYPES.has(resource.mimeType ?? '')
2537
2538        if (isImage) {
2539          // Resize and compress image blob, enforcing API dimension limits
2540          const imageBuffer = Buffer.from(resource.blob, 'base64')
2541          const ext = resource.mimeType?.split('/')[1] || 'png'
2542          const resized = await maybeResizeAndDownsampleImageBuffer(
2543            imageBuffer,
2544            imageBuffer.length,
2545            ext,
2546          )
2547          const content: MessageParam['content'] = []
2548          if (prefix) {
2549            content.push({
2550              type: 'text',
2551              text: prefix,
2552            })
2553          }
2554          content.push({
2555            type: 'image',
2556            source: {
2557              data: resized.buffer.toString('base64'),
2558              media_type:
2559                `image/${resized.mediaType}` as Base64ImageSource['media_type'],
2560              type: 'base64',
2561            },
2562          })
2563          return content
2564        } else {
2565          return await persistBlobToTextBlock(
2566            Buffer.from(resource.blob, 'base64'),
2567            resource.mimeType,
2568            serverName,
2569            prefix,
2570          )
2571        }
2572      }
2573      return []
2574    }
2575    case 'resource_link': {
2576      const resourceLink = resultContent as ResourceLink
2577      let text = `[Resource link: ${resourceLink.name}] ${resourceLink.uri}`
2578      if (resourceLink.description) {
2579        text += ` (${resourceLink.description})`
2580      }
2581      return [
2582        {
2583          type: 'text',
2584          text,
2585        },
2586      ]
2587    }
2588    default:
2589      return []
2590  }
2591}
2592
2593/**
2594 * Decode base64 binary content, write it to disk with the proper extension,
2595 * and return a small text block with the file path. Replaces the old behavior
2596 * of dumping raw base64 into the context.
2597 */
2598async function persistBlobToTextBlock(
2599  bytes: Buffer,
2600  mimeType: string | undefined,
2601  serverName: string,
2602  sourceDescription: string,
2603): Promise<Array<ContentBlockParam>> {
2604  const persistId = `mcp-${normalizeNameForMCP(serverName)}-blob-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
2605  const result = await persistBinaryContent(bytes, mimeType, persistId)
2606
2607  if ('error' in result) {
2608    return [
2609      {
2610        type: 'text',
2611        text: `${sourceDescription}Binary content (${mimeType || 'unknown type'}, ${bytes.length} bytes) could not be saved to disk: ${result.error}`,
2612      },
2613    ]
2614  }
2615
2616  return [
2617    {
2618      type: 'text',
2619      text: getBinaryBlobSavedMessage(
2620        result.filepath,
2621        mimeType,
2622        result.size,
2623        sourceDescription,
2624      ),
2625    },
2626  ]
2627}
2628
2629/**
2630 * Processes MCP tool result into a normalized format.
2631 */
2632export type MCPResultType = 'toolResult' | 'structuredContent' | 'contentArray'
2633
2634export type TransformedMCPResult = {
2635  content: MCPToolResult
2636  type: MCPResultType
2637  schema?: string
2638}
2639
2640/**
2641 * Generates a compact, jq-friendly type signature for a value.
2642 * e.g. "{title: string, items: [{id: number, name: string}]}"
2643 */
2644export function inferCompactSchema(value: unknown, depth = 2): string {
2645  if (value === null) return 'null'
2646  if (Array.isArray(value)) {
2647    if (value.length === 0) return '[]'
2648    return `[${inferCompactSchema(value[0], depth - 1)}]`
2649  }
2650  if (typeof value === 'object') {
2651    if (depth <= 0) return '{...}'
2652    const entries = Object.entries(value).slice(0, 10)
2653    const props = entries.map(
2654      ([k, v]) => `${k}: ${inferCompactSchema(v, depth - 1)}`,
2655    )
2656    const suffix = Object.keys(value).length > 10 ? ', ...' : ''
2657    return `{${props.join(', ')}${suffix}}`
2658  }
2659  return typeof value
2660}
2661
2662export async function transformMCPResult(
2663  result: unknown,
2664  tool: string, // Tool name for validation (e.g., "search")
2665  name: string, // Server name for transformation (e.g., "slack")
2666): Promise<TransformedMCPResult> {
2667  if (result && typeof result === 'object') {
2668    if ('toolResult' in result) {
2669      return {
2670        content: String(result.toolResult),
2671        type: 'toolResult',
2672      }
2673    }
2674
2675    if (
2676      'structuredContent' in result &&
2677      result.structuredContent !== undefined
2678    ) {
2679      return {
2680        content: jsonStringify(result.structuredContent),
2681        type: 'structuredContent',
2682        schema: inferCompactSchema(result.structuredContent),
2683      }
2684    }
2685
2686    if ('content' in result && Array.isArray(result.content)) {
2687      const transformedContent = (
2688        await Promise.all(
2689          result.content.map(item => transformResultContent(item, name)),
2690        )
2691      ).flat()
2692      return {
2693        content: transformedContent,
2694        type: 'contentArray',
2695        schema: inferCompactSchema(transformedContent),
2696      }
2697    }
2698  }
2699
2700  const errorMessage = `MCP server "${name}" tool "${tool}": unexpected response format`
2701  logMCPError(name, errorMessage)
2702  throw new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
2703    errorMessage,
2704    'MCP tool unexpected response format',
2705  )
2706}
2707
2708/**
2709 * Check if MCP content contains any image blocks.
2710 * Used to decide whether to persist to file (images should use truncation instead
2711 * to preserve image compression and viewability).
2712 */
2713function contentContainsImages(content: MCPToolResult): boolean {
2714  if (!content || typeof content === 'string') {
2715    return false
2716  }
2717  return content.some(block => block.type === 'image')
2718}
2719
2720export async function processMCPResult(
2721  result: unknown,
2722  tool: string, // Tool name for validation (e.g., "search")
2723  name: string, // Server name for IDE check and transformation (e.g., "slack")
2724): Promise<MCPToolResult> {
2725  const { content, type, schema } = await transformMCPResult(result, tool, name)
2726
2727  // IDE tools are not going to the model directly, so we don't need to
2728  // handle large output.
2729  if (name === 'ide') {
2730    return content
2731  }
2732
2733  // Check if content needs truncation (i.e., is too large)
2734  if (!(await mcpContentNeedsTruncation(content))) {
2735    return content
2736  }
2737
2738  const sizeEstimateTokens = getContentSizeEstimate(content)
2739
2740  // If large output files feature is disabled, fall back to old truncation behavior
2741  if (isEnvDefinedFalsy(process.env.ENABLE_MCP_LARGE_OUTPUT_FILES)) {
2742    logEvent('tengu_mcp_large_result_handled', {
2743      outcome: 'truncated',
2744      reason: 'env_disabled',
2745      sizeEstimateTokens,
2746    } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2747    return await truncateMcpContentIfNeeded(content)
2748  }
2749
2750  // Save large output to file and return instructions for reading it
2751  // Content is guaranteed to exist at this point (we checked mcpContentNeedsTruncation)
2752  if (!content) {
2753    return content
2754  }
2755
2756  // If content contains images, fall back to truncation - persisting images as JSON
2757  // defeats the image compression logic and makes them non-viewable
2758  if (contentContainsImages(content)) {
2759    logEvent('tengu_mcp_large_result_handled', {
2760      outcome: 'truncated',
2761      reason: 'contains_images',
2762      sizeEstimateTokens,
2763    } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2764    return await truncateMcpContentIfNeeded(content)
2765  }
2766
2767  // Generate a unique ID for the persisted file (server__tool-timestamp)
2768  const timestamp = Date.now()
2769  const persistId = `mcp-${normalizeNameForMCP(name)}-${normalizeNameForMCP(tool)}-${timestamp}`
2770  // Convert to string for persistence (persistToolResult expects string or specific block types)
2771  const contentStr =
2772    typeof content === 'string' ? content : jsonStringify(content, null, 2)
2773  const persistResult = await persistToolResult(contentStr, persistId)
2774
2775  if (isPersistError(persistResult)) {
2776    // If file save failed, fall back to returning truncated content info
2777    const contentLength = contentStr.length
2778    logEvent('tengu_mcp_large_result_handled', {
2779      outcome: 'truncated',
2780      reason: 'persist_failed',
2781      sizeEstimateTokens,
2782    } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2783    return `Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Failed to save output to file: ${persistResult.error}. If this MCP server provides pagination or filtering tools, use them to retrieve specific portions of the data.`
2784  }
2785
2786  logEvent('tengu_mcp_large_result_handled', {
2787    outcome: 'persisted',
2788    reason: 'file_saved',
2789    sizeEstimateTokens,
2790    persistedSizeChars: persistResult.originalSize,
2791  } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2792
2793  const formatDescription = getFormatDescription(type, schema)
2794  return getLargeOutputInstructions(
2795    persistResult.filepath,
2796    persistResult.originalSize,
2797    formatDescription,
2798  )
2799}
2800
2801/**
2802 * Call an MCP tool, handling UrlElicitationRequiredError (-32042) by
2803 * displaying the URL elicitation to the user, waiting for the completion
2804 * notification, and retrying the tool call.
2805 */
2806type MCPToolCallResult = {
2807  content: MCPToolResult
2808  _meta?: Record<string, unknown>
2809  structuredContent?: Record<string, unknown>
2810}
2811
2812/** @internal Exported for testing. */
2813export async function callMCPToolWithUrlElicitationRetry({
2814  client: connectedClient,
2815  clientConnection,
2816  tool,
2817  args,
2818  meta,
2819  signal,
2820  setAppState,
2821  onProgress,
2822  callToolFn = callMCPTool,
2823  handleElicitation,
2824}: {
2825  client: ConnectedMCPServer
2826  clientConnection: MCPServerConnection
2827  tool: string
2828  args: Record<string, unknown>
2829  meta?: Record<string, unknown>
2830  signal: AbortSignal
2831  setAppState: (f: (prev: AppState) => AppState) => void
2832  onProgress?: (data: MCPProgress) => void
2833  /** Injectable for testing. Defaults to callMCPTool. */
2834  callToolFn?: (opts: {
2835    client: ConnectedMCPServer
2836    tool: string
2837    args: Record<string, unknown>
2838    meta?: Record<string, unknown>
2839    signal: AbortSignal
2840    onProgress?: (data: MCPProgress) => void
2841  }) => Promise<MCPToolCallResult>
2842  /** Handler for URL elicitations when no hook handles them.
2843   * In print/SDK mode, delegates to structuredIO. In REPL, falls back to queue. */
2844  handleElicitation?: (
2845    serverName: string,
2846    params: ElicitRequestURLParams,
2847    signal: AbortSignal,
2848  ) => Promise<ElicitResult>
2849}): Promise<MCPToolCallResult> {
2850  const MAX_URL_ELICITATION_RETRIES = 3
2851  for (let attempt = 0; ; attempt++) {
2852    try {
2853      return await callToolFn({
2854        client: connectedClient,
2855        tool,
2856        args,
2857        meta,
2858        signal,
2859        onProgress,
2860      })
2861    } catch (error) {
2862      // The MCP SDK's Protocol creates plain McpError (not UrlElicitationRequiredError)
2863      // for error responses, so we check the error code instead of instanceof.
2864      if (
2865        !(error instanceof McpError) ||
2866        error.code !== ErrorCode.UrlElicitationRequired
2867      ) {
2868        throw error
2869      }
2870
2871      // Limit the number of URL elicitation retries
2872      if (attempt >= MAX_URL_ELICITATION_RETRIES) {
2873        throw error
2874      }
2875
2876      const errorData = error.data
2877      const rawElicitations =
2878        errorData != null &&
2879        typeof errorData === 'object' &&
2880        'elicitations' in errorData &&
2881        Array.isArray(errorData.elicitations)
2882          ? (errorData.elicitations as unknown[])
2883          : []
2884
2885      // Validate each element has the required fields for ElicitRequestURLParams
2886      const elicitations = rawElicitations.filter(
2887        (e): e is ElicitRequestURLParams => {
2888          if (e == null || typeof e !== 'object') return false
2889          const obj = e as Record<string, unknown>
2890          return (
2891            obj.mode === 'url' &&
2892            typeof obj.url === 'string' &&
2893            typeof obj.elicitationId === 'string' &&
2894            typeof obj.message === 'string'
2895          )
2896        },
2897      )
2898
2899      const serverName =
2900        clientConnection.type === 'connected'
2901          ? clientConnection.name
2902          : 'unknown'
2903
2904      if (elicitations.length === 0) {
2905        logMCPDebug(
2906          serverName,
2907          `Tool '${tool}' returned -32042 but no valid elicitations in error data`,
2908        )
2909        throw error
2910      }
2911
2912      logMCPDebug(
2913        serverName,
2914        `Tool '${tool}' requires URL elicitation (error -32042, attempt ${attempt + 1}), processing ${elicitations.length} elicitation(s)`,
2915      )
2916
2917      // Process each URL elicitation from the error.
2918      // The completion notification handler (in registerElicitationHandler) sets
2919      // `completed: true` on the matching queue event; the dialog reacts to this flag.
2920      for (const elicitation of elicitations) {
2921        const { elicitationId } = elicitation
2922
2923        // Run elicitation hooks — they can resolve URL elicitations programmatically
2924        const hookResponse = await runElicitationHooks(
2925          serverName,
2926          elicitation,
2927          signal,
2928        )
2929        if (hookResponse) {
2930          logMCPDebug(
2931            serverName,
2932            `URL elicitation ${elicitationId} resolved by hook: ${jsonStringify(hookResponse)}`,
2933          )
2934          if (hookResponse.action !== 'accept') {
2935            return {
2936              content: `URL elicitation was ${hookResponse.action === 'decline' ? 'declined' : hookResponse.action + 'ed'} by a hook. The tool "${tool}" could not complete because it requires the user to open a URL.`,
2937            }
2938          }
2939          // Hook accepted — skip the UI and proceed to retry
2940          continue
2941        }
2942
2943        // Resolve the URL elicitation via callback (print/SDK mode) or queue (REPL mode).
2944        let userResult: ElicitResult
2945        if (handleElicitation) {
2946          // Print/SDK mode: delegate to structuredIO which sends a control request
2947          userResult = await handleElicitation(serverName, elicitation, signal)
2948        } else {
2949          // REPL mode: queue for ElicitationDialog with two-phase consent/waiting flow
2950          const waitingState: ElicitationWaitingState = {
2951            actionLabel: 'Retry now',
2952            showCancel: true,
2953          }
2954          userResult = await new Promise<ElicitResult>(resolve => {
2955            const onAbort = () => {
2956              void resolve({ action: 'cancel' })
2957            }
2958            if (signal.aborted) {
2959              onAbort()
2960              return
2961            }
2962            signal.addEventListener('abort', onAbort, { once: true })
2963
2964            setAppState(prev => ({
2965              ...prev,
2966              elicitation: {
2967                queue: [
2968                  ...prev.elicitation.queue,
2969                  {
2970                    serverName,
2971                    requestId: `error-elicit-${elicitationId}`,
2972                    params: elicitation,
2973                    signal,
2974                    waitingState,
2975                    respond: result => {
2976                      // Phase 1 consent: accept is a no-op (doesn't resolve retry Promise)
2977                      if (result.action === 'accept') {
2978                        return
2979                      }
2980                      // Decline or cancel: resolve the retry Promise
2981                      signal.removeEventListener('abort', onAbort)
2982                      void resolve(result)
2983                    },
2984                    onWaitingDismiss: action => {
2985                      signal.removeEventListener('abort', onAbort)
2986                      if (action === 'retry') {
2987                        void resolve({ action: 'accept' })
2988                      } else {
2989                        void resolve({ action: 'cancel' })
2990                      }
2991                    },
2992                  },
2993                ],
2994              },
2995            }))
2996          })
2997        }
2998
2999        // Run ElicitationResult hooks — they can modify or block the response
3000        const finalResult = await runElicitationResultHooks(
3001          serverName,
3002          userResult,
3003          signal,
3004          'url',
3005          elicitationId,
3006        )
3007
3008        if (finalResult.action !== 'accept') {
3009          logMCPDebug(
3010            serverName,
3011            `User ${finalResult.action === 'decline' ? 'declined' : finalResult.action + 'ed'} URL elicitation ${elicitationId}`,
3012          )
3013          return {
3014            content: `URL elicitation was ${finalResult.action === 'decline' ? 'declined' : finalResult.action + 'ed'} by the user. The tool "${tool}" could not complete because it requires the user to open a URL.`,
3015          }
3016        }
3017
3018        logMCPDebug(
3019          serverName,
3020          `Elicitation ${elicitationId} completed, retrying tool call`,
3021        )
3022      }
3023
3024      // Loop back to retry the tool call
3025    }
3026  }
3027}
3028
3029async function callMCPTool({
3030  client: { client, name, config },
3031  tool,
3032  args,
3033  meta,
3034  signal,
3035  onProgress,
3036}: {
3037  client: ConnectedMCPServer
3038  tool: string
3039  args: Record<string, unknown>
3040  meta?: Record<string, unknown>
3041  signal: AbortSignal
3042  onProgress?: (data: MCPProgress) => void
3043}): Promise<{
3044  content: MCPToolResult
3045  _meta?: Record<string, unknown>
3046  structuredContent?: Record<string, unknown>
3047}> {
3048  const toolStartTime = Date.now()
3049  let progressInterval: NodeJS.Timeout | undefined
3050
3051  try {
3052    logMCPDebug(name, `Calling MCP tool: ${tool}`)
3053
3054    // Set up progress logging for long-running tools (every 30 seconds)
3055    progressInterval = setInterval(
3056      (startTime, name, tool) => {
3057        const elapsed = Date.now() - startTime
3058        const elapsedSeconds = Math.floor(elapsed / 1000)
3059        const duration = `${elapsedSeconds}s`
3060        logMCPDebug(name, `Tool '${tool}' still running (${duration} elapsed)`)
3061      },
3062      30000, // Log every 30 seconds
3063      toolStartTime,
3064      name,
3065      tool,
3066    )
3067
3068    // Use Promise.race with our own timeout to handle cases where SDK's
3069    // internal timeout doesn't work (e.g., SSE stream breaks mid-request)
3070    const timeoutMs = getMcpToolTimeoutMs()
3071    let timeoutId: NodeJS.Timeout | undefined
3072
3073    const timeoutPromise = new Promise<never>((_, reject) => {
3074      timeoutId = setTimeout(
3075        (reject, name, tool, timeoutMs) => {
3076          reject(
3077            new TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
3078              `MCP server "${name}" tool "${tool}" timed out after ${Math.floor(timeoutMs / 1000)}s`,
3079              'MCP tool timeout',
3080            ),
3081          )
3082        },
3083        timeoutMs,
3084        reject,
3085        name,
3086        tool,
3087        timeoutMs,
3088      )
3089    })
3090
3091    const result = await Promise.race([
3092      client.callTool(
3093        {
3094          name: tool,
3095          arguments: args,
3096          _meta: meta,
3097        },
3098        CallToolResultSchema,
3099        {
3100          signal,
3101          timeout: timeoutMs,
3102          onprogress: onProgress
3103            ? sdkProgress => {
3104                onProgress({
3105                  type: 'mcp_progress',
3106                  status: 'progress',
3107                  serverName: name,
3108                  toolName: tool,
3109                  progress: sdkProgress.progress,
3110                  total: sdkProgress.total,
3111                  progressMessage: sdkProgress.message,
3112                })
3113              }
3114            : undefined,
3115        },
3116      ),
3117      timeoutPromise,
3118    ]).finally(() => {
3119      if (timeoutId) {
3120        clearTimeout(timeoutId)
3121      }
3122    })
3123
3124    if ('isError' in result && result.isError) {
3125      let errorDetails = 'Unknown error'
3126      if (
3127        'content' in result &&
3128        Array.isArray(result.content) &&
3129        result.content.length > 0
3130      ) {
3131        const firstContent = result.content[0]
3132        if (
3133          firstContent &&
3134          typeof firstContent === 'object' &&
3135          'text' in firstContent
3136        ) {
3137          errorDetails = firstContent.text
3138        }
3139      } else if ('error' in result) {
3140        // Fallback for legacy error format
3141        errorDetails = String(result.error)
3142      }
3143      logMCPError(name, errorDetails)
3144      throw new McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS(
3145        errorDetails,
3146        'MCP tool returned error',
3147        '_meta' in result && result._meta ? { _meta: result._meta } : undefined,
3148      )
3149    }
3150    const elapsed = Date.now() - toolStartTime
3151    const duration =
3152      elapsed < 1000
3153        ? `${elapsed}ms`
3154        : elapsed < 60000
3155          ? `${Math.floor(elapsed / 1000)}s`
3156          : `${Math.floor(elapsed / 60000)}m ${Math.floor((elapsed % 60000) / 1000)}s`
3157
3158    logMCPDebug(name, `Tool '${tool}' completed successfully in ${duration}`)
3159
3160    // Log code indexing tool usage
3161    const codeIndexingTool = detectCodeIndexingFromMcpServerName(name)
3162    if (codeIndexingTool) {
3163      logEvent('tengu_code_indexing_tool_used', {
3164        tool: codeIndexingTool as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
3165        source:
3166          'mcp' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
3167        success: true,
3168      })
3169    }
3170
3171    const content = await processMCPResult(result, tool, name)
3172    return {
3173      content,
3174      _meta: result._meta as Record<string, unknown> | undefined,
3175      structuredContent: result.structuredContent as
3176        | Record<string, unknown>
3177        | undefined,
3178    }
3179  } catch (e) {
3180    // Clear intervals on error
3181    if (progressInterval !== undefined) {
3182      clearInterval(progressInterval)
3183    }
3184
3185    const elapsed = Date.now() - toolStartTime
3186
3187    if (e instanceof Error && e.name !== 'AbortError') {
3188      logMCPDebug(
3189        name,
3190        `Tool '${tool}' failed after ${Math.floor(elapsed / 1000)}s: ${e.message}`,
3191      )
3192    }
3193
3194    // Check for 401 errors indicating expired/invalid OAuth tokens
3195    // The MCP SDK's StreamableHTTPError has a `code` property with the HTTP status
3196    if (e instanceof Error) {
3197      const errorCode = 'code' in e ? (e.code as number | undefined) : undefined
3198      if (errorCode === 401 || e instanceof UnauthorizedError) {
3199        logMCPDebug(
3200          name,
3201          `Tool call returned 401 Unauthorized - token may have expired`,
3202        )
3203        logEvent('tengu_mcp_tool_call_auth_error', {})
3204        throw new McpAuthError(
3205          name,
3206          `MCP server "${name}" requires re-authorization (token expired)`,
3207        )
3208      }
3209
3210      // Check for session expiry — two error shapes can surface here:
3211      // 1. Direct 404 + JSON-RPC -32001 from the server (StreamableHTTPError)
3212      // 2. -32000 "Connection closed" (McpError) — the SDK closes the transport
3213      //    after the onerror handler fires, so the pending callTool() rejects
3214      //    with this derived error instead of the original 404.
3215      // In both cases, clear the connection cache so the next tool call
3216      // creates a fresh session.
3217      const isSessionExpired = isMcpSessionExpiredError(e)
3218      const isConnectionClosedOnHttp =
3219        'code' in e &&
3220        (e as Error & { code?: number }).code === -32000 &&
3221        e.message.includes('Connection closed') &&
3222        (config.type === 'http' || config.type === 'claudeai-proxy')
3223      if (isSessionExpired || isConnectionClosedOnHttp) {
3224        logMCPDebug(
3225          name,
3226          `MCP session expired during tool call (${isSessionExpired ? '404/-32001' : 'connection closed'}), clearing connection cache for re-initialization`,
3227        )
3228        logEvent('tengu_mcp_session_expired', {})
3229        await clearServerCache(name, config)
3230        throw new McpSessionExpiredError(name)
3231      }
3232    }
3233
3234    // When the users hits esc, avoid logspew
3235    if (!(e instanceof Error) || e.name !== 'AbortError') {
3236      throw e
3237    }
3238    return { content: undefined }
3239  } finally {
3240    // Always clear intervals
3241    if (progressInterval !== undefined) {
3242      clearInterval(progressInterval)
3243    }
3244  }
3245}
3246
3247function extractToolUseId(message: AssistantMessage): string | undefined {
3248  if (message.message.content[0]?.type !== 'tool_use') {
3249    return undefined
3250  }
3251  return message.message.content[0].id
3252}
3253
3254/**
3255 * Sets up SDK MCP clients by creating transports and connecting them.
3256 * This is used for SDK MCP servers that run in the same process as the SDK.
3257 *
3258 * @param sdkMcpConfigs - The SDK MCP server configurations
3259 * @param sendMcpMessage - Callback to send MCP messages through the control channel
3260 * @returns Connected clients, their tools, and transport map for message routing
3261 */
3262export async function setupSdkMcpClients(
3263  sdkMcpConfigs: Record<string, McpSdkServerConfig>,
3264  sendMcpMessage: (
3265    serverName: string,
3266    message: JSONRPCMessage,
3267  ) => Promise<JSONRPCMessage>,
3268): Promise<{
3269  clients: MCPServerConnection[]
3270  tools: Tool[]
3271}> {
3272  const clients: MCPServerConnection[] = []
3273  const tools: Tool[] = []
3274
3275  // Connect to all servers in parallel
3276  const results = await Promise.allSettled(
3277    Object.entries(sdkMcpConfigs).map(async ([name, config]) => {
3278      const transport = new SdkControlClientTransport(name, sendMcpMessage)
3279
3280      const client = new Client(
3281        {
3282          name: 'claude-code',
3283          title: 'Claude Code',
3284          version: MACRO.VERSION ?? 'unknown',
3285          description: "Anthropic's agentic coding tool",
3286          websiteUrl: PRODUCT_URL,
3287        },
3288        {
3289          capabilities: {},
3290        },
3291      )
3292
3293      try {
3294        // Connect the client
3295        await client.connect(transport)
3296
3297        // Get capabilities from the server
3298        const capabilities = client.getServerCapabilities()
3299
3300        // Create the connected client object
3301        const connectedClient: MCPServerConnection = {
3302          type: 'connected',
3303          name,
3304          capabilities: capabilities || {},
3305          client,
3306          config: { ...config, scope: 'dynamic' as const },
3307          cleanup: async () => {
3308            await client.close()
3309          },
3310        }
3311
3312        // Fetch tools if the server has them
3313        const serverTools: Tool[] = []
3314        if (capabilities?.tools) {
3315          const sdkTools = await fetchToolsForClient(connectedClient)
3316          serverTools.push(...sdkTools)
3317        }
3318
3319        return {
3320          client: connectedClient,
3321          tools: serverTools,
3322        }
3323      } catch (error) {
3324        // If connection fails, return failed server
3325        logMCPError(name, `Failed to connect SDK MCP server: ${error}`)
3326        return {
3327          client: {
3328            type: 'failed' as const,
3329            name,
3330            config: { ...config, scope: 'user' as const },
3331          },
3332          tools: [],
3333        }
3334      }
3335    }),
3336  )
3337
3338  // Process results and collect clients and tools
3339  for (const result of results) {
3340    if (result.status === 'fulfilled') {
3341      clients.push(result.value.client)
3342      tools.push(...result.value.tools)
3343    }
3344    // If rejected (unexpected), the error was already logged inside the promise
3345  }
3346
3347  return { clients, tools }
3348}
3349