claude-code-best 3 тижнів тому
коміт
f90eee85d8
100 змінених файлів з 34869 додано та 0 видалено
  1. 1 0
      .gitignore
  2. 108 0
      bun.lock
  3. 89 0
      package.json
  4. 1295 0
      src/QueryEngine.ts
  5. 125 0
      src/Task.ts
  6. 792 0
      src/Tool.ts
  7. 87 0
      src/assistant/sessionHistory.ts
  8. 1758 0
      src/bootstrap/state.ts
  9. 539 0
      src/bridge/bridgeApi.ts
  10. 48 0
      src/bridge/bridgeConfig.ts
  11. 135 0
      src/bridge/bridgeDebug.ts
  12. 202 0
      src/bridge/bridgeEnabled.ts
  13. 2999 0
      src/bridge/bridgeMain.ts
  14. 461 0
      src/bridge/bridgeMessaging.ts
  15. 43 0
      src/bridge/bridgePermissionCallbacks.ts
  16. 210 0
      src/bridge/bridgePointer.ts
  17. 163 0
      src/bridge/bridgeStatusUtil.ts
  18. 530 0
      src/bridge/bridgeUI.ts
  19. 56 0
      src/bridge/capacityWake.ts
  20. 168 0
      src/bridge/codeSessionApi.ts
  21. 384 0
      src/bridge/createSession.ts
  22. 141 0
      src/bridge/debugUtils.ts
  23. 165 0
      src/bridge/envLessBridgeConfig.ts
  24. 71 0
      src/bridge/flushGate.ts
  25. 175 0
      src/bridge/inboundAttachments.ts
  26. 80 0
      src/bridge/inboundMessages.ts
  27. 569 0
      src/bridge/initReplBridge.ts
  28. 256 0
      src/bridge/jwtUtils.ts
  29. 110 0
      src/bridge/pollConfig.ts
  30. 82 0
      src/bridge/pollConfigDefaults.ts
  31. 1008 0
      src/bridge/remoteBridgeCore.ts
  32. 2406 0
      src/bridge/replBridge.ts
  33. 36 0
      src/bridge/replBridgeHandle.ts
  34. 370 0
      src/bridge/replBridgeTransport.ts
  35. 57 0
      src/bridge/sessionIdCompat.ts
  36. 550 0
      src/bridge/sessionRunner.ts
  37. 210 0
      src/bridge/trustedDevice.ts
  38. 262 0
      src/bridge/types.ts
  39. 127 0
      src/bridge/workSecret.ts
  40. 370 0
      src/buddy/CompanionSprite.tsx
  41. 133 0
      src/buddy/companion.ts
  42. 36 0
      src/buddy/prompt.ts
  43. 514 0
      src/buddy/sprites.ts
  44. 148 0
      src/buddy/types.ts
  45. 97 0
      src/buddy/useBuddyNotification.tsx
  46. 31 0
      src/cli/exit.ts
  47. 70 0
      src/cli/handlers/agents.ts
  48. 330 0
      src/cli/handlers/auth.ts
  49. 170 0
      src/cli/handlers/autoMode.ts
  50. 361 0
      src/cli/handlers/mcp.tsx
  51. 878 0
      src/cli/handlers/plugins.ts
  52. 109 0
      src/cli/handlers/util.tsx
  53. 32 0
      src/cli/ndjsonSafeStringify.ts
  54. 5594 0
      src/cli/print.ts
  55. 255 0
      src/cli/remoteIO.ts
  56. 859 0
      src/cli/structuredIO.ts
  57. 282 0
      src/cli/transports/HybridTransport.ts
  58. 711 0
      src/cli/transports/SSETransport.ts
  59. 275 0
      src/cli/transports/SerialBatchEventUploader.ts
  60. 800 0
      src/cli/transports/WebSocketTransport.ts
  61. 131 0
      src/cli/transports/WorkerStateUploader.ts
  62. 998 0
      src/cli/transports/ccrClient.ts
  63. 45 0
      src/cli/transports/transportUtils.ts
  64. 422 0
      src/cli/update.ts
  65. 754 0
      src/commands.ts
  66. 125 0
      src/commands/add-dir/add-dir.tsx
  67. 11 0
      src/commands/add-dir/index.ts
  68. 110 0
      src/commands/add-dir/validation.ts
  69. 109 0
      src/commands/advisor.ts
  70. 12 0
      src/commands/agents/agents.tsx
  71. 10 0
      src/commands/agents/index.ts
  72. 1 0
      src/commands/ant-trace/index.js
  73. 1 0
      src/commands/autofix-pr/index.js
  74. 1 0
      src/commands/backfill-sessions/index.js
  75. 296 0
      src/commands/branch/branch.ts
  76. 14 0
      src/commands/branch/index.ts
  77. 1 0
      src/commands/break-cache/index.js
  78. 200 0
      src/commands/bridge-kick.ts
  79. 508 0
      src/commands/bridge/bridge.tsx
  80. 26 0
      src/commands/bridge/index.ts
  81. 130 0
      src/commands/brief.ts
  82. 242 0
      src/commands/btw/btw.tsx
  83. 13 0
      src/commands/btw/index.ts
  84. 1 0
      src/commands/bughunter/index.js
  85. 284 0
      src/commands/chrome/chrome.tsx
  86. 13 0
      src/commands/chrome/index.ts
  87. 144 0
      src/commands/clear/caches.ts
  88. 7 0
      src/commands/clear/clear.ts
  89. 251 0
      src/commands/clear/conversation.ts
  90. 19 0
      src/commands/clear/index.ts
  91. 93 0
      src/commands/color/color.ts
  92. 16 0
      src/commands/color/index.ts
  93. 158 0
      src/commands/commit-push-pr.ts
  94. 92 0
      src/commands/commit.ts
  95. 287 0
      src/commands/compact/compact.ts
  96. 15 0
      src/commands/compact/index.ts
  97. 7 0
      src/commands/config/config.tsx
  98. 11 0
      src/commands/config/index.ts
  99. 325 0
      src/commands/context/context-noninteractive.ts
  100. 63 0
      src/commands/context/context.tsx

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+node_modules

Різницю між файлами не показано, бо вона завелика
+ 108 - 0
bun.lock


+ 89 - 0
package.json

@@ -0,0 +1,89 @@
+{
+  "name": "claude-code",
+  "version": "1.0.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "build": "bun build src/entrypoints/cli.ts --outdir dist --target bun",
+    "dev": "bun run --watch src/entrypoints/cli.ts"
+  },
+  "dependencies": {
+    "@anthropic-ai/claude-agent-sdk": "latest",
+    "@anthropic-ai/mcpb": "latest",
+    "@anthropic-ai/sandbox-runtime": "latest",
+    "@anthropic-ai/sdk": "latest",
+    "@aws-sdk/client-bedrock": "latest",
+    "@aws-sdk/client-bedrock-runtime": "latest",
+    "@aws-sdk/credential-providers": "latest",
+    "@commander-js/extra-typings": "latest",
+    "@growthbook/growthbook": "latest",
+    "@modelcontextprotocol/sdk": "latest",
+    "@opentelemetry/api": "latest",
+    "@opentelemetry/api-logs": "latest",
+    "@opentelemetry/core": "latest",
+    "@opentelemetry/exporter-logs-otlp-grpc": "latest",
+    "@opentelemetry/exporter-logs-otlp-http": "latest",
+    "@opentelemetry/exporter-metrics-otlp-grpc": "latest",
+    "@opentelemetry/exporter-metrics-otlp-http": "latest",
+    "@opentelemetry/exporter-prometheus": "latest",
+    "@opentelemetry/exporter-trace-otlp-grpc": "latest",
+    "@opentelemetry/exporter-trace-otlp-http": "latest",
+    "@opentelemetry/resources": "latest",
+    "@opentelemetry/sdk-logs": "latest",
+    "@opentelemetry/sdk-metrics": "latest",
+    "@opentelemetry/sdk-trace-base": "latest",
+    "@smithy/core": "latest",
+    "@smithy/node-http-handler": "latest",
+    "ajv": "latest",
+    "asciichart": "latest",
+    "auto-bind": "latest",
+    "axios": "latest",
+    "bidi-js": "latest",
+    "chalk": "latest",
+    "chokidar": "latest",
+    "cli-boxes": "latest",
+    "code-excerpt": "latest",
+    "diff": "latest",
+    "emoji-regex": "latest",
+    "env-paths": "latest",
+    "execa": "latest",
+    "figures": "latest",
+    "fuse.js": "latest",
+    "get-east-asian-width": "latest",
+    "google-auth-library": "latest",
+    "highlight.js": "latest",
+    "https-proxy-agent": "latest",
+    "ignore": "latest",
+    "indent-string": "latest",
+    "lodash-es": "latest",
+    "lru-cache": "latest",
+    "marked": "latest",
+    "p-map": "latest",
+    "picomatch": "latest",
+    "proper-lockfile": "latest",
+    "qrcode": "latest",
+    "react": "latest",
+    "react-reconciler": "latest",
+    "semver": "latest",
+    "shell-quote": "latest",
+    "signal-exit": "latest",
+    "stack-utils": "latest",
+    "strip-ansi": "latest",
+    "supports-hyperlinks": "latest",
+    "tree-kill": "latest",
+    "type-fest": "latest",
+    "undici": "latest",
+    "usehooks-ts": "latest",
+    "vscode-languageserver-protocol": "latest",
+    "vscode-languageserver-types": "latest",
+    "wrap-ansi": "latest",
+    "ws": "latest",
+    "xss": "latest",
+    "zod": "latest"
+  },
+  "devDependencies": {
+    "@types/react": "latest",
+    "@types/react-reconciler": "latest",
+    "typescript": "latest"
+  }
+}

+ 1295 - 0
src/QueryEngine.ts

@@ -0,0 +1,1295 @@
+import { feature } from 'bun:bundle'
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import { randomUUID } from 'crypto'
+import last from 'lodash-es/last.js'
+import {
+  getSessionId,
+  isSessionPersistenceDisabled,
+} from 'src/bootstrap/state.js'
+import type {
+  PermissionMode,
+  SDKCompactBoundaryMessage,
+  SDKMessage,
+  SDKPermissionDenial,
+  SDKStatus,
+  SDKUserMessageReplay,
+} from 'src/entrypoints/agentSdkTypes.js'
+import { accumulateUsage, updateUsage } from 'src/services/api/claude.js'
+import type { NonNullableUsage } from 'src/services/api/logging.js'
+import { EMPTY_USAGE } from 'src/services/api/logging.js'
+import stripAnsi from 'strip-ansi'
+import type { Command } from './commands.js'
+import { getSlashCommandToolSkills } from './commands.js'
+import {
+  LOCAL_COMMAND_STDERR_TAG,
+  LOCAL_COMMAND_STDOUT_TAG,
+} from './constants/xml.js'
+import {
+  getModelUsage,
+  getTotalAPIDuration,
+  getTotalCost,
+} from './cost-tracker.js'
+import type { CanUseToolFn } from './hooks/useCanUseTool.js'
+import { loadMemoryPrompt } from './memdir/memdir.js'
+import { hasAutoMemPathOverride } from './memdir/paths.js'
+import { query } from './query.js'
+import { categorizeRetryableAPIError } from './services/api/errors.js'
+import type { MCPServerConnection } from './services/mcp/types.js'
+import type { AppState } from './state/AppState.js'
+import { type Tools, type ToolUseContext, toolMatchesName } from './Tool.js'
+import type { AgentDefinition } from './tools/AgentTool/loadAgentsDir.js'
+import { SYNTHETIC_OUTPUT_TOOL_NAME } from './tools/SyntheticOutputTool/SyntheticOutputTool.js'
+import type { Message } from './types/message.js'
+import type { OrphanedPermission } from './types/textInputTypes.js'
+import { createAbortController } from './utils/abortController.js'
+import type { AttributionState } from './utils/commitAttribution.js'
+import { getGlobalConfig } from './utils/config.js'
+import { getCwd } from './utils/cwd.js'
+import { isBareMode, isEnvTruthy } from './utils/envUtils.js'
+import { getFastModeState } from './utils/fastMode.js'
+import {
+  type FileHistoryState,
+  fileHistoryEnabled,
+  fileHistoryMakeSnapshot,
+} from './utils/fileHistory.js'
+import {
+  cloneFileStateCache,
+  type FileStateCache,
+} from './utils/fileStateCache.js'
+import { headlessProfilerCheckpoint } from './utils/headlessProfiler.js'
+import { registerStructuredOutputEnforcement } from './utils/hooks/hookHelpers.js'
+import { getInMemoryErrors } from './utils/log.js'
+import { countToolCalls, SYNTHETIC_MESSAGES } from './utils/messages.js'
+import {
+  getMainLoopModel,
+  parseUserSpecifiedModel,
+} from './utils/model/model.js'
+import { loadAllPluginsCacheOnly } from './utils/plugins/pluginLoader.js'
+import {
+  type ProcessUserInputContext,
+  processUserInput,
+} from './utils/processUserInput/processUserInput.js'
+import { fetchSystemPromptParts } from './utils/queryContext.js'
+import { setCwd } from './utils/Shell.js'
+import {
+  flushSessionStorage,
+  recordTranscript,
+} from './utils/sessionStorage.js'
+import { asSystemPrompt } from './utils/systemPromptType.js'
+import { resolveThemeSetting } from './utils/systemTheme.js'
+import {
+  shouldEnableThinkingByDefault,
+  type ThinkingConfig,
+} from './utils/thinking.js'
+
+// Lazy: MessageSelector.tsx pulls React/ink; only needed for message filtering at query time
+/* eslint-disable @typescript-eslint/no-require-imports */
+const messageSelector =
+  (): typeof import('src/components/MessageSelector.js') =>
+    require('src/components/MessageSelector.js')
+
+import {
+  localCommandOutputToSDKAssistantMessage,
+  toSDKCompactMetadata,
+} from './utils/messages/mappers.js'
+import {
+  buildSystemInitMessage,
+  sdkCompatToolName,
+} from './utils/messages/systemInit.js'
+import {
+  getScratchpadDir,
+  isScratchpadEnabled,
+} from './utils/permissions/filesystem.js'
+/* eslint-enable @typescript-eslint/no-require-imports */
+import {
+  handleOrphanedPermission,
+  isResultSuccessful,
+  normalizeMessage,
+} from './utils/queryHelpers.js'
+
+// Dead code elimination: conditional import for coordinator mode
+/* eslint-disable @typescript-eslint/no-require-imports */
+const getCoordinatorUserContext: (
+  mcpClients: ReadonlyArray<{ name: string }>,
+  scratchpadDir?: string,
+) => { [k: string]: string } = feature('COORDINATOR_MODE')
+  ? require('./coordinator/coordinatorMode.js').getCoordinatorUserContext
+  : () => ({})
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+// Dead code elimination: conditional import for snip compaction
+/* eslint-disable @typescript-eslint/no-require-imports */
+const snipModule = feature('HISTORY_SNIP')
+  ? (require('./services/compact/snipCompact.js') as typeof import('./services/compact/snipCompact.js'))
+  : null
+const snipProjection = feature('HISTORY_SNIP')
+  ? (require('./services/compact/snipProjection.js') as typeof import('./services/compact/snipProjection.js'))
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+export type QueryEngineConfig = {
+  cwd: string
+  tools: Tools
+  commands: Command[]
+  mcpClients: MCPServerConnection[]
+  agents: AgentDefinition[]
+  canUseTool: CanUseToolFn
+  getAppState: () => AppState
+  setAppState: (f: (prev: AppState) => AppState) => void
+  initialMessages?: Message[]
+  readFileCache: FileStateCache
+  customSystemPrompt?: string
+  appendSystemPrompt?: string
+  userSpecifiedModel?: string
+  fallbackModel?: string
+  thinkingConfig?: ThinkingConfig
+  maxTurns?: number
+  maxBudgetUsd?: number
+  taskBudget?: { total: number }
+  jsonSchema?: Record<string, unknown>
+  verbose?: boolean
+  replayUserMessages?: boolean
+  /** Handler for URL elicitations triggered by MCP tool -32042 errors. */
+  handleElicitation?: ToolUseContext['handleElicitation']
+  includePartialMessages?: boolean
+  setSDKStatus?: (status: SDKStatus) => void
+  abortController?: AbortController
+  orphanedPermission?: OrphanedPermission
+  /**
+   * Snip-boundary handler: receives each yielded system message plus the
+   * current mutableMessages store. Returns undefined if the message is not a
+   * snip boundary; otherwise returns the replayed snip result. Injected by
+   * ask() when HISTORY_SNIP is enabled so feature-gated strings stay inside
+   * the gated module (keeps QueryEngine free of excluded strings and testable
+   * despite feature() returning false under bun test). SDK-only: the REPL
+   * keeps full history for UI scrollback and projects on demand via
+   * projectSnippedView; QueryEngine truncates here to bound memory in long
+   * headless sessions (no UI to preserve).
+   */
+  snipReplay?: (
+    yieldedSystemMsg: Message,
+    store: Message[],
+  ) => { messages: Message[]; executed: boolean } | undefined
+}
+
+/**
+ * QueryEngine owns the query lifecycle and session state for a conversation.
+ * It extracts the core logic from ask() into a standalone class that can be
+ * used by both the headless/SDK path and (in a future phase) the REPL.
+ *
+ * One QueryEngine per conversation. Each submitMessage() call starts a new
+ * turn within the same conversation. State (messages, file cache, usage, etc.)
+ * persists across turns.
+ */
+export class QueryEngine {
+  private config: QueryEngineConfig
+  private mutableMessages: Message[]
+  private abortController: AbortController
+  private permissionDenials: SDKPermissionDenial[]
+  private totalUsage: NonNullableUsage
+  private hasHandledOrphanedPermission = false
+  private readFileState: FileStateCache
+  // Turn-scoped skill discovery tracking (feeds was_discovered on
+  // tengu_skill_tool_invocation). Must persist across the two
+  // processUserInputContext rebuilds inside submitMessage, but is cleared
+  // at the start of each submitMessage to avoid unbounded growth across
+  // many turns in SDK mode.
+  private discoveredSkillNames = new Set<string>()
+  private loadedNestedMemoryPaths = new Set<string>()
+
+  constructor(config: QueryEngineConfig) {
+    this.config = config
+    this.mutableMessages = config.initialMessages ?? []
+    this.abortController = config.abortController ?? createAbortController()
+    this.permissionDenials = []
+    this.readFileState = config.readFileCache
+    this.totalUsage = EMPTY_USAGE
+  }
+
+  async *submitMessage(
+    prompt: string | ContentBlockParam[],
+    options?: { uuid?: string; isMeta?: boolean },
+  ): AsyncGenerator<SDKMessage, void, unknown> {
+    const {
+      cwd,
+      commands,
+      tools,
+      mcpClients,
+      verbose = false,
+      thinkingConfig,
+      maxTurns,
+      maxBudgetUsd,
+      taskBudget,
+      canUseTool,
+      customSystemPrompt,
+      appendSystemPrompt,
+      userSpecifiedModel,
+      fallbackModel,
+      jsonSchema,
+      getAppState,
+      setAppState,
+      replayUserMessages = false,
+      includePartialMessages = false,
+      agents = [],
+      setSDKStatus,
+      orphanedPermission,
+    } = this.config
+
+    this.discoveredSkillNames.clear()
+    setCwd(cwd)
+    const persistSession = !isSessionPersistenceDisabled()
+    const startTime = Date.now()
+
+    // Wrap canUseTool to track permission denials
+    const wrappedCanUseTool: CanUseToolFn = async (
+      tool,
+      input,
+      toolUseContext,
+      assistantMessage,
+      toolUseID,
+      forceDecision,
+    ) => {
+      const result = await canUseTool(
+        tool,
+        input,
+        toolUseContext,
+        assistantMessage,
+        toolUseID,
+        forceDecision,
+      )
+
+      // Track denials for SDK reporting
+      if (result.behavior !== 'allow') {
+        this.permissionDenials.push({
+          tool_name: sdkCompatToolName(tool.name),
+          tool_use_id: toolUseID,
+          tool_input: input,
+        })
+      }
+
+      return result
+    }
+
+    const initialAppState = getAppState()
+    const initialMainLoopModel = userSpecifiedModel
+      ? parseUserSpecifiedModel(userSpecifiedModel)
+      : getMainLoopModel()
+
+    const initialThinkingConfig: ThinkingConfig = thinkingConfig
+      ? thinkingConfig
+      : shouldEnableThinkingByDefault() !== false
+        ? { type: 'adaptive' }
+        : { type: 'disabled' }
+
+    headlessProfilerCheckpoint('before_getSystemPrompt')
+    // Narrow once so TS tracks the type through the conditionals below.
+    const customPrompt =
+      typeof customSystemPrompt === 'string' ? customSystemPrompt : undefined
+    const {
+      defaultSystemPrompt,
+      userContext: baseUserContext,
+      systemContext,
+    } = await fetchSystemPromptParts({
+      tools,
+      mainLoopModel: initialMainLoopModel,
+      additionalWorkingDirectories: Array.from(
+        initialAppState.toolPermissionContext.additionalWorkingDirectories.keys(),
+      ),
+      mcpClients,
+      customSystemPrompt: customPrompt,
+    })
+    headlessProfilerCheckpoint('after_getSystemPrompt')
+    const userContext = {
+      ...baseUserContext,
+      ...getCoordinatorUserContext(
+        mcpClients,
+        isScratchpadEnabled() ? getScratchpadDir() : undefined,
+      ),
+    }
+
+    // When an SDK caller provides a custom system prompt AND has set
+    // CLAUDE_COWORK_MEMORY_PATH_OVERRIDE, inject the memory-mechanics prompt.
+    // The env var is an explicit opt-in signal — the caller has wired up
+    // a memory directory and needs Claude to know how to use it (which
+    // Write/Edit tools to call, MEMORY.md filename, loading semantics).
+    // The caller can layer their own policy text via appendSystemPrompt.
+    const memoryMechanicsPrompt =
+      customPrompt !== undefined && hasAutoMemPathOverride()
+        ? await loadMemoryPrompt()
+        : null
+
+    const systemPrompt = asSystemPrompt([
+      ...(customPrompt !== undefined ? [customPrompt] : defaultSystemPrompt),
+      ...(memoryMechanicsPrompt ? [memoryMechanicsPrompt] : []),
+      ...(appendSystemPrompt ? [appendSystemPrompt] : []),
+    ])
+
+    // Register function hook for structured output enforcement
+    const hasStructuredOutputTool = tools.some(t =>
+      toolMatchesName(t, SYNTHETIC_OUTPUT_TOOL_NAME),
+    )
+    if (jsonSchema && hasStructuredOutputTool) {
+      registerStructuredOutputEnforcement(setAppState, getSessionId())
+    }
+
+    let processUserInputContext: ProcessUserInputContext = {
+      messages: this.mutableMessages,
+      // Slash commands that mutate the message array (e.g. /force-snip)
+      // call setMessages(fn).  In interactive mode this writes back to
+      // AppState; in print mode we write back to mutableMessages so the
+      // rest of the query loop (push at :389, snapshot at :392) sees
+      // the result.  The second processUserInputContext below (after
+      // slash-command processing) keeps the no-op — nothing else calls
+      // setMessages past that point.
+      setMessages: fn => {
+        this.mutableMessages = fn(this.mutableMessages)
+      },
+      onChangeAPIKey: () => {},
+      handleElicitation: this.config.handleElicitation,
+      options: {
+        commands,
+        debug: false, // we use stdout, so don't want to clobber it
+        tools,
+        verbose,
+        mainLoopModel: initialMainLoopModel,
+        thinkingConfig: initialThinkingConfig,
+        mcpClients,
+        mcpResources: {},
+        ideInstallationStatus: null,
+        isNonInteractiveSession: true,
+        customSystemPrompt,
+        appendSystemPrompt,
+        agentDefinitions: { activeAgents: agents, allAgents: [] },
+        theme: resolveThemeSetting(getGlobalConfig().theme),
+        maxBudgetUsd,
+      },
+      getAppState,
+      setAppState,
+      abortController: this.abortController,
+      readFileState: this.readFileState,
+      nestedMemoryAttachmentTriggers: new Set<string>(),
+      loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,
+      dynamicSkillDirTriggers: new Set<string>(),
+      discoveredSkillNames: this.discoveredSkillNames,
+      setInProgressToolUseIDs: () => {},
+      setResponseLength: () => {},
+      updateFileHistoryState: (
+        updater: (prev: FileHistoryState) => FileHistoryState,
+      ) => {
+        setAppState(prev => {
+          const updated = updater(prev.fileHistory)
+          if (updated === prev.fileHistory) return prev
+          return { ...prev, fileHistory: updated }
+        })
+      },
+      updateAttributionState: (
+        updater: (prev: AttributionState) => AttributionState,
+      ) => {
+        setAppState(prev => {
+          const updated = updater(prev.attribution)
+          if (updated === prev.attribution) return prev
+          return { ...prev, attribution: updated }
+        })
+      },
+      setSDKStatus,
+    }
+
+    // Handle orphaned permission (only once per engine lifetime)
+    if (orphanedPermission && !this.hasHandledOrphanedPermission) {
+      this.hasHandledOrphanedPermission = true
+      for await (const message of handleOrphanedPermission(
+        orphanedPermission,
+        tools,
+        this.mutableMessages,
+        processUserInputContext,
+      )) {
+        yield message
+      }
+    }
+
+    const {
+      messages: messagesFromUserInput,
+      shouldQuery,
+      allowedTools,
+      model: modelFromUserInput,
+      resultText,
+    } = await processUserInput({
+      input: prompt,
+      mode: 'prompt',
+      setToolJSX: () => {},
+      context: {
+        ...processUserInputContext,
+        messages: this.mutableMessages,
+      },
+      messages: this.mutableMessages,
+      uuid: options?.uuid,
+      isMeta: options?.isMeta,
+      querySource: 'sdk',
+    })
+
+    // Push new messages, including user input and any attachments
+    this.mutableMessages.push(...messagesFromUserInput)
+
+    // Update params to reflect updates from processing /slash commands
+    const messages = [...this.mutableMessages]
+
+    // Persist the user's message(s) to transcript BEFORE entering the query
+    // loop. The for-await below only calls recordTranscript when ask() yields
+    // an assistant/user/compact_boundary message — which doesn't happen until
+    // the API responds. If the process is killed before that (e.g. user clicks
+    // Stop in cowork seconds after send), the transcript is left with only
+    // queue-operation entries; getLastSessionLog filters those out, returns
+    // null, and --resume fails with "No conversation found". Writing now makes
+    // the transcript resumable from the point the user message was accepted,
+    // even if no API response ever arrives.
+    //
+    // --bare / SIMPLE: fire-and-forget. Scripted calls don't --resume after
+    // kill-mid-request. The await is ~4ms on SSD, ~30ms under disk contention
+    // — the single largest controllable critical-path cost after module eval.
+    // Transcript is still written (for post-hoc debugging); just not blocking.
+    if (persistSession && messagesFromUserInput.length > 0) {
+      const transcriptPromise = recordTranscript(messages)
+      if (isBareMode()) {
+        void transcriptPromise
+      } else {
+        await transcriptPromise
+        if (
+          isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+          isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+        ) {
+          await flushSessionStorage()
+        }
+      }
+    }
+
+    // Filter messages that should be acknowledged after transcript
+    const replayableMessages = messagesFromUserInput.filter(
+      msg =>
+        (msg.type === 'user' &&
+          !msg.isMeta && // Skip synthetic caveat messages
+          !msg.toolUseResult && // Skip tool results (they'll be acked from query)
+          messageSelector().selectableUserMessagesFilter(msg)) || // Skip non-user-authored messages (task notifications, etc.)
+        (msg.type === 'system' && msg.subtype === 'compact_boundary'), // Always ack compact boundaries
+    )
+    const messagesToAck = replayUserMessages ? replayableMessages : []
+
+    // Update the ToolPermissionContext based on user input processing (as necessary)
+    setAppState(prev => ({
+      ...prev,
+      toolPermissionContext: {
+        ...prev.toolPermissionContext,
+        alwaysAllowRules: {
+          ...prev.toolPermissionContext.alwaysAllowRules,
+          command: allowedTools,
+        },
+      },
+    }))
+
+    const mainLoopModel = modelFromUserInput ?? initialMainLoopModel
+
+    // Recreate after processing the prompt to pick up updated messages and
+    // model (from slash commands).
+    processUserInputContext = {
+      messages,
+      setMessages: () => {},
+      onChangeAPIKey: () => {},
+      handleElicitation: this.config.handleElicitation,
+      options: {
+        commands,
+        debug: false,
+        tools,
+        verbose,
+        mainLoopModel,
+        thinkingConfig: initialThinkingConfig,
+        mcpClients,
+        mcpResources: {},
+        ideInstallationStatus: null,
+        isNonInteractiveSession: true,
+        customSystemPrompt,
+        appendSystemPrompt,
+        theme: resolveThemeSetting(getGlobalConfig().theme),
+        agentDefinitions: { activeAgents: agents, allAgents: [] },
+        maxBudgetUsd,
+      },
+      getAppState,
+      setAppState,
+      abortController: this.abortController,
+      readFileState: this.readFileState,
+      nestedMemoryAttachmentTriggers: new Set<string>(),
+      loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,
+      dynamicSkillDirTriggers: new Set<string>(),
+      discoveredSkillNames: this.discoveredSkillNames,
+      setInProgressToolUseIDs: () => {},
+      setResponseLength: () => {},
+      updateFileHistoryState: processUserInputContext.updateFileHistoryState,
+      updateAttributionState: processUserInputContext.updateAttributionState,
+      setSDKStatus,
+    }
+
+    headlessProfilerCheckpoint('before_skills_plugins')
+    // Cache-only: headless/SDK/CCR startup must not block on network for
+    // ref-tracked plugins. CCR populates the cache via CLAUDE_CODE_SYNC_PLUGIN_INSTALL
+    // (headlessPluginInstall) or CLAUDE_CODE_PLUGIN_SEED_DIR before this runs;
+    // SDK callers that need fresh source can call /reload-plugins.
+    const [skills, { enabled: enabledPlugins }] = await Promise.all([
+      getSlashCommandToolSkills(getCwd()),
+      loadAllPluginsCacheOnly(),
+    ])
+    headlessProfilerCheckpoint('after_skills_plugins')
+
+    yield buildSystemInitMessage({
+      tools,
+      mcpClients,
+      model: mainLoopModel,
+      permissionMode: initialAppState.toolPermissionContext
+        .mode as PermissionMode, // TODO: avoid the cast
+      commands,
+      agents,
+      skills,
+      plugins: enabledPlugins,
+      fastMode: initialAppState.fastMode,
+    })
+
+    // Record when system message is yielded for headless latency tracking
+    headlessProfilerCheckpoint('system_message_yielded')
+
+    if (!shouldQuery) {
+      // Return the results of local slash commands.
+      // Use messagesFromUserInput (not replayableMessages) for command output
+      // because selectableUserMessagesFilter excludes local-command-stdout tags.
+      for (const msg of messagesFromUserInput) {
+        if (
+          msg.type === 'user' &&
+          typeof msg.message.content === 'string' &&
+          (msg.message.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||
+            msg.message.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`) ||
+            msg.isCompactSummary)
+        ) {
+          yield {
+            type: 'user',
+            message: {
+              ...msg.message,
+              content: stripAnsi(msg.message.content),
+            },
+            session_id: getSessionId(),
+            parent_tool_use_id: null,
+            uuid: msg.uuid,
+            timestamp: msg.timestamp,
+            isReplay: !msg.isCompactSummary,
+            isSynthetic: msg.isMeta || msg.isVisibleInTranscriptOnly,
+          } as SDKUserMessageReplay
+        }
+
+        // Local command output — yield as a synthetic assistant message so
+        // RC renders it as assistant-style text rather than a user bubble.
+        // Emitted as assistant (not the dedicated SDKLocalCommandOutputMessage
+        // system subtype) so mobile clients + session-ingress can parse it.
+        if (
+          msg.type === 'system' &&
+          msg.subtype === 'local_command' &&
+          typeof msg.content === 'string' &&
+          (msg.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||
+            msg.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`))
+        ) {
+          yield localCommandOutputToSDKAssistantMessage(msg.content, msg.uuid)
+        }
+
+        if (msg.type === 'system' && msg.subtype === 'compact_boundary') {
+          yield {
+            type: 'system',
+            subtype: 'compact_boundary' as const,
+            session_id: getSessionId(),
+            uuid: msg.uuid,
+            compact_metadata: toSDKCompactMetadata(msg.compactMetadata),
+          } as SDKCompactBoundaryMessage
+        }
+      }
+
+      if (persistSession) {
+        await recordTranscript(messages)
+        if (
+          isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+          isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+        ) {
+          await flushSessionStorage()
+        }
+      }
+
+      yield {
+        type: 'result',
+        subtype: 'success',
+        is_error: false,
+        duration_ms: Date.now() - startTime,
+        duration_api_ms: getTotalAPIDuration(),
+        num_turns: messages.length - 1,
+        result: resultText ?? '',
+        stop_reason: null,
+        session_id: getSessionId(),
+        total_cost_usd: getTotalCost(),
+        usage: this.totalUsage,
+        modelUsage: getModelUsage(),
+        permission_denials: this.permissionDenials,
+        fast_mode_state: getFastModeState(
+          mainLoopModel,
+          initialAppState.fastMode,
+        ),
+        uuid: randomUUID(),
+      }
+      return
+    }
+
+    if (fileHistoryEnabled() && persistSession) {
+      messagesFromUserInput
+        .filter(messageSelector().selectableUserMessagesFilter)
+        .forEach(message => {
+          void fileHistoryMakeSnapshot(
+            (updater: (prev: FileHistoryState) => FileHistoryState) => {
+              setAppState(prev => ({
+                ...prev,
+                fileHistory: updater(prev.fileHistory),
+              }))
+            },
+            message.uuid,
+          )
+        })
+    }
+
+    // Track current message usage (reset on each message_start)
+    let currentMessageUsage: NonNullableUsage = EMPTY_USAGE
+    let turnCount = 1
+    let hasAcknowledgedInitialMessages = false
+    // Track structured output from StructuredOutput tool calls
+    let structuredOutputFromTool: unknown
+    // Track the last stop_reason from assistant messages
+    let lastStopReason: string | null = null
+    // Reference-based watermark so error_during_execution's errors[] is
+    // turn-scoped. A length-based index breaks when the 100-entry ring buffer
+    // shift()s during the turn — the index slides. If this entry is rotated
+    // out, lastIndexOf returns -1 and we include everything (safe fallback).
+    const errorLogWatermark = getInMemoryErrors().at(-1)
+    // Snapshot count before this query for delta-based retry limiting
+    const initialStructuredOutputCalls = jsonSchema
+      ? countToolCalls(this.mutableMessages, SYNTHETIC_OUTPUT_TOOL_NAME)
+      : 0
+
+    for await (const message of query({
+      messages,
+      systemPrompt,
+      userContext,
+      systemContext,
+      canUseTool: wrappedCanUseTool,
+      toolUseContext: processUserInputContext,
+      fallbackModel,
+      querySource: 'sdk',
+      maxTurns,
+      taskBudget,
+    })) {
+      // Record assistant, user, and compact boundary messages
+      if (
+        message.type === 'assistant' ||
+        message.type === 'user' ||
+        (message.type === 'system' && message.subtype === 'compact_boundary')
+      ) {
+        // Before writing a compact boundary, flush any in-memory-only
+        // messages up through the preservedSegment tail. Attachments and
+        // progress are now recorded inline (their switch cases below), but
+        // this flush still matters for the preservedSegment tail walk.
+        // If the SDK subprocess restarts before then (claude-desktop kills
+        // between turns), tailUuid points to a never-written message →
+        // applyPreservedSegmentRelinks fails its tail→head walk → returns
+        // without pruning → resume loads full pre-compact history.
+        if (
+          persistSession &&
+          message.type === 'system' &&
+          message.subtype === 'compact_boundary'
+        ) {
+          const tailUuid = message.compactMetadata?.preservedSegment?.tailUuid
+          if (tailUuid) {
+            const tailIdx = this.mutableMessages.findLastIndex(
+              m => m.uuid === tailUuid,
+            )
+            if (tailIdx !== -1) {
+              await recordTranscript(this.mutableMessages.slice(0, tailIdx + 1))
+            }
+          }
+        }
+        messages.push(message)
+        if (persistSession) {
+          // Fire-and-forget for assistant messages. claude.ts yields one
+          // assistant message per content block, then mutates the last
+          // one's message.usage/stop_reason on message_delta — relying on
+          // the write queue's 100ms lazy jsonStringify. Awaiting here
+          // blocks ask()'s generator, so message_delta can't run until
+          // every block is consumed; the drain timer (started at block 1)
+          // elapses first. Interactive CC doesn't hit this because
+          // useLogMessages.ts fire-and-forgets. enqueueWrite is
+          // order-preserving so fire-and-forget here is safe.
+          if (message.type === 'assistant') {
+            void recordTranscript(messages)
+          } else {
+            await recordTranscript(messages)
+          }
+        }
+
+        // Acknowledge initial user messages after first transcript recording
+        if (!hasAcknowledgedInitialMessages && messagesToAck.length > 0) {
+          hasAcknowledgedInitialMessages = true
+          for (const msgToAck of messagesToAck) {
+            if (msgToAck.type === 'user') {
+              yield {
+                type: 'user',
+                message: msgToAck.message,
+                session_id: getSessionId(),
+                parent_tool_use_id: null,
+                uuid: msgToAck.uuid,
+                timestamp: msgToAck.timestamp,
+                isReplay: true,
+              } as SDKUserMessageReplay
+            }
+          }
+        }
+      }
+
+      if (message.type === 'user') {
+        turnCount++
+      }
+
+      switch (message.type) {
+        case 'tombstone':
+          // Tombstone messages are control signals for removing messages, skip them
+          break
+        case 'assistant':
+          // Capture stop_reason if already set (synthetic messages). For
+          // streamed responses, this is null at content_block_stop time;
+          // the real value arrives via message_delta (handled below).
+          if (message.message.stop_reason != null) {
+            lastStopReason = message.message.stop_reason
+          }
+          this.mutableMessages.push(message)
+          yield* normalizeMessage(message)
+          break
+        case 'progress':
+          this.mutableMessages.push(message)
+          // Record inline so the dedup loop in the next ask() call sees it
+          // as already-recorded. Without this, deferred progress interleaves
+          // with already-recorded tool_results in mutableMessages, and the
+          // dedup walk freezes startingParentUuid at the wrong message —
+          // forking the chain and orphaning the conversation on resume.
+          if (persistSession) {
+            messages.push(message)
+            void recordTranscript(messages)
+          }
+          yield* normalizeMessage(message)
+          break
+        case 'user':
+          this.mutableMessages.push(message)
+          yield* normalizeMessage(message)
+          break
+        case 'stream_event':
+          if (message.event.type === 'message_start') {
+            // Reset current message usage for new message
+            currentMessageUsage = EMPTY_USAGE
+            currentMessageUsage = updateUsage(
+              currentMessageUsage,
+              message.event.message.usage,
+            )
+          }
+          if (message.event.type === 'message_delta') {
+            currentMessageUsage = updateUsage(
+              currentMessageUsage,
+              message.event.usage,
+            )
+            // Capture stop_reason from message_delta. The assistant message
+            // is yielded at content_block_stop with stop_reason=null; the
+            // real value only arrives here (see claude.ts message_delta
+            // handler). Without this, result.stop_reason is always null.
+            if (message.event.delta.stop_reason != null) {
+              lastStopReason = message.event.delta.stop_reason
+            }
+          }
+          if (message.event.type === 'message_stop') {
+            // Accumulate current message usage into total
+            this.totalUsage = accumulateUsage(
+              this.totalUsage,
+              currentMessageUsage,
+            )
+          }
+
+          if (includePartialMessages) {
+            yield {
+              type: 'stream_event' as const,
+              event: message.event,
+              session_id: getSessionId(),
+              parent_tool_use_id: null,
+              uuid: randomUUID(),
+            }
+          }
+
+          break
+        case 'attachment':
+          this.mutableMessages.push(message)
+          // Record inline (same reason as progress above).
+          if (persistSession) {
+            messages.push(message)
+            void recordTranscript(messages)
+          }
+
+          // Extract structured output from StructuredOutput tool calls
+          if (message.attachment.type === 'structured_output') {
+            structuredOutputFromTool = message.attachment.data
+          }
+          // Handle max turns reached signal from query.ts
+          else if (message.attachment.type === 'max_turns_reached') {
+            if (persistSession) {
+              if (
+                isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+                isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+              ) {
+                await flushSessionStorage()
+              }
+            }
+            yield {
+              type: 'result',
+              subtype: 'error_max_turns',
+              duration_ms: Date.now() - startTime,
+              duration_api_ms: getTotalAPIDuration(),
+              is_error: true,
+              num_turns: message.attachment.turnCount,
+              stop_reason: lastStopReason,
+              session_id: getSessionId(),
+              total_cost_usd: getTotalCost(),
+              usage: this.totalUsage,
+              modelUsage: getModelUsage(),
+              permission_denials: this.permissionDenials,
+              fast_mode_state: getFastModeState(
+                mainLoopModel,
+                initialAppState.fastMode,
+              ),
+              uuid: randomUUID(),
+              errors: [
+                `Reached maximum number of turns (${message.attachment.maxTurns})`,
+              ],
+            }
+            return
+          }
+          // Yield queued_command attachments as SDK user message replays
+          else if (
+            replayUserMessages &&
+            message.attachment.type === 'queued_command'
+          ) {
+            yield {
+              type: 'user',
+              message: {
+                role: 'user' as const,
+                content: message.attachment.prompt,
+              },
+              session_id: getSessionId(),
+              parent_tool_use_id: null,
+              uuid: message.attachment.source_uuid || message.uuid,
+              timestamp: message.timestamp,
+              isReplay: true,
+            } as SDKUserMessageReplay
+          }
+          break
+        case 'stream_request_start':
+          // Don't yield stream request start messages
+          break
+        case 'system': {
+          // Snip boundary: replay on our store to remove zombie messages and
+          // stale markers. The yielded boundary is a signal, not data to push —
+          // the replay produces its own equivalent boundary. Without this,
+          // markers persist and re-trigger on every turn, and mutableMessages
+          // never shrinks (memory leak in long SDK sessions). The subtype
+          // check lives inside the injected callback so feature-gated strings
+          // stay out of this file (excluded-strings check).
+          const snipResult = this.config.snipReplay?.(
+            message,
+            this.mutableMessages,
+          )
+          if (snipResult !== undefined) {
+            if (snipResult.executed) {
+              this.mutableMessages.length = 0
+              this.mutableMessages.push(...snipResult.messages)
+            }
+            break
+          }
+          this.mutableMessages.push(message)
+          // Yield compact boundary messages to SDK
+          if (
+            message.subtype === 'compact_boundary' &&
+            message.compactMetadata
+          ) {
+            // Release pre-compaction messages for GC. The boundary was just
+            // pushed so it's the last element. query.ts already uses
+            // getMessagesAfterCompactBoundary() internally, so only
+            // post-boundary messages are needed going forward.
+            const mutableBoundaryIdx = this.mutableMessages.length - 1
+            if (mutableBoundaryIdx > 0) {
+              this.mutableMessages.splice(0, mutableBoundaryIdx)
+            }
+            const localBoundaryIdx = messages.length - 1
+            if (localBoundaryIdx > 0) {
+              messages.splice(0, localBoundaryIdx)
+            }
+
+            yield {
+              type: 'system',
+              subtype: 'compact_boundary' as const,
+              session_id: getSessionId(),
+              uuid: message.uuid,
+              compact_metadata: toSDKCompactMetadata(message.compactMetadata),
+            }
+          }
+          if (message.subtype === 'api_error') {
+            yield {
+              type: 'system',
+              subtype: 'api_retry' as const,
+              attempt: message.retryAttempt,
+              max_retries: message.maxRetries,
+              retry_delay_ms: message.retryInMs,
+              error_status: message.error.status ?? null,
+              error: categorizeRetryableAPIError(message.error),
+              session_id: getSessionId(),
+              uuid: message.uuid,
+            }
+          }
+          // Don't yield other system messages in headless mode
+          break
+        }
+        case 'tool_use_summary':
+          // Yield tool use summary messages to SDK
+          yield {
+            type: 'tool_use_summary' as const,
+            summary: message.summary,
+            preceding_tool_use_ids: message.precedingToolUseIds,
+            session_id: getSessionId(),
+            uuid: message.uuid,
+          }
+          break
+      }
+
+      // Check if USD budget has been exceeded
+      if (maxBudgetUsd !== undefined && getTotalCost() >= maxBudgetUsd) {
+        if (persistSession) {
+          if (
+            isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+            isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+          ) {
+            await flushSessionStorage()
+          }
+        }
+        yield {
+          type: 'result',
+          subtype: 'error_max_budget_usd',
+          duration_ms: Date.now() - startTime,
+          duration_api_ms: getTotalAPIDuration(),
+          is_error: true,
+          num_turns: turnCount,
+          stop_reason: lastStopReason,
+          session_id: getSessionId(),
+          total_cost_usd: getTotalCost(),
+          usage: this.totalUsage,
+          modelUsage: getModelUsage(),
+          permission_denials: this.permissionDenials,
+          fast_mode_state: getFastModeState(
+            mainLoopModel,
+            initialAppState.fastMode,
+          ),
+          uuid: randomUUID(),
+          errors: [`Reached maximum budget ($${maxBudgetUsd})`],
+        }
+        return
+      }
+
+      // Check if structured output retry limit exceeded (only on user messages)
+      if (message.type === 'user' && jsonSchema) {
+        const currentCalls = countToolCalls(
+          this.mutableMessages,
+          SYNTHETIC_OUTPUT_TOOL_NAME,
+        )
+        const callsThisQuery = currentCalls - initialStructuredOutputCalls
+        const maxRetries = parseInt(
+          process.env.MAX_STRUCTURED_OUTPUT_RETRIES || '5',
+          10,
+        )
+        if (callsThisQuery >= maxRetries) {
+          if (persistSession) {
+            if (
+              isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+              isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+            ) {
+              await flushSessionStorage()
+            }
+          }
+          yield {
+            type: 'result',
+            subtype: 'error_max_structured_output_retries',
+            duration_ms: Date.now() - startTime,
+            duration_api_ms: getTotalAPIDuration(),
+            is_error: true,
+            num_turns: turnCount,
+            stop_reason: lastStopReason,
+            session_id: getSessionId(),
+            total_cost_usd: getTotalCost(),
+            usage: this.totalUsage,
+            modelUsage: getModelUsage(),
+            permission_denials: this.permissionDenials,
+            fast_mode_state: getFastModeState(
+              mainLoopModel,
+              initialAppState.fastMode,
+            ),
+            uuid: randomUUID(),
+            errors: [
+              `Failed to provide valid structured output after ${maxRetries} attempts`,
+            ],
+          }
+          return
+        }
+      }
+    }
+
+    // Stop hooks yield progress/attachment messages AFTER the assistant
+    // response (via yield* handleStopHooks in query.ts). Since #23537 pushes
+    // those to `messages` inline, last(messages) can be a progress/attachment
+    // instead of the assistant — which makes textResult extraction below
+    // return '' and -p mode emit a blank line. Allowlist to assistant|user:
+    // isResultSuccessful handles both (user with all tool_result blocks is a
+    // valid successful terminal state).
+    const result = messages.findLast(
+      m => m.type === 'assistant' || m.type === 'user',
+    )
+    // Capture for the error_during_execution diagnostic — isResultSuccessful
+    // is a type predicate (message is Message), so inside the false branch
+    // `result` narrows to never and these accesses don't typecheck.
+    const edeResultType = result?.type ?? 'undefined'
+    const edeLastContentType =
+      result?.type === 'assistant'
+        ? (last(result.message.content)?.type ?? 'none')
+        : 'n/a'
+
+    // Flush buffered transcript writes before yielding result.
+    // The desktop app kills the CLI process immediately after receiving the
+    // result message, so any unflushed writes would be lost.
+    if (persistSession) {
+      if (
+        isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+        isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+      ) {
+        await flushSessionStorage()
+      }
+    }
+
+    if (!isResultSuccessful(result, lastStopReason)) {
+      yield {
+        type: 'result',
+        subtype: 'error_during_execution',
+        duration_ms: Date.now() - startTime,
+        duration_api_ms: getTotalAPIDuration(),
+        is_error: true,
+        num_turns: turnCount,
+        stop_reason: lastStopReason,
+        session_id: getSessionId(),
+        total_cost_usd: getTotalCost(),
+        usage: this.totalUsage,
+        modelUsage: getModelUsage(),
+        permission_denials: this.permissionDenials,
+        fast_mode_state: getFastModeState(
+          mainLoopModel,
+          initialAppState.fastMode,
+        ),
+        uuid: randomUUID(),
+        // Diagnostic prefix: these are what isResultSuccessful() checks — if
+        // the result type isn't assistant-with-text/thinking or user-with-
+        // tool_result, and stop_reason isn't end_turn, that's why this fired.
+        // errors[] is turn-scoped via the watermark; previously it dumped the
+        // entire process's logError buffer (ripgrep timeouts, ENOENT, etc).
+        errors: (() => {
+          const all = getInMemoryErrors()
+          const start = errorLogWatermark
+            ? all.lastIndexOf(errorLogWatermark) + 1
+            : 0
+          return [
+            `[ede_diagnostic] result_type=${edeResultType} last_content_type=${edeLastContentType} stop_reason=${lastStopReason}`,
+            ...all.slice(start).map(_ => _.error),
+          ]
+        })(),
+      }
+      return
+    }
+
+    // Extract the text result based on message type
+    let textResult = ''
+    let isApiError = false
+
+    if (result.type === 'assistant') {
+      const lastContent = last(result.message.content)
+      if (
+        lastContent?.type === 'text' &&
+        !SYNTHETIC_MESSAGES.has(lastContent.text)
+      ) {
+        textResult = lastContent.text
+      }
+      isApiError = Boolean(result.isApiErrorMessage)
+    }
+
+    yield {
+      type: 'result',
+      subtype: 'success',
+      is_error: isApiError,
+      duration_ms: Date.now() - startTime,
+      duration_api_ms: getTotalAPIDuration(),
+      num_turns: turnCount,
+      result: textResult,
+      stop_reason: lastStopReason,
+      session_id: getSessionId(),
+      total_cost_usd: getTotalCost(),
+      usage: this.totalUsage,
+      modelUsage: getModelUsage(),
+      permission_denials: this.permissionDenials,
+      structured_output: structuredOutputFromTool,
+      fast_mode_state: getFastModeState(
+        mainLoopModel,
+        initialAppState.fastMode,
+      ),
+      uuid: randomUUID(),
+    }
+  }
+
+  interrupt(): void {
+    this.abortController.abort()
+  }
+
+  getMessages(): readonly Message[] {
+    return this.mutableMessages
+  }
+
+  getReadFileState(): FileStateCache {
+    return this.readFileState
+  }
+
+  getSessionId(): string {
+    return getSessionId()
+  }
+
+  setModel(model: string): void {
+    this.config.userSpecifiedModel = model
+  }
+}
+
+/**
+ * Sends a single prompt to the Claude API and returns the response.
+ * Assumes that claude is being used non-interactively -- will not
+ * ask the user for permissions or further input.
+ *
+ * Convenience wrapper around QueryEngine for one-shot usage.
+ */
+export async function* ask({
+  commands,
+  prompt,
+  promptUuid,
+  isMeta,
+  cwd,
+  tools,
+  mcpClients,
+  verbose = false,
+  thinkingConfig,
+  maxTurns,
+  maxBudgetUsd,
+  taskBudget,
+  canUseTool,
+  mutableMessages = [],
+  getReadFileCache,
+  setReadFileCache,
+  customSystemPrompt,
+  appendSystemPrompt,
+  userSpecifiedModel,
+  fallbackModel,
+  jsonSchema,
+  getAppState,
+  setAppState,
+  abortController,
+  replayUserMessages = false,
+  includePartialMessages = false,
+  handleElicitation,
+  agents = [],
+  setSDKStatus,
+  orphanedPermission,
+}: {
+  commands: Command[]
+  prompt: string | Array<ContentBlockParam>
+  promptUuid?: string
+  isMeta?: boolean
+  cwd: string
+  tools: Tools
+  verbose?: boolean
+  mcpClients: MCPServerConnection[]
+  thinkingConfig?: ThinkingConfig
+  maxTurns?: number
+  maxBudgetUsd?: number
+  taskBudget?: { total: number }
+  canUseTool: CanUseToolFn
+  mutableMessages?: Message[]
+  customSystemPrompt?: string
+  appendSystemPrompt?: string
+  userSpecifiedModel?: string
+  fallbackModel?: string
+  jsonSchema?: Record<string, unknown>
+  getAppState: () => AppState
+  setAppState: (f: (prev: AppState) => AppState) => void
+  getReadFileCache: () => FileStateCache
+  setReadFileCache: (cache: FileStateCache) => void
+  abortController?: AbortController
+  replayUserMessages?: boolean
+  includePartialMessages?: boolean
+  handleElicitation?: ToolUseContext['handleElicitation']
+  agents?: AgentDefinition[]
+  setSDKStatus?: (status: SDKStatus) => void
+  orphanedPermission?: OrphanedPermission
+}): AsyncGenerator<SDKMessage, void, unknown> {
+  const engine = new QueryEngine({
+    cwd,
+    tools,
+    commands,
+    mcpClients,
+    agents,
+    canUseTool,
+    getAppState,
+    setAppState,
+    initialMessages: mutableMessages,
+    readFileCache: cloneFileStateCache(getReadFileCache()),
+    customSystemPrompt,
+    appendSystemPrompt,
+    userSpecifiedModel,
+    fallbackModel,
+    thinkingConfig,
+    maxTurns,
+    maxBudgetUsd,
+    taskBudget,
+    jsonSchema,
+    verbose,
+    handleElicitation,
+    replayUserMessages,
+    includePartialMessages,
+    setSDKStatus,
+    abortController,
+    orphanedPermission,
+    ...(feature('HISTORY_SNIP')
+      ? {
+          snipReplay: (yielded: Message, store: Message[]) => {
+            if (!snipProjection!.isSnipBoundaryMessage(yielded))
+              return undefined
+            return snipModule!.snipCompactIfNeeded(store, { force: true })
+          },
+        }
+      : {}),
+  })
+
+  try {
+    yield* engine.submitMessage(prompt, {
+      uuid: promptUuid,
+      isMeta,
+    })
+  } finally {
+    setReadFileCache(engine.getReadFileState())
+  }
+}

+ 125 - 0
src/Task.ts

@@ -0,0 +1,125 @@
+import { randomBytes } from 'crypto'
+import type { AppState } from './state/AppState.js'
+import type { AgentId } from './types/ids.js'
+import { getTaskOutputPath } from './utils/task/diskOutput.js'
+
+export type TaskType =
+  | 'local_bash'
+  | 'local_agent'
+  | 'remote_agent'
+  | 'in_process_teammate'
+  | 'local_workflow'
+  | 'monitor_mcp'
+  | 'dream'
+
+export type TaskStatus =
+  | 'pending'
+  | 'running'
+  | 'completed'
+  | 'failed'
+  | 'killed'
+
+/**
+ * True when a task is in a terminal state and will not transition further.
+ * Used to guard against injecting messages into dead teammates, evicting
+ * finished tasks from AppState, and orphan-cleanup paths.
+ */
+export function isTerminalTaskStatus(status: TaskStatus): boolean {
+  return status === 'completed' || status === 'failed' || status === 'killed'
+}
+
+export type TaskHandle = {
+  taskId: string
+  cleanup?: () => void
+}
+
+export type SetAppState = (f: (prev: AppState) => AppState) => void
+
+export type TaskContext = {
+  abortController: AbortController
+  getAppState: () => AppState
+  setAppState: SetAppState
+}
+
+// Base fields shared by all task states
+export type TaskStateBase = {
+  id: string
+  type: TaskType
+  status: TaskStatus
+  description: string
+  toolUseId?: string
+  startTime: number
+  endTime?: number
+  totalPausedMs?: number
+  outputFile: string
+  outputOffset: number
+  notified: boolean
+}
+
+export type LocalShellSpawnInput = {
+  command: string
+  description: string
+  timeout?: number
+  toolUseId?: string
+  agentId?: AgentId
+  /** UI display variant: description-as-label, dialog title, status bar pill. */
+  kind?: 'bash' | 'monitor'
+}
+
+// What getTaskByType dispatches for: kill. spawn/render were never
+// called polymorphically (removed in #22546). All six kill implementations
+// use only setAppState — getAppState/abortController were dead weight.
+export type Task = {
+  name: string
+  type: TaskType
+  kill(taskId: string, setAppState: SetAppState): Promise<void>
+}
+
+// Task ID prefixes
+const TASK_ID_PREFIXES: Record<string, string> = {
+  local_bash: 'b', // Keep as 'b' for backward compatibility
+  local_agent: 'a',
+  remote_agent: 'r',
+  in_process_teammate: 't',
+  local_workflow: 'w',
+  monitor_mcp: 'm',
+  dream: 'd',
+}
+
+// Get task ID prefix
+function getTaskIdPrefix(type: TaskType): string {
+  return TASK_ID_PREFIXES[type] ?? 'x'
+}
+
+// Case-insensitive-safe alphabet (digits + lowercase) for task IDs.
+// 36^8 ≈ 2.8 trillion combinations, sufficient to resist brute-force symlink attacks.
+const TASK_ID_ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyz'
+
+export function generateTaskId(type: TaskType): string {
+  const prefix = getTaskIdPrefix(type)
+  const bytes = randomBytes(8)
+  let id = prefix
+  for (let i = 0; i < 8; i++) {
+    id += TASK_ID_ALPHABET[bytes[i]! % TASK_ID_ALPHABET.length]
+  }
+  return id
+}
+
+export function createTaskStateBase(
+  id: string,
+  type: TaskType,
+  description: string,
+  toolUseId?: string,
+): TaskStateBase {
+  return {
+    id,
+    type,
+    status: 'pending',
+    description,
+    toolUseId,
+    startTime: Date.now(),
+    outputFile: getTaskOutputPath(id),
+    outputOffset: 0,
+    notified: false,
+  }
+}

+ 792 - 0
src/Tool.ts

@@ -0,0 +1,792 @@
+import type {
+  ToolResultBlockParam,
+  ToolUseBlockParam,
+} from '@anthropic-ai/sdk/resources/index.mjs'
+import type {
+  ElicitRequestURLParams,
+  ElicitResult,
+} from '@modelcontextprotocol/sdk/types.js'
+import type { UUID } from 'crypto'
+import type { z } from 'zod/v4'
+import type { Command } from './commands.js'
+import type { CanUseToolFn } from './hooks/useCanUseTool.js'
+import type { ThinkingConfig } from './utils/thinking.js'
+
+export type ToolInputJSONSchema = {
+  [x: string]: unknown
+  type: 'object'
+  properties?: {
+    [x: string]: unknown
+  }
+}
+
+import type { Notification } from './context/notifications.js'
+import type {
+  MCPServerConnection,
+  ServerResource,
+} from './services/mcp/types.js'
+import type {
+  AgentDefinition,
+  AgentDefinitionsResult,
+} from './tools/AgentTool/loadAgentsDir.js'
+import type {
+  AssistantMessage,
+  AttachmentMessage,
+  Message,
+  ProgressMessage,
+  SystemLocalCommandMessage,
+  SystemMessage,
+  UserMessage,
+} from './types/message.js'
+// Import permission types from centralized location to break import cycles
+// Import PermissionResult from centralized location to break import cycles
+import type {
+  AdditionalWorkingDirectory,
+  PermissionMode,
+  PermissionResult,
+} from './types/permissions.js'
+// Import tool progress types from centralized location to break import cycles
+import type {
+  AgentToolProgress,
+  BashProgress,
+  MCPProgress,
+  REPLToolProgress,
+  SkillToolProgress,
+  TaskOutputProgress,
+  ToolProgressData,
+  WebSearchProgress,
+} from './types/tools.js'
+import type { FileStateCache } from './utils/fileStateCache.js'
+import type { DenialTrackingState } from './utils/permissions/denialTracking.js'
+import type { SystemPrompt } from './utils/systemPromptType.js'
+import type { ContentReplacementState } from './utils/toolResultStorage.js'
+
+// Re-export progress types for backwards compatibility
+export type {
+  AgentToolProgress,
+  BashProgress,
+  MCPProgress,
+  REPLToolProgress,
+  SkillToolProgress,
+  TaskOutputProgress,
+  WebSearchProgress,
+}
+
+import type { SpinnerMode } from './components/Spinner.js'
+import type { QuerySource } from './constants/querySource.js'
+import type { SDKStatus } from './entrypoints/agentSdkTypes.js'
+import type { AppState } from './state/AppState.js'
+import type {
+  HookProgress,
+  PromptRequest,
+  PromptResponse,
+} from './types/hooks.js'
+import type { AgentId } from './types/ids.js'
+import type { DeepImmutable } from './types/utils.js'
+import type { AttributionState } from './utils/commitAttribution.js'
+import type { FileHistoryState } from './utils/fileHistory.js'
+import type { Theme, ThemeName } from './utils/theme.js'
+
+export type QueryChainTracking = {
+  chainId: string
+  depth: number
+}
+
+export type ValidationResult =
+  | { result: true }
+  | {
+      result: false
+      message: string
+      errorCode: number
+    }
+
+export type SetToolJSXFn = (
+  args: {
+    jsx: React.ReactNode | null
+    shouldHidePromptInput: boolean
+    shouldContinueAnimation?: true
+    showSpinner?: boolean
+    isLocalJSXCommand?: boolean
+    isImmediate?: boolean
+    /** Set to true to clear a local JSX command (e.g., from its onDone callback) */
+    clearLocalJSX?: boolean
+  } | null,
+) => void
+
+// Import tool permission types from centralized location to break import cycles
+import type { ToolPermissionRulesBySource } from './types/permissions.js'
+
+// Re-export for backwards compatibility
+export type { ToolPermissionRulesBySource }
+
+// Apply DeepImmutable to the imported type
+export type ToolPermissionContext = DeepImmutable<{
+  mode: PermissionMode
+  additionalWorkingDirectories: Map<string, AdditionalWorkingDirectory>
+  alwaysAllowRules: ToolPermissionRulesBySource
+  alwaysDenyRules: ToolPermissionRulesBySource
+  alwaysAskRules: ToolPermissionRulesBySource
+  isBypassPermissionsModeAvailable: boolean
+  isAutoModeAvailable?: boolean
+  strippedDangerousRules?: ToolPermissionRulesBySource
+  /** When true, permission prompts are auto-denied (e.g., background agents that can't show UI) */
+  shouldAvoidPermissionPrompts?: boolean
+  /** When true, automated checks (classifier, hooks) are awaited before showing the permission dialog (coordinator workers) */
+  awaitAutomatedChecksBeforeDialog?: boolean
+  /** Stores the permission mode before model-initiated plan mode entry, so it can be restored on exit */
+  prePlanMode?: PermissionMode
+}>
+
+export const getEmptyToolPermissionContext: () => ToolPermissionContext =
+  () => ({
+    mode: 'default',
+    additionalWorkingDirectories: new Map(),
+    alwaysAllowRules: {},
+    alwaysDenyRules: {},
+    alwaysAskRules: {},
+    isBypassPermissionsModeAvailable: false,
+  })
+
+export type CompactProgressEvent =
+  | {
+      type: 'hooks_start'
+      hookType: 'pre_compact' | 'post_compact' | 'session_start'
+    }
+  | { type: 'compact_start' }
+  | { type: 'compact_end' }
+
+export type ToolUseContext = {
+  options: {
+    commands: Command[]
+    debug: boolean
+    mainLoopModel: string
+    tools: Tools
+    verbose: boolean
+    thinkingConfig: ThinkingConfig
+    mcpClients: MCPServerConnection[]
+    mcpResources: Record<string, ServerResource[]>
+    isNonInteractiveSession: boolean
+    agentDefinitions: AgentDefinitionsResult
+    maxBudgetUsd?: number
+    /** Custom system prompt that replaces the default system prompt */
+    customSystemPrompt?: string
+    /** Additional system prompt appended after the main system prompt */
+    appendSystemPrompt?: string
+    /** Override querySource for analytics tracking */
+    querySource?: QuerySource
+    /** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
+    refreshTools?: () => Tools
+  }
+  abortController: AbortController
+  readFileState: FileStateCache
+  getAppState(): AppState
+  setAppState(f: (prev: AppState) => AppState): void
+  /**
+   * Always-shared setAppState for session-scoped infrastructure (background
+   * tasks, session hooks). Unlike setAppState, which is no-op for async agents
+   * (see createSubagentContext), this always reaches the root store so agents
+   * at any nesting depth can register/clean up infrastructure that outlives
+   * a single turn. Only set by createSubagentContext; main-thread contexts
+   * fall back to setAppState.
+   */
+  setAppStateForTasks?: (f: (prev: AppState) => AppState) => void
+  /**
+   * Optional handler for URL elicitations triggered by tool call errors (-32042).
+   * In print/SDK mode, this delegates to structuredIO.handleElicitation.
+   * In REPL mode, this is undefined and the queue-based UI path is used.
+   */
+  handleElicitation?: (
+    serverName: string,
+    params: ElicitRequestURLParams,
+    signal: AbortSignal,
+  ) => Promise<ElicitResult>
+  setToolJSX?: SetToolJSXFn
+  addNotification?: (notif: Notification) => void
+  /** Append a UI-only system message to the REPL message list. Stripped at the
+   *  normalizeMessagesForAPI boundary — the Exclude<> makes that type-enforced. */
+  appendSystemMessage?: (
+    msg: Exclude<SystemMessage, SystemLocalCommandMessage>,
+  ) => void
+  /** Send an OS-level notification (iTerm2, Kitty, Ghostty, bell, etc.) */
+  sendOSNotification?: (opts: {
+    message: string
+    notificationType: string
+  }) => void
+  nestedMemoryAttachmentTriggers?: Set<string>
+  /**
+   * CLAUDE.md paths already injected as nested_memory attachments this
+   * session. Dedup for memoryFilesToAttachments — readFileState is an LRU
+   * that evicts entries in busy sessions, so its .has() check alone can
+   * re-inject the same CLAUDE.md dozens of times.
+   */
+  loadedNestedMemoryPaths?: Set<string>
+  dynamicSkillDirTriggers?: Set<string>
+  /** Skill names surfaced via skill_discovery this session. Telemetry only (feeds was_discovered). */
+  discoveredSkillNames?: Set<string>
+  userModified?: boolean
+  setInProgressToolUseIDs: (f: (prev: Set<string>) => Set<string>) => void
+  /** Only wired in interactive (REPL) contexts; SDK/QueryEngine don't set this. */
+  setHasInterruptibleToolInProgress?: (v: boolean) => void
+  setResponseLength: (f: (prev: number) => number) => void
+  /** Ant-only: push a new API metrics entry for OTPS tracking.
+   *  Called by subagent streaming when a new API request starts. */
+  pushApiMetricsEntry?: (ttftMs: number) => void
+  setStreamMode?: (mode: SpinnerMode) => void
+  onCompactProgress?: (event: CompactProgressEvent) => void
+  setSDKStatus?: (status: SDKStatus) => void
+  openMessageSelector?: () => void
+  updateFileHistoryState: (
+    updater: (prev: FileHistoryState) => FileHistoryState,
+  ) => void
+  updateAttributionState: (
+    updater: (prev: AttributionState) => AttributionState,
+  ) => void
+  setConversationId?: (id: UUID) => void
+  agentId?: AgentId // Only set for subagents; use getSessionId() for session ID. Hooks use this to distinguish subagent calls.
+  agentType?: string // Subagent type name. For the main thread's --agent type, hooks fall back to getMainThreadAgentType().
+  /** When true, canUseTool must always be called even when hooks auto-approve.
+   *  Used by speculation for overlay file path rewriting. */
+  requireCanUseTool?: boolean
+  messages: Message[]
+  fileReadingLimits?: {
+    maxTokens?: number
+    maxSizeBytes?: number
+  }
+  globLimits?: {
+    maxResults?: number
+  }
+  toolDecisions?: Map<
+    string,
+    {
+      source: string
+      decision: 'accept' | 'reject'
+      timestamp: number
+    }
+  >
+  queryTracking?: QueryChainTracking
+  /** Callback factory for requesting interactive prompts from the user.
+   * Returns a prompt callback bound to the given source name.
+   * Only available in interactive (REPL) contexts. */
+  requestPrompt?: (
+    sourceName: string,
+    toolInputSummary?: string | null,
+  ) => (request: PromptRequest) => Promise<PromptResponse>
+  toolUseId?: string
+  criticalSystemReminder_EXPERIMENTAL?: string
+  /** When true, preserve toolUseResult on messages even for subagents.
+   * Used by in-process teammates whose transcripts are viewable by the user. */
+  preserveToolUseResults?: boolean
+  /** Local denial tracking state for async subagents whose setAppState is a
+   *  no-op. Without this, the denial counter never accumulates and the
+   *  fallback-to-prompting threshold is never reached. Mutable — the
+   *  permissions code updates it in place. */
+  localDenialTracking?: DenialTrackingState
+  /**
+   * Per-conversation-thread content replacement state for the tool result
+   * budget. When present, query.ts applies the aggregate tool result budget.
+   * Main thread: REPL provisions once (never resets — stale UUID keys
+   * are inert). Subagents: createSubagentContext clones the parent's state
+   * by default (cache-sharing forks need identical decisions), or
+   * resumeAgentBackground threads one reconstructed from sidechain records.
+   */
+  contentReplacementState?: ContentReplacementState
+  /**
+   * Parent's rendered system prompt bytes, frozen at turn start.
+   * Used by fork subagents to share the parent's prompt cache — re-calling
+   * getSystemPrompt() at fork-spawn time can diverge (GrowthBook cold→warm)
+   * and bust the cache. See forkSubagent.ts.
+   */
+  renderedSystemPrompt?: SystemPrompt
+}
+
+// Re-export ToolProgressData from centralized location
+export type { ToolProgressData }
+
+export type Progress = ToolProgressData | HookProgress
+
+export type ToolProgress<P extends ToolProgressData> = {
+  toolUseID: string
+  data: P
+}
+
+export function filterToolProgressMessages(
+  progressMessagesForMessage: ProgressMessage[],
+): ProgressMessage<ToolProgressData>[] {
+  return progressMessagesForMessage.filter(
+    (msg): msg is ProgressMessage<ToolProgressData> =>
+      msg.data?.type !== 'hook_progress',
+  )
+}
+
+export type ToolResult<T> = {
+  data: T
+  newMessages?: (
+    | UserMessage
+    | AssistantMessage
+    | AttachmentMessage
+    | SystemMessage
+  )[]
+  // contextModifier is only honored for tools that aren't concurrency safe.
+  contextModifier?: (context: ToolUseContext) => ToolUseContext
+  /** MCP protocol metadata (structuredContent, _meta) to pass through to SDK consumers */
+  mcpMeta?: {
+    _meta?: Record<string, unknown>
+    structuredContent?: Record<string, unknown>
+  }
+}
+
+export type ToolCallProgress<P extends ToolProgressData = ToolProgressData> = (
+  progress: ToolProgress<P>,
+) => void
+
+// Type for any schema that outputs an object with string keys
+export type AnyObject = z.ZodType<{ [key: string]: unknown }>
+
+/**
+ * Checks if a tool matches the given name (primary name or alias).
+ */
+export function toolMatchesName(
+  tool: { name: string; aliases?: string[] },
+  name: string,
+): boolean {
+  return tool.name === name || (tool.aliases?.includes(name) ?? false)
+}
+
+/**
+ * Finds a tool by name or alias from a list of tools.
+ */
+export function findToolByName(tools: Tools, name: string): Tool | undefined {
+  return tools.find(t => toolMatchesName(t, name))
+}
+
+export type Tool<
+  Input extends AnyObject = AnyObject,
+  Output = unknown,
+  P extends ToolProgressData = ToolProgressData,
+> = {
+  /**
+   * Optional aliases for backwards compatibility when a tool is renamed.
+   * The tool can be looked up by any of these names in addition to its primary name.
+   */
+  aliases?: string[]
+  /**
+   * One-line capability phrase used by ToolSearch for keyword matching.
+   * Helps the model find this tool via keyword search when it's deferred.
+   * 3–10 words, no trailing period.
+   * Prefer terms not already in the tool name (e.g. 'jupyter' for NotebookEdit).
+   */
+  searchHint?: string
+  call(
+    args: z.infer<Input>,
+    context: ToolUseContext,
+    canUseTool: CanUseToolFn,
+    parentMessage: AssistantMessage,
+    onProgress?: ToolCallProgress<P>,
+  ): Promise<ToolResult<Output>>
+  description(
+    input: z.infer<Input>,
+    options: {
+      isNonInteractiveSession: boolean
+      toolPermissionContext: ToolPermissionContext
+      tools: Tools
+    },
+  ): Promise<string>
+  readonly inputSchema: Input
+  // Type for MCP tools that can specify their input schema directly in JSON Schema format
+  // rather than converting from Zod schema
+  readonly inputJSONSchema?: ToolInputJSONSchema
+  // Optional because TungstenTool doesn't define this. TODO: Make it required.
+  // When we do that, we can also go through and make this a bit more type-safe.
+  outputSchema?: z.ZodType<unknown>
+  inputsEquivalent?(a: z.infer<Input>, b: z.infer<Input>): boolean
+  isConcurrencySafe(input: z.infer<Input>): boolean
+  isEnabled(): boolean
+  isReadOnly(input: z.infer<Input>): boolean
+  /** Defaults to false. Only set when the tool performs irreversible operations (delete, overwrite, send). */
+  isDestructive?(input: z.infer<Input>): boolean
+  /**
+   * What should happen when the user submits a new message while this tool
+   * is running.
+   *
+   * - `'cancel'` — stop the tool and discard its result
+   * - `'block'`  — keep running; the new message waits
+   *
+   * Defaults to `'block'` when not implemented.
+   */
+  interruptBehavior?(): 'cancel' | 'block'
+  /**
+   * Returns information about whether this tool use is a search or read operation
+   * that should be collapsed into a condensed display in the UI. Examples include
+   * file searching (Grep, Glob), file reading (Read), and bash commands like find,
+   * grep, wc, etc.
+   *
+   * Returns an object indicating whether the operation is a search or read operation:
+   * - `isSearch: true` for search operations (grep, find, glob patterns)
+   * - `isRead: true` for read operations (cat, head, tail, file read)
+   * - `isList: true` for directory-listing operations (ls, tree, du)
+   * - All can be false if the operation shouldn't be collapsed
+   */
+  isSearchOrReadCommand?(input: z.infer<Input>): {
+    isSearch: boolean
+    isRead: boolean
+    isList?: boolean
+  }
+  isOpenWorld?(input: z.infer<Input>): boolean
+  requiresUserInteraction?(): boolean
+  isMcp?: boolean
+  isLsp?: boolean
+  /**
+   * When true, this tool is deferred (sent with defer_loading: true) and requires
+   * ToolSearch to be used before it can be called.
+   */
+  readonly shouldDefer?: boolean
+  /**
+   * When true, this tool is never deferred — its full schema appears in the
+   * initial prompt even when ToolSearch is enabled. For MCP tools, set via
+   * `_meta['anthropic/alwaysLoad']`. Use for tools the model must see on
+   * turn 1 without a ToolSearch round-trip.
+   */
+  readonly alwaysLoad?: boolean
+  /**
+   * For MCP tools: the server and tool names as received from the MCP server (unnormalized).
+   * Present on all MCP tools regardless of whether `name` is prefixed (mcp__server__tool)
+   * or unprefixed (CLAUDE_AGENT_SDK_MCP_NO_PREFIX mode).
+   */
+  mcpInfo?: { serverName: string; toolName: string }
+  readonly name: string
+  /**
+   * Maximum size in characters for tool result before it gets persisted to disk.
+   * When exceeded, the result is saved to a file and Claude receives a preview
+   * with the file path instead of the full content.
+   *
+   * Set to Infinity for tools whose output must never be persisted (e.g. Read,
+   * where persisting creates a circular Read→file→Read loop and the tool
+   * already self-bounds via its own limits).
+   */
+  maxResultSizeChars: number
+  /**
+   * When true, enables strict mode for this tool, which causes the API to
+   * more strictly adhere to tool instructions and parameter schemas.
+   * Only applied when the tengu_tool_pear is enabled.
+   */
+  readonly strict?: boolean
+
+  /**
+   * Called on copies of tool_use input before observers see it (SDK stream,
+   * transcript, canUseTool, PreToolUse/PostToolUse hooks). Mutate in place
+   * to add legacy/derived fields. Must be idempotent. The original API-bound
+   * input is never mutated (preserves prompt cache). Not re-applied when a
+   * hook/permission returns a fresh updatedInput — those own their shape.
+   */
+  backfillObservableInput?(input: Record<string, unknown>): void
+
+  /**
+   * Determines if this tool is allowed to run with this input in the current context.
+   * It informs the model of why the tool use failed, and does not directly display any UI.
+   * @param input
+   * @param context
+   */
+  validateInput?(
+    input: z.infer<Input>,
+    context: ToolUseContext,
+  ): Promise<ValidationResult>
+
+  /**
+   * Determines if the user is asked for permission. Only called after validateInput() passes.
+   * General permission logic is in permissions.ts. This method contains tool-specific logic.
+   * @param input
+   * @param context
+   */
+  checkPermissions(
+    input: z.infer<Input>,
+    context: ToolUseContext,
+  ): Promise<PermissionResult>
+
+  // Optional method for tools that operate on a file path
+  getPath?(input: z.infer<Input>): string
+
+  /**
+   * Prepare a matcher for hook `if` conditions (permission-rule patterns like
+   * "git *" from "Bash(git *)"). Called once per hook-input pair; any
+   * expensive parsing happens here. Returns a closure that is called per
+   * hook pattern. If not implemented, only tool-name-level matching works.
+   */
+  preparePermissionMatcher?(
+    input: z.infer<Input>,
+  ): Promise<(pattern: string) => boolean>
+
+  prompt(options: {
+    getToolPermissionContext: () => Promise<ToolPermissionContext>
+    tools: Tools
+    agents: AgentDefinition[]
+    allowedAgentTypes?: string[]
+  }): Promise<string>
+  userFacingName(input: Partial<z.infer<Input>> | undefined): string
+  userFacingNameBackgroundColor?(
+    input: Partial<z.infer<Input>> | undefined,
+  ): keyof Theme | undefined
+  /**
+   * Transparent wrappers (e.g. REPL) delegate all rendering to their progress
+   * handler, which emits native-looking blocks for each inner tool call.
+   * The wrapper itself shows nothing.
+   */
+  isTransparentWrapper?(): boolean
+  /**
+   * Returns a short string summary of this tool use for display in compact views.
+   * @param input The tool input
+   * @returns A short string summary, or null to not display
+   */
+  getToolUseSummary?(input: Partial<z.infer<Input>> | undefined): string | null
+  /**
+   * Returns a human-readable present-tense activity description for spinner display.
+   * Example: "Reading src/foo.ts", "Running bun test", "Searching for pattern"
+   * @param input The tool input
+   * @returns Activity description string, or null to fall back to tool name
+   */
+  getActivityDescription?(
+    input: Partial<z.infer<Input>> | undefined,
+  ): string | null
+  /**
+   * Returns a compact representation of this tool use for the auto-mode
+   * security classifier. Examples: `ls -la` for Bash, `/tmp/x: new content`
+   * for Edit. Return '' to skip this tool in the classifier transcript
+   * (e.g. tools with no security relevance). May return an object to avoid
+   * double-encoding when the caller JSON-wraps the value.
+   */
+  toAutoClassifierInput(input: z.infer<Input>): unknown
+  mapToolResultToToolResultBlockParam(
+    content: Output,
+    toolUseID: string,
+  ): ToolResultBlockParam
+  /**
+   * Optional. When omitted, the tool result renders nothing (same as returning
+   * null). Omit for tools whose results are surfaced elsewhere (e.g., TodoWrite
+   * updates the todo panel, not the transcript).
+   */
+  renderToolResultMessage?(
+    content: Output,
+    progressMessagesForMessage: ProgressMessage<P>[],
+    options: {
+      style?: 'condensed'
+      theme: ThemeName
+      tools: Tools
+      verbose: boolean
+      isTranscriptMode?: boolean
+      isBriefOnly?: boolean
+      /** Original tool_use input, when available. Useful for compact result
+       * summaries that reference what was requested (e.g. "Sent to #foo"). */
+      input?: unknown
+    },
+  ): React.ReactNode
+  /**
+   * Flattened text of what renderToolResultMessage shows IN TRANSCRIPT
+   * MODE (verbose=true, isTranscriptMode=true). For transcript search
+   * indexing: the index counts occurrences in this string, the highlight
+   * overlay scans the actual screen buffer. For count ≡ highlight, this
+   * must return the text that ends up visible — not the model-facing
+   * serialization from mapToolResultToToolResultBlockParam (which adds
+   * system-reminders, persisted-output wrappers).
+   *
+   * Chrome can be skipped (under-count is fine). "Found 3 files in 12ms"
+   * isn't worth indexing. Phantoms are not fine — text that's claimed
+   * here but doesn't render is a count≠highlight bug.
+   *
+   * Optional: omitted → field-name heuristic in transcriptSearch.ts.
+   * Drift caught by test/utils/transcriptSearch.renderFidelity.test.tsx
+   * which renders sample outputs and flags text that's indexed-but-not-
+   * rendered (phantom) or rendered-but-not-indexed (under-count warning).
+   */
+  extractSearchText?(out: Output): string
+  /**
+   * Render the tool use message. Note that `input` is partial because we render
+   * the message as soon as possible, possibly before tool parameters have fully
+   * streamed in.
+   */
+  renderToolUseMessage(
+    input: Partial<z.infer<Input>>,
+    options: { theme: ThemeName; verbose: boolean; commands?: Command[] },
+  ): React.ReactNode
+  /**
+   * Returns true when the non-verbose rendering of this output is truncated
+   * (i.e., clicking to expand would reveal more content). Gates
+   * click-to-expand in fullscreen — only messages where verbose actually
+   * shows more get a hover/click affordance. Unset means never truncated.
+   */
+  isResultTruncated?(output: Output): boolean
+  /**
+   * Renders an optional tag to display after the tool use message.
+   * Used for additional metadata like timeout, model, resume ID, etc.
+   * Returns null to not display anything.
+   */
+  renderToolUseTag?(input: Partial<z.infer<Input>>): React.ReactNode
+  /**
+   * Optional. When omitted, no progress UI is shown while the tool runs.
+   */
+  renderToolUseProgressMessage?(
+    progressMessagesForMessage: ProgressMessage<P>[],
+    options: {
+      tools: Tools
+      verbose: boolean
+      terminalSize?: { columns: number; rows: number }
+      inProgressToolCallCount?: number
+      isTranscriptMode?: boolean
+    },
+  ): React.ReactNode
+  renderToolUseQueuedMessage?(): React.ReactNode
+  /**
+   * Optional. When omitted, falls back to <FallbackToolUseRejectedMessage />.
+   * Only define this for tools that need custom rejection UI (e.g., file edits
+   * that show the rejected diff).
+   */
+  renderToolUseRejectedMessage?(
+    input: z.infer<Input>,
+    options: {
+      columns: number
+      messages: Message[]
+      style?: 'condensed'
+      theme: ThemeName
+      tools: Tools
+      verbose: boolean
+      progressMessagesForMessage: ProgressMessage<P>[]
+      isTranscriptMode?: boolean
+    },
+  ): React.ReactNode
+  /**
+   * Optional. When omitted, falls back to <FallbackToolUseErrorMessage />.
+   * Only define this for tools that need custom error UI (e.g., search tools
+   * that show "File not found" instead of the raw error).
+   */
+  renderToolUseErrorMessage?(
+    result: ToolResultBlockParam['content'],
+    options: {
+      progressMessagesForMessage: ProgressMessage<P>[]
+      tools: Tools
+      verbose: boolean
+      isTranscriptMode?: boolean
+    },
+  ): React.ReactNode
+
+  /**
+   * Renders multiple parallel instances of this tool as a group.
+   * @returns React node to render, or null to fall back to individual rendering
+   */
+  /**
+   * Renders multiple tool uses as a group (non-verbose mode only).
+   * In verbose mode, individual tool uses render at their original positions.
+   * @returns React node to render, or null to fall back to individual rendering
+   */
+  renderGroupedToolUse?(
+    toolUses: Array<{
+      param: ToolUseBlockParam
+      isResolved: boolean
+      isError: boolean
+      isInProgress: boolean
+      progressMessages: ProgressMessage<P>[]
+      result?: {
+        param: ToolResultBlockParam
+        output: unknown
+      }
+    }>,
+    options: {
+      shouldAnimate: boolean
+      tools: Tools
+    },
+  ): React.ReactNode | null
+}
+
+/**
+ * A collection of tools. Use this type instead of `Tool[]` to make it easier
+ * to track where tool sets are assembled, passed, and filtered across the codebase.
+ */
+export type Tools = readonly Tool[]
+
+/**
+ * Methods that `buildTool` supplies a default for. A `ToolDef` may omit these;
+ * the resulting `Tool` always has them.
+ */
+type DefaultableToolKeys =
+  | 'isEnabled'
+  | 'isConcurrencySafe'
+  | 'isReadOnly'
+  | 'isDestructive'
+  | 'checkPermissions'
+  | 'toAutoClassifierInput'
+  | 'userFacingName'
+
+/**
+ * Tool definition accepted by `buildTool`. Same shape as `Tool` but with the
+ * defaultable methods optional — `buildTool` fills them in so callers always
+ * see a complete `Tool`.
+ */
+export type ToolDef<
+  Input extends AnyObject = AnyObject,
+  Output = unknown,
+  P extends ToolProgressData = ToolProgressData,
+> = Omit<Tool<Input, Output, P>, DefaultableToolKeys> &
+  Partial<Pick<Tool<Input, Output, P>, DefaultableToolKeys>>
+
+/**
+ * Type-level spread mirroring `{ ...TOOL_DEFAULTS, ...def }`. For each
+ * defaultable key: if D provides it (required), D's type wins; if D omits
+ * it or has it optional (inherited from Partial<> in the constraint), the
+ * default fills in. All other keys come from D verbatim — preserving arity,
+ * optional presence, and literal types exactly as `satisfies Tool` did.
+ */
+type BuiltTool<D> = Omit<D, DefaultableToolKeys> & {
+  [K in DefaultableToolKeys]-?: K extends keyof D
+    ? undefined extends D[K]
+      ? ToolDefaults[K]
+      : D[K]
+    : ToolDefaults[K]
+}
+
+/**
+ * Build a complete `Tool` from a partial definition, filling in safe defaults
+ * for the commonly-stubbed methods. All tool exports should go through this so
+ * that defaults live in one place and callers never need `?.() ?? default`.
+ *
+ * Defaults (fail-closed where it matters):
+ * - `isEnabled` → `true`
+ * - `isConcurrencySafe` → `false` (assume not safe)
+ * - `isReadOnly` → `false` (assume writes)
+ * - `isDestructive` → `false`
+ * - `checkPermissions` → `{ behavior: 'allow', updatedInput }` (defer to general permission system)
+ * - `toAutoClassifierInput` → `''` (skip classifier — security-relevant tools must override)
+ * - `userFacingName` → `name`
+ */
+const TOOL_DEFAULTS = {
+  isEnabled: () => true,
+  isConcurrencySafe: (_input?: unknown) => false,
+  isReadOnly: (_input?: unknown) => false,
+  isDestructive: (_input?: unknown) => false,
+  checkPermissions: (
+    input: { [key: string]: unknown },
+    _ctx?: ToolUseContext,
+  ): Promise<PermissionResult> =>
+    Promise.resolve({ behavior: 'allow', updatedInput: input }),
+  toAutoClassifierInput: (_input?: unknown) => '',
+  userFacingName: (_input?: unknown) => '',
+}
+
+// The defaults type is the ACTUAL shape of TOOL_DEFAULTS (optional params so
+// both 0-arg and full-arg call sites type-check — stubs varied in arity and
+// tests relied on that), not the interface's strict signatures.
+type ToolDefaults = typeof TOOL_DEFAULTS
+
+// D infers the concrete object-literal type from the call site. The
+// constraint provides contextual typing for method parameters; `any` in
+// constraint position is structural and never leaks into the return type.
+// BuiltTool<D> mirrors runtime `{...TOOL_DEFAULTS, ...def}` at the type level.
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+type AnyToolDef = ToolDef<any, any, any>
+
+export function buildTool<D extends AnyToolDef>(def: D): BuiltTool<D> {
+  // The runtime spread is straightforward; the `as` bridges the gap between
+  // the structural-any constraint and the precise BuiltTool<D> return. The
+  // type semantics are proven by the 0-error typecheck across all 60+ tools.
+  return {
+    ...TOOL_DEFAULTS,
+    userFacingName: () => def.name,
+    ...def,
+  } as BuiltTool<D>
+}

+ 87 - 0
src/assistant/sessionHistory.ts

@@ -0,0 +1,87 @@
+import axios from 'axios'
+import { getOauthConfig } from '../constants/oauth.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import { logForDebugging } from '../utils/debug.js'
+import { getOAuthHeaders, prepareApiRequest } from '../utils/teleport/api.js'
+
+export const HISTORY_PAGE_SIZE = 100
+
+export type HistoryPage = {
+  /** Chronological order within the page. */
+  events: SDKMessage[]
+  /** Oldest event ID in this page → before_id cursor for next-older page. */
+  firstId: string | null
+  /** true = older events exist. */
+  hasMore: boolean
+}
+
+type SessionEventsResponse = {
+  data: SDKMessage[]
+  has_more: boolean
+  first_id: string | null
+  last_id: string | null
+}
+
+export type HistoryAuthCtx = {
+  baseUrl: string
+  headers: Record<string, string>
+}
+
+/** Prepare auth + headers + base URL once, reuse across pages. */
+export async function createHistoryAuthCtx(
+  sessionId: string,
+): Promise<HistoryAuthCtx> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+  return {
+    baseUrl: `${getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/events`,
+    headers: {
+      ...getOAuthHeaders(accessToken),
+      'anthropic-beta': 'ccr-byoc-2025-07-29',
+      'x-organization-uuid': orgUUID,
+    },
+  }
+}
+
+async function fetchPage(
+  ctx: HistoryAuthCtx,
+  params: Record<string, string | number | boolean>,
+  label: string,
+): Promise<HistoryPage | null> {
+  const resp = await axios
+    .get<SessionEventsResponse>(ctx.baseUrl, {
+      headers: ctx.headers,
+      params,
+      timeout: 15000,
+      validateStatus: () => true,
+    })
+    .catch(() => null)
+  if (!resp || resp.status !== 200) {
+    logForDebugging(`[${label}] HTTP ${resp?.status ?? 'error'}`)
+    return null
+  }
+  return {
+    events: Array.isArray(resp.data.data) ? resp.data.data : [],
+    firstId: resp.data.first_id,
+    hasMore: resp.data.has_more,
+  }
+}
+
+/**
+ * Newest page: last `limit` events, chronological, via anchor_to_latest.
+ * has_more=true means older events exist.
+ */
+export async function fetchLatestEvents(
+  ctx: HistoryAuthCtx,
+  limit = HISTORY_PAGE_SIZE,
+): Promise<HistoryPage | null> {
+  return fetchPage(ctx, { limit, anchor_to_latest: true }, 'fetchLatestEvents')
+}
+
+/** Older page: events immediately before `beforeId` cursor. */
+export async function fetchOlderEvents(
+  ctx: HistoryAuthCtx,
+  beforeId: string,
+  limit = HISTORY_PAGE_SIZE,
+): Promise<HistoryPage | null> {
+  return fetchPage(ctx, { limit, before_id: beforeId }, 'fetchOlderEvents')
+}

+ 1758 - 0
src/bootstrap/state.ts

@@ -0,0 +1,1758 @@
+import type { BetaMessageStreamParams } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
+import type { Attributes, Meter, MetricOptions } from '@opentelemetry/api'
+import type { logs } from '@opentelemetry/api-logs'
+import type { LoggerProvider } from '@opentelemetry/sdk-logs'
+import type { MeterProvider } from '@opentelemetry/sdk-metrics'
+import type { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
+import { realpathSync } from 'fs'
+import sumBy from 'lodash-es/sumBy.js'
+import { cwd } from 'process'
+import type { HookEvent, ModelUsage } from 'src/entrypoints/agentSdkTypes.js'
+import type { AgentColorName } from 'src/tools/AgentTool/agentColorManager.js'
+import type { HookCallbackMatcher } from 'src/types/hooks.js'
+// Indirection for browser-sdk build (package.json "browser" field swaps
+// crypto.ts for crypto.browser.ts). Pure leaf re-export of node:crypto —
+// zero circular-dep risk. Path-alias import bypasses bootstrap-isolation
+// (rule only checks ./ and / prefixes); explicit disable documents intent.
+// eslint-disable-next-line custom-rules/bootstrap-isolation
+import { randomUUID } from 'src/utils/crypto.js'
+import type { ModelSetting } from 'src/utils/model/model.js'
+import type { ModelStrings } from 'src/utils/model/modelStrings.js'
+import type { SettingSource } from 'src/utils/settings/constants.js'
+import { resetSettingsCache } from 'src/utils/settings/settingsCache.js'
+import type { PluginHookMatcher } from 'src/utils/settings/types.js'
+import { createSignal } from 'src/utils/signal.js'
+
+// Union type for registered hooks - can be SDK callbacks or native plugin hooks
+type RegisteredHookMatcher = HookCallbackMatcher | PluginHookMatcher
+
+import type { SessionId } from 'src/types/ids.js'
+
+// DO NOT ADD MORE STATE HERE - BE JUDICIOUS WITH GLOBAL STATE
+
+// dev: true on entries that came via --dangerously-load-development-channels.
+// The allowlist gate checks this per-entry (not the session-wide
+// hasDevChannels bit) so passing both flags doesn't let the dev dialog's
+// acceptance leak allowlist-bypass to the --channels entries.
+export type ChannelEntry =
+  | { kind: 'plugin'; name: string; marketplace: string; dev?: boolean }
+  | { kind: 'server'; name: string; dev?: boolean }
+
+export type AttributedCounter = {
+  add(value: number, additionalAttributes?: Attributes): void
+}
+
+type State = {
+  originalCwd: string
+  // Stable project root - set once at startup (including by --worktree flag),
+  // never updated by mid-session EnterWorktreeTool.
+  // Use for project identity (history, skills, sessions) not file operations.
+  projectRoot: string
+  totalCostUSD: number
+  totalAPIDuration: number
+  totalAPIDurationWithoutRetries: number
+  totalToolDuration: number
+  turnHookDurationMs: number
+  turnToolDurationMs: number
+  turnClassifierDurationMs: number
+  turnToolCount: number
+  turnHookCount: number
+  turnClassifierCount: number
+  startTime: number
+  lastInteractionTime: number
+  totalLinesAdded: number
+  totalLinesRemoved: number
+  hasUnknownModelCost: boolean
+  cwd: string
+  modelUsage: { [modelName: string]: ModelUsage }
+  mainLoopModelOverride: ModelSetting | undefined
+  initialMainLoopModel: ModelSetting
+  modelStrings: ModelStrings | null
+  isInteractive: boolean
+  kairosActive: boolean
+  // When true, ensureToolResultPairing throws on mismatch instead of
+  // repairing with synthetic placeholders. HFI opts in at startup so
+  // trajectories fail fast rather than conditioning the model on fake
+  // tool_results.
+  strictToolResultPairing: boolean
+  sdkAgentProgressSummariesEnabled: boolean
+  userMsgOptIn: boolean
+  clientType: string
+  sessionSource: string | undefined
+  questionPreviewFormat: 'markdown' | 'html' | undefined
+  flagSettingsPath: string | undefined
+  flagSettingsInline: Record<string, unknown> | null
+  allowedSettingSources: SettingSource[]
+  sessionIngressToken: string | null | undefined
+  oauthTokenFromFd: string | null | undefined
+  apiKeyFromFd: string | null | undefined
+  // Telemetry state
+  meter: Meter | null
+  sessionCounter: AttributedCounter | null
+  locCounter: AttributedCounter | null
+  prCounter: AttributedCounter | null
+  commitCounter: AttributedCounter | null
+  costCounter: AttributedCounter | null
+  tokenCounter: AttributedCounter | null
+  codeEditToolDecisionCounter: AttributedCounter | null
+  activeTimeCounter: AttributedCounter | null
+  statsStore: { observe(name: string, value: number): void } | null
+  sessionId: SessionId
+  // Parent session ID for tracking session lineage (e.g., plan mode -> implementation)
+  parentSessionId: SessionId | undefined
+  // Logger state
+  loggerProvider: LoggerProvider | null
+  eventLogger: ReturnType<typeof logs.getLogger> | null
+  // Meter provider state
+  meterProvider: MeterProvider | null
+  // Tracer provider state
+  tracerProvider: BasicTracerProvider | null
+  // Agent color state
+  agentColorMap: Map<string, AgentColorName>
+  agentColorIndex: number
+  // Last API request for bug reports
+  lastAPIRequest: Omit<BetaMessageStreamParams, 'messages'> | null
+  // Messages from the last API request (ant-only; reference, not clone).
+  // Captures the exact post-compaction, CLAUDE.md-injected message set sent
+  // to the API so /share's serialized_conversation.json reflects reality.
+  lastAPIRequestMessages: BetaMessageStreamParams['messages'] | null
+  // Last auto-mode classifier request(s) for /share transcript
+  lastClassifierRequests: unknown[] | null
+  // CLAUDE.md content cached by context.ts for the auto-mode classifier.
+  // Breaks the yoloClassifier → claudemd → filesystem → permissions cycle.
+  cachedClaudeMdContent: string | null
+  // In-memory error log for recent errors
+  inMemoryErrorLog: Array<{ error: string; timestamp: string }>
+  // Session-only plugins from --plugin-dir flag
+  inlinePlugins: Array<string>
+  // Explicit --chrome / --no-chrome flag value (undefined = not set on CLI)
+  chromeFlagOverride: boolean | undefined
+  // Use cowork_plugins directory instead of plugins (--cowork flag or env var)
+  useCoworkPlugins: boolean
+  // Session-only bypass permissions mode flag (not persisted)
+  sessionBypassPermissionsMode: boolean
+  // Session-only flag gating the .claude/scheduled_tasks.json watcher
+  // (useScheduledTasks). Set by cronScheduler.start() when the JSON has
+  // entries, or by CronCreateTool. Not persisted.
+  scheduledTasksEnabled: boolean
+  // Session-only cron tasks created via CronCreate with durable: false.
+  // Fire on schedule like file-backed tasks but are never written to
+  // .claude/scheduled_tasks.json — they die with the process. Typed via
+  // SessionCronTask below (not importing from cronTasks.ts keeps
+  // bootstrap a leaf of the import DAG).
+  sessionCronTasks: SessionCronTask[]
+  // Teams created this session via TeamCreate. cleanupSessionTeams()
+  // removes these on gracefulShutdown so subagent-created teams don't
+  // persist on disk forever (gh-32730). TeamDelete removes entries to
+  // avoid double-cleanup. Lives here (not teamHelpers.ts) so
+  // resetStateForTests() clears it between tests.
+  sessionCreatedTeams: Set<string>
+  // Session-only trust flag for home directory (not persisted to disk)
+  // When running from home dir, trust dialog is shown but not saved to disk.
+  // This flag allows features requiring trust to work during the session.
+  sessionTrustAccepted: boolean
+  // Session-only flag to disable session persistence to disk
+  sessionPersistenceDisabled: boolean
+  // Track if user has exited plan mode in this session (for re-entry guidance)
+  hasExitedPlanMode: boolean
+  // Track if we need to show the plan mode exit attachment (one-time notification)
+  needsPlanModeExitAttachment: boolean
+  // Track if we need to show the auto mode exit attachment (one-time notification)
+  needsAutoModeExitAttachment: boolean
+  // Track if LSP plugin recommendation has been shown this session (only show once)
+  lspRecommendationShownThisSession: boolean
+  // SDK init event state - jsonSchema for structured output
+  initJsonSchema: Record<string, unknown> | null
+  // Registered hooks - SDK callbacks and plugin native hooks
+  registeredHooks: Partial<Record<HookEvent, RegisteredHookMatcher[]>> | null
+  // Cache for plan slugs: sessionId -> wordSlug
+  planSlugCache: Map<string, string>
+  // Track teleported session for reliability logging
+  teleportedSessionInfo: {
+    isTeleported: boolean
+    hasLoggedFirstMessage: boolean
+    sessionId: string | null
+  } | null
+  // Track invoked skills for preservation across compaction
+  // Keys are composite: `${agentId ?? ''}:${skillName}` to prevent cross-agent overwrites
+  invokedSkills: Map<
+    string,
+    {
+      skillName: string
+      skillPath: string
+      content: string
+      invokedAt: number
+      agentId: string | null
+    }
+  >
+  // Track slow operations for dev bar display (ant-only)
+  slowOperations: Array<{
+    operation: string
+    durationMs: number
+    timestamp: number
+  }>
+  // SDK-provided betas (e.g., context-1m-2025-08-07)
+  sdkBetas: string[] | undefined
+  // Main thread agent type (from --agent flag or settings)
+  mainThreadAgentType: string | undefined
+  // Remote mode (--remote flag)
+  isRemoteMode: boolean
+  // Direct connect server URL (for display in header)
+  directConnectServerUrl: string | undefined
+  // System prompt section cache state
+  systemPromptSectionCache: Map<string, string | null>
+  // Last date emitted to the model (for detecting midnight date changes)
+  lastEmittedDate: string | null
+  // Additional directories from --add-dir flag (for CLAUDE.md loading)
+  additionalDirectoriesForClaudeMd: string[]
+  // Channel server allowlist from --channels flag (servers whose channel
+  // notifications should register this session). Parsed once in main.tsx —
+  // the tag decides trust model: 'plugin' → marketplace verification +
+  // allowlist, 'server' → allowlist always fails (schema is plugin-only).
+  // Either kind needs entry.dev to bypass allowlist.
+  allowedChannels: ChannelEntry[]
+  // True if any entry in allowedChannels came from
+  // --dangerously-load-development-channels (so ChannelsNotice can name the
+  // right flag in policy-blocked messages)
+  hasDevChannels: boolean
+  // Dir containing the session's `.jsonl`; null = derive from originalCwd.
+  sessionProjectDir: string | null
+  // Cached prompt cache 1h TTL allowlist from GrowthBook (session-stable)
+  promptCache1hAllowlist: string[] | null
+  // Cached 1h TTL user eligibility (session-stable). Latched on first
+  // evaluation so mid-session overage flips don't change the cache_control
+  // TTL, which would bust the server-side prompt cache.
+  promptCache1hEligible: boolean | null
+  // Sticky-on latch for AFK_MODE_BETA_HEADER. Once auto mode is first
+  // activated, keep sending the header for the rest of the session so
+  // Shift+Tab toggles don't bust the ~50-70K token prompt cache.
+  afkModeHeaderLatched: boolean | null
+  // Sticky-on latch for FAST_MODE_BETA_HEADER. Once fast mode is first
+  // enabled, keep sending the header so cooldown enter/exit doesn't
+  // double-bust the prompt cache. The `speed` body param stays dynamic.
+  fastModeHeaderLatched: boolean | null
+  // Sticky-on latch for the cache-editing beta header. Once cached
+  // microcompact is first enabled, keep sending the header so mid-session
+  // GrowthBook/settings toggles don't bust the prompt cache.
+  cacheEditingHeaderLatched: boolean | null
+  // Sticky-on latch for clearing thinking from prior tool loops. Triggered
+  // when >1h since last API call (confirmed cache miss — no cache-hit
+  // benefit to keeping thinking). Once latched, stays on so the newly-warmed
+  // thinking-cleared cache isn't busted by flipping back to keep:'all'.
+  thinkingClearLatched: boolean | null
+  // Current prompt ID (UUID) correlating a user prompt with subsequent OTel events
+  promptId: string | null
+  // Last API requestId for the main conversation chain (not subagents).
+  // Updated after each successful API response for main-session queries.
+  // Read at shutdown to send cache eviction hints to inference.
+  lastMainRequestId: string | undefined
+  // Timestamp (Date.now()) of the last successful API call completion.
+  // Used to compute timeSinceLastApiCallMs in tengu_api_success for
+  // correlating cache misses with idle time (cache TTL is ~5min).
+  lastApiCompletionTimestamp: number | null
+  // Set to true after compaction (auto or manual /compact). Consumed by
+  // logAPISuccess to tag the first post-compaction API call so we can
+  // distinguish compaction-induced cache misses from TTL expiry.
+  pendingPostCompaction: boolean
+}
+
+// ALSO HERE - THINK THRICE BEFORE MODIFYING
+function getInitialState(): State {
+  // Resolve symlinks in cwd to match behavior of shell.ts setCwd
+  // This ensures consistency with how paths are sanitized for session storage
+  let resolvedCwd = ''
+  if (
+    typeof process !== 'undefined' &&
+    typeof process.cwd === 'function' &&
+    typeof realpathSync === 'function'
+  ) {
+    const rawCwd = cwd()
+    try {
+      resolvedCwd = realpathSync(rawCwd).normalize('NFC')
+    } catch {
+      // File Provider EPERM on CloudStorage mounts (lstat per path component).
+      resolvedCwd = rawCwd.normalize('NFC')
+    }
+  }
+  const state: State = {
+    originalCwd: resolvedCwd,
+    projectRoot: resolvedCwd,
+    totalCostUSD: 0,
+    totalAPIDuration: 0,
+    totalAPIDurationWithoutRetries: 0,
+    totalToolDuration: 0,
+    turnHookDurationMs: 0,
+    turnToolDurationMs: 0,
+    turnClassifierDurationMs: 0,
+    turnToolCount: 0,
+    turnHookCount: 0,
+    turnClassifierCount: 0,
+    startTime: Date.now(),
+    lastInteractionTime: Date.now(),
+    totalLinesAdded: 0,
+    totalLinesRemoved: 0,
+    hasUnknownModelCost: false,
+    cwd: resolvedCwd,
+    modelUsage: {},
+    mainLoopModelOverride: undefined,
+    initialMainLoopModel: null,
+    modelStrings: null,
+    isInteractive: false,
+    kairosActive: false,
+    strictToolResultPairing: false,
+    sdkAgentProgressSummariesEnabled: false,
+    userMsgOptIn: false,
+    clientType: 'cli',
+    sessionSource: undefined,
+    questionPreviewFormat: undefined,
+    sessionIngressToken: undefined,
+    oauthTokenFromFd: undefined,
+    apiKeyFromFd: undefined,
+    flagSettingsPath: undefined,
+    flagSettingsInline: null,
+    allowedSettingSources: [
+      'userSettings',
+      'projectSettings',
+      'localSettings',
+      'flagSettings',
+      'policySettings',
+    ],
+    // Telemetry state
+    meter: null,
+    sessionCounter: null,
+    locCounter: null,
+    prCounter: null,
+    commitCounter: null,
+    costCounter: null,
+    tokenCounter: null,
+    codeEditToolDecisionCounter: null,
+    activeTimeCounter: null,
+    statsStore: null,
+    sessionId: randomUUID() as SessionId,
+    parentSessionId: undefined,
+    // Logger state
+    loggerProvider: null,
+    eventLogger: null,
+    // Meter provider state
+    meterProvider: null,
+    tracerProvider: null,
+    // Agent color state
+    agentColorMap: new Map(),
+    agentColorIndex: 0,
+    // Last API request for bug reports
+    lastAPIRequest: null,
+    lastAPIRequestMessages: null,
+    // Last auto-mode classifier request(s) for /share transcript
+    lastClassifierRequests: null,
+    cachedClaudeMdContent: null,
+    // In-memory error log for recent errors
+    inMemoryErrorLog: [],
+    // Session-only plugins from --plugin-dir flag
+    inlinePlugins: [],
+    // Explicit --chrome / --no-chrome flag value (undefined = not set on CLI)
+    chromeFlagOverride: undefined,
+    // Use cowork_plugins directory instead of plugins
+    useCoworkPlugins: false,
+    // Session-only bypass permissions mode flag (not persisted)
+    sessionBypassPermissionsMode: false,
+    // Scheduled tasks disabled until flag or dialog enables them
+    scheduledTasksEnabled: false,
+    sessionCronTasks: [],
+    sessionCreatedTeams: new Set(),
+    // Session-only trust flag (not persisted to disk)
+    sessionTrustAccepted: false,
+    // Session-only flag to disable session persistence to disk
+    sessionPersistenceDisabled: false,
+    // Track if user has exited plan mode in this session
+    hasExitedPlanMode: false,
+    // Track if we need to show the plan mode exit attachment
+    needsPlanModeExitAttachment: false,
+    // Track if we need to show the auto mode exit attachment
+    needsAutoModeExitAttachment: false,
+    // Track if LSP plugin recommendation has been shown this session
+    lspRecommendationShownThisSession: false,
+    // SDK init event state
+    initJsonSchema: null,
+    registeredHooks: null,
+    // Cache for plan slugs
+    planSlugCache: new Map(),
+    // Track teleported session for reliability logging
+    teleportedSessionInfo: null,
+    // Track invoked skills for preservation across compaction
+    invokedSkills: new Map(),
+    // Track slow operations for dev bar display
+    slowOperations: [],
+    // SDK-provided betas
+    sdkBetas: undefined,
+    // Main thread agent type
+    mainThreadAgentType: undefined,
+    // Remote mode
+    isRemoteMode: false,
+    ...(process.env.USER_TYPE === 'ant'
+      ? {
+          replBridgeActive: false,
+        }
+      : {}),
+    // Direct connect server URL
+    directConnectServerUrl: undefined,
+    // System prompt section cache state
+    systemPromptSectionCache: new Map(),
+    // Last date emitted to the model
+    lastEmittedDate: null,
+    // Additional directories from --add-dir flag (for CLAUDE.md loading)
+    additionalDirectoriesForClaudeMd: [],
+    // Channel server allowlist from --channels flag
+    allowedChannels: [],
+    hasDevChannels: false,
+    // Session project dir (null = derive from originalCwd)
+    sessionProjectDir: null,
+    // Prompt cache 1h allowlist (null = not yet fetched from GrowthBook)
+    promptCache1hAllowlist: null,
+    // Prompt cache 1h eligibility (null = not yet evaluated)
+    promptCache1hEligible: null,
+    // Beta header latches (null = not yet triggered)
+    afkModeHeaderLatched: null,
+    fastModeHeaderLatched: null,
+    cacheEditingHeaderLatched: null,
+    thinkingClearLatched: null,
+    // Current prompt ID
+    promptId: null,
+    lastMainRequestId: undefined,
+    lastApiCompletionTimestamp: null,
+    pendingPostCompaction: false,
+  }
+
+  return state
+}
+
+// AND ESPECIALLY HERE
+const STATE: State = getInitialState()
+
+export function getSessionId(): SessionId {
+  return STATE.sessionId
+}
+
+export function regenerateSessionId(
+  options: { setCurrentAsParent?: boolean } = {},
+): SessionId {
+  if (options.setCurrentAsParent) {
+    STATE.parentSessionId = STATE.sessionId
+  }
+  // Drop the outgoing session's plan-slug entry so the Map doesn't
+  // accumulate stale keys. Callers that need to carry the slug across
+  // (REPL.tsx clearContext) read it before calling clearConversation.
+  STATE.planSlugCache.delete(STATE.sessionId)
+  // Regenerated sessions live in the current project: reset projectDir to
+  // null so getTranscriptPath() derives from originalCwd.
+  STATE.sessionId = randomUUID() as SessionId
+  STATE.sessionProjectDir = null
+  return STATE.sessionId
+}
+
+export function getParentSessionId(): SessionId | undefined {
+  return STATE.parentSessionId
+}
+
+/**
+ * Atomically switch the active session. `sessionId` and `sessionProjectDir`
+ * always change together — there is no separate setter for either, so they
+ * cannot drift out of sync (CC-34).
+ *
+ * @param projectDir — directory containing `<sessionId>.jsonl`. Omit (or
+ *   pass `null`) for sessions in the current project — the path will derive
+ *   from originalCwd at read time. Pass `dirname(transcriptPath)` when the
+ *   session lives in a different project directory (git worktrees,
+ *   cross-project resume). Every call resets the project dir; it never
+ *   carries over from the previous session.
+ */
+export function switchSession(
+  sessionId: SessionId,
+  projectDir: string | null = null,
+): void {
+  // Drop the outgoing session's plan-slug entry so the Map stays bounded
+  // across repeated /resume. Only the current session's slug is ever read
+  // (plans.ts getPlanSlug defaults to getSessionId()).
+  STATE.planSlugCache.delete(STATE.sessionId)
+  STATE.sessionId = sessionId
+  STATE.sessionProjectDir = projectDir
+  sessionSwitched.emit(sessionId)
+}
+
+const sessionSwitched = createSignal<[id: SessionId]>()
+
+/**
+ * Register a callback that fires when switchSession changes the active
+ * sessionId. bootstrap can't import listeners directly (DAG leaf), so
+ * callers register themselves. concurrentSessions.ts uses this to keep the
+ * PID file's sessionId in sync with --resume.
+ */
+export const onSessionSwitch = sessionSwitched.subscribe
+
+/**
+ * Project directory the current session's transcript lives in, or `null` if
+ * the session was created in the current project (common case — derive from
+ * originalCwd). See `switchSession()`.
+ */
+export function getSessionProjectDir(): string | null {
+  return STATE.sessionProjectDir
+}
+
+export function getOriginalCwd(): string {
+  return STATE.originalCwd
+}
+
+/**
+ * Get the stable project root directory.
+ * Unlike getOriginalCwd(), this is never updated by mid-session EnterWorktreeTool
+ * (so skills/history stay stable when entering a throwaway worktree).
+ * It IS set at startup by --worktree, since that worktree is the session's project.
+ * Use for project identity (history, skills, sessions) not file operations.
+ */
+export function getProjectRoot(): string {
+  return STATE.projectRoot
+}
+
+export function setOriginalCwd(cwd: string): void {
+  STATE.originalCwd = cwd.normalize('NFC')
+}
+
+/**
+ * Only for --worktree startup flag. Mid-session EnterWorktreeTool must NOT
+ * call this — skills/history should stay anchored to where the session started.
+ */
+export function setProjectRoot(cwd: string): void {
+  STATE.projectRoot = cwd.normalize('NFC')
+}
+
+export function getCwdState(): string {
+  return STATE.cwd
+}
+
+export function setCwdState(cwd: string): void {
+  STATE.cwd = cwd.normalize('NFC')
+}
+
+export function getDirectConnectServerUrl(): string | undefined {
+  return STATE.directConnectServerUrl
+}
+
+export function setDirectConnectServerUrl(url: string): void {
+  STATE.directConnectServerUrl = url
+}
+
+export function addToTotalDurationState(
+  duration: number,
+  durationWithoutRetries: number,
+): void {
+  STATE.totalAPIDuration += duration
+  STATE.totalAPIDurationWithoutRetries += durationWithoutRetries
+}
+
+export function resetTotalDurationStateAndCost_FOR_TESTS_ONLY(): void {
+  STATE.totalAPIDuration = 0
+  STATE.totalAPIDurationWithoutRetries = 0
+  STATE.totalCostUSD = 0
+}
+
+export function addToTotalCostState(
+  cost: number,
+  modelUsage: ModelUsage,
+  model: string,
+): void {
+  STATE.modelUsage[model] = modelUsage
+  STATE.totalCostUSD += cost
+}
+
+export function getTotalCostUSD(): number {
+  return STATE.totalCostUSD
+}
+
+export function getTotalAPIDuration(): number {
+  return STATE.totalAPIDuration
+}
+
+export function getTotalDuration(): number {
+  return Date.now() - STATE.startTime
+}
+
+export function getTotalAPIDurationWithoutRetries(): number {
+  return STATE.totalAPIDurationWithoutRetries
+}
+
+export function getTotalToolDuration(): number {
+  return STATE.totalToolDuration
+}
+
+export function addToToolDuration(duration: number): void {
+  STATE.totalToolDuration += duration
+  STATE.turnToolDurationMs += duration
+  STATE.turnToolCount++
+}
+
+export function getTurnHookDurationMs(): number {
+  return STATE.turnHookDurationMs
+}
+
+export function addToTurnHookDuration(duration: number): void {
+  STATE.turnHookDurationMs += duration
+  STATE.turnHookCount++
+}
+
+export function resetTurnHookDuration(): void {
+  STATE.turnHookDurationMs = 0
+  STATE.turnHookCount = 0
+}
+
+export function getTurnHookCount(): number {
+  return STATE.turnHookCount
+}
+
+export function getTurnToolDurationMs(): number {
+  return STATE.turnToolDurationMs
+}
+
+export function resetTurnToolDuration(): void {
+  STATE.turnToolDurationMs = 0
+  STATE.turnToolCount = 0
+}
+
+export function getTurnToolCount(): number {
+  return STATE.turnToolCount
+}
+
+export function getTurnClassifierDurationMs(): number {
+  return STATE.turnClassifierDurationMs
+}
+
+export function addToTurnClassifierDuration(duration: number): void {
+  STATE.turnClassifierDurationMs += duration
+  STATE.turnClassifierCount++
+}
+
+export function resetTurnClassifierDuration(): void {
+  STATE.turnClassifierDurationMs = 0
+  STATE.turnClassifierCount = 0
+}
+
+export function getTurnClassifierCount(): number {
+  return STATE.turnClassifierCount
+}
+
+export function getStatsStore(): {
+  observe(name: string, value: number): void
+} | null {
+  return STATE.statsStore
+}
+
+export function setStatsStore(
+  store: { observe(name: string, value: number): void } | null,
+): void {
+  STATE.statsStore = store
+}
+
+/**
+ * Marks that an interaction occurred.
+ *
+ * By default the actual Date.now() call is deferred until the next Ink render
+ * frame (via flushInteractionTime()) so we avoid calling Date.now() on every
+ * single keypress.
+ *
+ * Pass `immediate = true` when calling from React useEffect callbacks or
+ * other code that runs *after* the Ink render cycle has already flushed.
+ * Without it the timestamp stays stale until the next render, which may never
+ * come if the user is idle (e.g. permission dialog waiting for input).
+ */
+let interactionTimeDirty = false
+
+export function updateLastInteractionTime(immediate?: boolean): void {
+  if (immediate) {
+    flushInteractionTime_inner()
+  } else {
+    interactionTimeDirty = true
+  }
+}
+
+/**
+ * If an interaction was recorded since the last flush, update the timestamp
+ * now. Called by Ink before each render cycle so we batch many keypresses into
+ * a single Date.now() call.
+ */
+export function flushInteractionTime(): void {
+  if (interactionTimeDirty) {
+    flushInteractionTime_inner()
+  }
+}
+
+function flushInteractionTime_inner(): void {
+  STATE.lastInteractionTime = Date.now()
+  interactionTimeDirty = false
+}
+
+export function addToTotalLinesChanged(added: number, removed: number): void {
+  STATE.totalLinesAdded += added
+  STATE.totalLinesRemoved += removed
+}
+
+export function getTotalLinesAdded(): number {
+  return STATE.totalLinesAdded
+}
+
+export function getTotalLinesRemoved(): number {
+  return STATE.totalLinesRemoved
+}
+
+export function getTotalInputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'inputTokens')
+}
+
+export function getTotalOutputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'outputTokens')
+}
+
+export function getTotalCacheReadInputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'cacheReadInputTokens')
+}
+
+export function getTotalCacheCreationInputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'cacheCreationInputTokens')
+}
+
+export function getTotalWebSearchRequests(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'webSearchRequests')
+}
+
+let outputTokensAtTurnStart = 0
+let currentTurnTokenBudget: number | null = null
+export function getTurnOutputTokens(): number {
+  return getTotalOutputTokens() - outputTokensAtTurnStart
+}
+export function getCurrentTurnTokenBudget(): number | null {
+  return currentTurnTokenBudget
+}
+let budgetContinuationCount = 0
+export function snapshotOutputTokensForTurn(budget: number | null): void {
+  outputTokensAtTurnStart = getTotalOutputTokens()
+  currentTurnTokenBudget = budget
+  budgetContinuationCount = 0
+}
+export function getBudgetContinuationCount(): number {
+  return budgetContinuationCount
+}
+export function incrementBudgetContinuationCount(): void {
+  budgetContinuationCount++
+}
+
+export function setHasUnknownModelCost(): void {
+  STATE.hasUnknownModelCost = true
+}
+
+export function hasUnknownModelCost(): boolean {
+  return STATE.hasUnknownModelCost
+}
+
+export function getLastMainRequestId(): string | undefined {
+  return STATE.lastMainRequestId
+}
+
+export function setLastMainRequestId(requestId: string): void {
+  STATE.lastMainRequestId = requestId
+}
+
+export function getLastApiCompletionTimestamp(): number | null {
+  return STATE.lastApiCompletionTimestamp
+}
+
+export function setLastApiCompletionTimestamp(timestamp: number): void {
+  STATE.lastApiCompletionTimestamp = timestamp
+}
+
+/** Mark that a compaction just occurred. The next API success event will
+ *  include isPostCompaction=true, then the flag auto-resets. */
+export function markPostCompaction(): void {
+  STATE.pendingPostCompaction = true
+}
+
+/** Consume the post-compaction flag. Returns true once after compaction,
+ *  then returns false until the next compaction. */
+export function consumePostCompaction(): boolean {
+  const was = STATE.pendingPostCompaction
+  STATE.pendingPostCompaction = false
+  return was
+}
+
+export function getLastInteractionTime(): number {
+  return STATE.lastInteractionTime
+}
+
+// Scroll drain suspension — background intervals check this before doing work
+// so they don't compete with scroll frames for the event loop. Set by
+// ScrollBox scrollBy/scrollTo, cleared SCROLL_DRAIN_IDLE_MS after the last
+// scroll event. Module-scope (not in STATE) — ephemeral hot-path flag, no
+// test-reset needed since the debounce timer self-clears.
+let scrollDraining = false
+let scrollDrainTimer: ReturnType<typeof setTimeout> | undefined
+const SCROLL_DRAIN_IDLE_MS = 150
+
+/** Mark that a scroll event just happened. Background intervals gate on
+ *  getIsScrollDraining() and skip their work until the debounce clears. */
+export function markScrollActivity(): void {
+  scrollDraining = true
+  if (scrollDrainTimer) clearTimeout(scrollDrainTimer)
+  scrollDrainTimer = setTimeout(() => {
+    scrollDraining = false
+    scrollDrainTimer = undefined
+  }, SCROLL_DRAIN_IDLE_MS)
+  scrollDrainTimer.unref?.()
+}
+
+/** True while scroll is actively draining (within 150ms of last event).
+ *  Intervals should early-return when this is set — the work picks up next
+ *  tick after scroll settles. */
+export function getIsScrollDraining(): boolean {
+  return scrollDraining
+}
+
+/** Await this before expensive one-shot work (network, subprocess) that could
+ *  coincide with scroll. Resolves immediately if not scrolling; otherwise
+ *  polls at the idle interval until the flag clears. */
+export async function waitForScrollIdle(): Promise<void> {
+  while (scrollDraining) {
+    // bootstrap-isolation forbids importing sleep() from src/utils/
+    // eslint-disable-next-line no-restricted-syntax
+    await new Promise(r => setTimeout(r, SCROLL_DRAIN_IDLE_MS).unref?.())
+  }
+}
+
+export function getModelUsage(): { [modelName: string]: ModelUsage } {
+  return STATE.modelUsage
+}
+
+export function getUsageForModel(model: string): ModelUsage | undefined {
+  return STATE.modelUsage[model]
+}
+
+/**
+ * Gets the model override set from the --model CLI flag or after the user
+ * updates their configured model.
+ */
+export function getMainLoopModelOverride(): ModelSetting | undefined {
+  return STATE.mainLoopModelOverride
+}
+
+export function getInitialMainLoopModel(): ModelSetting {
+  return STATE.initialMainLoopModel
+}
+
+export function setMainLoopModelOverride(
+  model: ModelSetting | undefined,
+): void {
+  STATE.mainLoopModelOverride = model
+}
+
+export function setInitialMainLoopModel(model: ModelSetting): void {
+  STATE.initialMainLoopModel = model
+}
+
+export function getSdkBetas(): string[] | undefined {
+  return STATE.sdkBetas
+}
+
+export function setSdkBetas(betas: string[] | undefined): void {
+  STATE.sdkBetas = betas
+}
+
+export function resetCostState(): void {
+  STATE.totalCostUSD = 0
+  STATE.totalAPIDuration = 0
+  STATE.totalAPIDurationWithoutRetries = 0
+  STATE.totalToolDuration = 0
+  STATE.startTime = Date.now()
+  STATE.totalLinesAdded = 0
+  STATE.totalLinesRemoved = 0
+  STATE.hasUnknownModelCost = false
+  STATE.modelUsage = {}
+  STATE.promptId = null
+}
+
+/**
+ * Sets cost state values for session restore.
+ * Called by restoreCostStateForSession in cost-tracker.ts.
+ */
+export function setCostStateForRestore({
+  totalCostUSD,
+  totalAPIDuration,
+  totalAPIDurationWithoutRetries,
+  totalToolDuration,
+  totalLinesAdded,
+  totalLinesRemoved,
+  lastDuration,
+  modelUsage,
+}: {
+  totalCostUSD: number
+  totalAPIDuration: number
+  totalAPIDurationWithoutRetries: number
+  totalToolDuration: number
+  totalLinesAdded: number
+  totalLinesRemoved: number
+  lastDuration: number | undefined
+  modelUsage: { [modelName: string]: ModelUsage } | undefined
+}): void {
+  STATE.totalCostUSD = totalCostUSD
+  STATE.totalAPIDuration = totalAPIDuration
+  STATE.totalAPIDurationWithoutRetries = totalAPIDurationWithoutRetries
+  STATE.totalToolDuration = totalToolDuration
+  STATE.totalLinesAdded = totalLinesAdded
+  STATE.totalLinesRemoved = totalLinesRemoved
+
+  // Restore per-model usage breakdown
+  if (modelUsage) {
+    STATE.modelUsage = modelUsage
+  }
+
+  // Adjust startTime to make wall duration accumulate
+  if (lastDuration) {
+    STATE.startTime = Date.now() - lastDuration
+  }
+}
+
+// Only used in tests
+export function resetStateForTests(): void {
+  if (process.env.NODE_ENV !== 'test') {
+    throw new Error('resetStateForTests can only be called in tests')
+  }
+  Object.entries(getInitialState()).forEach(([key, value]) => {
+    STATE[key as keyof State] = value as never
+  })
+  outputTokensAtTurnStart = 0
+  currentTurnTokenBudget = null
+  budgetContinuationCount = 0
+  sessionSwitched.clear()
+}
+
+// You shouldn't use this directly. See src/utils/model/modelStrings.ts::getModelStrings()
+export function getModelStrings(): ModelStrings | null {
+  return STATE.modelStrings
+}
+
+// You shouldn't use this directly. See src/utils/model/modelStrings.ts
+export function setModelStrings(modelStrings: ModelStrings): void {
+  STATE.modelStrings = modelStrings
+}
+
+// Test utility function to reset model strings for re-initialization.
+// Separate from setModelStrings because we only want to accept 'null' in tests.
+export function resetModelStringsForTestingOnly() {
+  STATE.modelStrings = null
+}
+
+export function setMeter(
+  meter: Meter,
+  createCounter: (name: string, options: MetricOptions) => AttributedCounter,
+): void {
+  STATE.meter = meter
+
+  // Initialize all counters using the provided factory
+  STATE.sessionCounter = createCounter('claude_code.session.count', {
+    description: 'Count of CLI sessions started',
+  })
+  STATE.locCounter = createCounter('claude_code.lines_of_code.count', {
+    description:
+      "Count of lines of code modified, with the 'type' attribute indicating whether lines were added or removed",
+  })
+  STATE.prCounter = createCounter('claude_code.pull_request.count', {
+    description: 'Number of pull requests created',
+  })
+  STATE.commitCounter = createCounter('claude_code.commit.count', {
+    description: 'Number of git commits created',
+  })
+  STATE.costCounter = createCounter('claude_code.cost.usage', {
+    description: 'Cost of the Claude Code session',
+    unit: 'USD',
+  })
+  STATE.tokenCounter = createCounter('claude_code.token.usage', {
+    description: 'Number of tokens used',
+    unit: 'tokens',
+  })
+  STATE.codeEditToolDecisionCounter = createCounter(
+    'claude_code.code_edit_tool.decision',
+    {
+      description:
+        'Count of code editing tool permission decisions (accept/reject) for Edit, Write, and NotebookEdit tools',
+    },
+  )
+  STATE.activeTimeCounter = createCounter('claude_code.active_time.total', {
+    description: 'Total active time in seconds',
+    unit: 's',
+  })
+}
+
+export function getMeter(): Meter | null {
+  return STATE.meter
+}
+
+export function getSessionCounter(): AttributedCounter | null {
+  return STATE.sessionCounter
+}
+
+export function getLocCounter(): AttributedCounter | null {
+  return STATE.locCounter
+}
+
+export function getPrCounter(): AttributedCounter | null {
+  return STATE.prCounter
+}
+
+export function getCommitCounter(): AttributedCounter | null {
+  return STATE.commitCounter
+}
+
+export function getCostCounter(): AttributedCounter | null {
+  return STATE.costCounter
+}
+
+export function getTokenCounter(): AttributedCounter | null {
+  return STATE.tokenCounter
+}
+
+export function getCodeEditToolDecisionCounter(): AttributedCounter | null {
+  return STATE.codeEditToolDecisionCounter
+}
+
+export function getActiveTimeCounter(): AttributedCounter | null {
+  return STATE.activeTimeCounter
+}
+
+export function getLoggerProvider(): LoggerProvider | null {
+  return STATE.loggerProvider
+}
+
+export function setLoggerProvider(provider: LoggerProvider | null): void {
+  STATE.loggerProvider = provider
+}
+
+export function getEventLogger(): ReturnType<typeof logs.getLogger> | null {
+  return STATE.eventLogger
+}
+
+export function setEventLogger(
+  logger: ReturnType<typeof logs.getLogger> | null,
+): void {
+  STATE.eventLogger = logger
+}
+
+export function getMeterProvider(): MeterProvider | null {
+  return STATE.meterProvider
+}
+
+export function setMeterProvider(provider: MeterProvider | null): void {
+  STATE.meterProvider = provider
+}
+export function getTracerProvider(): BasicTracerProvider | null {
+  return STATE.tracerProvider
+}
+export function setTracerProvider(provider: BasicTracerProvider | null): void {
+  STATE.tracerProvider = provider
+}
+
+export function getIsNonInteractiveSession(): boolean {
+  return !STATE.isInteractive
+}
+
+export function getIsInteractive(): boolean {
+  return STATE.isInteractive
+}
+
+export function setIsInteractive(value: boolean): void {
+  STATE.isInteractive = value
+}
+
+export function getClientType(): string {
+  return STATE.clientType
+}
+
+export function setClientType(type: string): void {
+  STATE.clientType = type
+}
+
+export function getSdkAgentProgressSummariesEnabled(): boolean {
+  return STATE.sdkAgentProgressSummariesEnabled
+}
+
+export function setSdkAgentProgressSummariesEnabled(value: boolean): void {
+  STATE.sdkAgentProgressSummariesEnabled = value
+}
+
+export function getKairosActive(): boolean {
+  return STATE.kairosActive
+}
+
+export function setKairosActive(value: boolean): void {
+  STATE.kairosActive = value
+}
+
+export function getStrictToolResultPairing(): boolean {
+  return STATE.strictToolResultPairing
+}
+
+export function setStrictToolResultPairing(value: boolean): void {
+  STATE.strictToolResultPairing = value
+}
+
+// Field name 'userMsgOptIn' avoids excluded-string substrings ('BriefTool',
+// 'SendUserMessage' — case-insensitive). All callers are inside feature()
+// guards so these accessors don't need their own (matches getKairosActive).
+export function getUserMsgOptIn(): boolean {
+  return STATE.userMsgOptIn
+}
+
+export function setUserMsgOptIn(value: boolean): void {
+  STATE.userMsgOptIn = value
+}
+
+export function getSessionSource(): string | undefined {
+  return STATE.sessionSource
+}
+
+export function setSessionSource(source: string): void {
+  STATE.sessionSource = source
+}
+
+export function getQuestionPreviewFormat(): 'markdown' | 'html' | undefined {
+  return STATE.questionPreviewFormat
+}
+
+export function setQuestionPreviewFormat(format: 'markdown' | 'html'): void {
+  STATE.questionPreviewFormat = format
+}
+
+export function getAgentColorMap(): Map<string, AgentColorName> {
+  return STATE.agentColorMap
+}
+
+export function getFlagSettingsPath(): string | undefined {
+  return STATE.flagSettingsPath
+}
+
+export function setFlagSettingsPath(path: string | undefined): void {
+  STATE.flagSettingsPath = path
+}
+
+export function getFlagSettingsInline(): Record<string, unknown> | null {
+  return STATE.flagSettingsInline
+}
+
+export function setFlagSettingsInline(
+  settings: Record<string, unknown> | null,
+): void {
+  STATE.flagSettingsInline = settings
+}
+
+export function getSessionIngressToken(): string | null | undefined {
+  return STATE.sessionIngressToken
+}
+
+export function setSessionIngressToken(token: string | null): void {
+  STATE.sessionIngressToken = token
+}
+
+export function getOauthTokenFromFd(): string | null | undefined {
+  return STATE.oauthTokenFromFd
+}
+
+export function setOauthTokenFromFd(token: string | null): void {
+  STATE.oauthTokenFromFd = token
+}
+
+export function getApiKeyFromFd(): string | null | undefined {
+  return STATE.apiKeyFromFd
+}
+
+export function setApiKeyFromFd(key: string | null): void {
+  STATE.apiKeyFromFd = key
+}
+
+export function setLastAPIRequest(
+  params: Omit<BetaMessageStreamParams, 'messages'> | null,
+): void {
+  STATE.lastAPIRequest = params
+}
+
+export function getLastAPIRequest(): Omit<
+  BetaMessageStreamParams,
+  'messages'
+> | null {
+  return STATE.lastAPIRequest
+}
+
+export function setLastAPIRequestMessages(
+  messages: BetaMessageStreamParams['messages'] | null,
+): void {
+  STATE.lastAPIRequestMessages = messages
+}
+
+export function getLastAPIRequestMessages():
+  | BetaMessageStreamParams['messages']
+  | null {
+  return STATE.lastAPIRequestMessages
+}
+
+export function setLastClassifierRequests(requests: unknown[] | null): void {
+  STATE.lastClassifierRequests = requests
+}
+
+export function getLastClassifierRequests(): unknown[] | null {
+  return STATE.lastClassifierRequests
+}
+
+export function setCachedClaudeMdContent(content: string | null): void {
+  STATE.cachedClaudeMdContent = content
+}
+
+export function getCachedClaudeMdContent(): string | null {
+  return STATE.cachedClaudeMdContent
+}
+
+export function addToInMemoryErrorLog(errorInfo: {
+  error: string
+  timestamp: string
+}): void {
+  const MAX_IN_MEMORY_ERRORS = 100
+  if (STATE.inMemoryErrorLog.length >= MAX_IN_MEMORY_ERRORS) {
+    STATE.inMemoryErrorLog.shift() // Remove oldest error
+  }
+  STATE.inMemoryErrorLog.push(errorInfo)
+}
+
+export function getAllowedSettingSources(): SettingSource[] {
+  return STATE.allowedSettingSources
+}
+
+export function setAllowedSettingSources(sources: SettingSource[]): void {
+  STATE.allowedSettingSources = sources
+}
+
+export function preferThirdPartyAuthentication(): boolean {
+  // IDE extension should behave as 1P for authentication reasons.
+  return getIsNonInteractiveSession() && STATE.clientType !== 'claude-vscode'
+}
+
+export function setInlinePlugins(plugins: Array<string>): void {
+  STATE.inlinePlugins = plugins
+}
+
+export function getInlinePlugins(): Array<string> {
+  return STATE.inlinePlugins
+}
+
+export function setChromeFlagOverride(value: boolean | undefined): void {
+  STATE.chromeFlagOverride = value
+}
+
+export function getChromeFlagOverride(): boolean | undefined {
+  return STATE.chromeFlagOverride
+}
+
+export function setUseCoworkPlugins(value: boolean): void {
+  STATE.useCoworkPlugins = value
+  resetSettingsCache()
+}
+
+export function getUseCoworkPlugins(): boolean {
+  return STATE.useCoworkPlugins
+}
+
+export function setSessionBypassPermissionsMode(enabled: boolean): void {
+  STATE.sessionBypassPermissionsMode = enabled
+}
+
+export function getSessionBypassPermissionsMode(): boolean {
+  return STATE.sessionBypassPermissionsMode
+}
+
+export function setScheduledTasksEnabled(enabled: boolean): void {
+  STATE.scheduledTasksEnabled = enabled
+}
+
+export function getScheduledTasksEnabled(): boolean {
+  return STATE.scheduledTasksEnabled
+}
+
+export type SessionCronTask = {
+  id: string
+  cron: string
+  prompt: string
+  createdAt: number
+  recurring?: boolean
+  /**
+   * When set, the task was created by an in-process teammate (not the team lead).
+   * The scheduler routes fires to that teammate's pendingUserMessages queue
+   * instead of the main REPL command queue. Session-only — never written to disk.
+   */
+  agentId?: string
+}
+
+export function getSessionCronTasks(): SessionCronTask[] {
+  return STATE.sessionCronTasks
+}
+
+export function addSessionCronTask(task: SessionCronTask): void {
+  STATE.sessionCronTasks.push(task)
+}
+
+/**
+ * Returns the number of tasks actually removed. Callers use this to skip
+ * downstream work (e.g. the disk read in removeCronTasks) when all ids
+ * were accounted for here.
+ */
+export function removeSessionCronTasks(ids: readonly string[]): number {
+  if (ids.length === 0) return 0
+  const idSet = new Set(ids)
+  const remaining = STATE.sessionCronTasks.filter(t => !idSet.has(t.id))
+  const removed = STATE.sessionCronTasks.length - remaining.length
+  if (removed === 0) return 0
+  STATE.sessionCronTasks = remaining
+  return removed
+}
+
+export function setSessionTrustAccepted(accepted: boolean): void {
+  STATE.sessionTrustAccepted = accepted
+}
+
+export function getSessionTrustAccepted(): boolean {
+  return STATE.sessionTrustAccepted
+}
+
+export function setSessionPersistenceDisabled(disabled: boolean): void {
+  STATE.sessionPersistenceDisabled = disabled
+}
+
+export function isSessionPersistenceDisabled(): boolean {
+  return STATE.sessionPersistenceDisabled
+}
+
+export function hasExitedPlanModeInSession(): boolean {
+  return STATE.hasExitedPlanMode
+}
+
+export function setHasExitedPlanMode(value: boolean): void {
+  STATE.hasExitedPlanMode = value
+}
+
+export function needsPlanModeExitAttachment(): boolean {
+  return STATE.needsPlanModeExitAttachment
+}
+
+export function setNeedsPlanModeExitAttachment(value: boolean): void {
+  STATE.needsPlanModeExitAttachment = value
+}
+
+export function handlePlanModeTransition(
+  fromMode: string,
+  toMode: string,
+): void {
+  // If switching TO plan mode, clear any pending exit attachment
+  // This prevents sending both plan_mode and plan_mode_exit when user toggles quickly
+  if (toMode === 'plan' && fromMode !== 'plan') {
+    STATE.needsPlanModeExitAttachment = false
+  }
+
+  // If switching out of plan mode, trigger the plan_mode_exit attachment
+  if (fromMode === 'plan' && toMode !== 'plan') {
+    STATE.needsPlanModeExitAttachment = true
+  }
+}
+
+export function needsAutoModeExitAttachment(): boolean {
+  return STATE.needsAutoModeExitAttachment
+}
+
+export function setNeedsAutoModeExitAttachment(value: boolean): void {
+  STATE.needsAutoModeExitAttachment = value
+}
+
+export function handleAutoModeTransition(
+  fromMode: string,
+  toMode: string,
+): void {
+  // Auto↔plan transitions are handled by prepareContextForPlanMode (auto may
+  // stay active through plan if opted in) and ExitPlanMode (restores mode).
+  // Skip both directions so this function only handles direct auto transitions.
+  if (
+    (fromMode === 'auto' && toMode === 'plan') ||
+    (fromMode === 'plan' && toMode === 'auto')
+  ) {
+    return
+  }
+  const fromIsAuto = fromMode === 'auto'
+  const toIsAuto = toMode === 'auto'
+
+  // If switching TO auto mode, clear any pending exit attachment
+  // This prevents sending both auto_mode and auto_mode_exit when user toggles quickly
+  if (toIsAuto && !fromIsAuto) {
+    STATE.needsAutoModeExitAttachment = false
+  }
+
+  // If switching out of auto mode, trigger the auto_mode_exit attachment
+  if (fromIsAuto && !toIsAuto) {
+    STATE.needsAutoModeExitAttachment = true
+  }
+}
+
+// LSP plugin recommendation session tracking
+export function hasShownLspRecommendationThisSession(): boolean {
+  return STATE.lspRecommendationShownThisSession
+}
+
+export function setLspRecommendationShownThisSession(value: boolean): void {
+  STATE.lspRecommendationShownThisSession = value
+}
+
+// SDK init event state
+export function setInitJsonSchema(schema: Record<string, unknown>): void {
+  STATE.initJsonSchema = schema
+}
+
+export function getInitJsonSchema(): Record<string, unknown> | null {
+  return STATE.initJsonSchema
+}
+
+export function registerHookCallbacks(
+  hooks: Partial<Record<HookEvent, RegisteredHookMatcher[]>>,
+): void {
+  if (!STATE.registeredHooks) {
+    STATE.registeredHooks = {}
+  }
+
+  // `registerHookCallbacks` may be called multiple times, so we need to merge (not overwrite)
+  for (const [event, matchers] of Object.entries(hooks)) {
+    const eventKey = event as HookEvent
+    if (!STATE.registeredHooks[eventKey]) {
+      STATE.registeredHooks[eventKey] = []
+    }
+    STATE.registeredHooks[eventKey]!.push(...matchers)
+  }
+}
+
+export function getRegisteredHooks(): Partial<
+  Record<HookEvent, RegisteredHookMatcher[]>
+> | null {
+  return STATE.registeredHooks
+}
+
+export function clearRegisteredHooks(): void {
+  STATE.registeredHooks = null
+}
+
+export function clearRegisteredPluginHooks(): void {
+  if (!STATE.registeredHooks) {
+    return
+  }
+
+  const filtered: Partial<Record<HookEvent, RegisteredHookMatcher[]>> = {}
+  for (const [event, matchers] of Object.entries(STATE.registeredHooks)) {
+    // Keep only callback hooks (those without pluginRoot)
+    const callbackHooks = matchers.filter(m => !('pluginRoot' in m))
+    if (callbackHooks.length > 0) {
+      filtered[event as HookEvent] = callbackHooks
+    }
+  }
+
+  STATE.registeredHooks = Object.keys(filtered).length > 0 ? filtered : null
+}
+
+export function resetSdkInitState(): void {
+  STATE.initJsonSchema = null
+  STATE.registeredHooks = null
+}
+
+export function getPlanSlugCache(): Map<string, string> {
+  return STATE.planSlugCache
+}
+
+export function getSessionCreatedTeams(): Set<string> {
+  return STATE.sessionCreatedTeams
+}
+
+// Teleported session tracking for reliability logging
+export function setTeleportedSessionInfo(info: {
+  sessionId: string | null
+}): void {
+  STATE.teleportedSessionInfo = {
+    isTeleported: true,
+    hasLoggedFirstMessage: false,
+    sessionId: info.sessionId,
+  }
+}
+
+export function getTeleportedSessionInfo(): {
+  isTeleported: boolean
+  hasLoggedFirstMessage: boolean
+  sessionId: string | null
+} | null {
+  return STATE.teleportedSessionInfo
+}
+
+export function markFirstTeleportMessageLogged(): void {
+  if (STATE.teleportedSessionInfo) {
+    STATE.teleportedSessionInfo.hasLoggedFirstMessage = true
+  }
+}
+
+// Invoked skills tracking for preservation across compaction
+export type InvokedSkillInfo = {
+  skillName: string
+  skillPath: string
+  content: string
+  invokedAt: number
+  agentId: string | null
+}
+
+export function addInvokedSkill(
+  skillName: string,
+  skillPath: string,
+  content: string,
+  agentId: string | null = null,
+): void {
+  const key = `${agentId ?? ''}:${skillName}`
+  STATE.invokedSkills.set(key, {
+    skillName,
+    skillPath,
+    content,
+    invokedAt: Date.now(),
+    agentId,
+  })
+}
+
+export function getInvokedSkills(): Map<string, InvokedSkillInfo> {
+  return STATE.invokedSkills
+}
+
+export function getInvokedSkillsForAgent(
+  agentId: string | undefined | null,
+): Map<string, InvokedSkillInfo> {
+  const normalizedId = agentId ?? null
+  const filtered = new Map<string, InvokedSkillInfo>()
+  for (const [key, skill] of STATE.invokedSkills) {
+    if (skill.agentId === normalizedId) {
+      filtered.set(key, skill)
+    }
+  }
+  return filtered
+}
+
+export function clearInvokedSkills(
+  preservedAgentIds?: ReadonlySet<string>,
+): void {
+  if (!preservedAgentIds || preservedAgentIds.size === 0) {
+    STATE.invokedSkills.clear()
+    return
+  }
+  for (const [key, skill] of STATE.invokedSkills) {
+    if (skill.agentId === null || !preservedAgentIds.has(skill.agentId)) {
+      STATE.invokedSkills.delete(key)
+    }
+  }
+}
+
+export function clearInvokedSkillsForAgent(agentId: string): void {
+  for (const [key, skill] of STATE.invokedSkills) {
+    if (skill.agentId === agentId) {
+      STATE.invokedSkills.delete(key)
+    }
+  }
+}
+
+// Slow operations tracking for dev bar
+const MAX_SLOW_OPERATIONS = 10
+const SLOW_OPERATION_TTL_MS = 10000
+
+export function addSlowOperation(operation: string, durationMs: number): void {
+  if (process.env.USER_TYPE !== 'ant') return
+  // Skip tracking for editor sessions (user editing a prompt file in $EDITOR)
+  // These are intentionally slow since the user is drafting text
+  if (operation.includes('exec') && operation.includes('claude-prompt-')) {
+    return
+  }
+  const now = Date.now()
+  // Remove stale operations
+  STATE.slowOperations = STATE.slowOperations.filter(
+    op => now - op.timestamp < SLOW_OPERATION_TTL_MS,
+  )
+  // Add new operation
+  STATE.slowOperations.push({ operation, durationMs, timestamp: now })
+  // Keep only the most recent operations
+  if (STATE.slowOperations.length > MAX_SLOW_OPERATIONS) {
+    STATE.slowOperations = STATE.slowOperations.slice(-MAX_SLOW_OPERATIONS)
+  }
+}
+
+const EMPTY_SLOW_OPERATIONS: ReadonlyArray<{
+  operation: string
+  durationMs: number
+  timestamp: number
+}> = []
+
+export function getSlowOperations(): ReadonlyArray<{
+  operation: string
+  durationMs: number
+  timestamp: number
+}> {
+  // Most common case: nothing tracked. Return a stable reference so the
+  // caller's setState() can bail via Object.is instead of re-rendering at 2fps.
+  if (STATE.slowOperations.length === 0) {
+    return EMPTY_SLOW_OPERATIONS
+  }
+  const now = Date.now()
+  // Only allocate a new array when something actually expired; otherwise keep
+  // the reference stable across polls while ops are still fresh.
+  if (
+    STATE.slowOperations.some(op => now - op.timestamp >= SLOW_OPERATION_TTL_MS)
+  ) {
+    STATE.slowOperations = STATE.slowOperations.filter(
+      op => now - op.timestamp < SLOW_OPERATION_TTL_MS,
+    )
+    if (STATE.slowOperations.length === 0) {
+      return EMPTY_SLOW_OPERATIONS
+    }
+  }
+  // Safe to return directly: addSlowOperation() reassigns STATE.slowOperations
+  // before pushing, so the array held in React state is never mutated.
+  return STATE.slowOperations
+}
+
+export function getMainThreadAgentType(): string | undefined {
+  return STATE.mainThreadAgentType
+}
+
+export function setMainThreadAgentType(agentType: string | undefined): void {
+  STATE.mainThreadAgentType = agentType
+}
+
+export function getIsRemoteMode(): boolean {
+  return STATE.isRemoteMode
+}
+
+export function setIsRemoteMode(value: boolean): void {
+  STATE.isRemoteMode = value
+}
+
+// System prompt section accessors
+
+export function getSystemPromptSectionCache(): Map<string, string | null> {
+  return STATE.systemPromptSectionCache
+}
+
+export function setSystemPromptSectionCacheEntry(
+  name: string,
+  value: string | null,
+): void {
+  STATE.systemPromptSectionCache.set(name, value)
+}
+
+export function clearSystemPromptSectionState(): void {
+  STATE.systemPromptSectionCache.clear()
+}
+
+// Last emitted date accessors (for detecting midnight date changes)
+
+export function getLastEmittedDate(): string | null {
+  return STATE.lastEmittedDate
+}
+
+export function setLastEmittedDate(date: string | null): void {
+  STATE.lastEmittedDate = date
+}
+
+export function getAdditionalDirectoriesForClaudeMd(): string[] {
+  return STATE.additionalDirectoriesForClaudeMd
+}
+
+export function setAdditionalDirectoriesForClaudeMd(
+  directories: string[],
+): void {
+  STATE.additionalDirectoriesForClaudeMd = directories
+}
+
+export function getAllowedChannels(): ChannelEntry[] {
+  return STATE.allowedChannels
+}
+
+export function setAllowedChannels(entries: ChannelEntry[]): void {
+  STATE.allowedChannels = entries
+}
+
+export function getHasDevChannels(): boolean {
+  return STATE.hasDevChannels
+}
+
+export function setHasDevChannels(value: boolean): void {
+  STATE.hasDevChannels = value
+}
+
+export function getPromptCache1hAllowlist(): string[] | null {
+  return STATE.promptCache1hAllowlist
+}
+
+export function setPromptCache1hAllowlist(allowlist: string[] | null): void {
+  STATE.promptCache1hAllowlist = allowlist
+}
+
+export function getPromptCache1hEligible(): boolean | null {
+  return STATE.promptCache1hEligible
+}
+
+export function setPromptCache1hEligible(eligible: boolean | null): void {
+  STATE.promptCache1hEligible = eligible
+}
+
+export function getAfkModeHeaderLatched(): boolean | null {
+  return STATE.afkModeHeaderLatched
+}
+
+export function setAfkModeHeaderLatched(v: boolean): void {
+  STATE.afkModeHeaderLatched = v
+}
+
+export function getFastModeHeaderLatched(): boolean | null {
+  return STATE.fastModeHeaderLatched
+}
+
+export function setFastModeHeaderLatched(v: boolean): void {
+  STATE.fastModeHeaderLatched = v
+}
+
+export function getCacheEditingHeaderLatched(): boolean | null {
+  return STATE.cacheEditingHeaderLatched
+}
+
+export function setCacheEditingHeaderLatched(v: boolean): void {
+  STATE.cacheEditingHeaderLatched = v
+}
+
+export function getThinkingClearLatched(): boolean | null {
+  return STATE.thinkingClearLatched
+}
+
+export function setThinkingClearLatched(v: boolean): void {
+  STATE.thinkingClearLatched = v
+}
+
+/**
+ * Reset beta header latches to null. Called on /clear and /compact so a
+ * fresh conversation gets fresh header evaluation.
+ */
+export function clearBetaHeaderLatches(): void {
+  STATE.afkModeHeaderLatched = null
+  STATE.fastModeHeaderLatched = null
+  STATE.cacheEditingHeaderLatched = null
+  STATE.thinkingClearLatched = null
+}
+
+export function getPromptId(): string | null {
+  return STATE.promptId
+}
+
+export function setPromptId(id: string | null): void {
+  STATE.promptId = id
+}
+

+ 539 - 0
src/bridge/bridgeApi.ts

@@ -0,0 +1,539 @@
+import axios from 'axios'
+
+import { debugBody, extractErrorDetail } from './debugUtils.js'
+import {
+  BRIDGE_LOGIN_INSTRUCTION,
+  type BridgeApiClient,
+  type BridgeConfig,
+  type PermissionResponseEvent,
+  type WorkResponse,
+} from './types.js'
+
+type BridgeApiDeps = {
+  baseUrl: string
+  getAccessToken: () => string | undefined
+  runnerVersion: string
+  onDebug?: (msg: string) => void
+  /**
+   * Called on 401 to attempt OAuth token refresh. Returns true if refreshed,
+   * in which case the request is retried once. Injected because
+   * handleOAuth401Error from utils/auth.ts transitively pulls in config.ts →
+   * file.ts → permissions/filesystem.ts → sessionStorage.ts → commands.ts
+   * (~1300 modules). Daemon callers using env-var tokens omit this — their
+   * tokens don't refresh, so 401 goes straight to BridgeFatalError.
+   */
+  onAuth401?: (staleAccessToken: string) => Promise<boolean>
+  /**
+   * Returns the trusted device token to send as X-Trusted-Device-Token on
+   * bridge API calls. Bridge sessions have SecurityTier=ELEVATED on the
+   * server (CCR v2); when the server's enforcement flag is on,
+   * ConnectBridgeWorker requires a trusted device at JWT-issuance.
+   * Optional — when absent or returning undefined, the header is omitted
+   * and the server falls through to its flag-off/no-op path. The CLI-side
+   * gate is tengu_sessions_elevated_auth_enforcement (see trustedDevice.ts).
+   */
+  getTrustedDeviceToken?: () => string | undefined
+}
+
+const BETA_HEADER = 'environments-2025-11-01'
+
+/** Allowlist pattern for server-provided IDs used in URL path segments. */
+const SAFE_ID_PATTERN = /^[a-zA-Z0-9_-]+$/
+
+/**
+ * Validate that a server-provided ID is safe to interpolate into a URL path.
+ * Prevents path traversal (e.g. `../../admin`) and injection via IDs that
+ * contain slashes, dots, or other special characters.
+ */
+export function validateBridgeId(id: string, label: string): string {
+  if (!id || !SAFE_ID_PATTERN.test(id)) {
+    throw new Error(`Invalid ${label}: contains unsafe characters`)
+  }
+  return id
+}
+
+/** Fatal bridge errors that should not be retried (e.g. auth failures). */
+export class BridgeFatalError extends Error {
+  readonly status: number
+  /** Server-provided error type, e.g. "environment_expired". */
+  readonly errorType: string | undefined
+  constructor(message: string, status: number, errorType?: string) {
+    super(message)
+    this.name = 'BridgeFatalError'
+    this.status = status
+    this.errorType = errorType
+  }
+}
+
+export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
+  function debug(msg: string): void {
+    deps.onDebug?.(msg)
+  }
+
+  let consecutiveEmptyPolls = 0
+  const EMPTY_POLL_LOG_INTERVAL = 100
+
+  function getHeaders(accessToken: string): Record<string, string> {
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${accessToken}`,
+      'Content-Type': 'application/json',
+      'anthropic-version': '2023-06-01',
+      'anthropic-beta': BETA_HEADER,
+      'x-environment-runner-version': deps.runnerVersion,
+    }
+    const deviceToken = deps.getTrustedDeviceToken?.()
+    if (deviceToken) {
+      headers['X-Trusted-Device-Token'] = deviceToken
+    }
+    return headers
+  }
+
+  function resolveAuth(): string {
+    const accessToken = deps.getAccessToken()
+    if (!accessToken) {
+      throw new Error(BRIDGE_LOGIN_INSTRUCTION)
+    }
+    return accessToken
+  }
+
+  /**
+   * Execute an OAuth-authenticated request with a single retry on 401.
+   * On 401, attempts token refresh via handleOAuth401Error (same pattern as
+   * withRetry.ts for v1/messages). If refresh succeeds, retries the request
+   * once with the new token. If refresh fails or the retry also returns 401,
+   * the 401 response is returned for handleErrorStatus to throw BridgeFatalError.
+   */
+  async function withOAuthRetry<T>(
+    fn: (accessToken: string) => Promise<{ status: number; data: T }>,
+    context: string,
+  ): Promise<{ status: number; data: T }> {
+    const accessToken = resolveAuth()
+    const response = await fn(accessToken)
+
+    if (response.status !== 401) {
+      return response
+    }
+
+    if (!deps.onAuth401) {
+      debug(`[bridge:api] ${context}: 401 received, no refresh handler`)
+      return response
+    }
+
+    // Attempt token refresh — matches the pattern in withRetry.ts
+    debug(`[bridge:api] ${context}: 401 received, attempting token refresh`)
+    const refreshed = await deps.onAuth401(accessToken)
+    if (refreshed) {
+      debug(`[bridge:api] ${context}: Token refreshed, retrying request`)
+      const newToken = resolveAuth()
+      const retryResponse = await fn(newToken)
+      if (retryResponse.status !== 401) {
+        return retryResponse
+      }
+      debug(`[bridge:api] ${context}: Retry after refresh also got 401`)
+    } else {
+      debug(`[bridge:api] ${context}: Token refresh failed`)
+    }
+
+    // Refresh failed — return 401 for handleErrorStatus to throw
+    return response
+  }
+
+  return {
+    async registerBridgeEnvironment(
+      config: BridgeConfig,
+    ): Promise<{ environment_id: string; environment_secret: string }> {
+      debug(
+        `[bridge:api] POST /v1/environments/bridge bridgeId=${config.bridgeId}`,
+      )
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post<{
+            environment_id: string
+            environment_secret: string
+          }>(
+            `${deps.baseUrl}/v1/environments/bridge`,
+            {
+              machine_name: config.machineName,
+              directory: config.dir,
+              branch: config.branch,
+              git_repo_url: config.gitRepoUrl,
+              // Advertise session capacity so claude.ai/code can show
+              // "2/4 sessions" badges and only block the picker when
+              // actually at capacity. Backends that don't yet accept
+              // this field will silently ignore it.
+              max_sessions: config.maxSessions,
+              // worker_type lets claude.ai filter environments by origin
+              // (e.g. assistant picker only shows assistant-mode workers).
+              // Desktop cowork app sends "cowork"; we send a distinct value.
+              metadata: { worker_type: config.workerType },
+              // Idempotent re-registration: if we have a backend-issued
+              // environment_id from a prior session (--session-id resume),
+              // send it back so the backend reattaches instead of creating
+              // a new env. The backend may still hand back a fresh ID if
+              // the old one expired — callers must compare the response.
+              ...(config.reuseEnvironmentId && {
+                environment_id: config.reuseEnvironmentId,
+              }),
+            },
+            {
+              headers: getHeaders(token),
+              timeout: 15_000,
+              validateStatus: status => status < 500,
+            },
+          ),
+        'Registration',
+      )
+
+      handleErrorStatus(response.status, response.data, 'Registration')
+      debug(
+        `[bridge:api] POST /v1/environments/bridge -> ${response.status} environment_id=${response.data.environment_id}`,
+      )
+      debug(
+        `[bridge:api] >>> ${debugBody({ machine_name: config.machineName, directory: config.dir, branch: config.branch, git_repo_url: config.gitRepoUrl, max_sessions: config.maxSessions, metadata: { worker_type: config.workerType } })}`,
+      )
+      debug(`[bridge:api] <<< ${debugBody(response.data)}`)
+      return response.data
+    },
+
+    async pollForWork(
+      environmentId: string,
+      environmentSecret: string,
+      signal?: AbortSignal,
+      reclaimOlderThanMs?: number,
+    ): Promise<WorkResponse | null> {
+      validateBridgeId(environmentId, 'environmentId')
+
+      // Save and reset so errors break the "consecutive empty" streak.
+      // Restored below when the response is truly empty.
+      const prevEmptyPolls = consecutiveEmptyPolls
+      consecutiveEmptyPolls = 0
+
+      const response = await axios.get<WorkResponse | null>(
+        `${deps.baseUrl}/v1/environments/${environmentId}/work/poll`,
+        {
+          headers: getHeaders(environmentSecret),
+          params:
+            reclaimOlderThanMs !== undefined
+              ? { reclaim_older_than_ms: reclaimOlderThanMs }
+              : undefined,
+          timeout: 10_000,
+          signal,
+          validateStatus: status => status < 500,
+        },
+      )
+
+      handleErrorStatus(response.status, response.data, 'Poll')
+
+      // Empty body or null = no work available
+      if (!response.data) {
+        consecutiveEmptyPolls = prevEmptyPolls + 1
+        if (
+          consecutiveEmptyPolls === 1 ||
+          consecutiveEmptyPolls % EMPTY_POLL_LOG_INTERVAL === 0
+        ) {
+          debug(
+            `[bridge:api] GET .../work/poll -> ${response.status} (no work, ${consecutiveEmptyPolls} consecutive empty polls)`,
+          )
+        }
+        return null
+      }
+
+      debug(
+        `[bridge:api] GET .../work/poll -> ${response.status} workId=${response.data.id} type=${response.data.data?.type}${response.data.data?.id ? ` sessionId=${response.data.data.id}` : ''}`,
+      )
+      debug(`[bridge:api] <<< ${debugBody(response.data)}`)
+      return response.data
+    },
+
+    async acknowledgeWork(
+      environmentId: string,
+      workId: string,
+      sessionToken: string,
+    ): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(workId, 'workId')
+
+      debug(`[bridge:api] POST .../work/${workId}/ack`)
+
+      const response = await axios.post(
+        `${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/ack`,
+        {},
+        {
+          headers: getHeaders(sessionToken),
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+
+      handleErrorStatus(response.status, response.data, 'Acknowledge')
+      debug(`[bridge:api] POST .../work/${workId}/ack -> ${response.status}`)
+    },
+
+    async stopWork(
+      environmentId: string,
+      workId: string,
+      force: boolean,
+    ): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(workId, 'workId')
+
+      debug(`[bridge:api] POST .../work/${workId}/stop force=${force}`)
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post(
+            `${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/stop`,
+            { force },
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'StopWork',
+      )
+
+      handleErrorStatus(response.status, response.data, 'StopWork')
+      debug(`[bridge:api] POST .../work/${workId}/stop -> ${response.status}`)
+    },
+
+    async deregisterEnvironment(environmentId: string): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+
+      debug(`[bridge:api] DELETE /v1/environments/bridge/${environmentId}`)
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.delete(
+            `${deps.baseUrl}/v1/environments/bridge/${environmentId}`,
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'Deregister',
+      )
+
+      handleErrorStatus(response.status, response.data, 'Deregister')
+      debug(
+        `[bridge:api] DELETE /v1/environments/bridge/${environmentId} -> ${response.status}`,
+      )
+    },
+
+    async archiveSession(sessionId: string): Promise<void> {
+      validateBridgeId(sessionId, 'sessionId')
+
+      debug(`[bridge:api] POST /v1/sessions/${sessionId}/archive`)
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post(
+            `${deps.baseUrl}/v1/sessions/${sessionId}/archive`,
+            {},
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'ArchiveSession',
+      )
+
+      // 409 = already archived (idempotent, not an error)
+      if (response.status === 409) {
+        debug(
+          `[bridge:api] POST /v1/sessions/${sessionId}/archive -> 409 (already archived)`,
+        )
+        return
+      }
+
+      handleErrorStatus(response.status, response.data, 'ArchiveSession')
+      debug(
+        `[bridge:api] POST /v1/sessions/${sessionId}/archive -> ${response.status}`,
+      )
+    },
+
+    async reconnectSession(
+      environmentId: string,
+      sessionId: string,
+    ): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(sessionId, 'sessionId')
+
+      debug(
+        `[bridge:api] POST /v1/environments/${environmentId}/bridge/reconnect session_id=${sessionId}`,
+      )
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post(
+            `${deps.baseUrl}/v1/environments/${environmentId}/bridge/reconnect`,
+            { session_id: sessionId },
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'ReconnectSession',
+      )
+
+      handleErrorStatus(response.status, response.data, 'ReconnectSession')
+      debug(`[bridge:api] POST .../bridge/reconnect -> ${response.status}`)
+    },
+
+    async heartbeatWork(
+      environmentId: string,
+      workId: string,
+      sessionToken: string,
+    ): Promise<{ lease_extended: boolean; state: string }> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(workId, 'workId')
+
+      debug(`[bridge:api] POST .../work/${workId}/heartbeat`)
+
+      const response = await axios.post<{
+        lease_extended: boolean
+        state: string
+        last_heartbeat: string
+        ttl_seconds: number
+      }>(
+        `${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/heartbeat`,
+        {},
+        {
+          headers: getHeaders(sessionToken),
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+
+      handleErrorStatus(response.status, response.data, 'Heartbeat')
+      debug(
+        `[bridge:api] POST .../work/${workId}/heartbeat -> ${response.status} lease_extended=${response.data.lease_extended} state=${response.data.state}`,
+      )
+      return response.data
+    },
+
+    async sendPermissionResponseEvent(
+      sessionId: string,
+      event: PermissionResponseEvent,
+      sessionToken: string,
+    ): Promise<void> {
+      validateBridgeId(sessionId, 'sessionId')
+
+      debug(
+        `[bridge:api] POST /v1/sessions/${sessionId}/events type=${event.type}`,
+      )
+
+      const response = await axios.post(
+        `${deps.baseUrl}/v1/sessions/${sessionId}/events`,
+        { events: [event] },
+        {
+          headers: getHeaders(sessionToken),
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+
+      handleErrorStatus(
+        response.status,
+        response.data,
+        'SendPermissionResponseEvent',
+      )
+      debug(
+        `[bridge:api] POST /v1/sessions/${sessionId}/events -> ${response.status}`,
+      )
+      debug(`[bridge:api] >>> ${debugBody({ events: [event] })}`)
+      debug(`[bridge:api] <<< ${debugBody(response.data)}`)
+    },
+  }
+}
+
+function handleErrorStatus(
+  status: number,
+  data: unknown,
+  context: string,
+): void {
+  if (status === 200 || status === 204) {
+    return
+  }
+  const detail = extractErrorDetail(data)
+  const errorType = extractErrorTypeFromData(data)
+  switch (status) {
+    case 401:
+      throw new BridgeFatalError(
+        `${context}: Authentication failed (401)${detail ? `: ${detail}` : ''}. ${BRIDGE_LOGIN_INSTRUCTION}`,
+        401,
+        errorType,
+      )
+    case 403:
+      throw new BridgeFatalError(
+        isExpiredErrorType(errorType)
+          ? 'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.'
+          : `${context}: Access denied (403)${detail ? `: ${detail}` : ''}. Check your organization permissions.`,
+        403,
+        errorType,
+      )
+    case 404:
+      throw new BridgeFatalError(
+        detail ??
+          `${context}: Not found (404). Remote Control may not be available for this organization.`,
+        404,
+        errorType,
+      )
+    case 410:
+      throw new BridgeFatalError(
+        detail ??
+          'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.',
+        410,
+        errorType ?? 'environment_expired',
+      )
+    case 429:
+      throw new Error(`${context}: Rate limited (429). Polling too frequently.`)
+    default:
+      throw new Error(
+        `${context}: Failed with status ${status}${detail ? `: ${detail}` : ''}`,
+      )
+  }
+}
+
+/** Check whether an error type string indicates a session/environment expiry. */
+export function isExpiredErrorType(errorType: string | undefined): boolean {
+  if (!errorType) {
+    return false
+  }
+  return errorType.includes('expired') || errorType.includes('lifetime')
+}
+
+/**
+ * Check whether a BridgeFatalError is a suppressible 403 permission error.
+ * These are 403 errors for scopes like 'external_poll_sessions' or operations
+ * like StopWork that fail because the user's role lacks 'environments:manage'.
+ * They don't affect core functionality and shouldn't be shown to users.
+ */
+export function isSuppressible403(err: BridgeFatalError): boolean {
+  if (err.status !== 403) {
+    return false
+  }
+  return (
+    err.message.includes('external_poll_sessions') ||
+    err.message.includes('environments:manage')
+  )
+}
+
+function extractErrorTypeFromData(data: unknown): string | undefined {
+  if (data && typeof data === 'object') {
+    if (
+      'error' in data &&
+      data.error &&
+      typeof data.error === 'object' &&
+      'type' in data.error &&
+      typeof data.error.type === 'string'
+    ) {
+      return data.error.type
+    }
+  }
+  return undefined
+}

+ 48 - 0
src/bridge/bridgeConfig.ts

@@ -0,0 +1,48 @@
+/**
+ * Shared bridge auth/URL resolution. Consolidates the ant-only
+ * CLAUDE_BRIDGE_* dev overrides that were previously copy-pasted across
+ * a dozen files — inboundAttachments, BriefTool/upload, bridgeMain,
+ * initReplBridge, remoteBridgeCore, daemon workers, /rename,
+ * /remote-control.
+ *
+ * Two layers: *Override() returns the ant-only env var (or undefined);
+ * the non-Override versions fall through to the real OAuth store/config.
+ * Callers that compose with a different auth source (e.g. daemon workers
+ * using IPC auth) use the Override getters directly.
+ */
+
+import { getOauthConfig } from '../constants/oauth.js'
+import { getClaudeAIOAuthTokens } from '../utils/auth.js'
+
+/** Ant-only dev override: CLAUDE_BRIDGE_OAUTH_TOKEN, else undefined. */
+export function getBridgeTokenOverride(): string | undefined {
+  return (
+    (process.env.USER_TYPE === 'ant' &&
+      process.env.CLAUDE_BRIDGE_OAUTH_TOKEN) ||
+    undefined
+  )
+}
+
+/** Ant-only dev override: CLAUDE_BRIDGE_BASE_URL, else undefined. */
+export function getBridgeBaseUrlOverride(): string | undefined {
+  return (
+    (process.env.USER_TYPE === 'ant' && process.env.CLAUDE_BRIDGE_BASE_URL) ||
+    undefined
+  )
+}
+
+/**
+ * Access token for bridge API calls: dev override first, then the OAuth
+ * keychain. Undefined means "not logged in".
+ */
+export function getBridgeAccessToken(): string | undefined {
+  return getBridgeTokenOverride() ?? getClaudeAIOAuthTokens()?.accessToken
+}
+
+/**
+ * Base URL for bridge API calls: dev override first, then the production
+ * OAuth config. Always returns a URL.
+ */
+export function getBridgeBaseUrl(): string {
+  return getBridgeBaseUrlOverride() ?? getOauthConfig().BASE_API_URL
+}

+ 135 - 0
src/bridge/bridgeDebug.ts

@@ -0,0 +1,135 @@
+import { logForDebugging } from '../utils/debug.js'
+import { BridgeFatalError } from './bridgeApi.js'
+import type { BridgeApiClient } from './types.js'
+
+/**
+ * Ant-only fault injection for manually testing bridge recovery paths.
+ *
+ * Real failure modes this targets (BQ 2026-03-12, 7-day window):
+ *   poll 404 not_found_error   — 147K sessions/week, dead onEnvironmentLost gate
+ *   ws_closed 1002/1006        —  22K sessions/week, zombie poll after close
+ *   register transient failure —  residual: network blips during doReconnect
+ *
+ * Usage: /bridge-kick <subcommand> from the REPL while Remote Control is
+ * connected, then tail debug.log to watch the recovery machinery react.
+ *
+ * Module-level state is intentional here: one bridge per REPL process, the
+ * /bridge-kick slash command has no other way to reach into initBridgeCore's
+ * closures, and teardown clears the slot.
+ */
+
+/** One-shot fault to inject on the next matching api call. */
+type BridgeFault = {
+  method:
+    | 'pollForWork'
+    | 'registerBridgeEnvironment'
+    | 'reconnectSession'
+    | 'heartbeatWork'
+  /** Fatal errors go through handleErrorStatus → BridgeFatalError. Transient
+   *  errors surface as plain axios rejections (5xx / network). Recovery code
+   *  distinguishes the two: fatal → teardown, transient → retry/backoff. */
+  kind: 'fatal' | 'transient'
+  status: number
+  errorType?: string
+  /** Remaining injections. Decremented on consume; removed at 0. */
+  count: number
+}
+
+export type BridgeDebugHandle = {
+  /** Invoke the transport's permanent-close handler directly. Tests the
+   *  ws_closed → reconnectEnvironmentWithSession escalation (#22148). */
+  fireClose: (code: number) => void
+  /** Call reconnectEnvironmentWithSession() — same as SIGUSR2 but
+   *  reachable from the slash command. */
+  forceReconnect: () => void
+  /** Queue a fault for the next N calls to the named api method. */
+  injectFault: (fault: BridgeFault) => void
+  /** Abort the at-capacity sleep so an injected poll fault lands
+   *  immediately instead of up to 10min later. */
+  wakePollLoop: () => void
+  /** env/session IDs for the debug.log grep. */
+  describe: () => string
+}
+
+let debugHandle: BridgeDebugHandle | null = null
+const faultQueue: BridgeFault[] = []
+
+export function registerBridgeDebugHandle(h: BridgeDebugHandle): void {
+  debugHandle = h
+}
+
+export function clearBridgeDebugHandle(): void {
+  debugHandle = null
+  faultQueue.length = 0
+}
+
+export function getBridgeDebugHandle(): BridgeDebugHandle | null {
+  return debugHandle
+}
+
+export function injectBridgeFault(fault: BridgeFault): void {
+  faultQueue.push(fault)
+  logForDebugging(
+    `[bridge:debug] Queued fault: ${fault.method} ${fault.kind}/${fault.status}${fault.errorType ? `/${fault.errorType}` : ''} ×${fault.count}`,
+  )
+}
+
+/**
+ * Wrap a BridgeApiClient so each call first checks the fault queue. If a
+ * matching fault is queued, throw the specified error instead of calling
+ * through. Delegates everything else to the real client.
+ *
+ * Only called when USER_TYPE === 'ant' — zero overhead in external builds.
+ */
+export function wrapApiForFaultInjection(
+  api: BridgeApiClient,
+): BridgeApiClient {
+  function consume(method: BridgeFault['method']): BridgeFault | null {
+    const idx = faultQueue.findIndex(f => f.method === method)
+    if (idx === -1) return null
+    const fault = faultQueue[idx]!
+    fault.count--
+    if (fault.count <= 0) faultQueue.splice(idx, 1)
+    return fault
+  }
+
+  function throwFault(fault: BridgeFault, context: string): never {
+    logForDebugging(
+      `[bridge:debug] Injecting ${fault.kind} fault into ${context}: status=${fault.status} errorType=${fault.errorType ?? 'none'}`,
+    )
+    if (fault.kind === 'fatal') {
+      throw new BridgeFatalError(
+        `[injected] ${context} ${fault.status}`,
+        fault.status,
+        fault.errorType,
+      )
+    }
+    // Transient: mimic an axios rejection (5xx / network). No .status on
+    // the error itself — that's how the catch blocks distinguish.
+    throw new Error(`[injected transient] ${context} ${fault.status}`)
+  }
+
+  return {
+    ...api,
+    async pollForWork(envId, secret, signal, reclaimMs) {
+      const f = consume('pollForWork')
+      if (f) throwFault(f, 'Poll')
+      return api.pollForWork(envId, secret, signal, reclaimMs)
+    },
+    async registerBridgeEnvironment(config) {
+      const f = consume('registerBridgeEnvironment')
+      if (f) throwFault(f, 'Registration')
+      return api.registerBridgeEnvironment(config)
+    },
+    async reconnectSession(envId, sessionId) {
+      const f = consume('reconnectSession')
+      if (f) throwFault(f, 'ReconnectSession')
+      return api.reconnectSession(envId, sessionId)
+    },
+    async heartbeatWork(envId, workId, token) {
+      const f = consume('heartbeatWork')
+      if (f) throwFault(f, 'Heartbeat')
+      return api.heartbeatWork(envId, workId, token)
+    },
+  }
+}

+ 202 - 0
src/bridge/bridgeEnabled.ts

@@ -0,0 +1,202 @@
+import { feature } from 'bun:bundle'
+import {
+  checkGate_CACHED_OR_BLOCKING,
+  getDynamicConfig_CACHED_MAY_BE_STALE,
+  getFeatureValue_CACHED_MAY_BE_STALE,
+} from '../services/analytics/growthbook.js'
+// Namespace import breaks the bridgeEnabled → auth → config → bridgeEnabled
+// cycle — authModule.foo is a live binding, so by the time the helpers below
+// call it, auth.js is fully loaded. Previously used require() for the same
+// deferral, but require() hits a CJS cache that diverges from the ESM
+// namespace after mock.module() (daemon/auth.test.ts), breaking spyOn.
+import * as authModule from '../utils/auth.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
+import { lt } from '../utils/semver.js'
+
+/**
+ * Runtime check for bridge mode entitlement.
+ *
+ * Remote Control requires a claude.ai subscription (the bridge auths to CCR
+ * with the claude.ai OAuth token). isClaudeAISubscriber() excludes
+ * Bedrock/Vertex/Foundry, apiKeyHelper/gateway deployments, env-var API keys,
+ * and Console API logins — none of which have the OAuth token CCR needs.
+ * See github.com/deshaw/anthropic-issues/issues/24.
+ *
+ * The `feature('BRIDGE_MODE')` guard ensures the GrowthBook string literal
+ * is only referenced when bridge mode is enabled at build time.
+ */
+export function isBridgeEnabled(): boolean {
+  // Positive ternary pattern — see docs/feature-gating.md.
+  // Negative pattern (if (!feature(...)) return) does not eliminate
+  // inline string literals from external builds.
+  return feature('BRIDGE_MODE')
+    ? isClaudeAISubscriber() &&
+        getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_bridge', false)
+    : false
+}
+
+/**
+ * Blocking entitlement check for Remote Control.
+ *
+ * Returns cached `true` immediately (fast path). If the disk cache says
+ * `false` or is missing, awaits GrowthBook init and fetches the fresh
+ * server value (slow path, max ~5s), then writes it to disk.
+ *
+ * Use at entitlement gates where a stale `false` would unfairly block access.
+ * For user-facing error paths, prefer `getBridgeDisabledReason()` which gives
+ * a specific diagnostic. For render-body UI visibility checks, use
+ * `isBridgeEnabled()` instead.
+ */
+export async function isBridgeEnabledBlocking(): Promise<boolean> {
+  return feature('BRIDGE_MODE')
+    ? isClaudeAISubscriber() &&
+        (await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))
+    : false
+}
+
+/**
+ * Diagnostic message for why Remote Control is unavailable, or null if
+ * it's enabled. Call this instead of a bare `isBridgeEnabledBlocking()`
+ * check when you need to show the user an actionable error.
+ *
+ * The GrowthBook gate targets on organizationUUID, which comes from
+ * config.oauthAccount — populated by /api/oauth/profile during login.
+ * That endpoint requires the user:profile scope. Tokens without it
+ * (setup-token, CLAUDE_CODE_OAUTH_TOKEN env var, or pre-scope-expansion
+ * logins) leave oauthAccount unpopulated, so the gate falls back to
+ * false and users see a dead-end "not enabled" message with no hint
+ * that re-login would fix it. See CC-1165 / gh-33105.
+ */
+export async function getBridgeDisabledReason(): Promise<string | null> {
+  if (feature('BRIDGE_MODE')) {
+    if (!isClaudeAISubscriber()) {
+      return 'Remote Control requires a claude.ai subscription. Run `claude auth login` to sign in with your claude.ai account.'
+    }
+    if (!hasProfileScope()) {
+      return 'Remote Control requires a full-scope login token. Long-lived tokens (from `claude setup-token` or CLAUDE_CODE_OAUTH_TOKEN) are limited to inference-only for security reasons. Run `claude auth login` to use Remote Control.'
+    }
+    if (!getOauthAccountInfo()?.organizationUuid) {
+      return 'Unable to determine your organization for Remote Control eligibility. Run `claude auth login` to refresh your account information.'
+    }
+    if (!(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))) {
+      return 'Remote Control is not yet enabled for your account.'
+    }
+    return null
+  }
+  return 'Remote Control is not available in this build.'
+}
+
+// try/catch: main.tsx:5698 calls isBridgeEnabled() while defining the Commander
+// program, before enableConfigs() runs. isClaudeAISubscriber() → getGlobalConfig()
+// throws "Config accessed before allowed" there. Pre-config, no OAuth token can
+// exist anyway — false is correct. Same swallow getFeatureValue_CACHED_MAY_BE_STALE
+// already does at growthbook.ts:775-780.
+function isClaudeAISubscriber(): boolean {
+  try {
+    return authModule.isClaudeAISubscriber()
+  } catch {
+    return false
+  }
+}
+function hasProfileScope(): boolean {
+  try {
+    return authModule.hasProfileScope()
+  } catch {
+    return false
+  }
+}
+function getOauthAccountInfo(): ReturnType<
+  typeof authModule.getOauthAccountInfo
+> {
+  try {
+    return authModule.getOauthAccountInfo()
+  } catch {
+    return undefined
+  }
+}
+
+/**
+ * Runtime check for the env-less (v2) REPL bridge path.
+ * Returns true when the GrowthBook flag `tengu_bridge_repl_v2` is enabled.
+ *
+ * This gates which implementation initReplBridge uses — NOT whether bridge
+ * is available at all (see isBridgeEnabled above). Daemon/print paths stay
+ * on the env-based implementation regardless of this gate.
+ */
+export function isEnvLessBridgeEnabled(): boolean {
+  return feature('BRIDGE_MODE')
+    ? getFeatureValue_CACHED_MAY_BE_STALE('tengu_bridge_repl_v2', false)
+    : false
+}
+
+/**
+ * Kill-switch for the `cse_*` → `session_*` client-side retag shim.
+ *
+ * The shim exists because compat/convert.go:27 validates TagSession and the
+ * claude.ai frontend routes on `session_*`, while v2 worker endpoints hand out
+ * `cse_*`. Once the server tags by environment_kind and the frontend accepts
+ * `cse_*` directly, flip this to false to make toCompatSessionId a no-op.
+ * Defaults to true — the shim stays active until explicitly disabled.
+ */
+export function isCseShimEnabled(): boolean {
+  return feature('BRIDGE_MODE')
+    ? getFeatureValue_CACHED_MAY_BE_STALE(
+        'tengu_bridge_repl_v2_cse_shim_enabled',
+        true,
+      )
+    : true
+}
+
+/**
+ * Returns an error message if the current CLI version is below the
+ * minimum required for the v1 (env-based) Remote Control path, or null if the
+ * version is fine. The v2 (env-less) path uses checkEnvLessBridgeMinVersion()
+ * in envLessBridgeConfig.ts instead — the two implementations have independent
+ * version floors.
+ *
+ * Uses cached (non-blocking) GrowthBook config. If GrowthBook hasn't
+ * loaded yet, the default '0.0.0' means the check passes — a safe fallback.
+ */
+export function checkBridgeMinVersion(): string | null {
+  // Positive pattern — see docs/feature-gating.md.
+  // Negative pattern (if (!feature(...)) return) does not eliminate
+  // inline string literals from external builds.
+  if (feature('BRIDGE_MODE')) {
+    const config = getDynamicConfig_CACHED_MAY_BE_STALE<{
+      minVersion: string
+    }>('tengu_bridge_min_version', { minVersion: '0.0.0' })
+    if (config.minVersion && lt(MACRO.VERSION, config.minVersion)) {
+      return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${config.minVersion} or higher is required. Run \`claude update\` to update.`
+    }
+  }
+  return null
+}
+
+/**
+ * Default for remoteControlAtStartup when the user hasn't explicitly set it.
+ * When the CCR_AUTO_CONNECT build flag is present (ant-only) and the
+ * tengu_cobalt_harbor GrowthBook gate is on, all sessions connect to CCR by
+ * default — the user can still opt out by setting remoteControlAtStartup=false
+ * in config (explicit settings always win over this default).
+ *
+ * Defined here rather than in config.ts to avoid a direct
+ * config.ts → growthbook.ts import cycle (growthbook.ts → user.ts → config.ts).
+ */
+export function getCcrAutoConnectDefault(): boolean {
+  return feature('CCR_AUTO_CONNECT')
+    ? getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_harbor', false)
+    : false
+}
+
+/**
+ * Opt-in CCR mirror mode — every local session spawns an outbound-only
+ * Remote Control session that receives forwarded events. Separate from
+ * getCcrAutoConnectDefault (bidirectional Remote Control). Env var wins for
+ * local opt-in; GrowthBook controls rollout.
+ */
+export function isCcrMirrorEnabled(): boolean {
+  return feature('CCR_MIRROR')
+    ? isEnvTruthy(process.env.CLAUDE_CODE_CCR_MIRROR) ||
+        getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_mirror', false)
+    : false
+}

+ 2999 - 0
src/bridge/bridgeMain.ts

@@ -0,0 +1,2999 @@
+import { feature } from 'bun:bundle'
+import { randomUUID } from 'crypto'
+import { hostname, tmpdir } from 'os'
+import { basename, join, resolve } from 'path'
+import { getRemoteSessionUrl } from '../constants/product.js'
+import { shutdownDatadog } from '../services/analytics/datadog.js'
+import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
+import { checkGate_CACHED_OR_BLOCKING } from '../services/analytics/growthbook.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+  logEventAsync,
+} from '../services/analytics/index.js'
+import { isInBundledMode } from '../utils/bundledMode.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
+import { errorMessage } from '../utils/errors.js'
+import { truncateToWidth } from '../utils/format.js'
+import { logError } from '../utils/log.js'
+import { sleep } from '../utils/sleep.js'
+import { createAgentWorktree, removeAgentWorktree } from '../utils/worktree.js'
+import {
+  BridgeFatalError,
+  createBridgeApiClient,
+  isExpiredErrorType,
+  isSuppressible403,
+  validateBridgeId,
+} from './bridgeApi.js'
+import { formatDuration } from './bridgeStatusUtil.js'
+import { createBridgeLogger } from './bridgeUI.js'
+import { createCapacityWake } from './capacityWake.js'
+import { describeAxiosError } from './debugUtils.js'
+import { createTokenRefreshScheduler } from './jwtUtils.js'
+import { getPollIntervalConfig } from './pollConfig.js'
+import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
+import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
+import { getTrustedDeviceToken } from './trustedDevice.js'
+import {
+  BRIDGE_LOGIN_ERROR,
+  type BridgeApiClient,
+  type BridgeConfig,
+  type BridgeLogger,
+  DEFAULT_SESSION_TIMEOUT_MS,
+  type SessionDoneStatus,
+  type SessionHandle,
+  type SessionSpawner,
+  type SessionSpawnOpts,
+  type SpawnMode,
+} from './types.js'
+import {
+  buildCCRv2SdkUrl,
+  buildSdkUrl,
+  decodeWorkSecret,
+  registerWorker,
+  sameSessionId,
+} from './workSecret.js'
+
+export type BackoffConfig = {
+  connInitialMs: number
+  connCapMs: number
+  connGiveUpMs: number
+  generalInitialMs: number
+  generalCapMs: number
+  generalGiveUpMs: number
+  /** SIGTERM→SIGKILL grace period on shutdown. Default 30s. */
+  shutdownGraceMs?: number
+  /** stopWorkWithRetry base delay (1s/2s/4s backoff). Default 1000ms. */
+  stopWorkBaseDelayMs?: number
+}
+
+const DEFAULT_BACKOFF: BackoffConfig = {
+  connInitialMs: 2_000,
+  connCapMs: 120_000, // 2 minutes
+  connGiveUpMs: 600_000, // 10 minutes
+  generalInitialMs: 500,
+  generalCapMs: 30_000,
+  generalGiveUpMs: 600_000, // 10 minutes
+}
+
+/** Status update interval for the live display (ms). */
+const STATUS_UPDATE_INTERVAL_MS = 1_000
+const SPAWN_SESSIONS_DEFAULT = 32
+
+/**
+ * GrowthBook gate for multi-session spawn modes (--spawn / --capacity / --create-session-in-dir).
+ * Sibling of tengu_ccr_bridge_multi_environment (multiple envs per host:dir) —
+ * this one enables multiple sessions per environment.
+ * Rollout staged via targeting rules: ants first, then gradual external.
+ *
+ * Uses the blocking gate check so a stale disk-cache miss doesn't unfairly
+ * deny access. The fast path (cache has true) is still instant; only the
+ * cold-start path awaits the server fetch, and that fetch also seeds the
+ * disk cache for next time.
+ */
+async function isMultiSessionSpawnEnabled(): Promise<boolean> {
+  return checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge_multi_session')
+}
+
+/**
+ * Returns the threshold for detecting system sleep/wake in the poll loop.
+ * Must exceed the max backoff cap — otherwise normal backoff delays trigger
+ * false sleep detection (resetting the error budget indefinitely). Using
+ * 2× the connection backoff cap, matching the pattern in WebSocketTransport
+ * and replBridge.
+ */
+function pollSleepDetectionThresholdMs(backoff: BackoffConfig): number {
+  return backoff.connCapMs * 2
+}
+
+/**
+ * Returns the args that must precede CLI flags when spawning a child claude
+ * process. In compiled binaries, process.execPath is the claude binary itself
+ * and args go directly to it. In npm installs (node running cli.js),
+ * process.execPath is the node runtime — the child spawn must pass the script
+ * path as the first arg, otherwise node interprets --sdk-url as a node option
+ * and exits with "bad option: --sdk-url". See anthropics/claude-code#28334.
+ */
+function spawnScriptArgs(): string[] {
+  if (isInBundledMode() || !process.argv[1]) {
+    return []
+  }
+  return [process.argv[1]]
+}
+
+/** Attempt to spawn a session; returns error string if spawn throws. */
+function safeSpawn(
+  spawner: SessionSpawner,
+  opts: SessionSpawnOpts,
+  dir: string,
+): SessionHandle | string {
+  try {
+    return spawner.spawn(opts, dir)
+  } catch (err) {
+    const errMsg = errorMessage(err)
+    logError(new Error(`Session spawn failed: ${errMsg}`))
+    return errMsg
+  }
+}
+
+export async function runBridgeLoop(
+  config: BridgeConfig,
+  environmentId: string,
+  environmentSecret: string,
+  api: BridgeApiClient,
+  spawner: SessionSpawner,
+  logger: BridgeLogger,
+  signal: AbortSignal,
+  backoffConfig: BackoffConfig = DEFAULT_BACKOFF,
+  initialSessionId?: string,
+  getAccessToken?: () => string | undefined | Promise<string | undefined>,
+): Promise<void> {
+  // Local abort controller so that onSessionDone can stop the poll loop.
+  // Linked to the incoming signal so external aborts also work.
+  const controller = new AbortController()
+  if (signal.aborted) {
+    controller.abort()
+  } else {
+    signal.addEventListener('abort', () => controller.abort(), { once: true })
+  }
+  const loopSignal = controller.signal
+
+  const activeSessions = new Map<string, SessionHandle>()
+  const sessionStartTimes = new Map<string, number>()
+  const sessionWorkIds = new Map<string, string>()
+  // Compat-surface ID (session_*) computed once at spawn and cached so
+  // cleanup and status-update ticks use the same key regardless of whether
+  // the tengu_bridge_repl_v2_cse_shim_enabled gate flips mid-session.
+  const sessionCompatIds = new Map<string, string>()
+  // Session ingress JWTs for heartbeat auth, keyed by sessionId.
+  // Stored separately from handle.accessToken because the token refresh
+  // scheduler overwrites that field with the OAuth token (~3h55m in).
+  const sessionIngressTokens = new Map<string, string>()
+  const sessionTimers = new Map<string, ReturnType<typeof setTimeout>>()
+  const completedWorkIds = new Set<string>()
+  const sessionWorktrees = new Map<
+    string,
+    {
+      worktreePath: string
+      worktreeBranch?: string
+      gitRoot?: string
+      hookBased?: boolean
+    }
+  >()
+  // Track sessions killed by the timeout watchdog so onSessionDone can
+  // distinguish them from server-initiated or shutdown interrupts.
+  const timedOutSessions = new Set<string>()
+  // Sessions that already have a title (server-set or bridge-derived) so
+  // onFirstUserMessage doesn't clobber a user-assigned --name / web rename.
+  // Keyed by compatSessionId to match logger.setSessionTitle's key.
+  const titledSessions = new Set<string>()
+  // Signal to wake the at-capacity sleep early when a session completes,
+  // so the bridge can immediately accept new work.
+  const capacityWake = createCapacityWake(loopSignal)
+
+  /**
+   * Heartbeat all active work items.
+   * Returns 'ok' if at least one heartbeat succeeded, 'auth_failed' if any
+   * got a 401/403 (JWT expired — re-queued via reconnectSession so the next
+   * poll delivers fresh work), or 'failed' if all failed for other reasons.
+   */
+  async function heartbeatActiveWorkItems(): Promise<
+    'ok' | 'auth_failed' | 'fatal' | 'failed'
+  > {
+    let anySuccess = false
+    let anyFatal = false
+    const authFailedSessions: string[] = []
+    for (const [sessionId] of activeSessions) {
+      const workId = sessionWorkIds.get(sessionId)
+      const ingressToken = sessionIngressTokens.get(sessionId)
+      if (!workId || !ingressToken) {
+        continue
+      }
+      try {
+        await api.heartbeatWork(environmentId, workId, ingressToken)
+        anySuccess = true
+      } catch (err) {
+        logForDebugging(
+          `[bridge:heartbeat] Failed for sessionId=${sessionId} workId=${workId}: ${errorMessage(err)}`,
+        )
+        if (err instanceof BridgeFatalError) {
+          logEvent('tengu_bridge_heartbeat_error', {
+            status:
+              err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            error_type: (err.status === 401 || err.status === 403
+              ? 'auth_failed'
+              : 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          if (err.status === 401 || err.status === 403) {
+            authFailedSessions.push(sessionId)
+          } else {
+            // 404/410 = environment expired or deleted — no point retrying
+            anyFatal = true
+          }
+        }
+      }
+    }
+    // JWT expired → trigger server-side re-dispatch. Without this, work stays
+    // ACK'd out of the Redis PEL and poll returns empty forever (CC-1263).
+    // The existingHandle path below delivers the fresh token to the child.
+    // sessionId is already in the format /bridge/reconnect expects: it comes
+    // from work.data.id, which matches the server's EnvironmentInstance store
+    // (cse_* under the compat gate, session_* otherwise).
+    for (const sessionId of authFailedSessions) {
+      logger.logVerbose(
+        `Session ${sessionId} token expired — re-queuing via bridge/reconnect`,
+      )
+      try {
+        await api.reconnectSession(environmentId, sessionId)
+        logForDebugging(
+          `[bridge:heartbeat] Re-queued sessionId=${sessionId} via bridge/reconnect`,
+        )
+      } catch (err) {
+        logger.logError(
+          `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
+        )
+        logForDebugging(
+          `[bridge:heartbeat] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
+          { level: 'error' },
+        )
+      }
+    }
+    if (anyFatal) {
+      return 'fatal'
+    }
+    if (authFailedSessions.length > 0) {
+      return 'auth_failed'
+    }
+    return anySuccess ? 'ok' : 'failed'
+  }
+
+  // Sessions spawned with CCR v2 env vars. v2 children cannot use OAuth
+  // tokens (CCR worker endpoints validate the JWT's session_id claim,
+  // register_worker.go:32), so onRefresh triggers server re-dispatch
+  // instead — the next poll delivers fresh work with a new JWT via the
+  // existingHandle path below.
+  const v2Sessions = new Set<string>()
+
+  // Proactive token refresh: schedules a timer 5min before the session
+  // ingress JWT expires. v1 delivers OAuth directly; v2 calls
+  // reconnectSession to trigger server re-dispatch (CC-1263: without
+  // this, v2 daemon sessions silently die at ~5h since the server does
+  // not auto-re-dispatch ACK'd work on lease expiry).
+  const tokenRefresh = getAccessToken
+    ? createTokenRefreshScheduler({
+        getAccessToken,
+        onRefresh: (sessionId, oauthToken) => {
+          const handle = activeSessions.get(sessionId)
+          if (!handle) {
+            return
+          }
+          if (v2Sessions.has(sessionId)) {
+            logger.logVerbose(
+              `Refreshing session ${sessionId} token via bridge/reconnect`,
+            )
+            void api
+              .reconnectSession(environmentId, sessionId)
+              .catch((err: unknown) => {
+                logger.logError(
+                  `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
+                )
+                logForDebugging(
+                  `[bridge:token] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
+                  { level: 'error' },
+                )
+              })
+          } else {
+            handle.updateAccessToken(oauthToken)
+          }
+        },
+        label: 'bridge',
+      })
+    : null
+  const loopStartTime = Date.now()
+  // Track all in-flight cleanup promises (stopWork, worktree removal) so
+  // the shutdown sequence can await them before process.exit().
+  const pendingCleanups = new Set<Promise<unknown>>()
+  function trackCleanup(p: Promise<unknown>): void {
+    pendingCleanups.add(p)
+    void p.finally(() => pendingCleanups.delete(p))
+  }
+  let connBackoff = 0
+  let generalBackoff = 0
+  let connErrorStart: number | null = null
+  let generalErrorStart: number | null = null
+  let lastPollErrorTime: number | null = null
+  let statusUpdateTimer: ReturnType<typeof setInterval> | null = null
+  // Set by BridgeFatalError and give-up paths so the shutdown block can
+  // skip the resume message (resume is impossible after env expiry/auth
+  // failure/sustained connection errors).
+  let fatalExit = false
+
+  logForDebugging(
+    `[bridge:work] Starting poll loop spawnMode=${config.spawnMode} maxSessions=${config.maxSessions} environmentId=${environmentId}`,
+  )
+  logForDiagnosticsNoPII('info', 'bridge_loop_started', {
+    max_sessions: config.maxSessions,
+    spawn_mode: config.spawnMode,
+  })
+
+  // For ant users, show where session debug logs will land so they can tail them.
+  // sessionRunner.ts uses the same base path. File appears once a session spawns.
+  if (process.env.USER_TYPE === 'ant') {
+    let debugGlob: string
+    if (config.debugFile) {
+      const ext = config.debugFile.lastIndexOf('.')
+      debugGlob =
+        ext > 0
+          ? `${config.debugFile.slice(0, ext)}-*${config.debugFile.slice(ext)}`
+          : `${config.debugFile}-*`
+    } else {
+      debugGlob = join(tmpdir(), 'claude', 'bridge-session-*.log')
+    }
+    logger.setDebugLogPath(debugGlob)
+  }
+
+  logger.printBanner(config, environmentId)
+
+  // Seed the logger's session count + spawn mode before any render. Without
+  // this, setAttached() below renders with the logger's default sessionMax=1,
+  // showing "Capacity: 0/1" until the status ticker kicks in (which is gated
+  // by !initialSessionId and only starts after the poll loop picks up work).
+  logger.updateSessionCount(0, config.maxSessions, config.spawnMode)
+
+  // If an initial session was pre-created, show its URL from the start so
+  // the user can click through immediately (matching /remote-control behavior).
+  if (initialSessionId) {
+    logger.setAttached(initialSessionId)
+  }
+
+  /** Refresh the inline status display. Shows idle or active depending on state. */
+  function updateStatusDisplay(): void {
+    // Push the session count (no-op when maxSessions === 1) so the
+    // next renderStatusLine tick shows the current count.
+    logger.updateSessionCount(
+      activeSessions.size,
+      config.maxSessions,
+      config.spawnMode,
+    )
+
+    // Push per-session activity into the multi-session display.
+    for (const [sid, handle] of activeSessions) {
+      const act = handle.currentActivity
+      if (act) {
+        logger.updateSessionActivity(sessionCompatIds.get(sid) ?? sid, act)
+      }
+    }
+
+    if (activeSessions.size === 0) {
+      logger.updateIdleStatus()
+      return
+    }
+
+    // Show the most recently started session that is still actively working.
+    // Sessions whose current activity is 'result' or 'error' are between
+    // turns — the CLI emitted its result but the process stays alive waiting
+    // for the next user message.  Skip updating so the status line keeps
+    // whatever state it had (Attached / session title).
+    const [sessionId, handle] = [...activeSessions.entries()].pop()!
+    const startTime = sessionStartTimes.get(sessionId)
+    if (!startTime) return
+
+    const activity = handle.currentActivity
+    if (!activity || activity.type === 'result' || activity.type === 'error') {
+      // Session is between turns — keep current status (Attached/titled).
+      // In multi-session mode, still refresh so bullet-list activities stay current.
+      if (config.maxSessions > 1) logger.refreshDisplay()
+      return
+    }
+
+    const elapsed = formatDuration(Date.now() - startTime)
+
+    // Build trail from recent tool activities (last 5)
+    const trail = handle.activities
+      .filter(a => a.type === 'tool_start')
+      .slice(-5)
+      .map(a => a.summary)
+
+    logger.updateSessionStatus(sessionId, elapsed, activity, trail)
+  }
+
+  /** Start the status display update ticker. */
+  function startStatusUpdates(): void {
+    stopStatusUpdates()
+    // Call immediately so the first transition (e.g. Connecting → Ready)
+    // happens without delay, avoiding concurrent timer races.
+    updateStatusDisplay()
+    statusUpdateTimer = setInterval(
+      updateStatusDisplay,
+      STATUS_UPDATE_INTERVAL_MS,
+    )
+  }
+
+  /** Stop the status display update ticker. */
+  function stopStatusUpdates(): void {
+    if (statusUpdateTimer) {
+      clearInterval(statusUpdateTimer)
+      statusUpdateTimer = null
+    }
+  }
+
+  function onSessionDone(
+    sessionId: string,
+    startTime: number,
+    handle: SessionHandle,
+  ): (status: SessionDoneStatus) => void {
+    return (rawStatus: SessionDoneStatus): void => {
+      const workId = sessionWorkIds.get(sessionId)
+      activeSessions.delete(sessionId)
+      sessionStartTimes.delete(sessionId)
+      sessionWorkIds.delete(sessionId)
+      sessionIngressTokens.delete(sessionId)
+      const compatId = sessionCompatIds.get(sessionId) ?? sessionId
+      sessionCompatIds.delete(sessionId)
+      logger.removeSession(compatId)
+      titledSessions.delete(compatId)
+      v2Sessions.delete(sessionId)
+      // Clear per-session timeout timer
+      const timer = sessionTimers.get(sessionId)
+      if (timer) {
+        clearTimeout(timer)
+        sessionTimers.delete(sessionId)
+      }
+      // Clear token refresh timer
+      tokenRefresh?.cancel(sessionId)
+      // Wake the at-capacity sleep so the bridge can accept new work immediately
+      capacityWake.wake()
+
+      // If the session was killed by the timeout watchdog, treat it as a
+      // failed session (not a server/shutdown interrupt) so we still call
+      // stopWork and archiveSession below.
+      const wasTimedOut = timedOutSessions.delete(sessionId)
+      const status: SessionDoneStatus =
+        wasTimedOut && rawStatus === 'interrupted' ? 'failed' : rawStatus
+      const durationMs = Date.now() - startTime
+
+      logForDebugging(
+        `[bridge:session] sessionId=${sessionId} workId=${workId ?? 'unknown'} exited status=${status} duration=${formatDuration(durationMs)}`,
+      )
+      logEvent('tengu_bridge_session_done', {
+        status:
+          status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        duration_ms: durationMs,
+      })
+      logForDiagnosticsNoPII('info', 'bridge_session_done', {
+        status,
+        duration_ms: durationMs,
+      })
+
+      // Clear the status display before printing final log
+      logger.clearStatus()
+      stopStatusUpdates()
+
+      // Build error message from stderr if available
+      const stderrSummary =
+        handle.lastStderr.length > 0 ? handle.lastStderr.join('\n') : undefined
+      let failureMessage: string | undefined
+
+      switch (status) {
+        case 'completed':
+          logger.logSessionComplete(sessionId, durationMs)
+          break
+        case 'failed':
+          // Skip failure log during shutdown — the child exits non-zero when
+          // killed, which is expected and not a real failure.
+          // Also skip for timeout-killed sessions — the timeout watchdog
+          // already logged a clear timeout message.
+          if (!wasTimedOut && !loopSignal.aborted) {
+            failureMessage = stderrSummary ?? 'Process exited with error'
+            logger.logSessionFailed(sessionId, failureMessage)
+            logError(new Error(`Bridge session failed: ${failureMessage}`))
+          }
+          break
+        case 'interrupted':
+          logger.logVerbose(`Session ${sessionId} interrupted`)
+          break
+      }
+
+      // Notify the server that this work item is done. Skip for interrupted
+      // sessions — interrupts are either server-initiated (the server already
+      // knows) or caused by bridge shutdown (which calls stopWork() separately).
+      if (status !== 'interrupted' && workId) {
+        trackCleanup(
+          stopWorkWithRetry(
+            api,
+            environmentId,
+            workId,
+            logger,
+            backoffConfig.stopWorkBaseDelayMs,
+          ),
+        )
+        completedWorkIds.add(workId)
+      }
+
+      // Clean up worktree if one was created for this session
+      const wt = sessionWorktrees.get(sessionId)
+      if (wt) {
+        sessionWorktrees.delete(sessionId)
+        trackCleanup(
+          removeAgentWorktree(
+            wt.worktreePath,
+            wt.worktreeBranch,
+            wt.gitRoot,
+            wt.hookBased,
+          ).catch((err: unknown) =>
+            logger.logVerbose(
+              `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
+            ),
+          ),
+        )
+      }
+
+      // Lifecycle decision: in multi-session mode, keep the bridge running
+      // after a session completes. In single-session mode, abort the poll
+      // loop so the bridge exits cleanly.
+      if (status !== 'interrupted' && !loopSignal.aborted) {
+        if (config.spawnMode !== 'single-session') {
+          // Multi-session: archive the completed session so it doesn't linger
+          // as stale in the web UI. archiveSession is idempotent (409 if already
+          // archived), so double-archiving at shutdown is safe.
+          // sessionId arrived as cse_* from the work poll (infrastructure-layer
+          // tag). archiveSession hits /v1/sessions/{id}/archive which is the
+          // compat surface and validates TagSession (session_*). Re-tag — same
+          // UUID underneath.
+          trackCleanup(
+            api
+              .archiveSession(compatId)
+              .catch((err: unknown) =>
+                logger.logVerbose(
+                  `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
+                ),
+              ),
+          )
+          logForDebugging(
+            `[bridge:session] Session ${status}, returning to idle (multi-session mode)`,
+          )
+        } else {
+          // Single-session: coupled lifecycle — tear down environment
+          logForDebugging(
+            `[bridge:session] Session ${status}, aborting poll loop to tear down environment`,
+          )
+          controller.abort()
+          return
+        }
+      }
+
+      if (!loopSignal.aborted) {
+        startStatusUpdates()
+      }
+    }
+  }
+
+  // Start the idle status display immediately — unless we have a pre-created
+  // session, in which case setAttached() already set up the display and the
+  // poll loop will start status updates when it picks up the session.
+  if (!initialSessionId) {
+    startStatusUpdates()
+  }
+
+  while (!loopSignal.aborted) {
+    // Fetched once per iteration — the GrowthBook cache refreshes every
+    // 5 min, so a loop running at the at-capacity rate picks up config
+    // changes within one sleep cycle.
+    const pollConfig = getPollIntervalConfig()
+
+    try {
+      const work = await api.pollForWork(
+        environmentId,
+        environmentSecret,
+        loopSignal,
+        pollConfig.reclaim_older_than_ms,
+      )
+
+      // Log reconnection if we were previously disconnected
+      const wasDisconnected =
+        connErrorStart !== null || generalErrorStart !== null
+      if (wasDisconnected) {
+        const disconnectedMs =
+          Date.now() - (connErrorStart ?? generalErrorStart ?? Date.now())
+        logger.logReconnected(disconnectedMs)
+        logForDebugging(
+          `[bridge:poll] Reconnected after ${formatDuration(disconnectedMs)}`,
+        )
+        logEvent('tengu_bridge_reconnected', {
+          disconnected_ms: disconnectedMs,
+        })
+      }
+
+      connBackoff = 0
+      generalBackoff = 0
+      connErrorStart = null
+      generalErrorStart = null
+      lastPollErrorTime = null
+
+      // Null response = no work available in the queue.
+      // Add a minimum delay to avoid hammering the server.
+      if (!work) {
+        // Use live check (not a snapshot) since sessions can end during poll.
+        const atCap = activeSessions.size >= config.maxSessions
+        if (atCap) {
+          const atCapMs = pollConfig.multisession_poll_interval_ms_at_capacity
+          // Heartbeat loops WITHOUT polling. When at-capacity polling is also
+          // enabled (atCapMs > 0), the loop tracks a deadline and breaks out
+          // to poll at that interval — heartbeat and poll compose instead of
+          // one suppressing the other. We break out to poll when:
+          //   - Poll deadline reached (atCapMs > 0 only)
+          //   - Auth fails (JWT expired → poll refreshes tokens)
+          //   - Capacity wake fires (session ended → poll for new work)
+          //   - Loop aborted (shutdown)
+          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+            logEvent('tengu_bridge_heartbeat_mode_entered', {
+              active_sessions: activeSessions.size,
+              heartbeat_interval_ms:
+                pollConfig.non_exclusive_heartbeat_interval_ms,
+            })
+            // Deadline computed once at entry — GB updates to atCapMs don't
+            // shift an in-flight deadline (next entry picks up the new value).
+            const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
+            let hbResult: 'ok' | 'auth_failed' | 'fatal' | 'failed' = 'ok'
+            let hbCycles = 0
+            while (
+              !loopSignal.aborted &&
+              activeSessions.size >= config.maxSessions &&
+              (pollDeadline === null || Date.now() < pollDeadline)
+            ) {
+              // Re-read config each cycle so GrowthBook updates take effect
+              const hbConfig = getPollIntervalConfig()
+              if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
+
+              // Capture capacity signal BEFORE the async heartbeat call so
+              // a session ending during the HTTP request is caught by the
+              // subsequent sleep (instead of being lost to a replaced controller).
+              const cap = capacityWake.signal()
+
+              hbResult = await heartbeatActiveWorkItems()
+              if (hbResult === 'auth_failed' || hbResult === 'fatal') {
+                cap.cleanup()
+                break
+              }
+
+              hbCycles++
+              await sleep(
+                hbConfig.non_exclusive_heartbeat_interval_ms,
+                cap.signal,
+              )
+              cap.cleanup()
+            }
+
+            // Determine exit reason for telemetry
+            const exitReason =
+              hbResult === 'auth_failed' || hbResult === 'fatal'
+                ? hbResult
+                : loopSignal.aborted
+                  ? 'shutdown'
+                  : activeSessions.size < config.maxSessions
+                    ? 'capacity_changed'
+                    : pollDeadline !== null && Date.now() >= pollDeadline
+                      ? 'poll_due'
+                      : 'config_disabled'
+            logEvent('tengu_bridge_heartbeat_mode_exited', {
+              reason:
+                exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              heartbeat_cycles: hbCycles,
+              active_sessions: activeSessions.size,
+            })
+            if (exitReason === 'poll_due') {
+              // bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
+              // so the once-per-10min poll_due poll is invisible at counter=2.
+              // Log it here so verification runs see both endpoints in the debug log.
+              logForDebugging(
+                `[bridge:poll] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
+              )
+            }
+
+            // On auth_failed or fatal, sleep before polling to avoid a tight
+            // poll+heartbeat loop. Auth_failed: heartbeatActiveWorkItems
+            // already called reconnectSession — the sleep gives the server
+            // time to propagate the re-queue. Fatal (404/410): may be a
+            // single work item GCd while the environment is still valid.
+            // Use atCapMs if enabled, else the heartbeat interval as a floor
+            // (guaranteed > 0 here) so heartbeat-only configs don't tight-loop.
+            if (hbResult === 'auth_failed' || hbResult === 'fatal') {
+              const cap = capacityWake.signal()
+              await sleep(
+                atCapMs > 0
+                  ? atCapMs
+                  : pollConfig.non_exclusive_heartbeat_interval_ms,
+                cap.signal,
+              )
+              cap.cleanup()
+            }
+          } else if (atCapMs > 0) {
+            // Heartbeat disabled: slow poll as liveness signal.
+            const cap = capacityWake.signal()
+            await sleep(atCapMs, cap.signal)
+            cap.cleanup()
+          }
+        } else {
+          const interval =
+            activeSessions.size > 0
+              ? pollConfig.multisession_poll_interval_ms_partial_capacity
+              : pollConfig.multisession_poll_interval_ms_not_at_capacity
+          await sleep(interval, loopSignal)
+        }
+        continue
+      }
+
+      // At capacity — we polled to keep the heartbeat alive, but cannot
+      // accept new work right now. We still enter the switch below so that
+      // token refreshes for existing sessions are processed (the case
+      // 'session' handler checks for existing sessions before the inner
+      // capacity guard).
+      const atCapacityBeforeSwitch = activeSessions.size >= config.maxSessions
+
+      // Skip work items that have already been completed and stopped.
+      // The server may re-deliver stale work before processing our stop
+      // request, which would otherwise cause a duplicate session spawn.
+      if (completedWorkIds.has(work.id)) {
+        logForDebugging(
+          `[bridge:work] Skipping already-completed workId=${work.id}`,
+        )
+        // Respect capacity throttle — without a sleep here, persistent stale
+        // redeliveries would tight-loop at poll-request speed (the !work
+        // branch above is the only sleep, and work != null skips it).
+        if (atCapacityBeforeSwitch) {
+          const cap = capacityWake.signal()
+          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+            await heartbeatActiveWorkItems()
+            await sleep(
+              pollConfig.non_exclusive_heartbeat_interval_ms,
+              cap.signal,
+            )
+          } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
+            await sleep(
+              pollConfig.multisession_poll_interval_ms_at_capacity,
+              cap.signal,
+            )
+          }
+          cap.cleanup()
+        } else {
+          await sleep(1000, loopSignal)
+        }
+        continue
+      }
+
+      // Decode the work secret for session spawning and to extract the JWT
+      // used for the ack call below.
+      let secret
+      try {
+        secret = decodeWorkSecret(work.secret)
+      } catch (err) {
+        const errMsg = errorMessage(err)
+        logger.logError(
+          `Failed to decode work secret for workId=${work.id}: ${errMsg}`,
+        )
+        logEvent('tengu_bridge_work_secret_failed', {})
+        // Can't ack (needs the JWT we failed to decode). stopWork uses OAuth,
+        // so it's callable here — prevents XAUTOCLAIM from re-delivering this
+        // poisoned item every reclaim_older_than_ms cycle.
+        completedWorkIds.add(work.id)
+        trackCleanup(
+          stopWorkWithRetry(
+            api,
+            environmentId,
+            work.id,
+            logger,
+            backoffConfig.stopWorkBaseDelayMs,
+          ),
+        )
+        // Respect capacity throttle before retrying — without a sleep here,
+        // repeated decode failures at capacity would tight-loop at
+        // poll-request speed (work != null skips the !work sleep above).
+        if (atCapacityBeforeSwitch) {
+          const cap = capacityWake.signal()
+          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+            await heartbeatActiveWorkItems()
+            await sleep(
+              pollConfig.non_exclusive_heartbeat_interval_ms,
+              cap.signal,
+            )
+          } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
+            await sleep(
+              pollConfig.multisession_poll_interval_ms_at_capacity,
+              cap.signal,
+            )
+          }
+          cap.cleanup()
+        }
+        continue
+      }
+
+      // Explicitly acknowledge after committing to handle the work — NOT
+      // before. The at-capacity guard inside case 'session' can break
+      // without spawning; acking there would permanently lose the work.
+      // Ack failures are non-fatal: server re-delivers, and existingHandle
+      // / completedWorkIds paths handle the dedup.
+      const ackWork = async (): Promise<void> => {
+        logForDebugging(`[bridge:work] Acknowledging workId=${work.id}`)
+        try {
+          await api.acknowledgeWork(
+            environmentId,
+            work.id,
+            secret.session_ingress_token,
+          )
+        } catch (err) {
+          logForDebugging(
+            `[bridge:work] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
+          )
+        }
+      }
+
+      const workType: string = work.data.type
+      switch (work.data.type) {
+        case 'healthcheck':
+          await ackWork()
+          logForDebugging('[bridge:work] Healthcheck received')
+          logger.logVerbose('Healthcheck received')
+          break
+        case 'session': {
+          const sessionId = work.data.id
+          try {
+            validateBridgeId(sessionId, 'session_id')
+          } catch {
+            await ackWork()
+            logger.logError(`Invalid session_id received: ${sessionId}`)
+            break
+          }
+
+          // If the session is already running, deliver the fresh token so
+          // the child process can reconnect its WebSocket with the new
+          // session ingress token. This handles the case where the server
+          // re-dispatches work for an existing session after the WS drops.
+          const existingHandle = activeSessions.get(sessionId)
+          if (existingHandle) {
+            existingHandle.updateAccessToken(secret.session_ingress_token)
+            sessionIngressTokens.set(sessionId, secret.session_ingress_token)
+            sessionWorkIds.set(sessionId, work.id)
+            // Re-schedule next refresh from the fresh JWT's expiry. onRefresh
+            // branches on v2Sessions so both v1 and v2 are safe here.
+            tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
+            logForDebugging(
+              `[bridge:work] Updated access token for existing sessionId=${sessionId} workId=${work.id}`,
+            )
+            await ackWork()
+            break
+          }
+
+          // At capacity — token refresh for existing sessions is handled
+          // above, but we cannot spawn new ones. The post-switch capacity
+          // sleep will throttle the loop; just break here.
+          if (activeSessions.size >= config.maxSessions) {
+            logForDebugging(
+              `[bridge:work] At capacity (${activeSessions.size}/${config.maxSessions}), cannot spawn new session for workId=${work.id}`,
+            )
+            break
+          }
+
+          await ackWork()
+          const spawnStartTime = Date.now()
+
+          // CCR v2 path: register this bridge as the session worker, get the
+          // epoch, and point the child at /v1/code/sessions/{id}. The child
+          // already has the full v2 client (SSETransport + CCRClient) — same
+          // code path environment-manager launches in containers.
+          //
+          // v1 path: Session-Ingress WebSocket. Uses config.sessionIngressUrl
+          // (not secret.api_base_url, which may point to a remote proxy tunnel
+          // that doesn't know about locally-created sessions).
+          let sdkUrl: string
+          let useCcrV2 = false
+          let workerEpoch: number | undefined
+          // Server decides per-session via the work secret; env var is the
+          // ant-dev override (e.g. forcing v2 before the server flag is on).
+          if (
+            secret.use_code_sessions === true ||
+            isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
+          ) {
+            sdkUrl = buildCCRv2SdkUrl(config.apiBaseUrl, sessionId)
+            // Retry once on transient failure (network blip, 500) before
+            // permanently giving up and killing the session.
+            for (let attempt = 1; attempt <= 2; attempt++) {
+              try {
+                workerEpoch = await registerWorker(
+                  sdkUrl,
+                  secret.session_ingress_token,
+                )
+                useCcrV2 = true
+                logForDebugging(
+                  `[bridge:session] CCR v2: registered worker sessionId=${sessionId} epoch=${workerEpoch} attempt=${attempt}`,
+                )
+                break
+              } catch (err) {
+                const errMsg = errorMessage(err)
+                if (attempt < 2) {
+                  logForDebugging(
+                    `[bridge:session] CCR v2: registerWorker attempt ${attempt} failed, retrying: ${errMsg}`,
+                  )
+                  await sleep(2_000, loopSignal)
+                  if (loopSignal.aborted) break
+                  continue
+                }
+                logger.logError(
+                  `CCR v2 worker registration failed for session ${sessionId}: ${errMsg}`,
+                )
+                logError(new Error(`registerWorker failed: ${errMsg}`))
+                completedWorkIds.add(work.id)
+                trackCleanup(
+                  stopWorkWithRetry(
+                    api,
+                    environmentId,
+                    work.id,
+                    logger,
+                    backoffConfig.stopWorkBaseDelayMs,
+                  ),
+                )
+              }
+            }
+            if (!useCcrV2) break
+          } else {
+            sdkUrl = buildSdkUrl(config.sessionIngressUrl, sessionId)
+          }
+
+          // In worktree mode, on-demand sessions get an isolated git worktree
+          // so concurrent sessions don't interfere with each other's file
+          // changes. The pre-created initial session (if any) runs in
+          // config.dir so the user's first session lands in the directory they
+          // invoked `rc` from — matching the old single-session UX.
+          // In same-dir and single-session modes, all sessions share config.dir.
+          // Capture spawnMode before the await below — the `w` key handler
+          // mutates config.spawnMode directly, and createAgentWorktree can
+          // take 1-2s, so reading config.spawnMode after the await can
+          // produce contradictory analytics (spawn_mode:'same-dir', in_worktree:true).
+          const spawnModeAtDecision = config.spawnMode
+          let sessionDir = config.dir
+          let worktreeCreateMs = 0
+          if (
+            spawnModeAtDecision === 'worktree' &&
+            (initialSessionId === undefined ||
+              !sameSessionId(sessionId, initialSessionId))
+          ) {
+            const wtStart = Date.now()
+            try {
+              const wt = await createAgentWorktree(
+                `bridge-${safeFilenameId(sessionId)}`,
+              )
+              worktreeCreateMs = Date.now() - wtStart
+              sessionWorktrees.set(sessionId, {
+                worktreePath: wt.worktreePath,
+                worktreeBranch: wt.worktreeBranch,
+                gitRoot: wt.gitRoot,
+                hookBased: wt.hookBased,
+              })
+              sessionDir = wt.worktreePath
+              logForDebugging(
+                `[bridge:session] Created worktree for sessionId=${sessionId} at ${wt.worktreePath}`,
+              )
+            } catch (err) {
+              const errMsg = errorMessage(err)
+              logger.logError(
+                `Failed to create worktree for session ${sessionId}: ${errMsg}`,
+              )
+              logError(new Error(`Worktree creation failed: ${errMsg}`))
+              completedWorkIds.add(work.id)
+              trackCleanup(
+                stopWorkWithRetry(
+                  api,
+                  environmentId,
+                  work.id,
+                  logger,
+                  backoffConfig.stopWorkBaseDelayMs,
+                ),
+              )
+              break
+            }
+          }
+
+          logForDebugging(
+            `[bridge:session] Spawning sessionId=${sessionId} sdkUrl=${sdkUrl}`,
+          )
+
+          // compat-surface session_* form for logger/Sessions-API calls.
+          // Work poll returns cse_* under v2 compat; convert before spawn so
+          // the onFirstUserMessage callback can close over it.
+          const compatSessionId = toCompatSessionId(sessionId)
+
+          const spawnResult = safeSpawn(
+            spawner,
+            {
+              sessionId,
+              sdkUrl,
+              accessToken: secret.session_ingress_token,
+              useCcrV2,
+              workerEpoch,
+              onFirstUserMessage: text => {
+                // Server-set titles (--name, web rename) win. fetchSessionTitle
+                // runs concurrently; if it already populated titledSessions,
+                // skip. If it hasn't resolved yet, the derived title sticks —
+                // acceptable since the server had no title at spawn time.
+                if (titledSessions.has(compatSessionId)) return
+                titledSessions.add(compatSessionId)
+                const title = deriveSessionTitle(text)
+                logger.setSessionTitle(compatSessionId, title)
+                logForDebugging(
+                  `[bridge:title] derived title for ${compatSessionId}: ${title}`,
+                )
+                void import('./createSession.js')
+                  .then(({ updateBridgeSessionTitle }) =>
+                    updateBridgeSessionTitle(compatSessionId, title, {
+                      baseUrl: config.apiBaseUrl,
+                    }),
+                  )
+                  .catch(err =>
+                    logForDebugging(
+                      `[bridge:title] failed to update title for ${compatSessionId}: ${err}`,
+                      { level: 'error' },
+                    ),
+                  )
+              },
+            },
+            sessionDir,
+          )
+          if (typeof spawnResult === 'string') {
+            logger.logError(
+              `Failed to spawn session ${sessionId}: ${spawnResult}`,
+            )
+            // Clean up worktree if one was created for this session
+            const wt = sessionWorktrees.get(sessionId)
+            if (wt) {
+              sessionWorktrees.delete(sessionId)
+              trackCleanup(
+                removeAgentWorktree(
+                  wt.worktreePath,
+                  wt.worktreeBranch,
+                  wt.gitRoot,
+                  wt.hookBased,
+                ).catch((err: unknown) =>
+                  logger.logVerbose(
+                    `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
+                  ),
+                ),
+              )
+            }
+            completedWorkIds.add(work.id)
+            trackCleanup(
+              stopWorkWithRetry(
+                api,
+                environmentId,
+                work.id,
+                logger,
+                backoffConfig.stopWorkBaseDelayMs,
+              ),
+            )
+            break
+          }
+          const handle = spawnResult
+
+          const spawnDurationMs = Date.now() - spawnStartTime
+          logEvent('tengu_bridge_session_started', {
+            active_sessions: activeSessions.size,
+            spawn_mode:
+              spawnModeAtDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            in_worktree: sessionWorktrees.has(sessionId),
+            spawn_duration_ms: spawnDurationMs,
+            worktree_create_ms: worktreeCreateMs,
+            inProtectedNamespace: isInProtectedNamespace(),
+          })
+          logForDiagnosticsNoPII('info', 'bridge_session_started', {
+            spawn_mode: spawnModeAtDecision,
+            in_worktree: sessionWorktrees.has(sessionId),
+            spawn_duration_ms: spawnDurationMs,
+            worktree_create_ms: worktreeCreateMs,
+          })
+
+          activeSessions.set(sessionId, handle)
+          sessionWorkIds.set(sessionId, work.id)
+          sessionIngressTokens.set(sessionId, secret.session_ingress_token)
+          sessionCompatIds.set(sessionId, compatSessionId)
+
+          const startTime = Date.now()
+          sessionStartTimes.set(sessionId, startTime)
+
+          // Use a generic prompt description since we no longer get startup_context
+          logger.logSessionStart(sessionId, `Session ${sessionId}`)
+
+          // Compute the actual debug file path (mirrors sessionRunner.ts logic)
+          const safeId = safeFilenameId(sessionId)
+          let sessionDebugFile: string | undefined
+          if (config.debugFile) {
+            const ext = config.debugFile.lastIndexOf('.')
+            if (ext > 0) {
+              sessionDebugFile = `${config.debugFile.slice(0, ext)}-${safeId}${config.debugFile.slice(ext)}`
+            } else {
+              sessionDebugFile = `${config.debugFile}-${safeId}`
+            }
+          } else if (config.verbose || process.env.USER_TYPE === 'ant') {
+            sessionDebugFile = join(
+              tmpdir(),
+              'claude',
+              `bridge-session-${safeId}.log`,
+            )
+          }
+
+          if (sessionDebugFile) {
+            logger.logVerbose(`Debug log: ${sessionDebugFile}`)
+          }
+
+          // Register in the sessions Map before starting status updates so the
+          // first render tick shows the correct count and bullet list in sync.
+          logger.addSession(
+            compatSessionId,
+            getRemoteSessionUrl(compatSessionId, config.sessionIngressUrl),
+          )
+
+          // Start live status updates and transition to "Attached" state.
+          startStatusUpdates()
+          logger.setAttached(compatSessionId)
+
+          // One-shot title fetch. If the session already has a title (set via
+          // --name, web rename, or /remote-control), display it and mark as
+          // titled so the first-user-message fallback doesn't overwrite it.
+          // Otherwise onFirstUserMessage derives one from the first prompt.
+          void fetchSessionTitle(compatSessionId, config.apiBaseUrl)
+            .then(title => {
+              if (title && activeSessions.has(sessionId)) {
+                titledSessions.add(compatSessionId)
+                logger.setSessionTitle(compatSessionId, title)
+                logForDebugging(
+                  `[bridge:title] server title for ${compatSessionId}: ${title}`,
+                )
+              }
+            })
+            .catch(err =>
+              logForDebugging(
+                `[bridge:title] failed to fetch title for ${compatSessionId}: ${err}`,
+                { level: 'error' },
+              ),
+            )
+
+          // Start per-session timeout watchdog
+          const timeoutMs =
+            config.sessionTimeoutMs ?? DEFAULT_SESSION_TIMEOUT_MS
+          if (timeoutMs > 0) {
+            const timer = setTimeout(
+              onSessionTimeout,
+              timeoutMs,
+              sessionId,
+              timeoutMs,
+              logger,
+              timedOutSessions,
+              handle,
+            )
+            sessionTimers.set(sessionId, timer)
+          }
+
+          // Schedule proactive token refresh before the JWT expires.
+          // onRefresh branches on v2Sessions: v1 delivers OAuth to the
+          // child, v2 triggers server re-dispatch via reconnectSession.
+          if (useCcrV2) {
+            v2Sessions.add(sessionId)
+          }
+          tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
+
+          void handle.done.then(onSessionDone(sessionId, startTime, handle))
+          break
+        }
+        default:
+          await ackWork()
+          // Gracefully ignore unknown work types. The backend may send new
+          // types before the bridge client is updated.
+          logForDebugging(
+            `[bridge:work] Unknown work type: ${workType}, skipping`,
+          )
+          break
+      }
+
+      // When at capacity, throttle the loop. The switch above still runs so
+      // existing-session token refreshes are processed, but we sleep here
+      // to avoid busy-looping. Include the capacity wake signal so the
+      // sleep is interrupted immediately when a session completes.
+      if (atCapacityBeforeSwitch) {
+        const cap = capacityWake.signal()
+        if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+          await heartbeatActiveWorkItems()
+          await sleep(
+            pollConfig.non_exclusive_heartbeat_interval_ms,
+            cap.signal,
+          )
+        } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
+          await sleep(
+            pollConfig.multisession_poll_interval_ms_at_capacity,
+            cap.signal,
+          )
+        }
+        cap.cleanup()
+      }
+    } catch (err) {
+      if (loopSignal.aborted) {
+        break
+      }
+
+      // Fatal errors (401/403) — no point retrying, auth won't fix itself
+      if (err instanceof BridgeFatalError) {
+        fatalExit = true
+        // Server-enforced expiry gets a clean status message, not an error
+        if (isExpiredErrorType(err.errorType)) {
+          logger.logStatus(err.message)
+        } else if (isSuppressible403(err)) {
+          // Cosmetic 403 errors (e.g., external_poll_sessions scope,
+          // environments:manage permission) — don't show to user
+          logForDebugging(`[bridge:work] Suppressed 403 error: ${err.message}`)
+        } else {
+          logger.logError(err.message)
+          logError(err)
+        }
+        logEvent('tengu_bridge_fatal_error', {
+          status: err.status,
+          error_type:
+            err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+        logForDiagnosticsNoPII(
+          isExpiredErrorType(err.errorType) ? 'info' : 'error',
+          'bridge_fatal_error',
+          { status: err.status, error_type: err.errorType },
+        )
+        break
+      }
+
+      const errMsg = describeAxiosError(err)
+
+      if (isConnectionError(err) || isServerError(err)) {
+        const now = Date.now()
+
+        // Detect system sleep/wake: if the gap since the last poll error
+        // greatly exceeds the expected backoff, the machine likely slept.
+        // Reset error tracking so the bridge retries with a fresh budget.
+        if (
+          lastPollErrorTime !== null &&
+          now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
+        ) {
+          logForDebugging(
+            `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
+          )
+          logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
+            gapMs: now - lastPollErrorTime,
+          })
+          connErrorStart = null
+          connBackoff = 0
+          generalErrorStart = null
+          generalBackoff = 0
+        }
+        lastPollErrorTime = now
+
+        if (!connErrorStart) {
+          connErrorStart = now
+        }
+        const elapsed = now - connErrorStart
+        if (elapsed >= backoffConfig.connGiveUpMs) {
+          logger.logError(
+            `Server unreachable for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
+          )
+          logEvent('tengu_bridge_poll_give_up', {
+            error_type:
+              'connection' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            elapsed_ms: elapsed,
+          })
+          logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
+            error_type: 'connection',
+            elapsed_ms: elapsed,
+          })
+          fatalExit = true
+          break
+        }
+
+        // Reset the other track when switching error types
+        generalErrorStart = null
+        generalBackoff = 0
+
+        connBackoff = connBackoff
+          ? Math.min(connBackoff * 2, backoffConfig.connCapMs)
+          : backoffConfig.connInitialMs
+        const delay = addJitter(connBackoff)
+        logger.logVerbose(
+          `Connection error, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
+        )
+        logger.updateReconnectingStatus(
+          formatDelay(delay),
+          formatDuration(elapsed),
+        )
+        // The poll_due heartbeat-loop exit leaves a healthy lease exposed to
+        // this backoff path. Heartbeat before each sleep so /poll outages
+        // (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced
+        // to avoid) don't kill the 300s lease TTL. No-op when activeSessions
+        // is empty or heartbeat is disabled.
+        if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
+          await heartbeatActiveWorkItems()
+        }
+        await sleep(delay, loopSignal)
+      } else {
+        const now = Date.now()
+
+        // Sleep detection for general errors (same logic as connection errors)
+        if (
+          lastPollErrorTime !== null &&
+          now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
+        ) {
+          logForDebugging(
+            `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
+          )
+          logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
+            gapMs: now - lastPollErrorTime,
+          })
+          connErrorStart = null
+          connBackoff = 0
+          generalErrorStart = null
+          generalBackoff = 0
+        }
+        lastPollErrorTime = now
+
+        if (!generalErrorStart) {
+          generalErrorStart = now
+        }
+        const elapsed = now - generalErrorStart
+        if (elapsed >= backoffConfig.generalGiveUpMs) {
+          logger.logError(
+            `Persistent errors for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
+          )
+          logEvent('tengu_bridge_poll_give_up', {
+            error_type:
+              'general' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            elapsed_ms: elapsed,
+          })
+          logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
+            error_type: 'general',
+            elapsed_ms: elapsed,
+          })
+          fatalExit = true
+          break
+        }
+
+        // Reset the other track when switching error types
+        connErrorStart = null
+        connBackoff = 0
+
+        generalBackoff = generalBackoff
+          ? Math.min(generalBackoff * 2, backoffConfig.generalCapMs)
+          : backoffConfig.generalInitialMs
+        const delay = addJitter(generalBackoff)
+        logger.logVerbose(
+          `Poll failed, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
+        )
+        logger.updateReconnectingStatus(
+          formatDelay(delay),
+          formatDuration(elapsed),
+        )
+        if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
+          await heartbeatActiveWorkItems()
+        }
+        await sleep(delay, loopSignal)
+      }
+    }
+  }
+
+  // Clean up
+  stopStatusUpdates()
+  logger.clearStatus()
+
+  const loopDurationMs = Date.now() - loopStartTime
+  logEvent('tengu_bridge_shutdown', {
+    active_sessions: activeSessions.size,
+    loop_duration_ms: loopDurationMs,
+  })
+  logForDiagnosticsNoPII('info', 'bridge_shutdown', {
+    active_sessions: activeSessions.size,
+    loop_duration_ms: loopDurationMs,
+  })
+
+  // Graceful shutdown: kill active sessions, report them as interrupted,
+  // archive sessions, then deregister the environment so the web UI shows
+  // the bridge as offline.
+
+  // Collect all session IDs to archive on exit. This includes:
+  // 1. Active sessions (snapshot before killing — onSessionDone clears maps)
+  // 2. The initial auto-created session (may never have had work dispatched)
+  // api.archiveSession is idempotent (409 if already archived), so
+  // double-archiving is safe.
+  const sessionsToArchive = new Set(activeSessions.keys())
+  if (initialSessionId) {
+    sessionsToArchive.add(initialSessionId)
+  }
+  // Snapshot before killing — onSessionDone clears sessionCompatIds.
+  const compatIdSnapshot = new Map(sessionCompatIds)
+
+  if (activeSessions.size > 0) {
+    logForDebugging(
+      `[bridge:shutdown] Shutting down ${activeSessions.size} active session(s)`,
+    )
+    logger.logStatus(
+      `Shutting down ${activeSessions.size} active session(s)\u2026`,
+    )
+
+    // Snapshot work IDs before killing — onSessionDone clears the maps when
+    // each child exits, so we need a copy for the stopWork calls below.
+    const shutdownWorkIds = new Map(sessionWorkIds)
+
+    for (const [sessionId, handle] of activeSessions.entries()) {
+      logForDebugging(
+        `[bridge:shutdown] Sending SIGTERM to sessionId=${sessionId}`,
+      )
+      handle.kill()
+    }
+
+    const timeout = new AbortController()
+    await Promise.race([
+      Promise.allSettled([...activeSessions.values()].map(h => h.done)),
+      sleep(backoffConfig.shutdownGraceMs ?? 30_000, timeout.signal),
+    ])
+    timeout.abort()
+
+    // SIGKILL any processes that didn't respond to SIGTERM within the grace window
+    for (const [sid, handle] of activeSessions.entries()) {
+      logForDebugging(`[bridge:shutdown] Force-killing stuck sessionId=${sid}`)
+      handle.forceKill()
+    }
+
+    // Clear any remaining session timeout and refresh timers
+    for (const timer of sessionTimers.values()) {
+      clearTimeout(timer)
+    }
+    sessionTimers.clear()
+    tokenRefresh?.cancelAll()
+
+    // Clean up any remaining worktrees from active sessions.
+    // Snapshot and clear the map first so onSessionDone (which may fire
+    // during the await below when handle.done resolves) won't try to
+    // remove the same worktrees again.
+    if (sessionWorktrees.size > 0) {
+      const remainingWorktrees = [...sessionWorktrees.values()]
+      sessionWorktrees.clear()
+      logForDebugging(
+        `[bridge:shutdown] Cleaning up ${remainingWorktrees.length} worktree(s)`,
+      )
+      await Promise.allSettled(
+        remainingWorktrees.map(wt =>
+          removeAgentWorktree(
+            wt.worktreePath,
+            wt.worktreeBranch,
+            wt.gitRoot,
+            wt.hookBased,
+          ),
+        ),
+      )
+    }
+
+    // Stop all active work items so the server knows they're done
+    await Promise.allSettled(
+      [...shutdownWorkIds.entries()].map(([sessionId, workId]) => {
+        return api
+          .stopWork(environmentId, workId, true)
+          .catch(err =>
+            logger.logVerbose(
+              `Failed to stop work ${workId} for session ${sessionId}: ${errorMessage(err)}`,
+            ),
+          )
+      }),
+    )
+  }
+
+  // Ensure all in-flight cleanup (stopWork, worktree removal) from
+  // onSessionDone completes before deregistering — otherwise
+  // process.exit() can kill them mid-flight.
+  if (pendingCleanups.size > 0) {
+    await Promise.allSettled([...pendingCleanups])
+  }
+
+  // In single-session mode with a known session, leave the session and
+  // environment alive so `claude remote-control --session-id=<id>` can resume.
+  // The backend GCs stale environments via a 4h TTL (BRIDGE_LAST_POLL_TTL).
+  // Archiving the session or deregistering the environment would make the
+  // printed resume command a lie — deregister deletes Firestore + Redis stream.
+  // Skip when the loop exited fatally (env expired, auth failed, give-up) —
+  // resume is impossible in those cases and the message would contradict the
+  // error already printed.
+  // feature('KAIROS') gate: --session-id is ant-only; without the gate,
+  // revert to the pre-PR behavior (archive + deregister on every shutdown).
+  if (
+    feature('KAIROS') &&
+    config.spawnMode === 'single-session' &&
+    initialSessionId &&
+    !fatalExit
+  ) {
+    logger.logStatus(
+      `Resume this session by running \`claude remote-control --continue\``,
+    )
+    logForDebugging(
+      `[bridge:shutdown] Skipping archive+deregister to allow resume of session ${initialSessionId}`,
+    )
+    return
+  }
+
+  // Archive all known sessions so they don't linger as idle/running on the
+  // server after the bridge goes offline.
+  if (sessionsToArchive.size > 0) {
+    logForDebugging(
+      `[bridge:shutdown] Archiving ${sessionsToArchive.size} session(s)`,
+    )
+    await Promise.allSettled(
+      [...sessionsToArchive].map(sessionId =>
+        api
+          .archiveSession(
+            compatIdSnapshot.get(sessionId) ?? toCompatSessionId(sessionId),
+          )
+          .catch(err =>
+            logger.logVerbose(
+              `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
+            ),
+          ),
+      ),
+    )
+  }
+
+  // Deregister the environment so the web UI shows the bridge as offline
+  // and the Redis stream is cleaned up.
+  try {
+    await api.deregisterEnvironment(environmentId)
+    logForDebugging(
+      `[bridge:shutdown] Environment deregistered, bridge offline`,
+    )
+    logger.logVerbose('Environment deregistered.')
+  } catch (err) {
+    logger.logVerbose(`Failed to deregister environment: ${errorMessage(err)}`)
+  }
+
+  // Clear the crash-recovery pointer — the env is gone, pointer would be
+  // stale. The early return above (resumable SIGINT shutdown) skips this,
+  // leaving the pointer as a backup for the printed --session-id hint.
+  const { clearBridgePointer } = await import('./bridgePointer.js')
+  await clearBridgePointer(config.dir)
+
+  logger.logVerbose('Environment offline.')
+}
+
+const CONNECTION_ERROR_CODES = new Set([
+  'ECONNREFUSED',
+  'ECONNRESET',
+  'ETIMEDOUT',
+  'ENETUNREACH',
+  'EHOSTUNREACH',
+])
+
+export function isConnectionError(err: unknown): boolean {
+  if (
+    err &&
+    typeof err === 'object' &&
+    'code' in err &&
+    typeof err.code === 'string' &&
+    CONNECTION_ERROR_CODES.has(err.code)
+  ) {
+    return true
+  }
+  return false
+}
+
+/** Detect HTTP 5xx errors from axios (code: 'ERR_BAD_RESPONSE'). */
+export function isServerError(err: unknown): boolean {
+  return (
+    !!err &&
+    typeof err === 'object' &&
+    'code' in err &&
+    typeof err.code === 'string' &&
+    err.code === 'ERR_BAD_RESPONSE'
+  )
+}
+
+/** Add ±25% jitter to a delay value. */
+function addJitter(ms: number): number {
+  return Math.max(0, ms + ms * 0.25 * (2 * Math.random() - 1))
+}
+
+function formatDelay(ms: number): string {
+  return ms >= 1000 ? `${(ms / 1000).toFixed(1)}s` : `${Math.round(ms)}ms`
+}
+
+/**
+ * Retry stopWork with exponential backoff (3 attempts, 1s/2s/4s).
+ * Ensures the server learns the work item ended, preventing server-side zombies.
+ */
+async function stopWorkWithRetry(
+  api: BridgeApiClient,
+  environmentId: string,
+  workId: string,
+  logger: BridgeLogger,
+  baseDelayMs = 1000,
+): Promise<void> {
+  const MAX_ATTEMPTS = 3
+
+  for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+    try {
+      await api.stopWork(environmentId, workId, false)
+      logForDebugging(
+        `[bridge:work] stopWork succeeded for workId=${workId} on attempt ${attempt}/${MAX_ATTEMPTS}`,
+      )
+      return
+    } catch (err) {
+      // Auth/permission errors won't be fixed by retrying
+      if (err instanceof BridgeFatalError) {
+        if (isSuppressible403(err)) {
+          logForDebugging(
+            `[bridge:work] Suppressed stopWork 403 for ${workId}: ${err.message}`,
+          )
+        } else {
+          logger.logError(`Failed to stop work ${workId}: ${err.message}`)
+        }
+        logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
+          attempts: attempt,
+          fatal: true,
+        })
+        return
+      }
+      const errMsg = errorMessage(err)
+      if (attempt < MAX_ATTEMPTS) {
+        const delay = addJitter(baseDelayMs * Math.pow(2, attempt - 1))
+        logger.logVerbose(
+          `Failed to stop work ${workId} (attempt ${attempt}/${MAX_ATTEMPTS}), retrying in ${formatDelay(delay)}: ${errMsg}`,
+        )
+        await sleep(delay)
+      } else {
+        logger.logError(
+          `Failed to stop work ${workId} after ${MAX_ATTEMPTS} attempts: ${errMsg}`,
+        )
+        logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
+          attempts: MAX_ATTEMPTS,
+        })
+      }
+    }
+  }
+}
+
+function onSessionTimeout(
+  sessionId: string,
+  timeoutMs: number,
+  logger: BridgeLogger,
+  timedOutSessions: Set<string>,
+  handle: SessionHandle,
+): void {
+  logForDebugging(
+    `[bridge:session] sessionId=${sessionId} timed out after ${formatDuration(timeoutMs)}`,
+  )
+  logEvent('tengu_bridge_session_timeout', {
+    timeout_ms: timeoutMs,
+  })
+  logger.logSessionFailed(
+    sessionId,
+    `Session timed out after ${formatDuration(timeoutMs)}`,
+  )
+  timedOutSessions.add(sessionId)
+  handle.kill()
+}
+
+export type ParsedArgs = {
+  verbose: boolean
+  sandbox: boolean
+  debugFile?: string
+  sessionTimeoutMs?: number
+  permissionMode?: string
+  name?: string
+  /** Value passed to --spawn (if any); undefined if no --spawn flag was given. */
+  spawnMode: SpawnMode | undefined
+  /** Value passed to --capacity (if any); undefined if no --capacity flag was given. */
+  capacity: number | undefined
+  /** --[no-]create-session-in-dir override; undefined = use default (on). */
+  createSessionInDir: boolean | undefined
+  /** Resume an existing session instead of creating a new one. */
+  sessionId?: string
+  /** Resume the last session in this directory (reads bridge-pointer.json). */
+  continueSession: boolean
+  help: boolean
+  error?: string
+}
+
+const SPAWN_FLAG_VALUES = ['session', 'same-dir', 'worktree'] as const
+
+function parseSpawnValue(raw: string | undefined): SpawnMode | string {
+  if (raw === 'session') return 'single-session'
+  if (raw === 'same-dir') return 'same-dir'
+  if (raw === 'worktree') return 'worktree'
+  return `--spawn requires one of: ${SPAWN_FLAG_VALUES.join(', ')} (got: ${raw ?? '<missing>'})`
+}
+
+function parseCapacityValue(raw: string | undefined): number | string {
+  const n = raw === undefined ? NaN : parseInt(raw, 10)
+  if (isNaN(n) || n < 1) {
+    return `--capacity requires a positive integer (got: ${raw ?? '<missing>'})`
+  }
+  return n
+}
+
+export function parseArgs(args: string[]): ParsedArgs {
+  let verbose = false
+  let sandbox = false
+  let debugFile: string | undefined
+  let sessionTimeoutMs: number | undefined
+  let permissionMode: string | undefined
+  let name: string | undefined
+  let help = false
+  let spawnMode: SpawnMode | undefined
+  let capacity: number | undefined
+  let createSessionInDir: boolean | undefined
+  let sessionId: string | undefined
+  let continueSession = false
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i]!
+    if (arg === '--help' || arg === '-h') {
+      help = true
+    } else if (arg === '--verbose' || arg === '-v') {
+      verbose = true
+    } else if (arg === '--sandbox') {
+      sandbox = true
+    } else if (arg === '--no-sandbox') {
+      sandbox = false
+    } else if (arg === '--debug-file' && i + 1 < args.length) {
+      debugFile = resolve(args[++i]!)
+    } else if (arg.startsWith('--debug-file=')) {
+      debugFile = resolve(arg.slice('--debug-file='.length))
+    } else if (arg === '--session-timeout' && i + 1 < args.length) {
+      sessionTimeoutMs = parseInt(args[++i]!, 10) * 1000
+    } else if (arg.startsWith('--session-timeout=')) {
+      sessionTimeoutMs =
+        parseInt(arg.slice('--session-timeout='.length), 10) * 1000
+    } else if (arg === '--permission-mode' && i + 1 < args.length) {
+      permissionMode = args[++i]!
+    } else if (arg.startsWith('--permission-mode=')) {
+      permissionMode = arg.slice('--permission-mode='.length)
+    } else if (arg === '--name' && i + 1 < args.length) {
+      name = args[++i]!
+    } else if (arg.startsWith('--name=')) {
+      name = arg.slice('--name='.length)
+    } else if (
+      feature('KAIROS') &&
+      arg === '--session-id' &&
+      i + 1 < args.length
+    ) {
+      sessionId = args[++i]!
+      if (!sessionId) {
+        return makeError('--session-id requires a value')
+      }
+    } else if (feature('KAIROS') && arg.startsWith('--session-id=')) {
+      sessionId = arg.slice('--session-id='.length)
+      if (!sessionId) {
+        return makeError('--session-id requires a value')
+      }
+    } else if (feature('KAIROS') && (arg === '--continue' || arg === '-c')) {
+      continueSession = true
+    } else if (arg === '--spawn' || arg.startsWith('--spawn=')) {
+      if (spawnMode !== undefined) {
+        return makeError('--spawn may only be specified once')
+      }
+      const raw = arg.startsWith('--spawn=')
+        ? arg.slice('--spawn='.length)
+        : args[++i]
+      const v = parseSpawnValue(raw)
+      if (v === 'single-session' || v === 'same-dir' || v === 'worktree') {
+        spawnMode = v
+      } else {
+        return makeError(v)
+      }
+    } else if (arg === '--capacity' || arg.startsWith('--capacity=')) {
+      if (capacity !== undefined) {
+        return makeError('--capacity may only be specified once')
+      }
+      const raw = arg.startsWith('--capacity=')
+        ? arg.slice('--capacity='.length)
+        : args[++i]
+      const v = parseCapacityValue(raw)
+      if (typeof v === 'number') capacity = v
+      else return makeError(v)
+    } else if (arg === '--create-session-in-dir') {
+      createSessionInDir = true
+    } else if (arg === '--no-create-session-in-dir') {
+      createSessionInDir = false
+    } else {
+      return makeError(
+        `Unknown argument: ${arg}\nRun 'claude remote-control --help' for usage.`,
+      )
+    }
+  }
+
+  // Note: gate check for --spawn/--capacity/--create-session-in-dir is in bridgeMain
+  // (gate-aware error). Flag cross-validation happens here.
+
+  // --capacity only makes sense for multi-session modes.
+  if (spawnMode === 'single-session' && capacity !== undefined) {
+    return makeError(
+      `--capacity cannot be used with --spawn=session (single-session mode has fixed capacity 1).`,
+    )
+  }
+
+  // --session-id / --continue resume a specific session on its original
+  // environment; incompatible with spawn-related flags (which configure
+  // fresh session creation), and mutually exclusive with each other.
+  if (
+    (sessionId || continueSession) &&
+    (spawnMode !== undefined ||
+      capacity !== undefined ||
+      createSessionInDir !== undefined)
+  ) {
+    return makeError(
+      `--session-id and --continue cannot be used with --spawn, --capacity, or --create-session-in-dir.`,
+    )
+  }
+  if (sessionId && continueSession) {
+    return makeError(`--session-id and --continue cannot be used together.`)
+  }
+
+  return {
+    verbose,
+    sandbox,
+    debugFile,
+    sessionTimeoutMs,
+    permissionMode,
+    name,
+    spawnMode,
+    capacity,
+    createSessionInDir,
+    sessionId,
+    continueSession,
+    help,
+  }
+
+  function makeError(error: string): ParsedArgs {
+    return {
+      verbose,
+      sandbox,
+      debugFile,
+      sessionTimeoutMs,
+      permissionMode,
+      name,
+      spawnMode,
+      capacity,
+      createSessionInDir,
+      sessionId,
+      continueSession,
+      help,
+      error,
+    }
+  }
+}
+
+async function printHelp(): Promise<void> {
+  // Use EXTERNAL_PERMISSION_MODES for help text — internal modes (bubble)
+  // are ant-only and auto is feature-gated; they're still accepted by validation.
+  const { EXTERNAL_PERMISSION_MODES } = await import('../types/permissions.js')
+  const modes = EXTERNAL_PERMISSION_MODES.join(', ')
+  const showServer = await isMultiSessionSpawnEnabled()
+  const serverOptions = showServer
+    ? `  --spawn <mode>                   Spawn mode: same-dir, worktree, session
+                                   (default: same-dir)
+  --capacity <N>                   Max concurrent sessions in worktree or
+                                   same-dir mode (default: ${SPAWN_SESSIONS_DEFAULT})
+  --[no-]create-session-in-dir     Pre-create a session in the current
+                                   directory; in worktree mode this session
+                                   stays in cwd while on-demand sessions get
+                                   isolated worktrees (default: on)
+`
+    : ''
+  const serverDescription = showServer
+    ? `
+  Remote Control runs as a persistent server that accepts multiple concurrent
+  sessions in the current directory. One session is pre-created on start so
+  you have somewhere to type immediately. Use --spawn=worktree to isolate
+  each on-demand session in its own git worktree, or --spawn=session for
+  the classic single-session mode (exits when that session ends). Press 'w'
+  during runtime to toggle between same-dir and worktree.
+`
+    : ''
+  const serverNote = showServer
+    ? `  - Worktree mode requires a git repository or WorktreeCreate/WorktreeRemove hooks
+`
+    : ''
+  const help = `
+Remote Control - Connect your local environment to claude.ai/code
+
+USAGE
+  claude remote-control [options]
+OPTIONS
+  --name <name>                    Name for the session (shown in claude.ai/code)
+${
+  feature('KAIROS')
+    ? `  -c, --continue                   Resume the last session in this directory
+  --session-id <id>                Resume a specific session by ID (cannot be
+                                   used with spawn flags or --continue)
+`
+    : ''
+}  --permission-mode <mode>         Permission mode for spawned sessions
+                                   (${modes})
+  --debug-file <path>              Write debug logs to file
+  -v, --verbose                    Enable verbose output
+  -h, --help                       Show this help
+${serverOptions}
+DESCRIPTION
+  Remote Control allows you to control sessions on your local device from
+  claude.ai/code (https://claude.ai/code). Run this command in the
+  directory you want to work in, then connect from the Claude app or web.
+${serverDescription}
+NOTES
+  - You must be logged in with a Claude account that has a subscription
+  - Run \`claude\` first in the directory to accept the workspace trust dialog
+${serverNote}`
+  // biome-ignore lint/suspicious/noConsole: intentional help output
+  console.log(help)
+}
+
+const TITLE_MAX_LEN = 80
+
+/** Derive a session title from a user message: first line, truncated. */
+function deriveSessionTitle(text: string): string {
+  // Collapse whitespace — newlines/tabs would break the single-line status display.
+  const flat = text.replace(/\s+/g, ' ').trim()
+  return truncateToWidth(flat, TITLE_MAX_LEN)
+}
+
+/**
+ * One-shot fetch of a session's title via GET /v1/sessions/{id}.
+ *
+ * Uses `getBridgeSession` from createSession.ts (ccr-byoc headers + org UUID)
+ * rather than the environments-level bridgeApi client, whose headers make the
+ * Sessions API return 404. Returns undefined if the session has no title yet
+ * or the fetch fails — the caller falls back to deriving a title from the
+ * first user message.
+ */
+async function fetchSessionTitle(
+  compatSessionId: string,
+  baseUrl: string,
+): Promise<string | undefined> {
+  const { getBridgeSession } = await import('./createSession.js')
+  const session = await getBridgeSession(compatSessionId, { baseUrl })
+  return session?.title || undefined
+}
+
+export async function bridgeMain(args: string[]): Promise<void> {
+  const parsed = parseArgs(args)
+
+  if (parsed.help) {
+    await printHelp()
+    return
+  }
+  if (parsed.error) {
+    // biome-ignore lint/suspicious/noConsole: intentional error output
+    console.error(`Error: ${parsed.error}`)
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  const {
+    verbose,
+    sandbox,
+    debugFile,
+    sessionTimeoutMs,
+    permissionMode,
+    name,
+    spawnMode: parsedSpawnMode,
+    capacity: parsedCapacity,
+    createSessionInDir: parsedCreateSessionInDir,
+    sessionId: parsedSessionId,
+    continueSession,
+  } = parsed
+  // Mutable so --continue can set it from the pointer file. The #20460
+  // resume flow below then treats it the same as an explicit --session-id.
+  let resumeSessionId = parsedSessionId
+  // When --continue found a pointer, this is the directory it came from
+  // (may be a worktree sibling, not `dir`). On resume-flow deterministic
+  // failure, clear THIS file so --continue doesn't keep hitting the same
+  // dead session. Undefined for explicit --session-id (leaves pointer alone).
+  let resumePointerDir: string | undefined
+
+  const usedMultiSessionFeature =
+    parsedSpawnMode !== undefined ||
+    parsedCapacity !== undefined ||
+    parsedCreateSessionInDir !== undefined
+
+  // Validate permission mode early so the user gets an error before
+  // the bridge starts polling for work.
+  if (permissionMode !== undefined) {
+    const { PERMISSION_MODES } = await import('../types/permissions.js')
+    const valid: readonly string[] = PERMISSION_MODES
+    if (!valid.includes(permissionMode)) {
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Invalid permission mode '${permissionMode}'. Valid modes: ${valid.join(', ')}`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+  }
+
+  const dir = resolve('.')
+
+  // The bridge fast-path bypasses init.ts, so we must enable config reading
+  // before any code that transitively calls getGlobalConfig()
+  const { enableConfigs, checkHasTrustDialogAccepted } = await import(
+    '../utils/config.js'
+  )
+  enableConfigs()
+
+  // Initialize analytics and error reporting sinks. The bridge bypasses the
+  // setup() init flow, so we call initSinks() directly to attach sinks here.
+  const { initSinks } = await import('../utils/sinks.js')
+  initSinks()
+
+  // Gate-aware validation: --spawn / --capacity / --create-session-in-dir require
+  // the multi-session gate. parseArgs has already validated flag combinations;
+  // here we only check the gate since that requires an async GrowthBook call.
+  // Runs after enableConfigs() (GrowthBook cache reads global config) and after
+  // initSinks() so the denial event can be enqueued.
+  const multiSessionEnabled = await isMultiSessionSpawnEnabled()
+  if (usedMultiSessionFeature && !multiSessionEnabled) {
+    await logEventAsync('tengu_bridge_multi_session_denied', {
+      used_spawn: parsedSpawnMode !== undefined,
+      used_capacity: parsedCapacity !== undefined,
+      used_create_session_in_dir: parsedCreateSessionInDir !== undefined,
+    })
+    // logEventAsync only enqueues — process.exit() discards buffered events.
+    // Flush explicitly, capped at 500ms to match gracefulShutdown.ts.
+    // (sleep() doesn't unref its timer, but process.exit() follows immediately
+    // so the ref'd timer can't delay shutdown.)
+    await Promise.race([
+      Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
+      sleep(500, undefined, { unref: true }),
+    ]).catch(() => {})
+    // biome-ignore lint/suspicious/noConsole: intentional error output
+    console.error(
+      'Error: Multi-session Remote Control is not enabled for your account yet.',
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Set the bootstrap CWD so that trust checks, project config lookups, and
+  // git utilities (getBranch, getRemoteUrl) resolve against the correct path.
+  const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
+  setOriginalCwd(dir)
+  setCwdState(dir)
+
+  // The bridge bypasses main.tsx (which renders the interactive TrustDialog via showSetupScreens),
+  // so we must verify trust was previously established by a normal `claude` session.
+  if (!checkHasTrustDialogAccepted()) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      `Error: Workspace not trusted. Please run \`claude\` in ${dir} first to review and accept the workspace trust dialog.`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Resolve auth
+  const { clearOAuthTokenCache, checkAndRefreshOAuthTokenIfNeeded } =
+    await import('../utils/auth.js')
+  const { getBridgeAccessToken, getBridgeBaseUrl } = await import(
+    './bridgeConfig.js'
+  )
+
+  const bridgeToken = getBridgeAccessToken()
+  if (!bridgeToken) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(BRIDGE_LOGIN_ERROR)
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // First-time remote dialog — explain what bridge does and get consent
+  const {
+    getGlobalConfig,
+    saveGlobalConfig,
+    getCurrentProjectConfig,
+    saveCurrentProjectConfig,
+  } = await import('../utils/config.js')
+  if (!getGlobalConfig().remoteDialogSeen) {
+    const readline = await import('readline')
+    const rl = readline.createInterface({
+      input: process.stdin,
+      output: process.stdout,
+    })
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(
+      '\nRemote Control lets you access this CLI session from the web (claude.ai/code)\nor the Claude app, so you can pick up where you left off on any device.\n\nYou can disconnect remote access anytime by running /remote-control again.\n',
+    )
+    const answer = await new Promise<string>(resolve => {
+      rl.question('Enable Remote Control? (y/n) ', resolve)
+    })
+    rl.close()
+    saveGlobalConfig(current => {
+      if (current.remoteDialogSeen) return current
+      return { ...current, remoteDialogSeen: true }
+    })
+    if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(0)
+    }
+  }
+
+  // --continue: resolve the most recent session from the crash-recovery
+  // pointer and chain into the #20460 --session-id flow. Worktree-aware:
+  // checks current dir first (fast path, zero exec), then fans out to git
+  // worktree siblings if that misses — the REPL bridge writes to
+  // getOriginalCwd() which EnterWorktreeTool/activeWorktreeSession can
+  // point at a worktree while the user's shell is at the repo root.
+  // KAIROS-gated at parseArgs — continueSession is always false in external
+  // builds, so this block tree-shakes.
+  if (feature('KAIROS') && continueSession) {
+    const { readBridgePointerAcrossWorktrees } = await import(
+      './bridgePointer.js'
+    )
+    const found = await readBridgePointerAcrossWorktrees(dir)
+    if (!found) {
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: No recent session found in this directory or its worktrees. Run \`claude remote-control\` to start a new one.`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    const { pointer, dir: pointerDir } = found
+    const ageMin = Math.round(pointer.ageMs / 60_000)
+    const ageStr = ageMin < 60 ? `${ageMin}m` : `${Math.round(ageMin / 60)}h`
+    const fromWt = pointerDir !== dir ? ` from worktree ${pointerDir}` : ''
+    // biome-ignore lint/suspicious/noConsole: intentional info output
+    console.error(
+      `Resuming session ${pointer.sessionId} (${ageStr} ago)${fromWt}\u2026`,
+    )
+    resumeSessionId = pointer.sessionId
+    // Track where the pointer came from so the #20460 exit(1) paths below
+    // clear the RIGHT file on deterministic failure — otherwise --continue
+    // would keep hitting the same dead session. May be a worktree sibling.
+    resumePointerDir = pointerDir
+  }
+
+  // In production, baseUrl is the Anthropic API (from OAuth config).
+  // CLAUDE_BRIDGE_BASE_URL overrides this for ant local dev only.
+  const baseUrl = getBridgeBaseUrl()
+
+  // For non-localhost targets, require HTTPS to protect credentials.
+  if (
+    baseUrl.startsWith('http://') &&
+    !baseUrl.includes('localhost') &&
+    !baseUrl.includes('127.0.0.1')
+  ) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      'Error: Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Session ingress URL for WebSocket connections. In production this is the
+  // same as baseUrl (Envoy routes /v1/session_ingress/* to session-ingress).
+  // Locally, session-ingress runs on a different port (9413) than the
+  // contain-provide-api (8211), so CLAUDE_BRIDGE_SESSION_INGRESS_URL must be
+  // set explicitly. Ant-only, matching CLAUDE_BRIDGE_BASE_URL.
+  const sessionIngressUrl =
+    process.env.USER_TYPE === 'ant' &&
+    process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      : baseUrl
+
+  const { getBranch, getRemoteUrl, findGitRoot } = await import(
+    '../utils/git.js'
+  )
+
+  // Precheck worktree availability for the first-run dialog and the `w`
+  // toggle. Unconditional so we know upfront whether worktree is an option.
+  const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
+  const worktreeAvailable = hasWorktreeCreateHook() || findGitRoot(dir) !== null
+
+  // Load saved per-project spawn-mode preference. Gated by multiSessionEnabled
+  // so a GrowthBook rollback cleanly reverts users to single-session —
+  // otherwise a saved pref would silently re-enable multi-session behavior
+  // (worktree isolation, 32 max sessions, w toggle) despite the gate being off.
+  // Also guard against a stale worktree pref left over from when this dir WAS
+  // a git repo (or the user copied config) — clear it on disk so the warning
+  // doesn't repeat on every launch.
+  let savedSpawnMode = multiSessionEnabled
+    ? getCurrentProjectConfig().remoteControlSpawnMode
+    : undefined
+  if (savedSpawnMode === 'worktree' && !worktreeAvailable) {
+    // biome-ignore lint/suspicious/noConsole: intentional warning output
+    console.error(
+      'Warning: Saved spawn mode is worktree but this directory is not a git repository. Falling back to same-dir.',
+    )
+    savedSpawnMode = undefined
+    saveCurrentProjectConfig(current => {
+      if (current.remoteControlSpawnMode === undefined) return current
+      return { ...current, remoteControlSpawnMode: undefined }
+    })
+  }
+
+  // First-run spawn-mode choice: ask once per project when the choice is
+  // meaningful (gate on, both modes available, no explicit override, not
+  // resuming). Saves to ProjectConfig so subsequent runs skip this.
+  if (
+    multiSessionEnabled &&
+    !savedSpawnMode &&
+    worktreeAvailable &&
+    parsedSpawnMode === undefined &&
+    !resumeSessionId &&
+    process.stdin.isTTY
+  ) {
+    const readline = await import('readline')
+    const rl = readline.createInterface({
+      input: process.stdin,
+      output: process.stdout,
+    })
+    // biome-ignore lint/suspicious/noConsole: intentional dialog output
+    console.log(
+      `\nClaude Remote Control is launching in spawn mode which lets you create new sessions in this project from Claude Code on Web or your Mobile app. Learn more here: https://code.claude.com/docs/en/remote-control\n\n` +
+        `Spawn mode for this project:\n` +
+        `  [1] same-dir \u2014 sessions share the current directory (default)\n` +
+        `  [2] worktree \u2014 each session gets an isolated git worktree\n\n` +
+        `This can be changed later or explicitly set with --spawn=same-dir or --spawn=worktree.\n`,
+    )
+    const answer = await new Promise<string>(resolve => {
+      rl.question('Choose [1/2] (default: 1): ', resolve)
+    })
+    rl.close()
+    const chosen: 'same-dir' | 'worktree' =
+      answer.trim() === '2' ? 'worktree' : 'same-dir'
+    savedSpawnMode = chosen
+    logEvent('tengu_bridge_spawn_mode_chosen', {
+      spawn_mode:
+        chosen as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    saveCurrentProjectConfig(current => {
+      if (current.remoteControlSpawnMode === chosen) return current
+      return { ...current, remoteControlSpawnMode: chosen }
+    })
+  }
+
+  // Determine effective spawn mode.
+  // Precedence: resume > explicit --spawn > saved project pref > gate default
+  // - resuming via --continue / --session-id: always single-session (resume
+  //   targets one specific session in its original directory)
+  // - explicit --spawn flag: use that value directly (does not persist)
+  // - saved ProjectConfig.remoteControlSpawnMode: set by first-run dialog or `w`
+  // - default with gate on: same-dir (persistent multi-session, shared cwd)
+  // - default with gate off: single-session (unchanged legacy behavior)
+  // Track how spawn mode was determined, for rollout analytics.
+  type SpawnModeSource = 'resume' | 'flag' | 'saved' | 'gate_default'
+  let spawnModeSource: SpawnModeSource
+  let spawnMode: SpawnMode
+  if (resumeSessionId) {
+    spawnMode = 'single-session'
+    spawnModeSource = 'resume'
+  } else if (parsedSpawnMode !== undefined) {
+    spawnMode = parsedSpawnMode
+    spawnModeSource = 'flag'
+  } else if (savedSpawnMode !== undefined) {
+    spawnMode = savedSpawnMode
+    spawnModeSource = 'saved'
+  } else {
+    spawnMode = multiSessionEnabled ? 'same-dir' : 'single-session'
+    spawnModeSource = 'gate_default'
+  }
+  const maxSessions =
+    spawnMode === 'single-session'
+      ? 1
+      : (parsedCapacity ?? SPAWN_SESSIONS_DEFAULT)
+  // Pre-create an empty session on start so the user has somewhere to type
+  // immediately, running in the current directory (exempted from worktree
+  // creation in the spawn loop). On by default; --no-create-session-in-dir
+  // opts out for a pure on-demand server where every session is isolated.
+  // The effectiveResumeSessionId guard at the creation site handles the
+  // resume case (skip creation when resume succeeded; fall through to
+  // fresh creation on env-mismatch fallback).
+  const preCreateSession = parsedCreateSessionInDir ?? true
+
+  // Without --continue: a leftover pointer means the previous run didn't
+  // shut down cleanly (crash, kill -9, terminal closed). Clear it so the
+  // stale env doesn't linger past its relevance. Runs in all modes
+  // (clearBridgePointer is a no-op when no file exists) — covers the
+  // gate-transition case where a user crashed in single-session mode then
+  // starts fresh in worktree mode. Only single-session mode writes new
+  // pointers.
+  if (!resumeSessionId) {
+    const { clearBridgePointer } = await import('./bridgePointer.js')
+    await clearBridgePointer(dir)
+  }
+
+  // Worktree mode requires either git or WorktreeCreate/WorktreeRemove hooks.
+  // Only reachable via explicit --spawn=worktree (default is same-dir);
+  // saved worktree pref was already guarded above.
+  if (spawnMode === 'worktree' && !worktreeAvailable) {
+    // biome-ignore lint/suspicious/noConsole: intentional error output
+    console.error(
+      `Error: Worktree mode requires a git repository or WorktreeCreate hooks configured. Use --spawn=session for single-session mode.`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  const branch = await getBranch()
+  const gitRepoUrl = await getRemoteUrl()
+  const machineName = hostname()
+  const bridgeId = randomUUID()
+
+  const { handleOAuth401Error } = await import('../utils/auth.js')
+  const api = createBridgeApiClient({
+    baseUrl,
+    getAccessToken: getBridgeAccessToken,
+    runnerVersion: MACRO.VERSION,
+    onDebug: logForDebugging,
+    onAuth401: handleOAuth401Error,
+    getTrustedDeviceToken,
+  })
+
+  // When resuming a session via --session-id, fetch it to learn its
+  // environment_id and reuse that for registration (idempotent on the
+  // backend). Left undefined otherwise — the backend rejects
+  // client-generated UUIDs and will allocate a fresh environment.
+  // feature('KAIROS') gate: --session-id is ant-only; parseArgs already
+  // rejects the flag when the gate is off, so resumeSessionId is always
+  // undefined here in external builds — this guard is for tree-shaking.
+  let reuseEnvironmentId: string | undefined
+  if (feature('KAIROS') && resumeSessionId) {
+    try {
+      validateBridgeId(resumeSessionId, 'sessionId')
+    } catch {
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Invalid session ID "${resumeSessionId}". Session IDs must not contain unsafe characters.`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    // Proactively refresh the OAuth token — getBridgeSession uses raw axios
+    // without the withOAuthRetry 401-refresh logic. An expired-but-present
+    // token would otherwise produce a misleading "not found" error.
+    await checkAndRefreshOAuthTokenIfNeeded()
+    clearOAuthTokenCache()
+    const { getBridgeSession } = await import('./createSession.js')
+    const session = await getBridgeSession(resumeSessionId, {
+      baseUrl,
+      getAccessToken: getBridgeAccessToken,
+    })
+    if (!session) {
+      // Session gone on server → pointer is stale. Clear it so the user
+      // isn't re-prompted next launch. (Explicit --session-id leaves the
+      // pointer alone — it's an independent file they may not even have.)
+      // resumePointerDir may be a worktree sibling — clear THAT file.
+      if (resumePointerDir) {
+        const { clearBridgePointer } = await import('./bridgePointer.js')
+        await clearBridgePointer(resumePointerDir)
+      }
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Session ${resumeSessionId} not found. It may have been archived or expired, or your login may have lapsed (run \`claude /login\`).`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    if (!session.environment_id) {
+      if (resumePointerDir) {
+        const { clearBridgePointer } = await import('./bridgePointer.js')
+        await clearBridgePointer(resumePointerDir)
+      }
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Session ${resumeSessionId} has no environment_id. It may never have been attached to a bridge.`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    reuseEnvironmentId = session.environment_id
+    logForDebugging(
+      `[bridge:init] Resuming session ${resumeSessionId} on environment ${reuseEnvironmentId}`,
+    )
+  }
+
+  const config: BridgeConfig = {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    maxSessions,
+    spawnMode,
+    verbose,
+    sandbox,
+    bridgeId,
+    workerType: 'claude_code',
+    environmentId: randomUUID(),
+    reuseEnvironmentId,
+    apiBaseUrl: baseUrl,
+    sessionIngressUrl,
+    debugFile,
+    sessionTimeoutMs,
+  }
+
+  logForDebugging(
+    `[bridge:init] bridgeId=${bridgeId}${reuseEnvironmentId ? ` reuseEnvironmentId=${reuseEnvironmentId}` : ''} dir=${dir} branch=${branch} gitRepoUrl=${gitRepoUrl} machine=${machineName}`,
+  )
+  logForDebugging(
+    `[bridge:init] apiBaseUrl=${baseUrl} sessionIngressUrl=${sessionIngressUrl}`,
+  )
+  logForDebugging(
+    `[bridge:init] sandbox=${sandbox}${debugFile ? ` debugFile=${debugFile}` : ''}`,
+  )
+
+  // Register the bridge environment before entering the poll loop.
+  let environmentId: string
+  let environmentSecret: string
+  try {
+    const reg = await api.registerBridgeEnvironment(config)
+    environmentId = reg.environment_id
+    environmentSecret = reg.environment_secret
+  } catch (err) {
+    logEvent('tengu_bridge_registration_failed', {
+      status: err instanceof BridgeFatalError ? err.status : undefined,
+    })
+    // Registration failures are fatal — print a clean message instead of a stack trace.
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      err instanceof BridgeFatalError && err.status === 404
+        ? 'Remote Control environments are not available for your account.'
+        : `Error: ${errorMessage(err)}`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Tracks whether the --session-id resume flow completed successfully.
+  // Used below to skip fresh session creation and seed initialSessionId.
+  // Cleared on env mismatch so we gracefully fall back to a new session.
+  let effectiveResumeSessionId: string | undefined
+  if (feature('KAIROS') && resumeSessionId) {
+    if (reuseEnvironmentId && environmentId !== reuseEnvironmentId) {
+      // Backend returned a different environment_id — the original env
+      // expired or was reaped. Reconnect won't work against the new env
+      // (session is bound to the old one). Log to sentry for visibility
+      // and fall through to fresh session creation on the new env.
+      logError(
+        new Error(
+          `Bridge resume env mismatch: requested ${reuseEnvironmentId}, backend returned ${environmentId}. Falling back to fresh session.`,
+        ),
+      )
+      // biome-ignore lint/suspicious/noConsole: intentional warning output
+      console.warn(
+        `Warning: Could not resume session ${resumeSessionId} — its environment has expired. Creating a fresh session instead.`,
+      )
+      // Don't deregister — we're going to use this new environment.
+      // effectiveResumeSessionId stays undefined → fresh session path below.
+    } else {
+      // Force-stop any stale worker instances for this session and re-queue
+      // it so our poll loop picks it up. Must happen after registration so
+      // the backend knows a live worker exists for the environment.
+      //
+      // The pointer stores a session_* ID but /bridge/reconnect looks
+      // sessions up by their infra tag (cse_*) when ccr_v2_compat_enabled
+      // is on. Try both; the conversion is a no-op if already cse_*.
+      const infraResumeId = toInfraSessionId(resumeSessionId)
+      const reconnectCandidates =
+        infraResumeId === resumeSessionId
+          ? [resumeSessionId]
+          : [resumeSessionId, infraResumeId]
+      let reconnected = false
+      let lastReconnectErr: unknown
+      for (const candidateId of reconnectCandidates) {
+        try {
+          await api.reconnectSession(environmentId, candidateId)
+          logForDebugging(
+            `[bridge:init] Session ${candidateId} re-queued via bridge/reconnect`,
+          )
+          effectiveResumeSessionId = resumeSessionId
+          reconnected = true
+          break
+        } catch (err) {
+          lastReconnectErr = err
+          logForDebugging(
+            `[bridge:init] reconnectSession(${candidateId}) failed: ${errorMessage(err)}`,
+          )
+        }
+      }
+      if (!reconnected) {
+        const err = lastReconnectErr
+
+        // Do NOT deregister on transient reconnect failure — at this point
+        // environmentId IS the session's own environment. Deregistering
+        // would make retry impossible. The backend's 4h TTL cleans up.
+        const isFatal = err instanceof BridgeFatalError
+        // Clear pointer only on fatal reconnect failure. Transient failures
+        // ("try running the same command again") should keep the pointer so
+        // next launch re-prompts — that IS the retry mechanism.
+        if (resumePointerDir && isFatal) {
+          const { clearBridgePointer } = await import('./bridgePointer.js')
+          await clearBridgePointer(resumePointerDir)
+        }
+        // biome-ignore lint/suspicious/noConsole: intentional error output
+        console.error(
+          isFatal
+            ? `Error: ${errorMessage(err)}`
+            : `Error: Failed to reconnect session ${resumeSessionId}: ${errorMessage(err)}\nThe session may still be resumable — try running the same command again.`,
+        )
+        // eslint-disable-next-line custom-rules/no-process-exit
+        process.exit(1)
+      }
+    }
+  }
+
+  logForDebugging(
+    `[bridge:init] Registered, server environmentId=${environmentId}`,
+  )
+  const startupPollConfig = getPollIntervalConfig()
+  logEvent('tengu_bridge_started', {
+    max_sessions: config.maxSessions,
+    has_debug_file: !!config.debugFile,
+    sandbox: config.sandbox,
+    verbose: config.verbose,
+    heartbeat_interval_ms:
+      startupPollConfig.non_exclusive_heartbeat_interval_ms,
+    spawn_mode:
+      config.spawnMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    spawn_mode_source:
+      spawnModeSource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    multi_session_gate: multiSessionEnabled,
+    pre_create_session: preCreateSession,
+    worktree_available: worktreeAvailable,
+  })
+  logForDiagnosticsNoPII('info', 'bridge_started', {
+    max_sessions: config.maxSessions,
+    sandbox: config.sandbox,
+    spawn_mode: config.spawnMode,
+  })
+
+  const spawner = createSessionSpawner({
+    execPath: process.execPath,
+    scriptArgs: spawnScriptArgs(),
+    env: process.env,
+    verbose,
+    sandbox,
+    debugFile,
+    permissionMode,
+    onDebug: logForDebugging,
+    onActivity: (sessionId, activity) => {
+      logForDebugging(
+        `[bridge:activity] sessionId=${sessionId} ${activity.type} ${activity.summary}`,
+      )
+    },
+    onPermissionRequest: (sessionId, request, _accessToken) => {
+      logForDebugging(
+        `[bridge:perm] sessionId=${sessionId} tool=${request.request.tool_name} request_id=${request.request_id} (not auto-approving)`,
+      )
+    },
+  })
+
+  const logger = createBridgeLogger({ verbose })
+  const { parseGitHubRepository } = await import('../utils/detectRepository.js')
+  const ownerRepo = gitRepoUrl ? parseGitHubRepository(gitRepoUrl) : null
+  // Use the repo name from the parsed owner/repo, or fall back to the dir basename
+  const repoName = ownerRepo ? ownerRepo.split('/').pop()! : basename(dir)
+  logger.setRepoInfo(repoName, branch)
+
+  // `w` toggle is available iff we're in a multi-session mode AND worktree
+  // is a valid option. When unavailable, the mode suffix and hint are hidden.
+  const toggleAvailable = spawnMode !== 'single-session' && worktreeAvailable
+  if (toggleAvailable) {
+    // Safe cast: spawnMode is not single-session (checked above), and the
+    // saved-worktree-in-non-git guard + exit check above ensure worktree
+    // is only reached when available.
+    logger.setSpawnModeDisplay(spawnMode as 'same-dir' | 'worktree')
+  }
+
+  // Listen for keys: space toggles QR code, w toggles spawn mode
+  const onStdinData = (data: Buffer): void => {
+    if (data[0] === 0x03 || data[0] === 0x04) {
+      // Ctrl+C / Ctrl+D — trigger graceful shutdown
+      process.emit('SIGINT')
+      return
+    }
+    if (data[0] === 0x20 /* space */) {
+      logger.toggleQr()
+      return
+    }
+    if (data[0] === 0x77 /* 'w' */) {
+      if (!toggleAvailable) return
+      const newMode: 'same-dir' | 'worktree' =
+        config.spawnMode === 'same-dir' ? 'worktree' : 'same-dir'
+      config.spawnMode = newMode
+      logEvent('tengu_bridge_spawn_mode_toggled', {
+        spawn_mode:
+          newMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+      logger.logStatus(
+        newMode === 'worktree'
+          ? 'Spawn mode: worktree (new sessions get isolated git worktrees)'
+          : 'Spawn mode: same-dir (new sessions share the current directory)',
+      )
+      logger.setSpawnModeDisplay(newMode)
+      logger.refreshDisplay()
+      saveCurrentProjectConfig(current => {
+        if (current.remoteControlSpawnMode === newMode) return current
+        return { ...current, remoteControlSpawnMode: newMode }
+      })
+      return
+    }
+  }
+  if (process.stdin.isTTY) {
+    process.stdin.setRawMode(true)
+    process.stdin.resume()
+    process.stdin.on('data', onStdinData)
+  }
+
+  const controller = new AbortController()
+  const onSigint = (): void => {
+    logForDebugging('[bridge:shutdown] SIGINT received, shutting down')
+    controller.abort()
+  }
+  const onSigterm = (): void => {
+    logForDebugging('[bridge:shutdown] SIGTERM received, shutting down')
+    controller.abort()
+  }
+  process.on('SIGINT', onSigint)
+  process.on('SIGTERM', onSigterm)
+
+  // Auto-create an empty session so the user has somewhere to type
+  // immediately (matching /remote-control behavior). Controlled by
+  // preCreateSession: on by default; --no-create-session-in-dir opts out.
+  // When a --session-id resume succeeded, skip creation entirely — the
+  // session already exists and bridge/reconnect has re-queued it.
+  // When resume was requested but failed on env mismatch, effectiveResumeSessionId
+  // is undefined, so we fall through to fresh session creation (honoring the
+  // "Creating a fresh session instead" warning printed above).
+  let initialSessionId: string | null =
+    feature('KAIROS') && effectiveResumeSessionId
+      ? effectiveResumeSessionId
+      : null
+  if (preCreateSession && !(feature('KAIROS') && effectiveResumeSessionId)) {
+    const { createBridgeSession } = await import('./createSession.js')
+    try {
+      initialSessionId = await createBridgeSession({
+        environmentId,
+        title: name,
+        events: [],
+        gitRepoUrl,
+        branch,
+        signal: controller.signal,
+        baseUrl,
+        getAccessToken: getBridgeAccessToken,
+        permissionMode,
+      })
+      if (initialSessionId) {
+        logForDebugging(
+          `[bridge:init] Created initial session ${initialSessionId}`,
+        )
+      }
+    } catch (err) {
+      logForDebugging(
+        `[bridge:init] Session creation failed (non-fatal): ${errorMessage(err)}`,
+      )
+    }
+  }
+
+  // Crash-recovery pointer: write immediately so kill -9 at any point
+  // after this leaves a recoverable trail. Covers both fresh sessions and
+  // resumed ones (so a second crash after resume is still recoverable).
+  // Cleared when runBridgeLoop falls through to archive+deregister; left in
+  // place on the SIGINT resumable-shutdown return (backup for when the user
+  // closes the terminal before copying the printed --session-id hint).
+  // Refreshed hourly so a 5h+ session that crashes still has a fresh
+  // pointer (staleness checks file mtime, backend TTL is rolling-from-poll).
+  let pointerRefreshTimer: ReturnType<typeof setInterval> | null = null
+  // Single-session only: --continue forces single-session mode on resume,
+  // so a pointer written in multi-session mode would contradict the user's
+  // config when they try to resume. The resumable-shutdown path is also
+  // gated to single-session (line ~1254) so the pointer would be orphaned.
+  if (initialSessionId && spawnMode === 'single-session') {
+    const { writeBridgePointer } = await import('./bridgePointer.js')
+    const pointerPayload = {
+      sessionId: initialSessionId,
+      environmentId,
+      source: 'standalone' as const,
+    }
+    await writeBridgePointer(config.dir, pointerPayload)
+    pointerRefreshTimer = setInterval(
+      writeBridgePointer,
+      60 * 60 * 1000,
+      config.dir,
+      pointerPayload,
+    )
+    // Don't let the interval keep the process alive on its own.
+    pointerRefreshTimer.unref?.()
+  }
+
+  try {
+    await runBridgeLoop(
+      config,
+      environmentId,
+      environmentSecret,
+      api,
+      spawner,
+      logger,
+      controller.signal,
+      undefined,
+      initialSessionId ?? undefined,
+      async () => {
+        // Clear the memoized OAuth token cache so we re-read from secure
+        // storage, picking up tokens refreshed by child processes.
+        clearOAuthTokenCache()
+        // Proactively refresh the token if it's expired on disk too.
+        await checkAndRefreshOAuthTokenIfNeeded()
+        return getBridgeAccessToken()
+      },
+    )
+  } finally {
+    if (pointerRefreshTimer !== null) {
+      clearInterval(pointerRefreshTimer)
+    }
+    process.off('SIGINT', onSigint)
+    process.off('SIGTERM', onSigterm)
+    process.stdin.off('data', onStdinData)
+    if (process.stdin.isTTY) {
+      process.stdin.setRawMode(false)
+    }
+    process.stdin.pause()
+  }
+
+  // The bridge bypasses init.ts (and its graceful shutdown handler), so we
+  // must exit explicitly.
+  // eslint-disable-next-line custom-rules/no-process-exit
+  process.exit(0)
+}
+
+// ─── Headless bridge (daemon worker) ────────────────────────────────────────
+
+/**
+ * Thrown by runBridgeHeadless for configuration issues the supervisor should
+ * NOT retry (trust not accepted, worktree unavailable, http-not-https). The
+ * daemon worker catches this and exits with EXIT_CODE_PERMANENT so the
+ * supervisor parks the worker instead of respawning it on backoff.
+ */
+export class BridgeHeadlessPermanentError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = 'BridgeHeadlessPermanentError'
+  }
+}
+
+export type HeadlessBridgeOpts = {
+  dir: string
+  name?: string
+  spawnMode: 'same-dir' | 'worktree'
+  capacity: number
+  permissionMode?: string
+  sandbox: boolean
+  sessionTimeoutMs?: number
+  createSessionOnStart: boolean
+  getAccessToken: () => string | undefined
+  onAuth401: (failedToken: string) => Promise<boolean>
+  log: (s: string) => void
+}
+
+/**
+ * Non-interactive bridge entrypoint for the `remoteControl` daemon worker.
+ *
+ * Linear subset of bridgeMain(): no readline dialogs, no stdin key handlers,
+ * no TUI, no process.exit(). Config comes from the caller (daemon.json), auth
+ * comes via IPC (supervisor's AuthManager), logs go to the worker's stdout
+ * pipe. Throws on fatal errors — the worker catches and maps permanent vs
+ * transient to the right exit code.
+ *
+ * Resolves cleanly when `signal` aborts and the poll loop tears down.
+ */
+export async function runBridgeHeadless(
+  opts: HeadlessBridgeOpts,
+  signal: AbortSignal,
+): Promise<void> {
+  const { dir, log } = opts
+
+  // Worker inherits the supervisor's CWD. chdir first so git utilities
+  // (getBranch/getRemoteUrl) — which read from bootstrap CWD state set
+  // below — resolve against the right repo.
+  process.chdir(dir)
+  const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
+  setOriginalCwd(dir)
+  setCwdState(dir)
+
+  const { enableConfigs, checkHasTrustDialogAccepted } = await import(
+    '../utils/config.js'
+  )
+  enableConfigs()
+  const { initSinks } = await import('../utils/sinks.js')
+  initSinks()
+
+  if (!checkHasTrustDialogAccepted()) {
+    throw new BridgeHeadlessPermanentError(
+      `Workspace not trusted: ${dir}. Run \`claude\` in that directory first to accept the trust dialog.`,
+    )
+  }
+
+  if (!opts.getAccessToken()) {
+    // Transient — supervisor's AuthManager may pick up a token on next cycle.
+    throw new Error(BRIDGE_LOGIN_ERROR)
+  }
+
+  const { getBridgeBaseUrl } = await import('./bridgeConfig.js')
+  const baseUrl = getBridgeBaseUrl()
+  if (
+    baseUrl.startsWith('http://') &&
+    !baseUrl.includes('localhost') &&
+    !baseUrl.includes('127.0.0.1')
+  ) {
+    throw new BridgeHeadlessPermanentError(
+      'Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
+    )
+  }
+  const sessionIngressUrl =
+    process.env.USER_TYPE === 'ant' &&
+    process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      : baseUrl
+
+  const { getBranch, getRemoteUrl, findGitRoot } = await import(
+    '../utils/git.js'
+  )
+  const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
+
+  if (opts.spawnMode === 'worktree') {
+    const worktreeAvailable =
+      hasWorktreeCreateHook() || findGitRoot(dir) !== null
+    if (!worktreeAvailable) {
+      throw new BridgeHeadlessPermanentError(
+        `Worktree mode requires a git repository or WorktreeCreate hooks. Directory ${dir} has neither.`,
+      )
+    }
+  }
+
+  const branch = await getBranch()
+  const gitRepoUrl = await getRemoteUrl()
+  const machineName = hostname()
+  const bridgeId = randomUUID()
+
+  const config: BridgeConfig = {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    maxSessions: opts.capacity,
+    spawnMode: opts.spawnMode,
+    verbose: false,
+    sandbox: opts.sandbox,
+    bridgeId,
+    workerType: 'claude_code',
+    environmentId: randomUUID(),
+    apiBaseUrl: baseUrl,
+    sessionIngressUrl,
+    sessionTimeoutMs: opts.sessionTimeoutMs,
+  }
+
+  const api = createBridgeApiClient({
+    baseUrl,
+    getAccessToken: opts.getAccessToken,
+    runnerVersion: MACRO.VERSION,
+    onDebug: log,
+    onAuth401: opts.onAuth401,
+    getTrustedDeviceToken,
+  })
+
+  let environmentId: string
+  let environmentSecret: string
+  try {
+    const reg = await api.registerBridgeEnvironment(config)
+    environmentId = reg.environment_id
+    environmentSecret = reg.environment_secret
+  } catch (err) {
+    // Transient — let supervisor backoff-retry.
+    throw new Error(`Bridge registration failed: ${errorMessage(err)}`)
+  }
+
+  const spawner = createSessionSpawner({
+    execPath: process.execPath,
+    scriptArgs: spawnScriptArgs(),
+    env: process.env,
+    verbose: false,
+    sandbox: opts.sandbox,
+    permissionMode: opts.permissionMode,
+    onDebug: log,
+  })
+
+  const logger = createHeadlessBridgeLogger(log)
+  logger.printBanner(config, environmentId)
+
+  let initialSessionId: string | undefined
+  if (opts.createSessionOnStart) {
+    const { createBridgeSession } = await import('./createSession.js')
+    try {
+      const sid = await createBridgeSession({
+        environmentId,
+        title: opts.name,
+        events: [],
+        gitRepoUrl,
+        branch,
+        signal,
+        baseUrl,
+        getAccessToken: opts.getAccessToken,
+        permissionMode: opts.permissionMode,
+      })
+      if (sid) {
+        initialSessionId = sid
+        log(`created initial session ${sid}`)
+      }
+    } catch (err) {
+      log(`session pre-creation failed (non-fatal): ${errorMessage(err)}`)
+    }
+  }
+
+  await runBridgeLoop(
+    config,
+    environmentId,
+    environmentSecret,
+    api,
+    spawner,
+    logger,
+    signal,
+    undefined,
+    initialSessionId,
+    async () => opts.getAccessToken(),
+  )
+}
+
+/** BridgeLogger adapter that routes everything to a single line-log fn. */
+function createHeadlessBridgeLogger(log: (s: string) => void): BridgeLogger {
+  const noop = (): void => {}
+  return {
+    printBanner: (cfg, envId) =>
+      log(
+        `registered environmentId=${envId} dir=${cfg.dir} spawnMode=${cfg.spawnMode} capacity=${cfg.maxSessions}`,
+      ),
+    logSessionStart: (id, _prompt) => log(`session start ${id}`),
+    logSessionComplete: (id, ms) => log(`session complete ${id} (${ms}ms)`),
+    logSessionFailed: (id, err) => log(`session failed ${id}: ${err}`),
+    logStatus: log,
+    logVerbose: log,
+    logError: s => log(`error: ${s}`),
+    logReconnected: ms => log(`reconnected after ${ms}ms`),
+    addSession: (id, _url) => log(`session attached ${id}`),
+    removeSession: id => log(`session detached ${id}`),
+    updateIdleStatus: noop,
+    updateReconnectingStatus: noop,
+    updateSessionStatus: noop,
+    updateSessionActivity: noop,
+    updateSessionCount: noop,
+    updateFailedStatus: noop,
+    setSpawnModeDisplay: noop,
+    setRepoInfo: noop,
+    setDebugLogPath: noop,
+    setAttached: noop,
+    setSessionTitle: noop,
+    clearStatus: noop,
+    toggleQr: noop,
+    refreshDisplay: noop,
+  }
+}

+ 461 - 0
src/bridge/bridgeMessaging.ts

@@ -0,0 +1,461 @@
+/**
+ * Shared transport-layer helpers for bridge message handling.
+ *
+ * Extracted from replBridge.ts so both the env-based core (initBridgeCore)
+ * and the env-less core (initEnvLessBridgeCore) can use the same ingress
+ * parsing, control-request handling, and echo-dedup machinery.
+ *
+ * Everything here is pure — no closure over bridge-specific state. All
+ * collaborators (transport, sessionId, UUID sets, callbacks) are passed
+ * as params.
+ */
+
+import { randomUUID } from 'crypto'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+} from '../entrypoints/sdk/controlTypes.js'
+import type { SDKResultSuccess } from '../entrypoints/sdk/coreTypes.js'
+import { logEvent } from '../services/analytics/index.js'
+import { EMPTY_USAGE } from '../services/api/emptyUsage.js'
+import type { Message } from '../types/message.js'
+import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
+import { logForDebugging } from '../utils/debug.js'
+import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
+import { errorMessage } from '../utils/errors.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+import { jsonParse } from '../utils/slowOperations.js'
+import type { ReplBridgeTransport } from './replBridgeTransport.js'
+
+// ─── Type guards ─────────────────────────────────────────────────────────────
+
+/** Type predicate for parsed WebSocket messages. SDKMessage is a
+ *  discriminated union on `type` — validating the discriminant is
+ *  sufficient for the predicate; callers narrow further via the union. */
+export function isSDKMessage(value: unknown): value is SDKMessage {
+  return (
+    value !== null &&
+    typeof value === 'object' &&
+    'type' in value &&
+    typeof value.type === 'string'
+  )
+}
+
+/** Type predicate for control_response messages from the server. */
+export function isSDKControlResponse(
+  value: unknown,
+): value is SDKControlResponse {
+  return (
+    value !== null &&
+    typeof value === 'object' &&
+    'type' in value &&
+    value.type === 'control_response' &&
+    'response' in value
+  )
+}
+
+/** Type predicate for control_request messages from the server. */
+export function isSDKControlRequest(
+  value: unknown,
+): value is SDKControlRequest {
+  return (
+    value !== null &&
+    typeof value === 'object' &&
+    'type' in value &&
+    value.type === 'control_request' &&
+    'request_id' in value &&
+    'request' in value
+  )
+}
+
+/**
+ * True for message types that should be forwarded to the bridge transport.
+ * The server only wants user/assistant turns and slash-command system events;
+ * everything else (tool_result, progress, etc.) is internal REPL chatter.
+ */
+export function isEligibleBridgeMessage(m: Message): boolean {
+  // Virtual messages (REPL inner calls) are display-only — bridge/SDK
+  // consumers see the REPL tool_use/result which summarizes the work.
+  if ((m.type === 'user' || m.type === 'assistant') && m.isVirtual) {
+    return false
+  }
+  return (
+    m.type === 'user' ||
+    m.type === 'assistant' ||
+    (m.type === 'system' && m.subtype === 'local_command')
+  )
+}
+
+/**
+ * Extract title-worthy text from a Message for onUserMessage. Returns
+ * undefined for messages that shouldn't title the session: non-user, meta
+ * (nudges), tool results, compact summaries, non-human origins (task
+ * notifications, channel messages), or pure display-tag content
+ * (<ide_opened_file>, <session-start-hook>, etc.).
+ *
+ * Synthetic interrupts ([Request interrupted by user]) are NOT filtered here —
+ * isSyntheticMessage lives in messages.ts (heavy import, pulls command
+ * registry). The initialMessages path in initReplBridge checks it; the
+ * writeMessages path reaching an interrupt as the *first* message is
+ * implausible (an interrupt implies a prior prompt already flowed through).
+ */
+export function extractTitleText(m: Message): string | undefined {
+  if (m.type !== 'user' || m.isMeta || m.toolUseResult || m.isCompactSummary)
+    return undefined
+  if (m.origin && m.origin.kind !== 'human') return undefined
+  const content = m.message.content
+  let raw: string | undefined
+  if (typeof content === 'string') {
+    raw = content
+  } else {
+    for (const block of content) {
+      if (block.type === 'text') {
+        raw = block.text
+        break
+      }
+    }
+  }
+  if (!raw) return undefined
+  const clean = stripDisplayTagsAllowEmpty(raw)
+  return clean || undefined
+}
+
+// ─── Ingress routing ─────────────────────────────────────────────────────────
+
+/**
+ * Parse an ingress WebSocket message and route it to the appropriate handler.
+ * Ignores messages whose UUID is in recentPostedUUIDs (echoes of what we sent)
+ * or in recentInboundUUIDs (re-deliveries we've already forwarded — e.g.
+ * server replayed history after a transport swap lost the seq-num cursor).
+ */
+export function handleIngressMessage(
+  data: string,
+  recentPostedUUIDs: BoundedUUIDSet,
+  recentInboundUUIDs: BoundedUUIDSet,
+  onInboundMessage: ((msg: SDKMessage) => void | Promise<void>) | undefined,
+  onPermissionResponse?: ((response: SDKControlResponse) => void) | undefined,
+  onControlRequest?: ((request: SDKControlRequest) => void) | undefined,
+): void {
+  try {
+    const parsed: unknown = normalizeControlMessageKeys(jsonParse(data))
+
+    // control_response is not an SDKMessage — check before the type guard
+    if (isSDKControlResponse(parsed)) {
+      logForDebugging('[bridge:repl] Ingress message type=control_response')
+      onPermissionResponse?.(parsed)
+      return
+    }
+
+    // control_request from the server (initialize, set_model, can_use_tool).
+    // Must respond promptly or the server kills the WS (~10-14s timeout).
+    if (isSDKControlRequest(parsed)) {
+      logForDebugging(
+        `[bridge:repl] Inbound control_request subtype=${parsed.request.subtype}`,
+      )
+      onControlRequest?.(parsed)
+      return
+    }
+
+    if (!isSDKMessage(parsed)) return
+
+    // Check for UUID to detect echoes of our own messages
+    const uuid =
+      'uuid' in parsed && typeof parsed.uuid === 'string'
+        ? parsed.uuid
+        : undefined
+
+    if (uuid && recentPostedUUIDs.has(uuid)) {
+      logForDebugging(
+        `[bridge:repl] Ignoring echo: type=${parsed.type} uuid=${uuid}`,
+      )
+      return
+    }
+
+    // Defensive dedup: drop inbound prompts we've already forwarded. The
+    // SSE seq-num carryover (lastTransportSequenceNum) is the primary fix
+    // for history-replay; this catches edge cases where that negotiation
+    // fails (server ignores from_sequence_num, transport died before
+    // receiving any frames, etc).
+    if (uuid && recentInboundUUIDs.has(uuid)) {
+      logForDebugging(
+        `[bridge:repl] Ignoring re-delivered inbound: type=${parsed.type} uuid=${uuid}`,
+      )
+      return
+    }
+
+    logForDebugging(
+      `[bridge:repl] Ingress message type=${parsed.type}${uuid ? ` uuid=${uuid}` : ''}`,
+    )
+
+    if (parsed.type === 'user') {
+      if (uuid) recentInboundUUIDs.add(uuid)
+      logEvent('tengu_bridge_message_received', {
+        is_repl: true,
+      })
+      // Fire-and-forget — handler may be async (attachment resolution).
+      void onInboundMessage?.(parsed)
+    } else {
+      logForDebugging(
+        `[bridge:repl] Ignoring non-user inbound message: type=${parsed.type}`,
+      )
+    }
+  } catch (err) {
+    logForDebugging(
+      `[bridge:repl] Failed to parse ingress message: ${errorMessage(err)}`,
+    )
+  }
+}
+
+// ─── Server-initiated control requests ───────────────────────────────────────
+
+export type ServerControlRequestHandlers = {
+  transport: ReplBridgeTransport | null
+  sessionId: string
+  /**
+   * When true, all mutable requests (interrupt, set_model, set_permission_mode,
+   * set_max_thinking_tokens) reply with an error instead of false-success.
+   * initialize still replies success — the server kills the connection otherwise.
+   * Used by the outbound-only bridge mode and the SDK's /bridge subpath so claude.ai sees a
+   * proper error instead of "action succeeded but nothing happened locally".
+   */
+  outboundOnly?: boolean
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+}
+
+const OUTBOUND_ONLY_ERROR =
+  'This session is outbound-only. Enable Remote Control locally to allow inbound control.'
+
+/**
+ * Respond to inbound control_request messages from the server. The server
+ * sends these for session lifecycle events (initialize, set_model) and
+ * for turn-level coordination (interrupt, set_max_thinking_tokens). If we
+ * don't respond, the server hangs and kills the WS after ~10-14s.
+ *
+ * Previously a closure inside initBridgeCore's onWorkReceived; now takes
+ * collaborators as params so both cores can use it.
+ */
+export function handleServerControlRequest(
+  request: SDKControlRequest,
+  handlers: ServerControlRequestHandlers,
+): void {
+  const {
+    transport,
+    sessionId,
+    outboundOnly,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+  } = handlers
+  if (!transport) {
+    logForDebugging(
+      '[bridge:repl] Cannot respond to control_request: transport not configured',
+    )
+    return
+  }
+
+  let response: SDKControlResponse
+
+  // Outbound-only: reply error for mutable requests so claude.ai doesn't show
+  // false success. initialize must still succeed (server kills the connection
+  // if it doesn't — see comment above).
+  if (outboundOnly && request.request.subtype !== 'initialize') {
+    response = {
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        request_id: request.request_id,
+        error: OUTBOUND_ONLY_ERROR,
+      },
+    }
+    const event = { ...response, session_id: sessionId }
+    void transport.write(event)
+    logForDebugging(
+      `[bridge:repl] Rejected ${request.request.subtype} (outbound-only) request_id=${request.request_id}`,
+    )
+    return
+  }
+
+  switch (request.request.subtype) {
+    case 'initialize':
+      // Respond with minimal capabilities — the REPL handles
+      // commands, models, and account info itself.
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+          response: {
+            commands: [],
+            output_style: 'normal',
+            available_output_styles: ['normal'],
+            models: [],
+            account: {},
+            pid: process.pid,
+          },
+        },
+      }
+      break
+
+    case 'set_model':
+      onSetModel?.(request.request.model)
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+        },
+      }
+      break
+
+    case 'set_max_thinking_tokens':
+      onSetMaxThinkingTokens?.(request.request.max_thinking_tokens)
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+        },
+      }
+      break
+
+    case 'set_permission_mode': {
+      // The callback returns a policy verdict so we can send an error
+      // control_response without importing isAutoModeGateEnabled /
+      // isBypassPermissionsModeDisabled here (bootstrap-isolation). If no
+      // callback is registered (daemon context, which doesn't wire this —
+      // see daemonBridge.ts), return an error verdict rather than a silent
+      // false-success: the mode is never actually applied in that context,
+      // so success would lie to the client.
+      const verdict = onSetPermissionMode?.(request.request.mode) ?? {
+        ok: false,
+        error:
+          'set_permission_mode is not supported in this context (onSetPermissionMode callback not registered)',
+      }
+      if (verdict.ok) {
+        response = {
+          type: 'control_response',
+          response: {
+            subtype: 'success',
+            request_id: request.request_id,
+          },
+        }
+      } else {
+        response = {
+          type: 'control_response',
+          response: {
+            subtype: 'error',
+            request_id: request.request_id,
+            error: verdict.error,
+          },
+        }
+      }
+      break
+    }
+
+    case 'interrupt':
+      onInterrupt?.()
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+        },
+      }
+      break
+
+    default:
+      // Unknown subtype — respond with error so the server doesn't
+      // hang waiting for a reply that never comes.
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'error',
+          request_id: request.request_id,
+          error: `REPL bridge does not handle control_request subtype: ${request.request.subtype}`,
+        },
+      }
+  }
+
+  const event = { ...response, session_id: sessionId }
+  void transport.write(event)
+  logForDebugging(
+    `[bridge:repl] Sent control_response for ${request.request.subtype} request_id=${request.request_id} result=${response.response.subtype}`,
+  )
+}
+
+// ─── Result message (for session archival on teardown) ───────────────────────
+
+/**
+ * Build a minimal `SDKResultSuccess` message for session archival.
+ * The server needs this event before a WS close to trigger archival.
+ */
+export function makeResultMessage(sessionId: string): SDKResultSuccess {
+  return {
+    type: 'result',
+    subtype: 'success',
+    duration_ms: 0,
+    duration_api_ms: 0,
+    is_error: false,
+    num_turns: 0,
+    result: '',
+    stop_reason: null,
+    total_cost_usd: 0,
+    usage: { ...EMPTY_USAGE },
+    modelUsage: {},
+    permission_denials: [],
+    session_id: sessionId,
+    uuid: randomUUID(),
+  }
+}
+
+// ─── BoundedUUIDSet (echo-dedup ring buffer) ─────────────────────────────────
+
+/**
+ * FIFO-bounded set backed by a circular buffer. Evicts the oldest entry
+ * when capacity is reached, keeping memory usage constant at O(capacity).
+ *
+ * Messages are added in chronological order, so evicted entries are always
+ * the oldest. The caller relies on external ordering (the hook's
+ * lastWrittenIndexRef) as the primary dedup — this set is a secondary
+ * safety net for echo filtering and race-condition dedup.
+ */
+export class BoundedUUIDSet {
+  private readonly capacity: number
+  private readonly ring: (string | undefined)[]
+  private readonly set = new Set<string>()
+  private writeIdx = 0
+
+  constructor(capacity: number) {
+    this.capacity = capacity
+    this.ring = new Array<string | undefined>(capacity)
+  }
+
+  add(uuid: string): void {
+    if (this.set.has(uuid)) return
+    // Evict the entry at the current write position (if occupied)
+    const evicted = this.ring[this.writeIdx]
+    if (evicted !== undefined) {
+      this.set.delete(evicted)
+    }
+    this.ring[this.writeIdx] = uuid
+    this.set.add(uuid)
+    this.writeIdx = (this.writeIdx + 1) % this.capacity
+  }
+
+  has(uuid: string): boolean {
+    return this.set.has(uuid)
+  }
+
+  clear(): void {
+    this.set.clear()
+    this.ring.fill(undefined)
+    this.writeIdx = 0
+  }
+}

+ 43 - 0
src/bridge/bridgePermissionCallbacks.ts

@@ -0,0 +1,43 @@
+import type { PermissionUpdate } from '../utils/permissions/PermissionUpdateSchema.js'
+
+type BridgePermissionResponse = {
+  behavior: 'allow' | 'deny'
+  updatedInput?: Record<string, unknown>
+  updatedPermissions?: PermissionUpdate[]
+  message?: string
+}
+
+type BridgePermissionCallbacks = {
+  sendRequest(
+    requestId: string,
+    toolName: string,
+    input: Record<string, unknown>,
+    toolUseId: string,
+    description: string,
+    permissionSuggestions?: PermissionUpdate[],
+    blockedPath?: string,
+  ): void
+  sendResponse(requestId: string, response: BridgePermissionResponse): void
+  /** Cancel a pending control_request so the web app can dismiss its prompt. */
+  cancelRequest(requestId: string): void
+  onResponse(
+    requestId: string,
+    handler: (response: BridgePermissionResponse) => void,
+  ): () => void // returns unsubscribe
+}
+
+/** Type predicate for validating a parsed control_response payload
+ *  as a BridgePermissionResponse. Checks the required `behavior`
+ *  discriminant rather than using an unsafe `as` cast. */
+function isBridgePermissionResponse(
+  value: unknown,
+): value is BridgePermissionResponse {
+  if (!value || typeof value !== 'object') return false
+  return (
+    'behavior' in value &&
+    (value.behavior === 'allow' || value.behavior === 'deny')
+  )
+}
+
+export { isBridgePermissionResponse }
+export type { BridgePermissionCallbacks, BridgePermissionResponse }

+ 210 - 0
src/bridge/bridgePointer.ts

@@ -0,0 +1,210 @@
+import { mkdir, readFile, stat, unlink, writeFile } from 'fs/promises'
+import { dirname, join } from 'path'
+import { z } from 'zod/v4'
+import { logForDebugging } from '../utils/debug.js'
+import { isENOENT } from '../utils/errors.js'
+import { getWorktreePathsPortable } from '../utils/getWorktreePathsPortable.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import {
+  getProjectsDir,
+  sanitizePath,
+} from '../utils/sessionStoragePortable.js'
+import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
+
+/**
+ * Upper bound on worktree fanout. git worktree list is naturally bounded
+ * (50 is a LOT), but this caps the parallel stat() burst and guards against
+ * pathological setups. Above this, --continue falls back to current-dir-only.
+ */
+const MAX_WORKTREE_FANOUT = 50
+
+/**
+ * Crash-recovery pointer for Remote Control sessions.
+ *
+ * Written immediately after a bridge session is created, periodically
+ * refreshed during the session, and cleared on clean shutdown. If the
+ * process dies unclean (crash, kill -9, terminal closed), the pointer
+ * persists. On next startup, `claude remote-control` detects it and offers
+ * to resume via the --session-id flow from #20460.
+ *
+ * Staleness is checked against the file's mtime (not an embedded timestamp)
+ * so that a periodic re-write with the same content serves as a refresh —
+ * matches the backend's rolling BRIDGE_LAST_POLL_TTL (4h) semantics. A
+ * bridge that's been polling for 5+ hours and then crashes still has a
+ * fresh pointer as long as the refresh ran within the window.
+ *
+ * Scoped per working directory (alongside transcript JSONL files) so two
+ * concurrent bridges in different repos don't clobber each other.
+ */
+
+export const BRIDGE_POINTER_TTL_MS = 4 * 60 * 60 * 1000
+
+const BridgePointerSchema = lazySchema(() =>
+  z.object({
+    sessionId: z.string(),
+    environmentId: z.string(),
+    source: z.enum(['standalone', 'repl']),
+  }),
+)
+
+export type BridgePointer = z.infer<ReturnType<typeof BridgePointerSchema>>
+
+export function getBridgePointerPath(dir: string): string {
+  return join(getProjectsDir(), sanitizePath(dir), 'bridge-pointer.json')
+}
+
+/**
+ * Write the pointer. Also used to refresh mtime during long sessions —
+ * calling with the same IDs is a cheap no-content-change write that bumps
+ * the staleness clock. Best-effort — a crash-recovery file must never
+ * itself cause a crash. Logs and swallows on error.
+ */
+export async function writeBridgePointer(
+  dir: string,
+  pointer: BridgePointer,
+): Promise<void> {
+  const path = getBridgePointerPath(dir)
+  try {
+    await mkdir(dirname(path), { recursive: true })
+    await writeFile(path, jsonStringify(pointer), 'utf8')
+    logForDebugging(`[bridge:pointer] wrote ${path}`)
+  } catch (err: unknown) {
+    logForDebugging(`[bridge:pointer] write failed: ${err}`, { level: 'warn' })
+  }
+}
+
+/**
+ * Read the pointer and its age (ms since last write). Operates directly
+ * and handles errors — no existence check (CLAUDE.md TOCTOU rule). Returns
+ * null on any failure: missing file, corrupted JSON, schema mismatch, or
+ * stale (mtime > 4h ago). Stale/invalid pointers are deleted so they don't
+ * keep re-prompting after the backend has already GC'd the env.
+ */
+export async function readBridgePointer(
+  dir: string,
+): Promise<(BridgePointer & { ageMs: number }) | null> {
+  const path = getBridgePointerPath(dir)
+  let raw: string
+  let mtimeMs: number
+  try {
+    // stat for mtime (staleness anchor), then read. Two syscalls, but both
+    // are needed — mtime IS the data we return, not a TOCTOU guard.
+    mtimeMs = (await stat(path)).mtimeMs
+    raw = await readFile(path, 'utf8')
+  } catch {
+    return null
+  }
+
+  const parsed = BridgePointerSchema().safeParse(safeJsonParse(raw))
+  if (!parsed.success) {
+    logForDebugging(`[bridge:pointer] invalid schema, clearing: ${path}`)
+    await clearBridgePointer(dir)
+    return null
+  }
+
+  const ageMs = Math.max(0, Date.now() - mtimeMs)
+  if (ageMs > BRIDGE_POINTER_TTL_MS) {
+    logForDebugging(`[bridge:pointer] stale (>4h mtime), clearing: ${path}`)
+    await clearBridgePointer(dir)
+    return null
+  }
+
+  return { ...parsed.data, ageMs }
+}
+
+/**
+ * Worktree-aware read for `--continue`. The REPL bridge writes its pointer
+ * to `getOriginalCwd()` which EnterWorktreeTool/activeWorktreeSession can
+ * mutate to a worktree path — but `claude remote-control --continue` runs
+ * with `resolve('.')` = shell CWD. This fans out across git worktree
+ * siblings to find the freshest pointer, matching /resume's semantics.
+ *
+ * Fast path: checks `dir` first. Only shells out to `git worktree list` if
+ * that misses — the common case (pointer in launch dir) is one stat, zero
+ * exec. Fanout reads run in parallel; capped at MAX_WORKTREE_FANOUT.
+ *
+ * Returns the pointer AND the dir it was found in, so the caller can clear
+ * the right file on resume failure.
+ */
+export async function readBridgePointerAcrossWorktrees(
+  dir: string,
+): Promise<{ pointer: BridgePointer & { ageMs: number }; dir: string } | null> {
+  // Fast path: current dir. Covers standalone bridge (always matches) and
+  // REPL bridge when no worktree mutation happened.
+  const here = await readBridgePointer(dir)
+  if (here) {
+    return { pointer: here, dir }
+  }
+
+  // Fanout: scan worktree siblings. getWorktreePathsPortable has a 5s
+  // timeout and returns [] on any error (not a git repo, git not installed).
+  const worktrees = await getWorktreePathsPortable(dir)
+  if (worktrees.length <= 1) return null
+  if (worktrees.length > MAX_WORKTREE_FANOUT) {
+    logForDebugging(
+      `[bridge:pointer] ${worktrees.length} worktrees exceeds fanout cap ${MAX_WORKTREE_FANOUT}, skipping`,
+    )
+    return null
+  }
+
+  // Dedupe against `dir` so we don't re-stat it. sanitizePath normalizes
+  // case/separators so worktree-list output matches our fast-path key even
+  // on Windows where git may emit C:/ vs stored c:/.
+  const dirKey = sanitizePath(dir)
+  const candidates = worktrees.filter(wt => sanitizePath(wt) !== dirKey)
+
+  // Parallel stat+read. Each readBridgePointer is a stat() that ENOENTs
+  // for worktrees with no pointer (cheap) plus a ~100-byte read for the
+  // rare ones that have one. Promise.all → latency ≈ slowest single stat.
+  const results = await Promise.all(
+    candidates.map(async wt => {
+      const p = await readBridgePointer(wt)
+      return p ? { pointer: p, dir: wt } : null
+    }),
+  )
+
+  // Pick freshest (lowest ageMs). The pointer stores environmentId so
+  // resume reconnects to the right env regardless of which worktree
+  // --continue was invoked from.
+  let freshest: {
+    pointer: BridgePointer & { ageMs: number }
+    dir: string
+  } | null = null
+  for (const r of results) {
+    if (r && (!freshest || r.pointer.ageMs < freshest.pointer.ageMs)) {
+      freshest = r
+    }
+  }
+  if (freshest) {
+    logForDebugging(
+      `[bridge:pointer] fanout found pointer in worktree ${freshest.dir} (ageMs=${freshest.pointer.ageMs})`,
+    )
+  }
+  return freshest
+}
+
+/**
+ * Delete the pointer. Idempotent — ENOENT is expected when the process
+ * shut down clean previously.
+ */
+export async function clearBridgePointer(dir: string): Promise<void> {
+  const path = getBridgePointerPath(dir)
+  try {
+    await unlink(path)
+    logForDebugging(`[bridge:pointer] cleared ${path}`)
+  } catch (err: unknown) {
+    if (!isENOENT(err)) {
+      logForDebugging(`[bridge:pointer] clear failed: ${err}`, {
+        level: 'warn',
+      })
+    }
+  }
+}
+
+function safeJsonParse(raw: string): unknown {
+  try {
+    return jsonParse(raw)
+  } catch {
+    return null
+  }
+}

+ 163 - 0
src/bridge/bridgeStatusUtil.ts

@@ -0,0 +1,163 @@
+import {
+  getClaudeAiBaseUrl,
+  getRemoteSessionUrl,
+} from '../constants/product.js'
+import { stringWidth } from '../ink/stringWidth.js'
+import { formatDuration, truncateToWidth } from '../utils/format.js'
+import { getGraphemeSegmenter } from '../utils/intl.js'
+
+/** Bridge status state machine states. */
+export type StatusState =
+  | 'idle'
+  | 'attached'
+  | 'titled'
+  | 'reconnecting'
+  | 'failed'
+
+/** How long a tool activity line stays visible after last tool_start (ms). */
+export const TOOL_DISPLAY_EXPIRY_MS = 30_000
+
+/** Interval for the shimmer animation tick (ms). */
+export const SHIMMER_INTERVAL_MS = 150
+
+export function timestamp(): string {
+  const now = new Date()
+  const h = String(now.getHours()).padStart(2, '0')
+  const m = String(now.getMinutes()).padStart(2, '0')
+  const s = String(now.getSeconds()).padStart(2, '0')
+  return `${h}:${m}:${s}`
+}
+
+export { formatDuration, truncateToWidth as truncatePrompt }
+
+/** Abbreviate a tool activity summary for the trail display. */
+export function abbreviateActivity(summary: string): string {
+  return truncateToWidth(summary, 30)
+}
+
+/** Build the connect URL shown when the bridge is idle. */
+export function buildBridgeConnectUrl(
+  environmentId: string,
+  ingressUrl?: string,
+): string {
+  const baseUrl = getClaudeAiBaseUrl(undefined, ingressUrl)
+  return `${baseUrl}/code?bridge=${environmentId}`
+}
+
+/**
+ * Build the session URL shown when a session is attached. Delegates to
+ * getRemoteSessionUrl for the cse_→session_ prefix translation, then appends
+ * the v1-specific ?bridge={environmentId} query.
+ */
+export function buildBridgeSessionUrl(
+  sessionId: string,
+  environmentId: string,
+  ingressUrl?: string,
+): string {
+  return `${getRemoteSessionUrl(sessionId, ingressUrl)}?bridge=${environmentId}`
+}
+
+/** Compute the glimmer index for a reverse-sweep shimmer animation. */
+export function computeGlimmerIndex(
+  tick: number,
+  messageWidth: number,
+): number {
+  const cycleLength = messageWidth + 20
+  return messageWidth + 10 - (tick % cycleLength)
+}
+
+/**
+ * Split text into three segments by visual column position for shimmer rendering.
+ *
+ * Uses grapheme segmentation and `stringWidth` so the split is correct for
+ * multi-byte characters, emoji, and CJK glyphs.
+ *
+ * Returns `{ before, shimmer, after }` strings. Both renderers (chalk in
+ * bridgeUI.ts and React/Ink in bridge.tsx) apply their own coloring to
+ * these segments.
+ */
+export function computeShimmerSegments(
+  text: string,
+  glimmerIndex: number,
+): { before: string; shimmer: string; after: string } {
+  const messageWidth = stringWidth(text)
+  const shimmerStart = glimmerIndex - 1
+  const shimmerEnd = glimmerIndex + 1
+
+  // When shimmer is offscreen, return all text as "before"
+  if (shimmerStart >= messageWidth || shimmerEnd < 0) {
+    return { before: text, shimmer: '', after: '' }
+  }
+
+  // Split into at most 3 segments by visual column position
+  const clampedStart = Math.max(0, shimmerStart)
+  let colPos = 0
+  let before = ''
+  let shimmer = ''
+  let after = ''
+  for (const { segment } of getGraphemeSegmenter().segment(text)) {
+    const segWidth = stringWidth(segment)
+    if (colPos + segWidth <= clampedStart) {
+      before += segment
+    } else if (colPos > shimmerEnd) {
+      after += segment
+    } else {
+      shimmer += segment
+    }
+    colPos += segWidth
+  }
+
+  return { before, shimmer, after }
+}
+
+/** Computed bridge status label and color from connection state. */
+export type BridgeStatusInfo = {
+  label:
+    | 'Remote Control failed'
+    | 'Remote Control reconnecting'
+    | 'Remote Control active'
+    | 'Remote Control connecting\u2026'
+  color: 'error' | 'warning' | 'success'
+}
+
+/** Derive a status label and color from the bridge connection state. */
+export function getBridgeStatus({
+  error,
+  connected,
+  sessionActive,
+  reconnecting,
+}: {
+  error: string | undefined
+  connected: boolean
+  sessionActive: boolean
+  reconnecting: boolean
+}): BridgeStatusInfo {
+  if (error) return { label: 'Remote Control failed', color: 'error' }
+  if (reconnecting)
+    return { label: 'Remote Control reconnecting', color: 'warning' }
+  if (sessionActive || connected)
+    return { label: 'Remote Control active', color: 'success' }
+  return { label: 'Remote Control connecting\u2026', color: 'warning' }
+}
+
+/** Footer text shown when bridge is idle (Ready state). */
+export function buildIdleFooterText(url: string): string {
+  return `Code everywhere with the Claude app or ${url}`
+}
+
+/** Footer text shown when a session is active (Connected state). */
+export function buildActiveFooterText(url: string): string {
+  return `Continue coding in the Claude app or ${url}`
+}
+
+/** Footer text shown when the bridge has failed. */
+export const FAILED_FOOTER_TEXT = 'Something went wrong, please try again'
+
+/**
+ * Wrap text in an OSC 8 terminal hyperlink. Zero visual width for layout purposes.
+ * strip-ansi (used by stringWidth) correctly strips these sequences, so
+ * countVisualLines in bridgeUI.ts remains accurate.
+ */
+export function wrapWithOsc8Link(text: string, url: string): string {
+  return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`
+}

+ 530 - 0
src/bridge/bridgeUI.ts

@@ -0,0 +1,530 @@
+import chalk from 'chalk'
+import { toString as qrToString } from 'qrcode'
+import {
+  BRIDGE_FAILED_INDICATOR,
+  BRIDGE_READY_INDICATOR,
+  BRIDGE_SPINNER_FRAMES,
+} from '../constants/figures.js'
+import { stringWidth } from '../ink/stringWidth.js'
+import { logForDebugging } from '../utils/debug.js'
+import {
+  buildActiveFooterText,
+  buildBridgeConnectUrl,
+  buildBridgeSessionUrl,
+  buildIdleFooterText,
+  FAILED_FOOTER_TEXT,
+  formatDuration,
+  type StatusState,
+  TOOL_DISPLAY_EXPIRY_MS,
+  timestamp,
+  truncatePrompt,
+  wrapWithOsc8Link,
+} from './bridgeStatusUtil.js'
+import type {
+  BridgeConfig,
+  BridgeLogger,
+  SessionActivity,
+  SpawnMode,
+} from './types.js'
+
+const QR_OPTIONS = {
+  type: 'utf8' as const,
+  errorCorrectionLevel: 'L' as const,
+  small: true,
+}
+
+/** Generate a QR code and return its lines. */
+async function generateQr(url: string): Promise<string[]> {
+  const qr = await qrToString(url, QR_OPTIONS)
+  return qr.split('\n').filter((line: string) => line.length > 0)
+}
+
+export function createBridgeLogger(options: {
+  verbose: boolean
+  write?: (s: string) => void
+}): BridgeLogger {
+  const write = options.write ?? ((s: string) => process.stdout.write(s))
+  const verbose = options.verbose
+
+  // Track how many status lines are currently displayed at the bottom
+  let statusLineCount = 0
+
+  // Status state machine
+  let currentState: StatusState = 'idle'
+  let currentStateText = 'Ready'
+  let repoName = ''
+  let branch = ''
+  let debugLogPath = ''
+
+  // Connect URL (built in printBanner with correct base for staging/prod)
+  let connectUrl = ''
+  let cachedIngressUrl = ''
+  let cachedEnvironmentId = ''
+  let activeSessionUrl: string | null = null
+
+  // QR code lines for the current URL
+  let qrLines: string[] = []
+  let qrVisible = false
+
+  // Tool activity for the second status line
+  let lastToolSummary: string | null = null
+  let lastToolTime = 0
+
+  // Session count indicator (shown when multi-session mode is enabled)
+  let sessionActive = 0
+  let sessionMax = 1
+  // Spawn mode shown in the session-count line + gates the `w` hint
+  let spawnModeDisplay: 'same-dir' | 'worktree' | null = null
+  let spawnMode: SpawnMode = 'single-session'
+
+  // Per-session display info for the multi-session bullet list (keyed by compat sessionId)
+  const sessionDisplayInfo = new Map<
+    string,
+    { title?: string; url: string; activity?: SessionActivity }
+  >()
+
+  // Connecting spinner state
+  let connectingTimer: ReturnType<typeof setInterval> | null = null
+  let connectingTick = 0
+
+  /**
+   * Count how many visual terminal rows a string occupies, accounting for
+   * line wrapping. Each `\n` is one row, and content wider than the terminal
+   * wraps to additional rows.
+   */
+  function countVisualLines(text: string): number {
+    // eslint-disable-next-line custom-rules/prefer-use-terminal-size
+    const cols = process.stdout.columns || 80 // non-React CLI context
+    let count = 0
+    // Split on newlines to get logical lines
+    for (const logical of text.split('\n')) {
+      if (logical.length === 0) {
+        // Empty segment between consecutive \n — counts as 1 row
+        count++
+        continue
+      }
+      const width = stringWidth(logical)
+      count += Math.max(1, Math.ceil(width / cols))
+    }
+    // The trailing \n in "line\n" produces an empty last element — don't count it
+    // because the cursor sits at the start of the next line, not a new visual row.
+    if (text.endsWith('\n')) {
+      count--
+    }
+    return count
+  }
+
+  /** Write a status line and track its visual line count. */
+  function writeStatus(text: string): void {
+    write(text)
+    statusLineCount += countVisualLines(text)
+  }
+
+  /** Clear any currently displayed status lines. */
+  function clearStatusLines(): void {
+    if (statusLineCount <= 0) return
+    logForDebugging(`[bridge:ui] clearStatusLines count=${statusLineCount}`)
+    // Move cursor up to the start of the status block, then erase everything below
+    write(`\x1b[${statusLineCount}A`) // cursor up N lines
+    write('\x1b[J') // erase from cursor to end of screen
+    statusLineCount = 0
+  }
+
+  /** Print a permanent log line, clearing status first and restoring after. */
+  function printLog(line: string): void {
+    clearStatusLines()
+    write(line)
+  }
+
+  /** Regenerate the QR code with the given URL. */
+  function regenerateQr(url: string): void {
+    generateQr(url)
+      .then(lines => {
+        qrLines = lines
+        renderStatusLine()
+      })
+      .catch(e => {
+        logForDebugging(`QR code generation failed: ${e}`, { level: 'error' })
+      })
+  }
+
+  /** Render the connecting spinner line (shown before first updateIdleStatus). */
+  function renderConnectingLine(): void {
+    clearStatusLines()
+
+    const frame =
+      BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
+    let suffix = ''
+    if (repoName) {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
+    }
+    if (branch) {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
+    }
+    writeStatus(
+      `${chalk.yellow(frame)} ${chalk.yellow('Connecting')}${suffix}\n`,
+    )
+  }
+
+  /** Start the connecting spinner. Stopped by first updateIdleStatus(). */
+  function startConnecting(): void {
+    stopConnecting()
+    renderConnectingLine()
+    connectingTimer = setInterval(() => {
+      connectingTick++
+      renderConnectingLine()
+    }, 150)
+  }
+
+  /** Stop the connecting spinner. */
+  function stopConnecting(): void {
+    if (connectingTimer) {
+      clearInterval(connectingTimer)
+      connectingTimer = null
+    }
+  }
+
+  /** Render and write the current status lines based on state. */
+  function renderStatusLine(): void {
+    if (currentState === 'reconnecting' || currentState === 'failed') {
+      // These states are handled separately (updateReconnectingStatus /
+      // updateFailedStatus). Return before clearing so callers like toggleQr
+      // and setSpawnModeDisplay don't blank the display during these states.
+      return
+    }
+
+    clearStatusLines()
+
+    const isIdle = currentState === 'idle'
+
+    // QR code above the status line
+    if (qrVisible) {
+      for (const line of qrLines) {
+        writeStatus(`${chalk.dim(line)}\n`)
+      }
+    }
+
+    // Determine indicator and colors based on state
+    const indicator = BRIDGE_READY_INDICATOR
+    const indicatorColor = isIdle ? chalk.green : chalk.cyan
+    const baseColor = isIdle ? chalk.green : chalk.cyan
+    const stateText = baseColor(currentStateText)
+
+    // Build the suffix with repo and branch
+    let suffix = ''
+    if (repoName) {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
+    }
+    // In worktree mode each session gets its own branch, so showing the
+    // bridge's branch would be misleading.
+    if (branch && spawnMode !== 'worktree') {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
+    }
+
+    if (process.env.USER_TYPE === 'ant' && debugLogPath) {
+      writeStatus(
+        `${chalk.yellow('[ANT-ONLY] Logs:')} ${chalk.dim(debugLogPath)}\n`,
+      )
+    }
+    writeStatus(`${indicatorColor(indicator)} ${stateText}${suffix}\n`)
+
+    // Session count and per-session list (multi-session mode only)
+    if (sessionMax > 1) {
+      const modeHint =
+        spawnMode === 'worktree'
+          ? 'New sessions will be created in an isolated worktree'
+          : 'New sessions will be created in the current directory'
+      writeStatus(
+        `    ${chalk.dim(`Capacity: ${sessionActive}/${sessionMax} \u00b7 ${modeHint}`)}\n`,
+      )
+      for (const [, info] of sessionDisplayInfo) {
+        const titleText = info.title
+          ? truncatePrompt(info.title, 35)
+          : chalk.dim('Attached')
+        const titleLinked = wrapWithOsc8Link(titleText, info.url)
+        const act = info.activity
+        const showAct = act && act.type !== 'result' && act.type !== 'error'
+        const actText = showAct
+          ? chalk.dim(` ${truncatePrompt(act.summary, 40)}`)
+          : ''
+        writeStatus(`    ${titleLinked}${actText}
+`)
+      }
+    }
+
+    // Mode line for spawn modes with a single slot (or true single-session mode)
+    if (sessionMax === 1) {
+      const modeText =
+        spawnMode === 'single-session'
+          ? 'Single session \u00b7 exits when complete'
+          : spawnMode === 'worktree'
+            ? `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in an isolated worktree`
+            : `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in the current directory`
+      writeStatus(`    ${chalk.dim(modeText)}\n`)
+    }
+
+    // Tool activity line for single-session mode
+    if (
+      sessionMax === 1 &&
+      !isIdle &&
+      lastToolSummary &&
+      Date.now() - lastToolTime < TOOL_DISPLAY_EXPIRY_MS
+    ) {
+      writeStatus(`  ${chalk.dim(truncatePrompt(lastToolSummary, 60))}\n`)
+    }
+
+    // Blank line separator before footer
+    const url = activeSessionUrl ?? connectUrl
+    if (url) {
+      writeStatus('\n')
+      const footerText = isIdle
+        ? buildIdleFooterText(url)
+        : buildActiveFooterText(url)
+      const qrHint = qrVisible
+        ? chalk.dim.italic('space to hide QR code')
+        : chalk.dim.italic('space to show QR code')
+      const toggleHint = spawnModeDisplay
+        ? chalk.dim.italic(' \u00b7 w to toggle spawn mode')
+        : ''
+      writeStatus(`${chalk.dim(footerText)}\n`)
+      writeStatus(`${qrHint}${toggleHint}\n`)
+    }
+  }
+
+  return {
+    printBanner(config: BridgeConfig, environmentId: string): void {
+      cachedIngressUrl = config.sessionIngressUrl
+      cachedEnvironmentId = environmentId
+      connectUrl = buildBridgeConnectUrl(environmentId, cachedIngressUrl)
+      regenerateQr(connectUrl)
+
+      if (verbose) {
+        write(chalk.dim(`Remote Control`) + ` v${MACRO.VERSION}\n`)
+      }
+      if (verbose) {
+        if (config.spawnMode !== 'single-session') {
+          write(chalk.dim(`Spawn mode: `) + `${config.spawnMode}\n`)
+          write(
+            chalk.dim(`Max concurrent sessions: `) + `${config.maxSessions}\n`,
+          )
+        }
+        write(chalk.dim(`Environment ID: `) + `${environmentId}\n`)
+      }
+      if (config.sandbox) {
+        write(chalk.dim(`Sandbox: `) + `${chalk.green('Enabled')}\n`)
+      }
+      write('\n')
+
+      // Start connecting spinner — first updateIdleStatus() will stop it
+      startConnecting()
+    },
+
+    logSessionStart(sessionId: string, prompt: string): void {
+      if (verbose) {
+        const short = truncatePrompt(prompt, 80)
+        printLog(
+          chalk.dim(`[${timestamp()}]`) +
+            ` Session started: ${chalk.white(`"${short}"`)} (${chalk.dim(sessionId)})\n`,
+        )
+      }
+    },
+
+    logSessionComplete(sessionId: string, durationMs: number): void {
+      printLog(
+        chalk.dim(`[${timestamp()}]`) +
+          ` Session ${chalk.green('completed')} (${formatDuration(durationMs)}) ${chalk.dim(sessionId)}\n`,
+      )
+    },
+
+    logSessionFailed(sessionId: string, error: string): void {
+      printLog(
+        chalk.dim(`[${timestamp()}]`) +
+          ` Session ${chalk.red('failed')}: ${error} ${chalk.dim(sessionId)}\n`,
+      )
+    },
+
+    logStatus(message: string): void {
+      printLog(chalk.dim(`[${timestamp()}]`) + ` ${message}\n`)
+    },
+
+    logVerbose(message: string): void {
+      if (verbose) {
+        printLog(chalk.dim(`[${timestamp()}] ${message}`) + '\n')
+      }
+    },
+
+    logError(message: string): void {
+      printLog(chalk.red(`[${timestamp()}] Error: ${message}`) + '\n')
+    },
+
+    logReconnected(disconnectedMs: number): void {
+      printLog(
+        chalk.dim(`[${timestamp()}]`) +
+          ` ${chalk.green('Reconnected')} after ${formatDuration(disconnectedMs)}\n`,
+      )
+    },
+
+    setRepoInfo(repo: string, branchName: string): void {
+      repoName = repo
+      branch = branchName
+    },
+
+    setDebugLogPath(path: string): void {
+      debugLogPath = path
+    },
+
+    updateIdleStatus(): void {
+      stopConnecting()
+
+      currentState = 'idle'
+      currentStateText = 'Ready'
+      lastToolSummary = null
+      lastToolTime = 0
+      activeSessionUrl = null
+      regenerateQr(connectUrl)
+      renderStatusLine()
+    },
+
+    setAttached(sessionId: string): void {
+      stopConnecting()
+      currentState = 'attached'
+      currentStateText = 'Connected'
+      lastToolSummary = null
+      lastToolTime = 0
+      // Multi-session: keep footer/QR on the environment connect URL so users
+      // can spawn more sessions. Per-session links are in the bullet list.
+      if (sessionMax <= 1) {
+        activeSessionUrl = buildBridgeSessionUrl(
+          sessionId,
+          cachedEnvironmentId,
+          cachedIngressUrl,
+        )
+        regenerateQr(activeSessionUrl)
+      }
+      renderStatusLine()
+    },
+
+    updateReconnectingStatus(delayStr: string, elapsedStr: string): void {
+      stopConnecting()
+      clearStatusLines()
+      currentState = 'reconnecting'
+
+      // QR code above the status line
+      if (qrVisible) {
+        for (const line of qrLines) {
+          writeStatus(`${chalk.dim(line)}\n`)
+        }
+      }
+
+      const frame =
+        BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
+      connectingTick++
+      writeStatus(
+        `${chalk.yellow(frame)} ${chalk.yellow('Reconnecting')} ${chalk.dim('\u00b7')} ${chalk.dim(`retrying in ${delayStr}`)} ${chalk.dim('\u00b7')} ${chalk.dim(`disconnected ${elapsedStr}`)}\n`,
+      )
+    },
+
+    updateFailedStatus(error: string): void {
+      stopConnecting()
+      clearStatusLines()
+      currentState = 'failed'
+
+      let suffix = ''
+      if (repoName) {
+        suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
+      }
+      if (branch) {
+        suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
+      }
+
+      writeStatus(
+        `${chalk.red(BRIDGE_FAILED_INDICATOR)} ${chalk.red('Remote Control Failed')}${suffix}\n`,
+      )
+      writeStatus(`${chalk.dim(FAILED_FOOTER_TEXT)}\n`)
+
+      if (error) {
+        writeStatus(`${chalk.red(error)}\n`)
+      }
+    },
+
+    updateSessionStatus(
+      _sessionId: string,
+      _elapsed: string,
+      activity: SessionActivity,
+      _trail: string[],
+    ): void {
+      // Cache tool activity for the second status line
+      if (activity.type === 'tool_start') {
+        lastToolSummary = activity.summary
+        lastToolTime = Date.now()
+      }
+      renderStatusLine()
+    },
+
+    clearStatus(): void {
+      stopConnecting()
+      clearStatusLines()
+    },
+
+    toggleQr(): void {
+      qrVisible = !qrVisible
+      renderStatusLine()
+    },
+
+    updateSessionCount(active: number, max: number, mode: SpawnMode): void {
+      if (sessionActive === active && sessionMax === max && spawnMode === mode)
+        return
+      sessionActive = active
+      sessionMax = max
+      spawnMode = mode
+      // Don't re-render here — the status ticker calls renderStatusLine
+      // on its own cadence, and the next tick will pick up the new values.
+    },
+
+    setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void {
+      if (spawnModeDisplay === mode) return
+      spawnModeDisplay = mode
+      // Also sync the #21118-added spawnMode so the next render shows correct
+      // mode hint + branch visibility. Don't render here — matches
+      // updateSessionCount: called before printBanner (initial setup) and
+      // again from the `w` handler (which follows with refreshDisplay).
+      if (mode) spawnMode = mode
+    },
+
+    addSession(sessionId: string, url: string): void {
+      sessionDisplayInfo.set(sessionId, { url })
+    },
+
+    updateSessionActivity(sessionId: string, activity: SessionActivity): void {
+      const info = sessionDisplayInfo.get(sessionId)
+      if (!info) return
+      info.activity = activity
+    },
+
+    setSessionTitle(sessionId: string, title: string): void {
+      const info = sessionDisplayInfo.get(sessionId)
+      if (!info) return
+      info.title = title
+      // Guard against reconnecting/failed — renderStatusLine clears then returns
+      // early for those states, which would erase the spinner/error.
+      if (currentState === 'reconnecting' || currentState === 'failed') return
+      if (sessionMax === 1) {
+        // Single-session: show title in the main status line too.
+        currentState = 'titled'
+        currentStateText = truncatePrompt(title, 40)
+      }
+      renderStatusLine()
+    },
+
+    removeSession(sessionId: string): void {
+      sessionDisplayInfo.delete(sessionId)
+    },
+
+    refreshDisplay(): void {
+      // Skip during reconnecting/failed — renderStatusLine clears then returns
+      // early for those states, which would erase the spinner/error.
+      if (currentState === 'reconnecting' || currentState === 'failed') return
+      renderStatusLine()
+    },
+  }
+}

+ 56 - 0
src/bridge/capacityWake.ts

@@ -0,0 +1,56 @@
+/**
+ * Shared capacity-wake primitive for bridge poll loops.
+ *
+ * Both replBridge.ts and bridgeMain.ts need to sleep while "at capacity"
+ * but wake early when either (a) the outer loop signal aborts (shutdown),
+ * or (b) capacity frees up (session done / transport lost). This module
+ * encapsulates the mutable wake-controller + two-signal merger that both
+ * poll loops previously duplicated byte-for-byte.
+ */
+
+export type CapacitySignal = { signal: AbortSignal; cleanup: () => void }
+
+export type CapacityWake = {
+  /**
+   * Create a signal that aborts when either the outer loop signal or the
+   * capacity-wake controller fires. Returns the merged signal and a cleanup
+   * function that removes listeners when the sleep resolves normally
+   * (without abort).
+   */
+  signal(): CapacitySignal
+  /**
+   * Abort the current at-capacity sleep and arm a fresh controller so the
+   * poll loop immediately re-checks for new work.
+   */
+  wake(): void
+}
+
+export function createCapacityWake(outerSignal: AbortSignal): CapacityWake {
+  let wakeController = new AbortController()
+
+  function wake(): void {
+    wakeController.abort()
+    wakeController = new AbortController()
+  }
+
+  function signal(): CapacitySignal {
+    const merged = new AbortController()
+    const abort = (): void => merged.abort()
+    if (outerSignal.aborted || wakeController.signal.aborted) {
+      merged.abort()
+      return { signal: merged.signal, cleanup: () => {} }
+    }
+    outerSignal.addEventListener('abort', abort, { once: true })
+    const capSig = wakeController.signal
+    capSig.addEventListener('abort', abort, { once: true })
+    return {
+      signal: merged.signal,
+      cleanup: () => {
+        outerSignal.removeEventListener('abort', abort)
+        capSig.removeEventListener('abort', abort)
+      },
+    }
+  }
+
+  return { signal, wake }
+}

+ 168 - 0
src/bridge/codeSessionApi.ts

@@ -0,0 +1,168 @@
+/**
+ * Thin HTTP wrappers for the CCR v2 code-session API.
+ *
+ * Separate file from remoteBridgeCore.ts so the SDK /bridge subpath can
+ * export createCodeSession + fetchRemoteCredentials without bundling the
+ * heavy CLI tree (analytics, transport, etc.). Callers supply explicit
+ * accessToken + baseUrl — no implicit auth or config reads.
+ */
+
+import axios from 'axios'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+import { extractErrorDetail } from './debugUtils.js'
+
+const ANTHROPIC_VERSION = '2023-06-01'
+
+function oauthHeaders(accessToken: string): Record<string, string> {
+  return {
+    Authorization: `Bearer ${accessToken}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': ANTHROPIC_VERSION,
+  }
+}
+
+export async function createCodeSession(
+  baseUrl: string,
+  accessToken: string,
+  title: string,
+  timeoutMs: number,
+  tags?: string[],
+): Promise<string | null> {
+  const url = `${baseUrl}/v1/code/sessions`
+  let response
+  try {
+    response = await axios.post(
+      url,
+      // bridge: {} is the positive signal for the oneof runner — omitting it
+      // (or sending environment_id: "") now 400s. BridgeRunner is an empty
+      // message today; it's a placeholder for future bridge-specific options.
+      { title, bridge: {}, ...(tags?.length ? { tags } : {}) },
+      {
+        headers: oauthHeaders(accessToken),
+        timeout: timeoutMs,
+        validateStatus: s => s < 500,
+      },
+    )
+  } catch (err: unknown) {
+    logForDebugging(
+      `[code-session] Session create request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+
+  if (response.status !== 200 && response.status !== 201) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[code-session] Session create failed ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  const data: unknown = response.data
+  if (
+    !data ||
+    typeof data !== 'object' ||
+    !('session' in data) ||
+    !data.session ||
+    typeof data.session !== 'object' ||
+    !('id' in data.session) ||
+    typeof data.session.id !== 'string' ||
+    !data.session.id.startsWith('cse_')
+  ) {
+    logForDebugging(
+      `[code-session] No session.id (cse_*) in response: ${jsonStringify(data).slice(0, 200)}`,
+    )
+    return null
+  }
+  return data.session.id
+}
+
+/**
+ * Credentials from POST /bridge. JWT is opaque — do not decode.
+ * Each /bridge call bumps worker_epoch server-side (it IS the register).
+ */
+export type RemoteCredentials = {
+  worker_jwt: string
+  api_base_url: string
+  expires_in: number
+  worker_epoch: number
+}
+
+export async function fetchRemoteCredentials(
+  sessionId: string,
+  baseUrl: string,
+  accessToken: string,
+  timeoutMs: number,
+  trustedDeviceToken?: string,
+): Promise<RemoteCredentials | null> {
+  const url = `${baseUrl}/v1/code/sessions/${sessionId}/bridge`
+  const headers = oauthHeaders(accessToken)
+  if (trustedDeviceToken) {
+    headers['X-Trusted-Device-Token'] = trustedDeviceToken
+  }
+  let response
+  try {
+    response = await axios.post(
+      url,
+      {},
+      {
+        headers,
+        timeout: timeoutMs,
+        validateStatus: s => s < 500,
+      },
+    )
+  } catch (err: unknown) {
+    logForDebugging(
+      `[code-session] /bridge request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+
+  if (response.status !== 200) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[code-session] /bridge failed ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  const data: unknown = response.data
+  if (
+    data === null ||
+    typeof data !== 'object' ||
+    !('worker_jwt' in data) ||
+    typeof data.worker_jwt !== 'string' ||
+    !('expires_in' in data) ||
+    typeof data.expires_in !== 'number' ||
+    !('api_base_url' in data) ||
+    typeof data.api_base_url !== 'string' ||
+    !('worker_epoch' in data)
+  ) {
+    logForDebugging(
+      `[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${jsonStringify(data).slice(0, 200)}`,
+    )
+    return null
+  }
+  // protojson serializes int64 as a string to avoid JS precision loss;
+  // Go may also return a number depending on encoder settings.
+  const rawEpoch = data.worker_epoch
+  const epoch = typeof rawEpoch === 'string' ? Number(rawEpoch) : rawEpoch
+  if (
+    typeof epoch !== 'number' ||
+    !Number.isFinite(epoch) ||
+    !Number.isSafeInteger(epoch)
+  ) {
+    logForDebugging(
+      `[code-session] /bridge worker_epoch invalid: ${jsonStringify(rawEpoch)}`,
+    )
+    return null
+  }
+  return {
+    worker_jwt: data.worker_jwt,
+    api_base_url: data.api_base_url,
+    expires_in: data.expires_in,
+    worker_epoch: epoch,
+  }
+}

+ 384 - 0
src/bridge/createSession.ts

@@ -0,0 +1,384 @@
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { extractErrorDetail } from './debugUtils.js'
+import { toCompatSessionId } from './sessionIdCompat.js'
+
+type GitSource = {
+  type: 'git_repository'
+  url: string
+  revision?: string
+}
+
+type GitOutcome = {
+  type: 'git_repository'
+  git_info: { type: 'github'; repo: string; branches: string[] }
+}
+
+// Events must be wrapped in { type: 'event', data: <sdk_message> } for the
+// POST /v1/sessions endpoint (discriminated union format).
+type SessionEvent = {
+  type: 'event'
+  data: SDKMessage
+}
+
+/**
+ * Create a session on a bridge environment via POST /v1/sessions.
+ *
+ * Used by both `claude remote-control` (empty session so the user has somewhere to
+ * type immediately) and `/remote-control` (session pre-populated with conversation
+ * history).
+ *
+ * Returns the session ID on success, or null if creation fails (non-fatal).
+ */
+export async function createBridgeSession({
+  environmentId,
+  title,
+  events,
+  gitRepoUrl,
+  branch,
+  signal,
+  baseUrl: baseUrlOverride,
+  getAccessToken,
+  permissionMode,
+}: {
+  environmentId: string
+  title?: string
+  events: SessionEvent[]
+  gitRepoUrl: string | null
+  branch: string
+  signal: AbortSignal
+  baseUrl?: string
+  getAccessToken?: () => string | undefined
+  permissionMode?: string
+}): Promise<string | null> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { parseGitHubRepository } = await import('../utils/detectRepository.js')
+  const { getDefaultBranch } = await import('../utils/git.js')
+  const { getMainLoopModel } = await import('../utils/model/model.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session creation')
+    return null
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session creation')
+    return null
+  }
+
+  // Build git source and outcome context
+  let gitSource: GitSource | null = null
+  let gitOutcome: GitOutcome | null = null
+
+  if (gitRepoUrl) {
+    const { parseGitRemote } = await import('../utils/detectRepository.js')
+    const parsed = parseGitRemote(gitRepoUrl)
+    if (parsed) {
+      const { host, owner, name } = parsed
+      const revision = branch || (await getDefaultBranch()) || undefined
+      gitSource = {
+        type: 'git_repository',
+        url: `https://${host}/${owner}/${name}`,
+        revision,
+      }
+      gitOutcome = {
+        type: 'git_repository',
+        git_info: {
+          type: 'github',
+          repo: `${owner}/${name}`,
+          branches: [`claude/${branch || 'task'}`],
+        },
+      }
+    } else {
+      // Fallback: try parseGitHubRepository for owner/repo format
+      const ownerRepo = parseGitHubRepository(gitRepoUrl)
+      if (ownerRepo) {
+        const [owner, name] = ownerRepo.split('/')
+        if (owner && name) {
+          const revision = branch || (await getDefaultBranch()) || undefined
+          gitSource = {
+            type: 'git_repository',
+            url: `https://github.com/${owner}/${name}`,
+            revision,
+          }
+          gitOutcome = {
+            type: 'git_repository',
+            git_info: {
+              type: 'github',
+              repo: `${owner}/${name}`,
+              branches: [`claude/${branch || 'task'}`],
+            },
+          }
+        }
+      }
+    }
+  }
+
+  const requestBody = {
+    ...(title !== undefined && { title }),
+    events,
+    session_context: {
+      sources: gitSource ? [gitSource] : [],
+      outcomes: gitOutcome ? [gitOutcome] : [],
+      model: getMainLoopModel(),
+    },
+    environment_id: environmentId,
+    source: 'remote-control',
+    ...(permissionMode && { permission_mode: permissionMode }),
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${baseUrlOverride ?? getOauthConfig().BASE_API_URL}/v1/sessions`
+  let response
+  try {
+    response = await axios.post(url, requestBody, {
+      headers,
+      signal,
+      validateStatus: s => s < 500,
+    })
+  } catch (err: unknown) {
+    logForDebugging(
+      `[bridge] Session creation request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+  const isSuccess = response.status === 200 || response.status === 201
+
+  if (!isSuccess) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[bridge] Session creation failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  const sessionData: unknown = response.data
+  if (
+    !sessionData ||
+    typeof sessionData !== 'object' ||
+    !('id' in sessionData) ||
+    typeof sessionData.id !== 'string'
+  ) {
+    logForDebugging('[bridge] No session ID in response')
+    return null
+  }
+
+  return sessionData.id
+}
+
+/**
+ * Fetch a bridge session via GET /v1/sessions/{id}.
+ *
+ * Returns the session's environment_id (for `--session-id` resume) and title.
+ * Uses the same org-scoped headers as create/archive — the environments-level
+ * client in bridgeApi.ts uses a different beta header and no org UUID, which
+ * makes the Sessions API return 404.
+ */
+export async function getBridgeSession(
+  sessionId: string,
+  opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
+): Promise<{ environment_id?: string; title?: string } | null> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session fetch')
+    return null
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session fetch')
+    return null
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}`
+  logForDebugging(`[bridge] Fetching session ${sessionId}`)
+
+  let response
+  try {
+    response = await axios.get<{ environment_id?: string; title?: string }>(
+      url,
+      { headers, timeout: 10_000, validateStatus: s => s < 500 },
+    )
+  } catch (err: unknown) {
+    logForDebugging(
+      `[bridge] Session fetch request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+
+  if (response.status !== 200) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[bridge] Session fetch failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  return response.data
+}
+
+/**
+ * Archive a bridge session via POST /v1/sessions/{id}/archive.
+ *
+ * The CCR server never auto-archives sessions — archival is always an
+ * explicit client action. Both `claude remote-control` (standalone bridge) and the
+ * always-on `/remote-control` REPL bridge call this during shutdown to archive any
+ * sessions that are still alive.
+ *
+ * The archive endpoint accepts sessions in any status (running, idle,
+ * requires_action, pending) and returns 409 if already archived, making
+ * it safe to call even if the server-side runner already archived the
+ * session.
+ *
+ * Callers must handle errors — this function has no try/catch; 5xx,
+ * timeouts, and network errors throw. Archival is best-effort during
+ * cleanup; call sites wrap with .catch().
+ */
+export async function archiveBridgeSession(
+  sessionId: string,
+  opts?: {
+    baseUrl?: string
+    getAccessToken?: () => string | undefined
+    timeoutMs?: number
+  },
+): Promise<void> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session archive')
+    return
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session archive')
+    return
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/archive`
+  logForDebugging(`[bridge] Archiving session ${sessionId}`)
+
+  const response = await axios.post(
+    url,
+    {},
+    {
+      headers,
+      timeout: opts?.timeoutMs ?? 10_000,
+      validateStatus: s => s < 500,
+    },
+  )
+
+  if (response.status === 200) {
+    logForDebugging(`[bridge] Session ${sessionId} archived successfully`)
+  } else {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[bridge] Session archive failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+  }
+}
+
+/**
+ * Update the title of a bridge session via PATCH /v1/sessions/{id}.
+ *
+ * Called when the user renames a session via /rename while a bridge
+ * connection is active, so the title stays in sync on claude.ai/code.
+ *
+ * Errors are swallowed — title sync is best-effort.
+ */
+export async function updateBridgeSessionTitle(
+  sessionId: string,
+  title: string,
+  opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
+): Promise<void> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session title update')
+    return
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session title update')
+    return
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  // Compat gateway only accepts session_* (compat/convert.go:27). v2 callers
+  // pass raw cse_*; retag here so all callers can pass whatever they hold.
+  // Idempotent for v1's session_* and bridgeMain's pre-converted compatSessionId.
+  const compatId = toCompatSessionId(sessionId)
+  const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${compatId}`
+  logForDebugging(`[bridge] Updating session title: ${compatId} → ${title}`)
+
+  try {
+    const response = await axios.patch(
+      url,
+      { title },
+      { headers, timeout: 10_000, validateStatus: s => s < 500 },
+    )
+
+    if (response.status === 200) {
+      logForDebugging(`[bridge] Session title updated successfully`)
+    } else {
+      const detail = extractErrorDetail(response.data)
+      logForDebugging(
+        `[bridge] Session title update failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+      )
+    }
+  } catch (err: unknown) {
+    logForDebugging(
+      `[bridge] Session title update request failed: ${errorMessage(err)}`,
+    )
+  }
+}

+ 141 - 0
src/bridge/debugUtils.ts

@@ -0,0 +1,141 @@
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+
+const DEBUG_MSG_LIMIT = 2000
+
+const SECRET_FIELD_NAMES = [
+  'session_ingress_token',
+  'environment_secret',
+  'access_token',
+  'secret',
+  'token',
+]
+
+const SECRET_PATTERN = new RegExp(
+  `"(${SECRET_FIELD_NAMES.join('|')})"\\s*:\\s*"([^"]*)"`,
+  'g',
+)
+
+const REDACT_MIN_LENGTH = 16
+
+export function redactSecrets(s: string): string {
+  return s.replace(SECRET_PATTERN, (_match, field: string, value: string) => {
+    if (value.length < REDACT_MIN_LENGTH) {
+      return `"${field}":"[REDACTED]"`
+    }
+    const redacted = `${value.slice(0, 8)}...${value.slice(-4)}`
+    return `"${field}":"${redacted}"`
+  })
+}
+
+/** Truncate a string for debug logging, collapsing newlines. */
+export function debugTruncate(s: string): string {
+  const flat = s.replace(/\n/g, '\\n')
+  if (flat.length <= DEBUG_MSG_LIMIT) {
+    return flat
+  }
+  return flat.slice(0, DEBUG_MSG_LIMIT) + `... (${flat.length} chars)`
+}
+
+/** Truncate a JSON-serializable value for debug logging. */
+export function debugBody(data: unknown): string {
+  const raw = typeof data === 'string' ? data : jsonStringify(data)
+  const s = redactSecrets(raw)
+  if (s.length <= DEBUG_MSG_LIMIT) {
+    return s
+  }
+  return s.slice(0, DEBUG_MSG_LIMIT) + `... (${s.length} chars)`
+}
+
+/**
+ * Extract a descriptive error message from an axios error (or any error).
+ * For HTTP errors, appends the server's response body message if available,
+ * since axios's default message only includes the status code.
+ */
+export function describeAxiosError(err: unknown): string {
+  const msg = errorMessage(err)
+  if (err && typeof err === 'object' && 'response' in err) {
+    const response = (err as { response?: { data?: unknown } }).response
+    if (response?.data && typeof response.data === 'object') {
+      const data = response.data as Record<string, unknown>
+      const detail =
+        typeof data.message === 'string'
+          ? data.message
+          : typeof data.error === 'object' &&
+              data.error &&
+              'message' in data.error &&
+              typeof (data.error as Record<string, unknown>).message ===
+                'string'
+            ? (data.error as Record<string, unknown>).message
+            : undefined
+      if (detail) {
+        return `${msg}: ${detail}`
+      }
+    }
+  }
+  return msg
+}
+
+/**
+ * Extract the HTTP status code from an axios error, if present.
+ * Returns undefined for non-HTTP errors (e.g. network failures).
+ */
+export function extractHttpStatus(err: unknown): number | undefined {
+  if (
+    err &&
+    typeof err === 'object' &&
+    'response' in err &&
+    (err as { response?: { status?: unknown } }).response &&
+    typeof (err as { response: { status?: unknown } }).response.status ===
+      'number'
+  ) {
+    return (err as { response: { status: number } }).response.status
+  }
+  return undefined
+}
+
+/**
+ * Pull a human-readable message out of an API error response body.
+ * Checks `data.message` first, then `data.error.message`.
+ */
+export function extractErrorDetail(data: unknown): string | undefined {
+  if (!data || typeof data !== 'object') return undefined
+  if ('message' in data && typeof data.message === 'string') {
+    return data.message
+  }
+  if (
+    'error' in data &&
+    data.error !== null &&
+    typeof data.error === 'object' &&
+    'message' in data.error &&
+    typeof data.error.message === 'string'
+  ) {
+    return data.error.message
+  }
+  return undefined
+}
+
+/**
+ * Log a bridge init skip — debug message + `tengu_bridge_repl_skipped`
+ * analytics event. Centralizes the event name and the AnalyticsMetadata
+ * cast so call sites don't each repeat the 5-line boilerplate.
+ */
+export function logBridgeSkip(
+  reason: string,
+  debugMsg?: string,
+  v2?: boolean,
+): void {
+  if (debugMsg) {
+    logForDebugging(debugMsg)
+  }
+  logEvent('tengu_bridge_repl_skipped', {
+    reason:
+      reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    ...(v2 !== undefined && { v2 }),
+  })
+}

+ 165 - 0
src/bridge/envLessBridgeConfig.ts

@@ -0,0 +1,165 @@
+import { z } from 'zod/v4'
+import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import { lt } from '../utils/semver.js'
+import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
+
+export type EnvLessBridgeConfig = {
+  // withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
+  init_retry_max_attempts: number
+  init_retry_base_delay_ms: number
+  init_retry_jitter_fraction: number
+  init_retry_max_delay_ms: number
+  // axios timeout for POST /sessions, POST /bridge, POST /archive
+  http_timeout_ms: number
+  // BoundedUUIDSet ring size (echo + re-delivery dedup)
+  uuid_dedup_buffer_size: number
+  // CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
+  heartbeat_interval_ms: number
+  // ±fraction of interval — per-beat jitter to spread fleet load.
+  heartbeat_jitter_fraction: number
+  // Fire proactive JWT refresh this long before expires_in. Larger buffer =
+  // more frequent refresh (refresh cadence ≈ expires_in - buffer).
+  token_refresh_buffer_ms: number
+  // Archive POST timeout in teardown(). Distinct from http_timeout_ms because
+  // gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
+  // axios timeout on a slow/stalled archive burns the whole budget on a
+  // request that forceExit will kill anyway.
+  teardown_archive_timeout_ms: number
+  // Deadline for onConnect after transport.connect(). If neither onConnect
+  // nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
+  // — the only telemetry for the ~1% of sessions that emit `started` then
+  // go silent (no error, no event, just nothing).
+  connect_timeout_ms: number
+  // Semver floor for the env-less bridge path. Separate from the v1
+  // tengu_bridge_min_version config so a v2-specific bug can force upgrades
+  // without blocking v1 (env-based) clients, and vice versa.
+  min_version: string
+  // When true, tell users their claude.ai app may be too old to see v2
+  // sessions — lets us roll the v2 bridge before the app ships the new
+  // session-list query.
+  should_show_app_upgrade_message: boolean
+}
+
+export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
+  init_retry_max_attempts: 3,
+  init_retry_base_delay_ms: 500,
+  init_retry_jitter_fraction: 0.25,
+  init_retry_max_delay_ms: 4000,
+  http_timeout_ms: 10_000,
+  uuid_dedup_buffer_size: 2000,
+  heartbeat_interval_ms: 20_000,
+  heartbeat_jitter_fraction: 0.1,
+  token_refresh_buffer_ms: 300_000,
+  teardown_archive_timeout_ms: 1500,
+  connect_timeout_ms: 15_000,
+  min_version: '0.0.0',
+  should_show_app_upgrade_message: false,
+}
+
+// Floors reject the whole object on violation (fall back to DEFAULT) rather
+// than partially trusting — same defense-in-depth as pollConfig.ts.
+const envLessBridgeConfigSchema = lazySchema(() =>
+  z.object({
+    init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
+    init_retry_base_delay_ms: z.number().int().min(100).default(500),
+    init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
+    init_retry_max_delay_ms: z.number().int().min(500).default(4000),
+    http_timeout_ms: z.number().int().min(2000).default(10_000),
+    uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
+    // Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
+    heartbeat_interval_ms: z
+      .number()
+      .int()
+      .min(5000)
+      .max(30_000)
+      .default(20_000),
+    // ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
+    // still under the 60s TTL.
+    heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
+    // Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
+    // semantic inversion: ops entering expires_in-5min (the *delay until
+    // refresh*) instead of 5min (the *buffer before expiry*) yields
+    // delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
+    // durations so .min() alone can't distinguish; .max() catches the
+    // inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
+    token_refresh_buffer_ms: z
+      .number()
+      .int()
+      .min(30_000)
+      .max(1_800_000)
+      .default(300_000),
+    // Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
+    // timeout just lies to axios since forceExit kills the socket regardless.
+    teardown_archive_timeout_ms: z
+      .number()
+      .int()
+      .min(500)
+      .max(2000)
+      .default(1500),
+    // Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
+    // false-positive rate under transient slowness; cap 60s bounds how long
+    // a truly-stalled session stays dark.
+    connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
+    min_version: z
+      .string()
+      .refine(v => {
+        try {
+          lt(v, '0.0.0')
+          return true
+        } catch {
+          return false
+        }
+      })
+      .default('0.0.0'),
+    should_show_app_upgrade_message: z.boolean().default(false),
+  }),
+)
+
+/**
+ * Fetch the env-less bridge timing config from GrowthBook. Read once per
+ * initEnvLessBridgeCore call — config is fixed for the lifetime of a bridge
+ * session.
+ *
+ * Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
+ * runs well after GrowthBook init — initializeGrowthBook() resolves instantly,
+ * so there's no startup penalty, and we get the fresh in-memory remoteEval
+ * value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
+ * warns against startup-path usage, which this isn't.
+ */
+export async function getEnvLessBridgeConfig(): Promise<EnvLessBridgeConfig> {
+  const raw = await getFeatureValue_DEPRECATED<unknown>(
+    'tengu_bridge_repl_v2_config',
+    DEFAULT_ENV_LESS_BRIDGE_CONFIG,
+  )
+  const parsed = envLessBridgeConfigSchema().safeParse(raw)
+  return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
+}
+
+/**
+ * Returns an error message if the current CLI version is below the minimum
+ * required for the env-less (v2) bridge path, or null if the version is fine.
+ *
+ * v2 analogue of checkBridgeMinVersion() — reads from tengu_bridge_repl_v2_config
+ * instead of tengu_bridge_min_version so the two implementations can enforce
+ * independent floors.
+ */
+export async function checkEnvLessBridgeMinVersion(): Promise<string | null> {
+  const cfg = await getEnvLessBridgeConfig()
+  if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
+    return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
+  }
+  return null
+}
+
+/**
+ * Whether to nudge users toward upgrading their claude.ai app when a
+ * Remote Control session starts. True only when the v2 bridge is active
+ * AND the should_show_app_upgrade_message config bit is set — lets us
+ * roll the v2 bridge before the app ships the new session-list query.
+ */
+export async function shouldShowAppUpgradeMessage(): Promise<boolean> {
+  if (!isEnvLessBridgeEnabled()) return false
+  const cfg = await getEnvLessBridgeConfig()
+  return cfg.should_show_app_upgrade_message
+}

+ 71 - 0
src/bridge/flushGate.ts

@@ -0,0 +1,71 @@
+/**
+ * State machine for gating message writes during an initial flush.
+ *
+ * When a bridge session starts, historical messages are flushed to the
+ * server via a single HTTP POST. During that flush, new messages must
+ * be queued to prevent them from arriving at the server interleaved
+ * with the historical messages.
+ *
+ * Lifecycle:
+ *   start() → enqueue() returns true, items are queued
+ *   end()   → returns queued items for draining, enqueue() returns false
+ *   drop()  → discards queued items (permanent transport close)
+ *   deactivate() → clears active flag without dropping items
+ *                   (transport replacement — new transport will drain)
+ */
+export class FlushGate<T> {
+  private _active = false
+  private _pending: T[] = []
+
+  get active(): boolean {
+    return this._active
+  }
+
+  get pendingCount(): number {
+    return this._pending.length
+  }
+
+  /** Mark flush as in-progress. enqueue() will start queuing items. */
+  start(): void {
+    this._active = true
+  }
+
+  /**
+   * End the flush and return any queued items for draining.
+   * Caller is responsible for sending the returned items.
+   */
+  end(): T[] {
+    this._active = false
+    return this._pending.splice(0)
+  }
+
+  /**
+   * If flush is active, queue the items and return true.
+   * If flush is not active, return false (caller should send directly).
+   */
+  enqueue(...items: T[]): boolean {
+    if (!this._active) return false
+    this._pending.push(...items)
+    return true
+  }
+
+  /**
+   * Discard all queued items (permanent transport close).
+   * Returns the number of items dropped.
+   */
+  drop(): number {
+    this._active = false
+    const count = this._pending.length
+    this._pending.length = 0
+    return count
+  }
+
+  /**
+   * Clear the active flag without dropping queued items.
+   * Used when the transport is replaced (onWorkReceived) — the new
+   * transport's flush will drain the pending items.
+   */
+  deactivate(): void {
+    this._active = false
+  }
+}

+ 175 - 0
src/bridge/inboundAttachments.ts

@@ -0,0 +1,175 @@
+/**
+ * Resolve file_uuid attachments on inbound bridge user messages.
+ *
+ * Web composer uploads via cookie-authed /api/{org}/upload, sends file_uuid
+ * alongside the message. Here we fetch each via GET /api/oauth/files/{uuid}/content
+ * (oauth-authed, same store), write to ~/.claude/uploads/{sessionId}/, and
+ * return @path refs to prepend. Claude's Read tool takes it from there.
+ *
+ * Best-effort: any failure (no token, network, non-2xx, disk) logs debug and
+ * skips that attachment. The message still reaches Claude, just without @path.
+ */
+
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import axios from 'axios'
+import { randomUUID } from 'crypto'
+import { mkdir, writeFile } from 'fs/promises'
+import { basename, join } from 'path'
+import { z } from 'zod/v4'
+import { getSessionId } from '../bootstrap/state.js'
+import { logForDebugging } from '../utils/debug.js'
+import { getClaudeConfigHomeDir } from '../utils/envUtils.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import { getBridgeAccessToken, getBridgeBaseUrl } from './bridgeConfig.js'
+
+const DOWNLOAD_TIMEOUT_MS = 30_000
+
+function debug(msg: string): void {
+  logForDebugging(`[bridge:inbound-attach] ${msg}`)
+}
+
+const attachmentSchema = lazySchema(() =>
+  z.object({
+    file_uuid: z.string(),
+    file_name: z.string(),
+  }),
+)
+const attachmentsArraySchema = lazySchema(() => z.array(attachmentSchema()))
+
+export type InboundAttachment = z.infer<ReturnType<typeof attachmentSchema>>
+
+/** Pull file_attachments off a loosely-typed inbound message. */
+export function extractInboundAttachments(msg: unknown): InboundAttachment[] {
+  if (typeof msg !== 'object' || msg === null || !('file_attachments' in msg)) {
+    return []
+  }
+  const parsed = attachmentsArraySchema().safeParse(msg.file_attachments)
+  return parsed.success ? parsed.data : []
+}
+
+/**
+ * Strip path components and keep only filename-safe chars. file_name comes
+ * from the network (web composer), so treat it as untrusted even though the
+ * composer controls it.
+ */
+function sanitizeFileName(name: string): string {
+  const base = basename(name).replace(/[^a-zA-Z0-9._-]/g, '_')
+  return base || 'attachment'
+}
+
+function uploadsDir(): string {
+  return join(getClaudeConfigHomeDir(), 'uploads', getSessionId())
+}
+
+/**
+ * Fetch + write one attachment. Returns the absolute path on success,
+ * undefined on any failure.
+ */
+async function resolveOne(att: InboundAttachment): Promise<string | undefined> {
+  const token = getBridgeAccessToken()
+  if (!token) {
+    debug('skip: no oauth token')
+    return undefined
+  }
+
+  let data: Buffer
+  try {
+    // getOauthConfig() (via getBridgeBaseUrl) throws on a non-allowlisted
+    // CLAUDE_CODE_CUSTOM_OAUTH_URL — keep it inside the try so a bad
+    // FedStart URL degrades to "no @path" instead of crashing print.ts's
+    // reader loop (which has no catch around the await).
+    const url = `${getBridgeBaseUrl()}/api/oauth/files/${encodeURIComponent(att.file_uuid)}/content`
+    const response = await axios.get(url, {
+      headers: { Authorization: `Bearer ${token}` },
+      responseType: 'arraybuffer',
+      timeout: DOWNLOAD_TIMEOUT_MS,
+      validateStatus: () => true,
+    })
+    if (response.status !== 200) {
+      debug(`fetch ${att.file_uuid} failed: status=${response.status}`)
+      return undefined
+    }
+    data = Buffer.from(response.data)
+  } catch (e) {
+    debug(`fetch ${att.file_uuid} threw: ${e}`)
+    return undefined
+  }
+
+  // uuid-prefix makes collisions impossible across messages and within one
+  // (same filename, different files). 8 chars is enough — this isn't security.
+  const safeName = sanitizeFileName(att.file_name)
+  const prefix = (
+    att.file_uuid.slice(0, 8) || randomUUID().slice(0, 8)
+  ).replace(/[^a-zA-Z0-9_-]/g, '_')
+  const dir = uploadsDir()
+  const outPath = join(dir, `${prefix}-${safeName}`)
+
+  try {
+    await mkdir(dir, { recursive: true })
+    await writeFile(outPath, data)
+  } catch (e) {
+    debug(`write ${outPath} failed: ${e}`)
+    return undefined
+  }
+
+  debug(`resolved ${att.file_uuid} → ${outPath} (${data.length} bytes)`)
+  return outPath
+}
+
+/**
+ * Resolve all attachments on an inbound message to a prefix string of
+ * @path refs. Empty string if none resolved.
+ */
+export async function resolveInboundAttachments(
+  attachments: InboundAttachment[],
+): Promise<string> {
+  if (attachments.length === 0) return ''
+  debug(`resolving ${attachments.length} attachment(s)`)
+  const paths = await Promise.all(attachments.map(resolveOne))
+  const ok = paths.filter((p): p is string => p !== undefined)
+  if (ok.length === 0) return ''
+  // Quoted form — extractAtMentionedFiles truncates unquoted @refs at the
+  // first space, which breaks any home dir with spaces (/Users/John Smith/).
+  return ok.map(p => `@"${p}"`).join(' ') + ' '
+}
+
+/**
+ * Prepend @path refs to content, whichever form it's in.
+ * Targets the LAST text block — processUserInputBase reads inputString
+ * from processedBlocks[processedBlocks.length - 1], so putting refs in
+ * block[0] means they're silently ignored for [text, image] content.
+ */
+export function prependPathRefs(
+  content: string | Array<ContentBlockParam>,
+  prefix: string,
+): string | Array<ContentBlockParam> {
+  if (!prefix) return content
+  if (typeof content === 'string') return prefix + content
+  const i = content.findLastIndex(b => b.type === 'text')
+  if (i !== -1) {
+    const b = content[i]!
+    if (b.type === 'text') {
+      return [
+        ...content.slice(0, i),
+        { ...b, text: prefix + b.text },
+        ...content.slice(i + 1),
+      ]
+    }
+  }
+  // No text block — append one at the end so it's last.
+  return [...content, { type: 'text', text: prefix.trimEnd() }]
+}
+
+/**
+ * Convenience: extract + resolve + prepend. No-op when the message has no
+ * file_attachments field (fast path — no network, returns same reference).
+ */
+export async function resolveAndPrepend(
+  msg: unknown,
+  content: string | Array<ContentBlockParam>,
+): Promise<string | Array<ContentBlockParam>> {
+  const attachments = extractInboundAttachments(msg)
+  if (attachments.length === 0) return content
+  const prefix = await resolveInboundAttachments(attachments)
+  return prependPathRefs(content, prefix)
+}

+ 80 - 0
src/bridge/inboundMessages.ts

@@ -0,0 +1,80 @@
+import type {
+  Base64ImageSource,
+  ContentBlockParam,
+  ImageBlockParam,
+} from '@anthropic-ai/sdk/resources/messages.mjs'
+import type { UUID } from 'crypto'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import { detectImageFormatFromBase64 } from '../utils/imageResizer.js'
+
+/**
+ * Process an inbound user message from the bridge, extracting content
+ * and UUID for enqueueing. Supports both string content and
+ * ContentBlockParam[] (e.g. messages containing images).
+ *
+ * Normalizes image blocks from bridge clients that may use camelCase
+ * `mediaType` instead of snake_case `media_type` (mobile-apps#5825).
+ *
+ * Returns the extracted fields, or undefined if the message should be
+ * skipped (non-user type, missing/empty content).
+ */
+export function extractInboundMessageFields(
+  msg: SDKMessage,
+):
+  | { content: string | Array<ContentBlockParam>; uuid: UUID | undefined }
+  | undefined {
+  if (msg.type !== 'user') return undefined
+  const content = msg.message?.content
+  if (!content) return undefined
+  if (Array.isArray(content) && content.length === 0) return undefined
+
+  const uuid =
+    'uuid' in msg && typeof msg.uuid === 'string'
+      ? (msg.uuid as UUID)
+      : undefined
+
+  return {
+    content: Array.isArray(content) ? normalizeImageBlocks(content) : content,
+    uuid,
+  }
+}
+
+/**
+ * Normalize image content blocks from bridge clients. iOS/web clients may
+ * send `mediaType` (camelCase) instead of `media_type` (snake_case), or
+ * omit the field entirely. Without normalization, the bad block poisons
+ * the session — every subsequent API call fails with
+ * "media_type: Field required".
+ *
+ * Fast-path scan returns the original array reference when no
+ * normalization is needed (zero allocation on the happy path).
+ */
+export function normalizeImageBlocks(
+  blocks: Array<ContentBlockParam>,
+): Array<ContentBlockParam> {
+  if (!blocks.some(isMalformedBase64Image)) return blocks
+
+  return blocks.map(block => {
+    if (!isMalformedBase64Image(block)) return block
+    const src = block.source as unknown as Record<string, unknown>
+    const mediaType =
+      typeof src.mediaType === 'string' && src.mediaType
+        ? src.mediaType
+        : detectImageFormatFromBase64(block.source.data)
+    return {
+      ...block,
+      source: {
+        type: 'base64' as const,
+        media_type: mediaType as Base64ImageSource['media_type'],
+        data: block.source.data,
+      },
+    }
+  })
+}
+
+function isMalformedBase64Image(
+  block: ContentBlockParam,
+): block is ImageBlockParam & { source: Base64ImageSource } {
+  if (block.type !== 'image' || block.source?.type !== 'base64') return false
+  return !(block.source as unknown as Record<string, unknown>).media_type
+}

+ 569 - 0
src/bridge/initReplBridge.ts

@@ -0,0 +1,569 @@
+/**
+ * REPL-specific wrapper around initBridgeCore. Owns the parts that read
+ * bootstrap state — gates, cwd, session ID, git context, OAuth, title
+ * derivation — then delegates to the bootstrap-free core.
+ *
+ * Split out of replBridge.ts because the sessionStorage import
+ * (getCurrentSessionTitle) transitively pulls in src/commands.ts → the
+ * entire slash command + React component tree (~1300 modules). Keeping
+ * initBridgeCore in a file that doesn't touch sessionStorage lets
+ * daemonBridge.ts import the core without bloating the Agent SDK bundle.
+ *
+ * Called via dynamic import by useReplBridge (auto-start) and print.ts
+ * (SDK -p mode via query.enableRemoteControl).
+ */
+
+import { feature } from 'bun:bundle'
+import { hostname } from 'os'
+import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type { SDKControlResponse } from '../entrypoints/sdk/controlTypes.js'
+import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
+import { getOrganizationUUID } from '../services/oauth/client.js'
+import {
+  isPolicyAllowed,
+  waitForPolicyLimitsToLoad,
+} from '../services/policyLimits/index.js'
+import type { Message } from '../types/message.js'
+import {
+  checkAndRefreshOAuthTokenIfNeeded,
+  getClaudeAIOAuthTokens,
+  handleOAuth401Error,
+} from '../utils/auth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
+import { logForDebugging } from '../utils/debug.js'
+import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
+import { errorMessage } from '../utils/errors.js'
+import { getBranch, getRemoteUrl } from '../utils/git.js'
+import { toSDKMessages } from '../utils/messages/mappers.js'
+import {
+  getContentText,
+  getMessagesAfterCompactBoundary,
+  isSyntheticMessage,
+} from '../utils/messages.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+import { getCurrentSessionTitle } from '../utils/sessionStorage.js'
+import {
+  extractConversationText,
+  generateSessionTitle,
+} from '../utils/sessionTitle.js'
+import { generateShortWordSlug } from '../utils/words.js'
+import {
+  getBridgeAccessToken,
+  getBridgeBaseUrl,
+  getBridgeTokenOverride,
+} from './bridgeConfig.js'
+import {
+  checkBridgeMinVersion,
+  isBridgeEnabledBlocking,
+  isCseShimEnabled,
+  isEnvLessBridgeEnabled,
+} from './bridgeEnabled.js'
+import {
+  archiveBridgeSession,
+  createBridgeSession,
+  updateBridgeSessionTitle,
+} from './createSession.js'
+import { logBridgeSkip } from './debugUtils.js'
+import { checkEnvLessBridgeMinVersion } from './envLessBridgeConfig.js'
+import { getPollIntervalConfig } from './pollConfig.js'
+import type { BridgeState, ReplBridgeHandle } from './replBridge.js'
+import { initBridgeCore } from './replBridge.js'
+import { setCseShimGate } from './sessionIdCompat.js'
+import type { BridgeWorkerType } from './types.js'
+
+export type InitBridgeOptions = {
+  onInboundMessage?: (msg: SDKMessage) => void | Promise<void>
+  onPermissionResponse?: (response: SDKControlResponse) => void
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  initialMessages?: Message[]
+  // Explicit session name from `/remote-control <name>`. When set, overrides
+  // the title derived from the conversation or /rename.
+  initialName?: string
+  // Fresh view of the full conversation at call time. Used by onUserMessage's
+  // count-3 derivation to call generateSessionTitle over the full conversation.
+  // Optional — print.ts's SDK enableRemoteControl path has no REPL message
+  // array; count-3 falls back to the single message text when absent.
+  getMessages?: () => Message[]
+  // UUIDs already flushed in a prior bridge session. Messages with these
+  // UUIDs are excluded from the initial flush to avoid poisoning the
+  // server (duplicate UUIDs across sessions cause the WS to be killed).
+  // Mutated in place — newly flushed UUIDs are added after each flush.
+  previouslyFlushedUUIDs?: Set<string>
+  /** See BridgeCoreParams.perpetual. */
+  perpetual?: boolean
+  /**
+   * When true, the bridge only forwards events outbound (no SSE inbound
+   * stream). Used by CCR mirror mode — local sessions visible on claude.ai
+   * without enabling inbound control.
+   */
+  outboundOnly?: boolean
+  tags?: string[]
+}
+
+export async function initReplBridge(
+  options?: InitBridgeOptions,
+): Promise<ReplBridgeHandle | null> {
+  const {
+    onInboundMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    initialMessages,
+    getMessages,
+    previouslyFlushedUUIDs,
+    initialName,
+    perpetual,
+    outboundOnly,
+    tags,
+  } = options ?? {}
+
+  // Wire the cse_ shim kill switch so toCompatSessionId respects the
+  // GrowthBook gate. Daemon/SDK paths skip this — shim defaults to active.
+  setCseShimGate(isCseShimEnabled)
+
+  // 1. Runtime gate
+  if (!(await isBridgeEnabledBlocking())) {
+    logBridgeSkip('not_enabled', '[bridge:repl] Skipping: bridge not enabled')
+    return null
+  }
+
+  // 1b. Minimum version check — deferred to after the v1/v2 branch below,
+  // since each implementation has its own floor (tengu_bridge_min_version
+  // for v1, tengu_bridge_repl_v2_config.min_version for v2).
+
+  // 2. Check OAuth — must be signed in with claude.ai. Runs before the
+  // policy check so console-auth users get the actionable "/login" hint
+  // instead of a misleading policy error from a stale/wrong-org cache.
+  if (!getBridgeAccessToken()) {
+    logBridgeSkip('no_oauth', '[bridge:repl] Skipping: no OAuth tokens')
+    onStateChange?.('failed', '/login')
+    return null
+  }
+
+  // 3. Check organization policy — remote control may be disabled
+  await waitForPolicyLimitsToLoad()
+  if (!isPolicyAllowed('allow_remote_control')) {
+    logBridgeSkip(
+      'policy_denied',
+      '[bridge:repl] Skipping: allow_remote_control policy not allowed',
+    )
+    onStateChange?.('failed', "disabled by your organization's policy")
+    return null
+  }
+
+  // When CLAUDE_BRIDGE_OAUTH_TOKEN is set (ant-only local dev), the bridge
+  // uses that token directly via getBridgeAccessToken() — keychain state is
+  // irrelevant. Skip 2b/2c to preserve that decoupling: an expired keychain
+  // token shouldn't block a bridge connection that doesn't use it.
+  if (!getBridgeTokenOverride()) {
+    // 2a. Cross-process backoff. If N prior processes already saw this exact
+    // dead token (matched by expiresAt), skip silently — no event, no refresh
+    // attempt. The count threshold tolerates transient refresh failures (auth
+    // server 5xx, lockfile errors per auth.ts:1437/1444/1485): each process
+    // independently retries until 3 consecutive failures prove the token dead.
+    // Mirrors useReplBridge's MAX_CONSECUTIVE_INIT_FAILURES for in-process.
+    // The expiresAt key is content-addressed: /login → new token → new expiresAt
+    // → this stops matching without any explicit clear.
+    const cfg = getGlobalConfig()
+    if (
+      cfg.bridgeOauthDeadExpiresAt != null &&
+      (cfg.bridgeOauthDeadFailCount ?? 0) >= 3 &&
+      getClaudeAIOAuthTokens()?.expiresAt === cfg.bridgeOauthDeadExpiresAt
+    ) {
+      logForDebugging(
+        `[bridge:repl] Skipping: cross-process backoff (dead token seen ${cfg.bridgeOauthDeadFailCount} times)`,
+      )
+      return null
+    }
+
+    // 2b. Proactively refresh if expired. Mirrors bridgeMain.ts:2096 — the REPL
+    // bridge fires at useEffect mount BEFORE any v1/messages call, making this
+    // usually the first OAuth request of the session. Without this, ~9% of
+    // registrations hit the server with a >8h-expired token → 401 → withOAuthRetry
+    // recovers, but the server logs a 401 we can avoid. VPN egress IPs observed
+    // at 30:1 401:200 when many unrelated users cluster at the 8h TTL boundary.
+    //
+    // Fresh-token cost: one memoized read + one Date.now() comparison (~µs).
+    // checkAndRefreshOAuthTokenIfNeeded clears its own cache in every path that
+    // touches the keychain (refresh success, lockfile race, throw), so no
+    // explicit clearOAuthTokenCache() here — that would force a blocking
+    // keychain spawn on the 91%+ fresh-token path.
+    await checkAndRefreshOAuthTokenIfNeeded()
+
+    // 2c. Skip if token is still expired post-refresh-attempt. Env-var / FD
+    // tokens (auth.ts:894-917) have expiresAt=null → never trip this. But a
+    // keychain token whose refresh token is dead (password change, org left,
+    // token GC'd) has expiresAt<now AND refresh just failed — the client would
+    // otherwise loop 401 forever: withOAuthRetry → handleOAuth401Error →
+    // refresh fails again → retry with same stale token → 401 again.
+    // Datadog 2026-03-08: single IPs generating 2,879 such 401s/day. Skip the
+    // guaranteed-fail API call; useReplBridge surfaces the failure.
+    //
+    // Intentionally NOT using isOAuthTokenExpired here — that has a 5-minute
+    // proactive-refresh buffer, which is the right heuristic for "should
+    // refresh soon" but wrong for "provably unusable". A token with 3min left
+    // + transient refresh endpoint blip (5xx/timeout/wifi-reconnect) would
+    // falsely trip a buffered check; the still-valid token would connect fine.
+    // Check actual expiry instead: past-expiry AND refresh-failed → truly dead.
+    const tokens = getClaudeAIOAuthTokens()
+    if (tokens && tokens.expiresAt !== null && tokens.expiresAt <= Date.now()) {
+      logBridgeSkip(
+        'oauth_expired_unrefreshable',
+        '[bridge:repl] Skipping: OAuth token expired and refresh failed (re-login required)',
+      )
+      onStateChange?.('failed', '/login')
+      // Persist for the next process. Increments failCount when re-discovering
+      // the same dead token (matched by expiresAt); resets to 1 for a different
+      // token. Once count reaches 3, step 2a's early-return fires and this path
+      // is never reached again — writes are capped at 3 per dead token.
+      // Local const captures the narrowed type (closure loses !==null narrowing).
+      const deadExpiresAt = tokens.expiresAt
+      saveGlobalConfig(c => ({
+        ...c,
+        bridgeOauthDeadExpiresAt: deadExpiresAt,
+        bridgeOauthDeadFailCount:
+          c.bridgeOauthDeadExpiresAt === deadExpiresAt
+            ? (c.bridgeOauthDeadFailCount ?? 0) + 1
+            : 1,
+      }))
+      return null
+    }
+  }
+
+  // 4. Compute baseUrl — needed by both v1 (env-based) and v2 (env-less)
+  // paths. Hoisted above the v2 gate so both can use it.
+  const baseUrl = getBridgeBaseUrl()
+
+  // 5. Derive session title. Precedence: explicit initialName → /rename
+  // (session storage) → last meaningful user message → generated slug.
+  // Cosmetic only (claude.ai session list); the model never sees it.
+  // Two flags: `hasExplicitTitle` (initialName or /rename — never auto-
+  // overwrite) vs. `hasTitle` (any title, including auto-derived — blocks
+  // the count-1 re-derivation but not count-3). The onUserMessage callback
+  // (wired to both v1 and v2 below) derives from the 1st prompt and again
+  // from the 3rd so mobile/web show a title that reflects more context.
+  // The slug fallback (e.g. "remote-control-graceful-unicorn") makes
+  // auto-started sessions distinguishable in the claude.ai list before the
+  // first prompt.
+  let title = `remote-control-${generateShortWordSlug()}`
+  let hasTitle = false
+  let hasExplicitTitle = false
+  if (initialName) {
+    title = initialName
+    hasTitle = true
+    hasExplicitTitle = true
+  } else {
+    const sessionId = getSessionId()
+    const customTitle = sessionId
+      ? getCurrentSessionTitle(sessionId)
+      : undefined
+    if (customTitle) {
+      title = customTitle
+      hasTitle = true
+      hasExplicitTitle = true
+    } else if (initialMessages && initialMessages.length > 0) {
+      // Find the last user message that has meaningful content. Skip meta
+      // (nudges), tool results, compact summaries ("This session is being
+      // continued…"), non-human origins (task notifications, channel pushes),
+      // and synthetic interrupts ([Request interrupted by user]) — none are
+      // human-authored. Same filter as extractTitleText + isSyntheticMessage.
+      for (let i = initialMessages.length - 1; i >= 0; i--) {
+        const msg = initialMessages[i]!
+        if (
+          msg.type !== 'user' ||
+          msg.isMeta ||
+          msg.toolUseResult ||
+          msg.isCompactSummary ||
+          (msg.origin && msg.origin.kind !== 'human') ||
+          isSyntheticMessage(msg)
+        )
+          continue
+        const rawContent = getContentText(msg.message.content)
+        if (!rawContent) continue
+        const derived = deriveTitle(rawContent)
+        if (!derived) continue
+        title = derived
+        hasTitle = true
+        break
+      }
+    }
+  }
+
+  // Shared by both v1 and v2 — fires on every title-worthy user message until
+  // it returns true. At count 1: deriveTitle placeholder immediately, then
+  // generateSessionTitle (Haiku, sentence-case) fire-and-forget upgrade. At
+  // count 3: re-generate over the full conversation. Skips entirely if the
+  // title is explicit (/remote-control <name> or /rename) — re-checks
+  // sessionStorage at call time so /rename between messages isn't clobbered.
+  // Skips count 1 if initialMessages already derived (that title is fresh);
+  // still refreshes at count 3. v2 passes cse_*; updateBridgeSessionTitle
+  // retags internally.
+  let userMessageCount = 0
+  let lastBridgeSessionId: string | undefined
+  let genSeq = 0
+  const patch = (
+    derived: string,
+    bridgeSessionId: string,
+    atCount: number,
+  ): void => {
+    hasTitle = true
+    title = derived
+    logForDebugging(
+      `[bridge:repl] derived title from message ${atCount}: ${derived}`,
+    )
+    void updateBridgeSessionTitle(bridgeSessionId, derived, {
+      baseUrl,
+      getAccessToken: getBridgeAccessToken,
+    }).catch(() => {})
+  }
+  // Fire-and-forget Haiku generation with post-await guards. Re-checks /rename
+  // (sessionStorage), v1 env-lost (lastBridgeSessionId), and same-session
+  // out-of-order resolution (genSeq — count-1's Haiku resolving after count-3
+  // would clobber the richer title). generateSessionTitle never rejects.
+  const generateAndPatch = (input: string, bridgeSessionId: string): void => {
+    const gen = ++genSeq
+    const atCount = userMessageCount
+    void generateSessionTitle(input, AbortSignal.timeout(15_000)).then(
+      generated => {
+        if (
+          generated &&
+          gen === genSeq &&
+          lastBridgeSessionId === bridgeSessionId &&
+          !getCurrentSessionTitle(getSessionId())
+        ) {
+          patch(generated, bridgeSessionId, atCount)
+        }
+      },
+    )
+  }
+  const onUserMessage = (text: string, bridgeSessionId: string): boolean => {
+    if (hasExplicitTitle || getCurrentSessionTitle(getSessionId())) {
+      return true
+    }
+    // v1 env-lost re-creates the session with a new ID. Reset the count so
+    // the new session gets its own count-3 derivation; hasTitle stays true
+    // (new session was created via getCurrentTitle(), which reads the count-1
+    // title from this closure), so count-1 of the fresh cycle correctly skips.
+    if (
+      lastBridgeSessionId !== undefined &&
+      lastBridgeSessionId !== bridgeSessionId
+    ) {
+      userMessageCount = 0
+    }
+    lastBridgeSessionId = bridgeSessionId
+    userMessageCount++
+    if (userMessageCount === 1 && !hasTitle) {
+      const placeholder = deriveTitle(text)
+      if (placeholder) patch(placeholder, bridgeSessionId, userMessageCount)
+      generateAndPatch(text, bridgeSessionId)
+    } else if (userMessageCount === 3) {
+      const msgs = getMessages?.()
+      const input = msgs
+        ? extractConversationText(getMessagesAfterCompactBoundary(msgs))
+        : text
+      generateAndPatch(input, bridgeSessionId)
+    }
+    // Also re-latches if v1 env-lost resets the transport's done flag past 3.
+    return userMessageCount >= 3
+  }
+
+  const initialHistoryCap = getFeatureValue_CACHED_WITH_REFRESH(
+    'tengu_bridge_initial_history_cap',
+    200,
+    5 * 60 * 1000,
+  )
+
+  // Fetch orgUUID before the v1/v2 branch — both paths need it. v1 for
+  // environment registration; v2 for archive (which lives at the compat
+  // /v1/sessions/{id}/archive, not /v1/code/sessions). Without it, v2
+  // archive 404s and sessions stay alive in CCR after /exit.
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logBridgeSkip('no_org_uuid', '[bridge:repl] Skipping: no org UUID')
+    onStateChange?.('failed', '/login')
+    return null
+  }
+
+  // ── GrowthBook gate: env-less bridge ──────────────────────────────────
+  // When enabled, skips the Environments API layer entirely (no register/
+  // poll/ack/heartbeat) and connects directly via POST /bridge → worker_jwt.
+  // See server PR #292605 (renamed in #293280). REPL-only — daemon/print stay
+  // on env-based.
+  //
+  // NAMING: "env-less" is distinct from "CCR v2" (the /worker/* transport).
+  // The env-based path below can ALSO use CCR v2 via CLAUDE_CODE_USE_CCR_V2.
+  // tengu_bridge_repl_v2 gates env-less (no poll loop), not transport version.
+  //
+  // perpetual (assistant-mode session continuity via bridge-pointer.json) is
+  // env-coupled and not yet implemented here — fall back to env-based when set
+  // so KAIROS users don't silently lose cross-restart continuity.
+  if (isEnvLessBridgeEnabled() && !perpetual) {
+    const versionError = await checkEnvLessBridgeMinVersion()
+    if (versionError) {
+      logBridgeSkip(
+        'version_too_old',
+        `[bridge:repl] Skipping: ${versionError}`,
+        true,
+      )
+      onStateChange?.('failed', 'run `claude update` to upgrade')
+      return null
+    }
+    logForDebugging(
+      '[bridge:repl] Using env-less bridge path (tengu_bridge_repl_v2)',
+    )
+    const { initEnvLessBridgeCore } = await import('./remoteBridgeCore.js')
+    return initEnvLessBridgeCore({
+      baseUrl,
+      orgUUID,
+      title,
+      getAccessToken: getBridgeAccessToken,
+      onAuth401: handleOAuth401Error,
+      toSDKMessages,
+      initialHistoryCap,
+      initialMessages,
+      // v2 always creates a fresh server session (new cse_* id), so
+      // previouslyFlushedUUIDs is not passed — there's no cross-session
+      // UUID collision risk, and the ref persists across enable→disable→
+      // re-enable cycles which would cause the new session to receive zero
+      // history (all UUIDs already in the set from the prior enable).
+      // v1 handles this by calling previouslyFlushedUUIDs.clear() on fresh
+      // session creation (replBridge.ts:768); v2 skips the param entirely.
+      onInboundMessage,
+      onUserMessage,
+      onPermissionResponse,
+      onInterrupt,
+      onSetModel,
+      onSetMaxThinkingTokens,
+      onSetPermissionMode,
+      onStateChange,
+      outboundOnly,
+      tags,
+    })
+  }
+
+  // ── v1 path: env-based (register/poll/ack/heartbeat) ──────────────────
+
+  const versionError = checkBridgeMinVersion()
+  if (versionError) {
+    logBridgeSkip('version_too_old', `[bridge:repl] Skipping: ${versionError}`)
+    onStateChange?.('failed', 'run `claude update` to upgrade')
+    return null
+  }
+
+  // Gather git context — this is the bootstrap-read boundary.
+  // Everything from here down is passed explicitly to bridgeCore.
+  const branch = await getBranch()
+  const gitRepoUrl = await getRemoteUrl()
+  const sessionIngressUrl =
+    process.env.USER_TYPE === 'ant' &&
+    process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      : baseUrl
+
+  // Assistant-mode sessions advertise a distinct worker_type so the web UI
+  // can filter them into a dedicated picker. KAIROS guard keeps the
+  // assistant module out of external builds entirely.
+  let workerType: BridgeWorkerType = 'claude_code'
+  if (feature('KAIROS')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { isAssistantMode } =
+      require('../assistant/index.js') as typeof import('../assistant/index.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    if (isAssistantMode()) {
+      workerType = 'claude_code_assistant'
+    }
+  }
+
+  // 6. Delegate. BridgeCoreHandle is a structural superset of
+  // ReplBridgeHandle (adds writeSdkMessages which REPL callers don't use),
+  // so no adapter needed — just the narrower type on the way out.
+  return initBridgeCore({
+    dir: getOriginalCwd(),
+    machineName: hostname(),
+    branch,
+    gitRepoUrl,
+    title,
+    baseUrl,
+    sessionIngressUrl,
+    workerType,
+    getAccessToken: getBridgeAccessToken,
+    createSession: opts =>
+      createBridgeSession({
+        ...opts,
+        events: [],
+        baseUrl,
+        getAccessToken: getBridgeAccessToken,
+      }),
+    archiveSession: sessionId =>
+      archiveBridgeSession(sessionId, {
+        baseUrl,
+        getAccessToken: getBridgeAccessToken,
+        // gracefulShutdown.ts:407 races runCleanupFunctions against 2s.
+        // Teardown also does stopWork (parallel) + deregister (sequential),
+        // so archive can't have the full budget. 1.5s matches v2's
+        // teardown_archive_timeout_ms default.
+        timeoutMs: 1500,
+      }).catch((err: unknown) => {
+        // archiveBridgeSession has no try/catch — 5xx/timeout/network throw
+        // straight through. Previously swallowed silently, making archive
+        // failures BQ-invisible and undiagnosable from debug logs.
+        logForDebugging(
+          `[bridge:repl] archiveBridgeSession threw: ${errorMessage(err)}`,
+          { level: 'error' },
+        )
+      }),
+    // getCurrentTitle is read on reconnect-after-env-lost to re-title the new
+    // session. /rename writes to session storage; onUserMessage mutates
+    // `title` directly — both paths are picked up here.
+    getCurrentTitle: () => getCurrentSessionTitle(getSessionId()) ?? title,
+    onUserMessage,
+    toSDKMessages,
+    onAuth401: handleOAuth401Error,
+    getPollIntervalConfig,
+    initialHistoryCap,
+    initialMessages,
+    previouslyFlushedUUIDs,
+    onInboundMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    perpetual,
+  })
+}
+
+const TITLE_MAX_LEN = 50
+
+/**
+ * Quick placeholder title: strip display tags, take the first sentence,
+ * collapse whitespace, truncate to 50 chars. Returns undefined if the result
+ * is empty (e.g. message was only <local-command-stdout>). Replaced by
+ * generateSessionTitle once Haiku resolves (~1-15s).
+ */
+function deriveTitle(raw: string): string | undefined {
+  // Strip <ide_opened_file>, <session-start-hook>, etc. — these appear in
+  // user messages when IDE/hooks inject context. stripDisplayTagsAllowEmpty
+  // returns '' (not the original) so pure-tag messages are skipped.
+  const clean = stripDisplayTagsAllowEmpty(raw)
+  // First sentence is usually the intent; rest is often context/detail.
+  // Capture group instead of lookbehind — keeps YARR JIT happy.
+  const firstSentence = /^(.*?[.!?])\s/.exec(clean)?.[1] ?? clean
+  // Collapse newlines/tabs — titles are single-line in the claude.ai list.
+  const flat = firstSentence.replace(/\s+/g, ' ').trim()
+  if (!flat) return undefined
+  return flat.length > TITLE_MAX_LEN
+    ? flat.slice(0, TITLE_MAX_LEN - 1) + '\u2026'
+    : flat
+}

+ 256 - 0
src/bridge/jwtUtils.ts

@@ -0,0 +1,256 @@
+import { logEvent } from '../services/analytics/index.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { errorMessage } from '../utils/errors.js'
+import { jsonParse } from '../utils/slowOperations.js'
+
+/** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */
+function formatDuration(ms: number): string {
+  if (ms < 60_000) return `${Math.round(ms / 1000)}s`
+  const m = Math.floor(ms / 60_000)
+  const s = Math.round((ms % 60_000) / 1000)
+  return s > 0 ? `${m}m ${s}s` : `${m}m`
+}
+
+/**
+ * Decode a JWT's payload segment without verifying the signature.
+ * Strips the `sk-ant-si-` session-ingress prefix if present.
+ * Returns the parsed JSON payload as `unknown`, or `null` if the
+ * token is malformed or the payload is not valid JSON.
+ */
+export function decodeJwtPayload(token: string): unknown | null {
+  const jwt = token.startsWith('sk-ant-si-')
+    ? token.slice('sk-ant-si-'.length)
+    : token
+  const parts = jwt.split('.')
+  if (parts.length !== 3 || !parts[1]) return null
+  try {
+    return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8'))
+  } catch {
+    return null
+  }
+}
+
+/**
+ * Decode the `exp` (expiry) claim from a JWT without verifying the signature.
+ * @returns The `exp` value in Unix seconds, or `null` if unparseable
+ */
+export function decodeJwtExpiry(token: string): number | null {
+  const payload = decodeJwtPayload(token)
+  if (
+    payload !== null &&
+    typeof payload === 'object' &&
+    'exp' in payload &&
+    typeof payload.exp === 'number'
+  ) {
+    return payload.exp
+  }
+  return null
+}
+
+/** Refresh buffer: request a new token before expiry. */
+const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000
+
+/** Fallback refresh interval when the new token's expiry is unknown. */
+const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes
+
+/** Max consecutive failures before giving up on the refresh chain. */
+const MAX_REFRESH_FAILURES = 3
+
+/** Retry delay when getAccessToken returns undefined. */
+const REFRESH_RETRY_DELAY_MS = 60_000
+
+/**
+ * Creates a token refresh scheduler that proactively refreshes session tokens
+ * before they expire. Used by both the standalone bridge and the REPL bridge.
+ *
+ * When a token is about to expire, the scheduler calls `onRefresh` with the
+ * session ID and the bridge's OAuth access token. The caller is responsible
+ * for delivering the token to the appropriate transport (child process stdin
+ * for standalone bridge, WebSocket reconnect for REPL bridge).
+ */
+export function createTokenRefreshScheduler({
+  getAccessToken,
+  onRefresh,
+  label,
+  refreshBufferMs = TOKEN_REFRESH_BUFFER_MS,
+}: {
+  getAccessToken: () => string | undefined | Promise<string | undefined>
+  onRefresh: (sessionId: string, oauthToken: string) => void
+  label: string
+  /** How long before expiry to fire refresh. Defaults to 5 min. */
+  refreshBufferMs?: number
+}): {
+  schedule: (sessionId: string, token: string) => void
+  scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void
+  cancel: (sessionId: string) => void
+  cancelAll: () => void
+} {
+  const timers = new Map<string, ReturnType<typeof setTimeout>>()
+  const failureCounts = new Map<string, number>()
+  // Generation counter per session — incremented by schedule() and cancel()
+  // so that in-flight async doRefresh() calls can detect when they've been
+  // superseded and should skip setting follow-up timers.
+  const generations = new Map<string, number>()
+
+  function nextGeneration(sessionId: string): number {
+    const gen = (generations.get(sessionId) ?? 0) + 1
+    generations.set(sessionId, gen)
+    return gen
+  }
+
+  function schedule(sessionId: string, token: string): void {
+    const expiry = decodeJwtExpiry(token)
+    if (!expiry) {
+      // Token is not a decodable JWT (e.g. an OAuth token passed from the
+      // REPL bridge WebSocket open handler).  Preserve any existing timer
+      // (such as the follow-up refresh set by doRefresh) so the refresh
+      // chain is not broken.
+      logForDebugging(
+        `[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
+      )
+      return
+    }
+
+    // Clear any existing refresh timer — we have a concrete expiry to replace it.
+    const existing = timers.get(sessionId)
+    if (existing) {
+      clearTimeout(existing)
+    }
+
+    // Bump generation to invalidate any in-flight async doRefresh.
+    const gen = nextGeneration(sessionId)
+
+    const expiryDate = new Date(expiry * 1000).toISOString()
+    const delayMs = expiry * 1000 - Date.now() - refreshBufferMs
+    if (delayMs <= 0) {
+      logForDebugging(
+        `[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`,
+      )
+      void doRefresh(sessionId, gen)
+      return
+    }
+
+    logForDebugging(
+      `[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`,
+    )
+
+    const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
+    timers.set(sessionId, timer)
+  }
+
+  /**
+   * Schedule refresh using an explicit TTL (seconds until expiry) rather
+   * than decoding a JWT's exp claim. Used by callers whose JWT is opaque
+   * (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly).
+   */
+  function scheduleFromExpiresIn(
+    sessionId: string,
+    expiresInSeconds: number,
+  ): void {
+    const existing = timers.get(sessionId)
+    if (existing) clearTimeout(existing)
+    const gen = nextGeneration(sessionId)
+    // Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in
+    // (e.g. very large buffer for frequent-refresh testing, or server shortens
+    // expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop.
+    const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000)
+    logForDebugging(
+      `[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`,
+    )
+    const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
+    timers.set(sessionId, timer)
+  }
+
+  async function doRefresh(sessionId: string, gen: number): Promise<void> {
+    let oauthToken: string | undefined
+    try {
+      oauthToken = await getAccessToken()
+    } catch (err) {
+      logForDebugging(
+        `[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`,
+        { level: 'error' },
+      )
+    }
+
+    // If the session was cancelled or rescheduled while we were awaiting,
+    // the generation will have changed — bail out to avoid orphaned timers.
+    if (generations.get(sessionId) !== gen) {
+      logForDebugging(
+        `[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`,
+      )
+      return
+    }
+
+    if (!oauthToken) {
+      const failures = (failureCounts.get(sessionId) ?? 0) + 1
+      failureCounts.set(sessionId, failures)
+      logForDebugging(
+        `[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth')
+      // Schedule a retry so the refresh chain can recover if the token
+      // becomes available again (e.g. transient cache clear during refresh).
+      // Cap retries to avoid spamming on genuine failures.
+      if (failures < MAX_REFRESH_FAILURES) {
+        const retryTimer = setTimeout(
+          doRefresh,
+          REFRESH_RETRY_DELAY_MS,
+          sessionId,
+          gen,
+        )
+        timers.set(sessionId, retryTimer)
+      }
+      return
+    }
+
+    // Reset failure counter on successful token retrieval
+    failureCounts.delete(sessionId)
+
+    logForDebugging(
+      `[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}…`,
+    )
+    logEvent('tengu_bridge_token_refreshed', {})
+    onRefresh(sessionId, oauthToken)
+
+    // Schedule a follow-up refresh so long-running sessions stay authenticated.
+    // Without this, the initial one-shot timer leaves the session vulnerable
+    // to token expiry if it runs past the first refresh window.
+    const timer = setTimeout(
+      doRefresh,
+      FALLBACK_REFRESH_INTERVAL_MS,
+      sessionId,
+      gen,
+    )
+    timers.set(sessionId, timer)
+    logForDebugging(
+      `[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`,
+    )
+  }
+
+  function cancel(sessionId: string): void {
+    // Bump generation to invalidate any in-flight async doRefresh.
+    nextGeneration(sessionId)
+    const timer = timers.get(sessionId)
+    if (timer) {
+      clearTimeout(timer)
+      timers.delete(sessionId)
+    }
+    failureCounts.delete(sessionId)
+  }
+
+  function cancelAll(): void {
+    // Bump all generations so in-flight doRefresh calls are invalidated.
+    for (const sessionId of generations.keys()) {
+      nextGeneration(sessionId)
+    }
+    for (const timer of timers.values()) {
+      clearTimeout(timer)
+    }
+    timers.clear()
+    failureCounts.clear()
+  }
+
+  return { schedule, scheduleFromExpiresIn, cancel, cancelAll }
+}

+ 110 - 0
src/bridge/pollConfig.ts

@@ -0,0 +1,110 @@
+import { z } from 'zod/v4'
+import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import {
+  DEFAULT_POLL_CONFIG,
+  type PollIntervalConfig,
+} from './pollConfigDefaults.js'
+
+// .min(100) on the seek-work intervals restores the old Math.max(..., 100)
+// defense-in-depth floor against fat-fingered GrowthBook values. Unlike a
+// clamp, Zod rejects the whole object on violation — a config with one bad
+// field falls back to DEFAULT_POLL_CONFIG entirely rather than being
+// partially trusted.
+//
+// The at_capacity intervals use a 0-or-≥100 refinement: 0 means "disabled"
+// (heartbeat-only mode), ≥100 is the fat-finger floor. Values 1–99 are
+// rejected so unit confusion (ops thinks seconds, enters 10) doesn't poll
+// every 10ms against the VerifyEnvironmentSecretAuth DB path.
+//
+// The object-level refines require at least one at-capacity liveness
+// mechanism enabled: heartbeat OR the relevant poll interval. Without this,
+// the hb=0, atCapMs=0 drift config (ops disables heartbeat without
+// restoring at_capacity) falls through every throttle site with no sleep —
+// tight-looping /poll at HTTP-round-trip speed.
+const zeroOrAtLeast100 = {
+  message: 'must be 0 (disabled) or ≥100ms',
+}
+const pollIntervalConfigSchema = lazySchema(() =>
+  z
+    .object({
+      poll_interval_ms_not_at_capacity: z.number().int().min(100),
+      // 0 = no at-capacity polling. Independent of heartbeat — both can be
+      // enabled (heartbeat runs, periodically breaks out to poll).
+      poll_interval_ms_at_capacity: z
+        .number()
+        .int()
+        .refine(v => v === 0 || v >= 100, zeroOrAtLeast100),
+      // 0 = disabled; positive value = heartbeat at this interval while at
+      // capacity. Runs alongside at-capacity polling, not instead of it.
+      // Named non_exclusive to distinguish from the old heartbeat_interval_ms
+      // (either-or semantics in pre-#22145 clients). .default(0) so existing
+      // GrowthBook configs without this field parse successfully.
+      non_exclusive_heartbeat_interval_ms: z.number().int().min(0).default(0),
+      // Multisession (bridgeMain.ts) intervals. Defaults match the
+      // single-session values so existing configs without these fields
+      // preserve current behavior.
+      multisession_poll_interval_ms_not_at_capacity: z
+        .number()
+        .int()
+        .min(100)
+        .default(
+          DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_not_at_capacity,
+        ),
+      multisession_poll_interval_ms_partial_capacity: z
+        .number()
+        .int()
+        .min(100)
+        .default(
+          DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_partial_capacity,
+        ),
+      multisession_poll_interval_ms_at_capacity: z
+        .number()
+        .int()
+        .refine(v => v === 0 || v >= 100, zeroOrAtLeast100)
+        .default(DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_at_capacity),
+      // .min(1) matches the server's ge=1 constraint (work_v1.py:230).
+      reclaim_older_than_ms: z.number().int().min(1).default(5000),
+      session_keepalive_interval_v2_ms: z
+        .number()
+        .int()
+        .min(0)
+        .default(120_000),
+    })
+    .refine(
+      cfg =>
+        cfg.non_exclusive_heartbeat_interval_ms > 0 ||
+        cfg.poll_interval_ms_at_capacity > 0,
+      {
+        message:
+          'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or poll_interval_ms_at_capacity > 0',
+      },
+    )
+    .refine(
+      cfg =>
+        cfg.non_exclusive_heartbeat_interval_ms > 0 ||
+        cfg.multisession_poll_interval_ms_at_capacity > 0,
+      {
+        message:
+          'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or multisession_poll_interval_ms_at_capacity > 0',
+      },
+    ),
+)
+
+/**
+ * Fetch the bridge poll interval config from GrowthBook with a 5-minute
+ * refresh window. Validates the served JSON against the schema; falls back
+ * to defaults if the flag is absent, malformed, or partially-specified.
+ *
+ * Shared by bridgeMain.ts (standalone) and replBridge.ts (REPL) so ops
+ * can tune both poll rates fleet-wide with a single config push.
+ */
+export function getPollIntervalConfig(): PollIntervalConfig {
+  const raw = getFeatureValue_CACHED_WITH_REFRESH<unknown>(
+    'tengu_bridge_poll_interval_config',
+    DEFAULT_POLL_CONFIG,
+    5 * 60 * 1000,
+  )
+  const parsed = pollIntervalConfigSchema().safeParse(raw)
+  return parsed.success ? parsed.data : DEFAULT_POLL_CONFIG
+}

+ 82 - 0
src/bridge/pollConfigDefaults.ts

@@ -0,0 +1,82 @@
+/**
+ * Bridge poll interval defaults. Extracted from pollConfig.ts so callers
+ * that don't need live GrowthBook tuning (daemon via Agent SDK) can avoid
+ * the growthbook.ts → config.ts → file.ts → sessionStorage.ts → commands.ts
+ * transitive dependency chain.
+ */
+
+/**
+ * Poll interval when actively seeking work (no transport / below maxSessions).
+ * Governs user-visible "connecting…" latency on initial work pickup and
+ * recovery speed after the server re-dispatches a work item.
+ */
+const POLL_INTERVAL_MS_NOT_AT_CAPACITY = 2000
+
+/**
+ * Poll interval when the transport is connected. Runs independently of
+ * heartbeat — when both are enabled, the heartbeat loop breaks out to poll
+ * at this interval. Set to 0 to disable at-capacity polling entirely.
+ *
+ * Server-side constraints that bound this value:
+ * - BRIDGE_LAST_POLL_TTL = 4h (Redis key expiry → environment auto-archived)
+ * - max_poll_stale_seconds = 24h (session-creation health gate, currently disabled)
+ *
+ * 10 minutes gives 24× headroom on the Redis TTL while still picking up
+ * server-initiated token-rotation redispatches within one poll cycle.
+ * The transport auto-reconnects internally for 10 minutes on transient WS
+ * failures, so poll is not the recovery path — it's strictly a liveness
+ * signal plus a backstop for permanent close.
+ */
+const POLL_INTERVAL_MS_AT_CAPACITY = 600_000
+
+/**
+ * Multisession bridge (bridgeMain.ts) poll intervals. Defaults match the
+ * single-session values so existing GrowthBook configs without these fields
+ * preserve current behavior. Ops can tune these independently via the
+ * tengu_bridge_poll_interval_config GB flag.
+ */
+const MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY =
+  POLL_INTERVAL_MS_NOT_AT_CAPACITY
+const MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY =
+  POLL_INTERVAL_MS_NOT_AT_CAPACITY
+const MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY = POLL_INTERVAL_MS_AT_CAPACITY
+
+export type PollIntervalConfig = {
+  poll_interval_ms_not_at_capacity: number
+  poll_interval_ms_at_capacity: number
+  non_exclusive_heartbeat_interval_ms: number
+  multisession_poll_interval_ms_not_at_capacity: number
+  multisession_poll_interval_ms_partial_capacity: number
+  multisession_poll_interval_ms_at_capacity: number
+  reclaim_older_than_ms: number
+  session_keepalive_interval_v2_ms: number
+}
+
+export const DEFAULT_POLL_CONFIG: PollIntervalConfig = {
+  poll_interval_ms_not_at_capacity: POLL_INTERVAL_MS_NOT_AT_CAPACITY,
+  poll_interval_ms_at_capacity: POLL_INTERVAL_MS_AT_CAPACITY,
+  // 0 = disabled. When > 0, at-capacity loops send per-work-item heartbeats
+  // at this interval. Independent of poll_interval_ms_at_capacity — both may
+  // run (heartbeat periodically yields to poll). 60s gives 5× headroom under
+  // the server's 300s heartbeat TTL. Named non_exclusive to distinguish from
+  // the old heartbeat_interval_ms field (either-or semantics in pre-#22145
+  // clients — heartbeat suppressed poll). Old clients ignore this key; ops
+  // can set both fields during rollout.
+  non_exclusive_heartbeat_interval_ms: 0,
+  multisession_poll_interval_ms_not_at_capacity:
+    MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY,
+  multisession_poll_interval_ms_partial_capacity:
+    MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY,
+  multisession_poll_interval_ms_at_capacity:
+    MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY,
+  // Poll query param: reclaim unacknowledged work items older than this.
+  // Matches the server's DEFAULT_RECLAIM_OLDER_THAN_MS (work_service.py:24).
+  // Enables picking up stale-pending work after JWT expiry, when the prior
+  // ack failed because the session_ingress_token was already stale.
+  reclaim_older_than_ms: 5000,
+  // 0 = disabled. When > 0, push a silent {type:'keep_alive'} frame to
+  // session-ingress at this interval so upstream proxies don't GC an idle
+  // remote-control session. 2 min is the default. _v2: bridge-only gate
+  // (pre-v2 clients read the old key, new clients ignore it).
+  session_keepalive_interval_v2_ms: 120_000,
+}

+ 1008 - 0
src/bridge/remoteBridgeCore.ts

@@ -0,0 +1,1008 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+/**
+ * Env-less Remote Control bridge core.
+ *
+ * "Env-less" = no Environments API layer. Distinct from "CCR v2" (the
+ * /worker/* transport protocol) — the env-based path (replBridge.ts) can also
+ * use CCR v2 transport via CLAUDE_CODE_USE_CCR_V2. This file is about removing
+ * the poll/dispatch layer, not about which transport protocol is underneath.
+ *
+ * Unlike initBridgeCore (env-based, ~2400 lines), this connects directly
+ * to the session-ingress layer without the Environments API work-dispatch
+ * layer:
+ *
+ *   1. POST /v1/code/sessions              (OAuth, no env_id)  → session.id
+ *   2. POST /v1/code/sessions/{id}/bridge  (OAuth)             → {worker_jwt, expires_in, api_base_url, worker_epoch}
+ *      Each /bridge call bumps epoch — it IS the register. No separate /worker/register.
+ *   3. createV2ReplTransport(worker_jwt, worker_epoch)         → SSE + CCRClient
+ *   4. createTokenRefreshScheduler                             → proactive /bridge re-call (new JWT + new epoch)
+ *   5. 401 on SSE → rebuild transport with fresh /bridge credentials (same seq-num)
+ *
+ * No register/poll/ack/stop/heartbeat/deregister environment lifecycle.
+ * The Environments API historically existed because CCR's /worker/*
+ * endpoints required a session_id+role=worker JWT that only the work-dispatch
+ * layer could mint. Server PR #292605 (renamed in #293280) adds the /bridge endpoint as a direct
+ * OAuth→worker_jwt exchange, making the env layer optional for REPL sessions.
+ *
+ * Gated by `tengu_bridge_repl_v2` GrowthBook flag in initReplBridge.ts.
+ * REPL-only — daemon/print stay on env-based.
+ */
+
+import { feature } from 'bun:bundle'
+import axios from 'axios'
+import {
+  createV2ReplTransport,
+  type ReplBridgeTransport,
+} from './replBridgeTransport.js'
+import { buildCCRv2SdkUrl } from './workSecret.js'
+import { toCompatSessionId } from './sessionIdCompat.js'
+import { FlushGate } from './flushGate.js'
+import { createTokenRefreshScheduler } from './jwtUtils.js'
+import { getTrustedDeviceToken } from './trustedDevice.js'
+import {
+  getEnvLessBridgeConfig,
+  type EnvLessBridgeConfig,
+} from './envLessBridgeConfig.js'
+import {
+  handleIngressMessage,
+  handleServerControlRequest,
+  makeResultMessage,
+  isEligibleBridgeMessage,
+  extractTitleText,
+  BoundedUUIDSet,
+} from './bridgeMessaging.js'
+import { logBridgeSkip } from './debugUtils.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { isInProtectedNamespace } from '../utils/envUtils.js'
+import { errorMessage } from '../utils/errors.js'
+import { sleep } from '../utils/sleep.js'
+import { registerCleanup } from '../utils/cleanupRegistry.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import type { ReplBridgeHandle, BridgeState } from './replBridge.js'
+import type { Message } from '../types/message.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+} from '../entrypoints/sdk/controlTypes.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+
+const ANTHROPIC_VERSION = '2023-06-01'
+
+// Telemetry discriminator for ws_connected. 'initial' is the default and
+// never passed to rebuildTransport (which can only be called post-init);
+// Exclude<> makes that constraint explicit at both signatures.
+type ConnectCause = 'initial' | 'proactive_refresh' | 'auth_401_recovery'
+
+function oauthHeaders(accessToken: string): Record<string, string> {
+  return {
+    Authorization: `Bearer ${accessToken}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': ANTHROPIC_VERSION,
+  }
+}
+
+export type EnvLessBridgeParams = {
+  baseUrl: string
+  orgUUID: string
+  title: string
+  getAccessToken: () => string | undefined
+  onAuth401?: (staleAccessToken: string) => Promise<boolean>
+  /**
+   * Converts internal Message[] → SDKMessage[] for writeMessages() and the
+   * initial-flush/drain paths. Injected rather than imported — mappers.ts
+   * transitively pulls in src/commands.ts (entire command registry + React
+   * tree) which would bloat bundles that don't already have it.
+   */
+  toSDKMessages: (messages: Message[]) => SDKMessage[]
+  initialHistoryCap: number
+  initialMessages?: Message[]
+  onInboundMessage?: (msg: SDKMessage) => void | Promise<void>
+  /**
+   * Fired on each title-worthy user message seen in writeMessages() until
+   * the callback returns true (done). Mirrors replBridge.ts's onUserMessage —
+   * caller derives a title and PATCHes /v1/sessions/{id} so auto-started
+   * sessions don't stay at the generic fallback. The caller owns the
+   * derive-at-count-1-and-3 policy; the transport just keeps calling until
+   * told to stop. sessionId is the raw cse_* — updateBridgeSessionTitle
+   * retags internally.
+   */
+  onUserMessage?: (text: string, sessionId: string) => boolean
+  onPermissionResponse?: (response: SDKControlResponse) => void
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  /**
+   * When true, skip opening the SSE read stream — only the CCRClient write
+   * path is activated. Threaded to createV2ReplTransport and
+   * handleServerControlRequest.
+   */
+  outboundOnly?: boolean
+  /** Free-form tags for session categorization (e.g. ['ccr-mirror']). */
+  tags?: string[]
+}
+
+/**
+ * Create a session, fetch a worker JWT, connect the v2 transport.
+ *
+ * Returns null on any pre-flight failure (session create failed, /bridge
+ * failed, transport setup failed). Caller (initReplBridge) surfaces this
+ * as a generic "initialization failed" state.
+ */
+export async function initEnvLessBridgeCore(
+  params: EnvLessBridgeParams,
+): Promise<ReplBridgeHandle | null> {
+  const {
+    baseUrl,
+    orgUUID,
+    title,
+    getAccessToken,
+    onAuth401,
+    toSDKMessages,
+    initialHistoryCap,
+    initialMessages,
+    onInboundMessage,
+    onUserMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    outboundOnly,
+    tags,
+  } = params
+
+  const cfg = await getEnvLessBridgeConfig()
+
+  // ── 1. Create session (POST /v1/code/sessions, no env_id) ───────────────
+  const accessToken = getAccessToken()
+  if (!accessToken) {
+    logForDebugging('[remote-bridge] No OAuth token')
+    return null
+  }
+
+  const createdSessionId = await withRetry(
+    () =>
+      createCodeSession(baseUrl, accessToken, title, cfg.http_timeout_ms, tags),
+    'createCodeSession',
+    cfg,
+  )
+  if (!createdSessionId) {
+    onStateChange?.('failed', 'Session creation failed — see debug log')
+    logBridgeSkip('v2_session_create_failed', undefined, true)
+    return null
+  }
+  const sessionId: string = createdSessionId
+  logForDebugging(`[remote-bridge] Created session ${sessionId}`)
+  logForDiagnosticsNoPII('info', 'bridge_repl_v2_session_created')
+
+  // ── 2. Fetch bridge credentials (POST /bridge → worker_jwt, expires_in, api_base_url) ──
+  const credentials = await withRetry(
+    () =>
+      fetchRemoteCredentials(
+        sessionId,
+        baseUrl,
+        accessToken,
+        cfg.http_timeout_ms,
+      ),
+    'fetchRemoteCredentials',
+    cfg,
+  )
+  if (!credentials) {
+    onStateChange?.('failed', 'Remote credentials fetch failed — see debug log')
+    logBridgeSkip('v2_remote_creds_failed', undefined, true)
+    void archiveSession(
+      sessionId,
+      baseUrl,
+      accessToken,
+      orgUUID,
+      cfg.http_timeout_ms,
+    )
+    return null
+  }
+  logForDebugging(
+    `[remote-bridge] Fetched bridge credentials (expires_in=${credentials.expires_in}s)`,
+  )
+
+  // ── 3. Build v2 transport (SSETransport + CCRClient) ────────────────────
+  const sessionUrl = buildCCRv2SdkUrl(credentials.api_base_url, sessionId)
+  logForDebugging(`[remote-bridge] v2 session URL: ${sessionUrl}`)
+
+  let transport: ReplBridgeTransport
+  try {
+    transport = await createV2ReplTransport({
+      sessionUrl,
+      ingressToken: credentials.worker_jwt,
+      sessionId,
+      epoch: credentials.worker_epoch,
+      heartbeatIntervalMs: cfg.heartbeat_interval_ms,
+      heartbeatJitterFraction: cfg.heartbeat_jitter_fraction,
+      // Per-instance closure — keeps the worker JWT out of
+      // process.env.CLAUDE_CODE_SESSION_ACCESS_TOKEN, which mcp/client.ts
+      // reads ungatedly and would otherwise send to user-configured ws/http
+      // MCP servers. Frozen-at-construction is correct: transport is fully
+      // rebuilt on refresh (rebuildTransport below).
+      getAuthToken: () => credentials.worker_jwt,
+      outboundOnly,
+    })
+  } catch (err) {
+    logForDebugging(
+      `[remote-bridge] v2 transport setup failed: ${errorMessage(err)}`,
+      { level: 'error' },
+    )
+    onStateChange?.('failed', `Transport setup failed: ${errorMessage(err)}`)
+    logBridgeSkip('v2_transport_setup_failed', undefined, true)
+    void archiveSession(
+      sessionId,
+      baseUrl,
+      accessToken,
+      orgUUID,
+      cfg.http_timeout_ms,
+    )
+    return null
+  }
+  logForDebugging(
+    `[remote-bridge] v2 transport created (epoch=${credentials.worker_epoch})`,
+  )
+  onStateChange?.('ready')
+
+  // ── 4. State ────────────────────────────────────────────────────────────
+
+  // Echo dedup: messages we POST come back on the read stream. Seeded with
+  // initial message UUIDs so server echoes of flushed history are recognized.
+  // Both sets cover initial UUIDs — recentPostedUUIDs is a 2000-cap ring buffer
+  // and could evict them after enough live writes; initialMessageUUIDs is the
+  // unbounded fallback. Defense-in-depth; mirrors replBridge.ts.
+  const recentPostedUUIDs = new BoundedUUIDSet(cfg.uuid_dedup_buffer_size)
+  const initialMessageUUIDs = new Set<string>()
+  if (initialMessages) {
+    for (const msg of initialMessages) {
+      initialMessageUUIDs.add(msg.uuid)
+      recentPostedUUIDs.add(msg.uuid)
+    }
+  }
+
+  // Defensive dedup for re-delivered inbound prompts (seq-num negotiation
+  // edge cases, server history replay after transport swap).
+  const recentInboundUUIDs = new BoundedUUIDSet(cfg.uuid_dedup_buffer_size)
+
+  // FlushGate: queue live writes while the history flush POST is in flight,
+  // so the server receives [history..., live...] in order.
+  const flushGate = new FlushGate<Message>()
+
+  let initialFlushDone = false
+  let tornDown = false
+  let authRecoveryInFlight = false
+  // Latch for onUserMessage — flips true when the callback returns true
+  // (policy says "done deriving"). sessionId is const (no re-create path —
+  // rebuildTransport swaps JWT/epoch, same session), so no reset needed.
+  let userMessageCallbackDone = !onUserMessage
+
+  // Telemetry: why did onConnect fire? Set by rebuildTransport before
+  // wireTransportCallbacks; read asynchronously by onConnect. Race-safe
+  // because authRecoveryInFlight serializes rebuild callers, and a fresh
+  // initEnvLessBridgeCore() call gets a fresh closure defaulting to 'initial'.
+  let connectCause: ConnectCause = 'initial'
+
+  // Deadline for onConnect after transport.connect(). Cleared by onConnect
+  // (connected) and onClose (got a close — not silent). If neither fires
+  // before cfg.connect_timeout_ms, onConnectTimeout emits — the only
+  // signal for the `started → (silence)` gap.
+  let connectDeadline: ReturnType<typeof setTimeout> | undefined
+  function onConnectTimeout(cause: ConnectCause): void {
+    if (tornDown) return
+    logEvent('tengu_bridge_repl_connect_timeout', {
+      v2: true,
+      elapsed_ms: cfg.connect_timeout_ms,
+      cause:
+        cause as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+  }
+
+  // ── 5. JWT refresh scheduler ────────────────────────────────────────────
+  // Schedule a callback 5min before expiry (per response.expires_in). On fire,
+  // re-fetch /bridge with OAuth → rebuild transport with fresh credentials.
+  // Each /bridge call bumps epoch server-side, so a JWT-only swap would leave
+  // the old CCRClient heartbeating with a stale epoch → 409 within 20s.
+  // JWT is opaque — do not decode.
+  const refresh = createTokenRefreshScheduler({
+    refreshBufferMs: cfg.token_refresh_buffer_ms,
+    getAccessToken: async () => {
+      // Unconditionally refresh OAuth before calling /bridge — getAccessToken()
+      // returns expired tokens as non-null strings (doesn't check expiresAt),
+      // so truthiness doesn't mean valid. Pass the stale token to onAuth401
+      // so handleOAuth401Error's keychain-comparison can detect parallel refresh.
+      const stale = getAccessToken()
+      if (onAuth401) await onAuth401(stale ?? '')
+      return getAccessToken() ?? stale
+    },
+    onRefresh: (sid, oauthToken) => {
+      void (async () => {
+        // Laptop wake: overdue proactive timer + SSE 401 fire ~simultaneously.
+        // Claim the flag BEFORE the /bridge fetch so the other path skips
+        // entirely — prevents double epoch bump (each /bridge call bumps; if
+        // both fetch, the first rebuild gets a stale epoch and 409s).
+        if (authRecoveryInFlight || tornDown) {
+          logForDebugging(
+            '[remote-bridge] Recovery already in flight, skipping proactive refresh',
+          )
+          return
+        }
+        authRecoveryInFlight = true
+        try {
+          const fresh = await withRetry(
+            () =>
+              fetchRemoteCredentials(
+                sid,
+                baseUrl,
+                oauthToken,
+                cfg.http_timeout_ms,
+              ),
+            'fetchRemoteCredentials (proactive)',
+            cfg,
+          )
+          if (!fresh || tornDown) return
+          await rebuildTransport(fresh, 'proactive_refresh')
+          logForDebugging(
+            '[remote-bridge] Transport rebuilt (proactive refresh)',
+          )
+        } catch (err) {
+          logForDebugging(
+            `[remote-bridge] Proactive refresh rebuild failed: ${errorMessage(err)}`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII(
+            'error',
+            'bridge_repl_v2_proactive_refresh_failed',
+          )
+          if (!tornDown) {
+            onStateChange?.('failed', `Refresh failed: ${errorMessage(err)}`)
+          }
+        } finally {
+          authRecoveryInFlight = false
+        }
+      })()
+    },
+    label: 'remote',
+  })
+  refresh.scheduleFromExpiresIn(sessionId, credentials.expires_in)
+
+  // ── 6. Wire callbacks (extracted so transport-rebuild can re-wire) ──────
+  function wireTransportCallbacks(): void {
+    transport.setOnConnect(() => {
+      clearTimeout(connectDeadline)
+      logForDebugging('[remote-bridge] v2 transport connected')
+      logForDiagnosticsNoPII('info', 'bridge_repl_v2_transport_connected')
+      logEvent('tengu_bridge_repl_ws_connected', {
+        v2: true,
+        cause:
+          connectCause as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+
+      if (!initialFlushDone && initialMessages && initialMessages.length > 0) {
+        initialFlushDone = true
+        // Capture current transport — if 401/teardown happens mid-flush,
+        // the stale .finally() must not drain the gate or signal connected.
+        // (Same guard pattern as replBridge.ts:1119.)
+        const flushTransport = transport
+        void flushHistory(initialMessages)
+          .catch(e =>
+            logForDebugging(`[remote-bridge] flushHistory failed: ${e}`),
+          )
+          .finally(() => {
+            // authRecoveryInFlight catches the v1-vs-v2 asymmetry: v1 nulls
+            // transport synchronously in setOnClose (replBridge.ts:1175), so
+            // transport !== flushTransport trips immediately. v2 doesn't null —
+            // transport reassigned only at rebuildTransport:346, 3 awaits deep.
+            // authRecoveryInFlight is set synchronously at rebuildTransport entry.
+            if (
+              transport !== flushTransport ||
+              tornDown ||
+              authRecoveryInFlight
+            ) {
+              return
+            }
+            drainFlushGate()
+            onStateChange?.('connected')
+          })
+      } else if (!flushGate.active) {
+        onStateChange?.('connected')
+      }
+    })
+
+    transport.setOnData((data: string) => {
+      handleIngressMessage(
+        data,
+        recentPostedUUIDs,
+        recentInboundUUIDs,
+        onInboundMessage,
+        // Remote client answered the permission prompt — the turn resumes.
+        // Without this the server stays on requires_action until the next
+        // user message or turn-end result.
+        onPermissionResponse
+          ? res => {
+              transport.reportState('running')
+              onPermissionResponse(res)
+            }
+          : undefined,
+        req =>
+          handleServerControlRequest(req, {
+            transport,
+            sessionId,
+            onInterrupt,
+            onSetModel,
+            onSetMaxThinkingTokens,
+            onSetPermissionMode,
+            outboundOnly,
+          }),
+      )
+    })
+
+    transport.setOnClose((code?: number) => {
+      clearTimeout(connectDeadline)
+      if (tornDown) return
+      logForDebugging(`[remote-bridge] v2 transport closed (code=${code})`)
+      logEvent('tengu_bridge_repl_ws_closed', { code, v2: true })
+      // onClose fires only for TERMINAL failures: 401 (JWT invalid),
+      // 4090 (CCR epoch mismatch), 4091 (CCR init failed), or SSE 10-min
+      // reconnect budget exhausted. Transient disconnects are handled
+      // transparently inside SSETransport. 401 we can recover from (fetch
+      // fresh JWT, rebuild transport); all other codes are dead-ends.
+      if (code === 401 && !authRecoveryInFlight) {
+        void recoverFromAuthFailure()
+        return
+      }
+      onStateChange?.('failed', `Transport closed (code ${code})`)
+    })
+  }
+
+  // ── 7. Transport rebuild (shared by proactive refresh + 401 recovery) ──
+  // Every /bridge call bumps epoch server-side. Both refresh paths must
+  // rebuild the transport with the new epoch — a JWT-only swap leaves the
+  // old CCRClient heartbeating stale epoch → 409. SSE resumes from the old
+  // transport's high-water-mark seq-num so no server-side replay.
+  // Caller MUST set authRecoveryInFlight = true before calling (synchronously,
+  // before any await) and clear it in a finally. This function doesn't manage
+  // the flag — moving it here would be too late to prevent a double /bridge
+  // fetch, and each fetch bumps epoch.
+  async function rebuildTransport(
+    fresh: RemoteCredentials,
+    cause: Exclude<ConnectCause, 'initial'>,
+  ): Promise<void> {
+    connectCause = cause
+    // Queue writes during rebuild — once /bridge returns, the old transport's
+    // epoch is stale and its next write/heartbeat 409s. Without this gate,
+    // writeMessages adds UUIDs to recentPostedUUIDs then writeBatch silently
+    // no-ops (closed uploader after 409) → permanent silent message loss.
+    flushGate.start()
+    try {
+      const seq = transport.getLastSequenceNum()
+      transport.close()
+      transport = await createV2ReplTransport({
+        sessionUrl: buildCCRv2SdkUrl(fresh.api_base_url, sessionId),
+        ingressToken: fresh.worker_jwt,
+        sessionId,
+        epoch: fresh.worker_epoch,
+        heartbeatIntervalMs: cfg.heartbeat_interval_ms,
+        heartbeatJitterFraction: cfg.heartbeat_jitter_fraction,
+        initialSequenceNum: seq,
+        getAuthToken: () => fresh.worker_jwt,
+        outboundOnly,
+      })
+      if (tornDown) {
+        // Teardown fired during the async createV2ReplTransport window.
+        // Don't wire/connect/schedule — we'd re-arm timers after cancelAll()
+        // and fire onInboundMessage into a torn-down bridge.
+        transport.close()
+        return
+      }
+      wireTransportCallbacks()
+      transport.connect()
+      connectDeadline = setTimeout(
+        onConnectTimeout,
+        cfg.connect_timeout_ms,
+        connectCause,
+      )
+      refresh.scheduleFromExpiresIn(sessionId, fresh.expires_in)
+      // Drain queued writes into the new uploader. Runs before
+      // ccr.initialize() resolves (transport.connect() is fire-and-forget),
+      // but the uploader serializes behind the initial PUT /worker. If
+      // init fails (4091), events drop — but only recentPostedUUIDs
+      // (per-instance) is populated, so re-enabling the bridge re-flushes.
+      drainFlushGate()
+    } finally {
+      // End the gate on failure paths too — drainFlushGate already ended
+      // it on success. Queued messages are dropped (transport still dead).
+      flushGate.drop()
+    }
+  }
+
+  // ── 8. 401 recovery (OAuth refresh + rebuild) ───────────────────────────
+  async function recoverFromAuthFailure(): Promise<void> {
+    // setOnClose already guards `!authRecoveryInFlight` but that check and
+    // this set must be atomic against onRefresh — claim synchronously before
+    // any await. Laptop wake fires both paths ~simultaneously.
+    if (authRecoveryInFlight) return
+    authRecoveryInFlight = true
+    onStateChange?.('reconnecting', 'JWT expired — refreshing')
+    logForDebugging('[remote-bridge] 401 on SSE — attempting JWT refresh')
+    try {
+      // Unconditionally try OAuth refresh — getAccessToken() returns expired
+      // tokens as non-null strings, so !oauthToken doesn't catch expiry.
+      // Pass the stale token so handleOAuth401Error's keychain-comparison
+      // can detect if another tab already refreshed.
+      const stale = getAccessToken()
+      if (onAuth401) await onAuth401(stale ?? '')
+      const oauthToken = getAccessToken() ?? stale
+      if (!oauthToken || tornDown) {
+        if (!tornDown) {
+          onStateChange?.('failed', 'JWT refresh failed: no OAuth token')
+        }
+        return
+      }
+
+      const fresh = await withRetry(
+        () =>
+          fetchRemoteCredentials(
+            sessionId,
+            baseUrl,
+            oauthToken,
+            cfg.http_timeout_ms,
+          ),
+        'fetchRemoteCredentials (recovery)',
+        cfg,
+      )
+      if (!fresh || tornDown) {
+        if (!tornDown) {
+          onStateChange?.('failed', 'JWT refresh failed after 401')
+        }
+        return
+      }
+      // If 401 interrupted the initial flush, writeBatch may have silently
+      // no-op'd on the closed uploader (ccr.close() ran in the SSE wrapper
+      // before our setOnClose callback). Reset so the new onConnect re-flushes.
+      // (v1 scopes initialFlushDone inside the per-transport closure at
+      // replBridge.ts:1027 so it resets naturally; v2 has it at outer scope.)
+      initialFlushDone = false
+      await rebuildTransport(fresh, 'auth_401_recovery')
+      logForDebugging('[remote-bridge] Transport rebuilt after 401')
+    } catch (err) {
+      logForDebugging(
+        `[remote-bridge] 401 recovery failed: ${errorMessage(err)}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'bridge_repl_v2_jwt_refresh_failed')
+      if (!tornDown) {
+        onStateChange?.('failed', `JWT refresh failed: ${errorMessage(err)}`)
+      }
+    } finally {
+      authRecoveryInFlight = false
+    }
+  }
+
+  wireTransportCallbacks()
+
+  // Start flushGate BEFORE connect so writeMessages() during handshake
+  // queues instead of racing the history POST.
+  if (initialMessages && initialMessages.length > 0) {
+    flushGate.start()
+  }
+  transport.connect()
+  connectDeadline = setTimeout(
+    onConnectTimeout,
+    cfg.connect_timeout_ms,
+    connectCause,
+  )
+
+  // ── 8. History flush + drain helpers ────────────────────────────────────
+  function drainFlushGate(): void {
+    const msgs = flushGate.end()
+    if (msgs.length === 0) return
+    for (const msg of msgs) recentPostedUUIDs.add(msg.uuid)
+    const events = toSDKMessages(msgs).map(m => ({
+      ...m,
+      session_id: sessionId,
+    }))
+    if (msgs.some(m => m.type === 'user')) {
+      transport.reportState('running')
+    }
+    logForDebugging(
+      `[remote-bridge] Drained ${msgs.length} queued message(s) after flush`,
+    )
+    void transport.writeBatch(events)
+  }
+
+  async function flushHistory(msgs: Message[]): Promise<void> {
+    // v2 always creates a fresh server session (unconditional createCodeSession
+    // above) — no session reuse, no double-post risk. Unlike v1, we do NOT
+    // filter by previouslyFlushedUUIDs: that set persists across REPL enable/
+    // disable cycles (useRef), so it would wrongly suppress history on re-enable.
+    const eligible = msgs.filter(isEligibleBridgeMessage)
+    const capped =
+      initialHistoryCap > 0 && eligible.length > initialHistoryCap
+        ? eligible.slice(-initialHistoryCap)
+        : eligible
+    if (capped.length < eligible.length) {
+      logForDebugging(
+        `[remote-bridge] Capped initial flush: ${eligible.length} -> ${capped.length} (cap=${initialHistoryCap})`,
+      )
+    }
+    const events = toSDKMessages(capped).map(m => ({
+      ...m,
+      session_id: sessionId,
+    }))
+    if (events.length === 0) return
+    // Mid-turn init: if Remote Control is enabled while a query is running,
+    // the last eligible message is a user prompt or tool_result (both 'user'
+    // type). Without this the init PUT's 'idle' sticks until the next user-
+    // type message forwards via writeMessages — which for a pure-text turn
+    // is never (only assistant chunks stream post-init). Check eligible (pre-
+    // cap), not capped: the cap may truncate to a user message even when the
+    // actual trailing message is assistant.
+    if (eligible.at(-1)?.type === 'user') {
+      transport.reportState('running')
+    }
+    logForDebugging(`[remote-bridge] Flushing ${events.length} history events`)
+    await transport.writeBatch(events)
+  }
+
+  // ── 9. Teardown ───────────────────────────────────────────────────────────
+  // On SIGINT/SIGTERM/⁠/exit, gracefulShutdown races runCleanupFunctions()
+  // against a 2s cap before forceExit kills the process. Budget accordingly:
+  //   - archive: teardown_archive_timeout_ms (default 1500, cap 2000)
+  //   - result write: fire-and-forget, archive latency covers the drain
+  //   - 401 retry: only if first archive 401s, shares the same budget
+  async function teardown(): Promise<void> {
+    if (tornDown) return
+    tornDown = true
+    refresh.cancelAll()
+    clearTimeout(connectDeadline)
+    flushGate.drop()
+
+    // Fire the result message before archive — transport.write() only awaits
+    // enqueue (SerialBatchEventUploader resolves once buffered, drain is
+    // async). Archiving before close() gives the uploader's drain loop a
+    // window (typical archive ≈ 100-500ms) to POST the result without an
+    // explicit sleep. close() sets closed=true which interrupts drain at the
+    // next while-check, so close-before-archive drops the result.
+    transport.reportState('idle')
+    void transport.write(makeResultMessage(sessionId))
+
+    let token = getAccessToken()
+    let status = await archiveSession(
+      sessionId,
+      baseUrl,
+      token,
+      orgUUID,
+      cfg.teardown_archive_timeout_ms,
+    )
+
+    // Token is usually fresh (refresh scheduler runs 5min before expiry) but
+    // laptop-wake past the refresh window leaves getAccessToken() returning a
+    // stale string. Retry once on 401 — onAuth401 (= handleOAuth401Error)
+    // clears keychain cache + force-refreshes. No proactive refresh on the
+    // happy path: handleOAuth401Error force-refreshes even valid tokens,
+    // which would waste budget 99% of the time. try/catch mirrors
+    // recoverFromAuthFailure: keychain reads can throw (macOS locked after
+    // wake); an uncaught throw here would skip transport.close + telemetry.
+    if (status === 401 && onAuth401) {
+      try {
+        await onAuth401(token ?? '')
+        token = getAccessToken()
+        status = await archiveSession(
+          sessionId,
+          baseUrl,
+          token,
+          orgUUID,
+          cfg.teardown_archive_timeout_ms,
+        )
+      } catch (err) {
+        logForDebugging(
+          `[remote-bridge] Teardown 401 retry threw: ${errorMessage(err)}`,
+          { level: 'error' },
+        )
+      }
+    }
+
+    transport.close()
+
+    const archiveStatus: ArchiveTelemetryStatus =
+      status === 'no_token'
+        ? 'skipped_no_token'
+        : status === 'timeout' || status === 'error'
+          ? 'network_error'
+          : status >= 500
+            ? 'server_5xx'
+            : status >= 400
+              ? 'server_4xx'
+              : 'ok'
+
+    logForDebugging(`[remote-bridge] Torn down (archive=${status})`)
+    logForDiagnosticsNoPII('info', 'bridge_repl_v2_teardown')
+    logEvent(
+      feature('CCR_MIRROR') && outboundOnly
+        ? 'tengu_ccr_mirror_teardown'
+        : 'tengu_bridge_repl_teardown',
+      {
+        v2: true,
+        archive_status:
+          archiveStatus as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        archive_ok: typeof status === 'number' && status < 400,
+        archive_http_status: typeof status === 'number' ? status : undefined,
+        archive_timeout: status === 'timeout',
+        archive_no_token: status === 'no_token',
+      },
+    )
+  }
+  const unregister = registerCleanup(teardown)
+
+  if (feature('CCR_MIRROR') && outboundOnly) {
+    logEvent('tengu_ccr_mirror_started', {
+      v2: true,
+      expires_in_s: credentials.expires_in,
+    })
+  } else {
+    logEvent('tengu_bridge_repl_started', {
+      has_initial_messages: !!(initialMessages && initialMessages.length > 0),
+      v2: true,
+      expires_in_s: credentials.expires_in,
+      inProtectedNamespace: isInProtectedNamespace(),
+    })
+  }
+
+  // ── 10. Handle ──────────────────────────────────────────────────────────
+  return {
+    bridgeSessionId: sessionId,
+    environmentId: '',
+    sessionIngressUrl: credentials.api_base_url,
+    writeMessages(messages) {
+      const filtered = messages.filter(
+        m =>
+          isEligibleBridgeMessage(m) &&
+          !initialMessageUUIDs.has(m.uuid) &&
+          !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+
+      // Fire onUserMessage for title derivation. Scan before the flushGate
+      // check — prompts are title-worthy even if they queue. Keeps calling
+      // on every title-worthy message until the callback returns true; the
+      // caller owns the policy (derive at 1st and 3rd, skip if explicit).
+      if (!userMessageCallbackDone) {
+        for (const m of filtered) {
+          const text = extractTitleText(m)
+          if (text !== undefined && onUserMessage?.(text, sessionId)) {
+            userMessageCallbackDone = true
+            break
+          }
+        }
+      }
+
+      if (flushGate.enqueue(...filtered)) {
+        logForDebugging(
+          `[remote-bridge] Queued ${filtered.length} message(s) during flush`,
+        )
+        return
+      }
+
+      for (const msg of filtered) recentPostedUUIDs.add(msg.uuid)
+      const events = toSDKMessages(filtered).map(m => ({
+        ...m,
+        session_id: sessionId,
+      }))
+      // v2 does not derive worker_status from events server-side (unlike v1
+      // session-ingress session_status_updater.go). Push it from here so the
+      // CCR web session list shows Running instead of stuck on Idle. A user
+      // message in the batch marks turn start. CCRClient.reportState dedupes
+      // consecutive same-state pushes.
+      if (filtered.some(m => m.type === 'user')) {
+        transport.reportState('running')
+      }
+      logForDebugging(`[remote-bridge] Sending ${filtered.length} message(s)`)
+      void transport.writeBatch(events)
+    },
+    writeSdkMessages(messages: SDKMessage[]) {
+      const filtered = messages.filter(
+        m => !m.uuid || !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+      for (const msg of filtered) {
+        if (msg.uuid) recentPostedUUIDs.add(msg.uuid)
+      }
+      const events = filtered.map(m => ({ ...m, session_id: sessionId }))
+      void transport.writeBatch(events)
+    },
+    sendControlRequest(request: SDKControlRequest) {
+      if (authRecoveryInFlight) {
+        logForDebugging(
+          `[remote-bridge] Dropping control_request during 401 recovery: ${request.request_id}`,
+        )
+        return
+      }
+      const event = { ...request, session_id: sessionId }
+      if (request.request.subtype === 'can_use_tool') {
+        transport.reportState('requires_action')
+      }
+      void transport.write(event)
+      logForDebugging(
+        `[remote-bridge] Sent control_request request_id=${request.request_id}`,
+      )
+    },
+    sendControlResponse(response: SDKControlResponse) {
+      if (authRecoveryInFlight) {
+        logForDebugging(
+          '[remote-bridge] Dropping control_response during 401 recovery',
+        )
+        return
+      }
+      const event = { ...response, session_id: sessionId }
+      transport.reportState('running')
+      void transport.write(event)
+      logForDebugging('[remote-bridge] Sent control_response')
+    },
+    sendControlCancelRequest(requestId: string) {
+      if (authRecoveryInFlight) {
+        logForDebugging(
+          `[remote-bridge] Dropping control_cancel_request during 401 recovery: ${requestId}`,
+        )
+        return
+      }
+      const event = {
+        type: 'control_cancel_request' as const,
+        request_id: requestId,
+        session_id: sessionId,
+      }
+      // Hook/classifier/channel/recheck resolved the permission locally —
+      // interactiveHandler calls only cancelRequest (no sendResponse) on
+      // those paths, so without this the server stays on requires_action.
+      transport.reportState('running')
+      void transport.write(event)
+      logForDebugging(
+        `[remote-bridge] Sent control_cancel_request request_id=${requestId}`,
+      )
+    },
+    sendResult() {
+      if (authRecoveryInFlight) {
+        logForDebugging('[remote-bridge] Dropping result during 401 recovery')
+        return
+      }
+      transport.reportState('idle')
+      void transport.write(makeResultMessage(sessionId))
+      logForDebugging(`[remote-bridge] Sent result`)
+    },
+    async teardown() {
+      unregister()
+      await teardown()
+    },
+  }
+}
+
+// ─── Session API (v2 /code/sessions, no env) ─────────────────────────────────
+
+/** Retry an async init call with exponential backoff + jitter. */
+async function withRetry<T>(
+  fn: () => Promise<T | null>,
+  label: string,
+  cfg: EnvLessBridgeConfig,
+): Promise<T | null> {
+  const max = cfg.init_retry_max_attempts
+  for (let attempt = 1; attempt <= max; attempt++) {
+    const result = await fn()
+    if (result !== null) return result
+    if (attempt < max) {
+      const base = cfg.init_retry_base_delay_ms * 2 ** (attempt - 1)
+      const jitter =
+        base * cfg.init_retry_jitter_fraction * (2 * Math.random() - 1)
+      const delay = Math.min(base + jitter, cfg.init_retry_max_delay_ms)
+      logForDebugging(
+        `[remote-bridge] ${label} failed (attempt ${attempt}/${max}), retrying in ${Math.round(delay)}ms`,
+      )
+      await sleep(delay)
+    }
+  }
+  return null
+}
+
+// Moved to codeSessionApi.ts so the SDK /bridge subpath can bundle them
+// without pulling in this file's heavy CLI tree (analytics, transport).
+export {
+  createCodeSession,
+  type RemoteCredentials,
+} from './codeSessionApi.js'
+import {
+  createCodeSession,
+  fetchRemoteCredentials as fetchRemoteCredentialsRaw,
+  type RemoteCredentials,
+} from './codeSessionApi.js'
+import { getBridgeBaseUrlOverride } from './bridgeConfig.js'
+
+// CLI-side wrapper that applies the CLAUDE_BRIDGE_BASE_URL dev override and
+// injects the trusted-device token (both are env/GrowthBook reads that the
+// SDK-facing codeSessionApi.ts export must stay free of).
+export async function fetchRemoteCredentials(
+  sessionId: string,
+  baseUrl: string,
+  accessToken: string,
+  timeoutMs: number,
+): Promise<RemoteCredentials | null> {
+  const creds = await fetchRemoteCredentialsRaw(
+    sessionId,
+    baseUrl,
+    accessToken,
+    timeoutMs,
+    getTrustedDeviceToken(),
+  )
+  if (!creds) return null
+  return getBridgeBaseUrlOverride()
+    ? { ...creds, api_base_url: baseUrl }
+    : creds
+}
+
+type ArchiveStatus = number | 'timeout' | 'error' | 'no_token'
+
+// Single categorical for BQ `GROUP BY archive_status`. The booleans on
+// _teardown predate this and are redundant with it (except archive_timeout,
+// which distinguishes ECONNABORTED from other network errors — both map to
+// 'network_error' here since the dominant cause in a 1.5s window is timeout).
+type ArchiveTelemetryStatus =
+  | 'ok'
+  | 'skipped_no_token'
+  | 'network_error'
+  | 'server_4xx'
+  | 'server_5xx'
+
+async function archiveSession(
+  sessionId: string,
+  baseUrl: string,
+  accessToken: string | undefined,
+  orgUUID: string,
+  timeoutMs: number,
+): Promise<ArchiveStatus> {
+  if (!accessToken) return 'no_token'
+  // Archive lives at the compat layer (/v1/sessions/*, not /v1/code/sessions).
+  // compat.parseSessionID only accepts TagSession (session_*), so retag cse_*.
+  // anthropic-beta + x-organization-uuid are required — without them the
+  // compat gateway 404s before reaching the handler.
+  //
+  // Unlike bridgeMain.ts (which caches compatId in sessionCompatIds to keep
+  // in-memory titledSessions/logger keys consistent across a mid-session
+  // gate flip), this compatId is only a server URL path segment — no
+  // in-memory state. Fresh compute matches whatever the server currently
+  // validates: if the gate is OFF, the server has been updated to accept
+  // cse_* and we correctly send it.
+  const compatId = toCompatSessionId(sessionId)
+  try {
+    const response = await axios.post(
+      `${baseUrl}/v1/sessions/${compatId}/archive`,
+      {},
+      {
+        headers: {
+          ...oauthHeaders(accessToken),
+          'anthropic-beta': 'ccr-byoc-2025-07-29',
+          'x-organization-uuid': orgUUID,
+        },
+        timeout: timeoutMs,
+        validateStatus: () => true,
+      },
+    )
+    logForDebugging(
+      `[remote-bridge] Archive ${compatId} status=${response.status}`,
+    )
+    return response.status
+  } catch (err) {
+    const msg = errorMessage(err)
+    logForDebugging(`[remote-bridge] Archive failed: ${msg}`)
+    return axios.isAxiosError(err) && err.code === 'ECONNABORTED'
+      ? 'timeout'
+      : 'error'
+  }
+}

+ 2406 - 0
src/bridge/replBridge.ts

@@ -0,0 +1,2406 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+import { randomUUID } from 'crypto'
+import {
+  createBridgeApiClient,
+  BridgeFatalError,
+  isExpiredErrorType,
+  isSuppressible403,
+} from './bridgeApi.js'
+import type { BridgeConfig, BridgeApiClient } from './types.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import { registerCleanup } from '../utils/cleanupRegistry.js'
+import {
+  handleIngressMessage,
+  handleServerControlRequest,
+  makeResultMessage,
+  isEligibleBridgeMessage,
+  extractTitleText,
+  BoundedUUIDSet,
+} from './bridgeMessaging.js'
+import {
+  decodeWorkSecret,
+  buildSdkUrl,
+  buildCCRv2SdkUrl,
+  sameSessionId,
+} from './workSecret.js'
+import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
+import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
+import { getTrustedDeviceToken } from './trustedDevice.js'
+import { HybridTransport } from '../cli/transports/HybridTransport.js'
+import {
+  type ReplBridgeTransport,
+  createV1ReplTransport,
+  createV2ReplTransport,
+} from './replBridgeTransport.js'
+import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
+import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
+import { validateBridgeId } from './bridgeApi.js'
+import {
+  describeAxiosError,
+  extractHttpStatus,
+  logBridgeSkip,
+} from './debugUtils.js'
+import type { Message } from '../types/message.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+} from '../entrypoints/sdk/controlTypes.js'
+import { createCapacityWake, type CapacitySignal } from './capacityWake.js'
+import { FlushGate } from './flushGate.js'
+import {
+  DEFAULT_POLL_CONFIG,
+  type PollIntervalConfig,
+} from './pollConfigDefaults.js'
+import { errorMessage } from '../utils/errors.js'
+import { sleep } from '../utils/sleep.js'
+import {
+  wrapApiForFaultInjection,
+  registerBridgeDebugHandle,
+  clearBridgeDebugHandle,
+  injectBridgeFault,
+} from './bridgeDebug.js'
+
+export type ReplBridgeHandle = {
+  bridgeSessionId: string
+  environmentId: string
+  sessionIngressUrl: string
+  writeMessages(messages: Message[]): void
+  writeSdkMessages(messages: SDKMessage[]): void
+  sendControlRequest(request: SDKControlRequest): void
+  sendControlResponse(response: SDKControlResponse): void
+  sendControlCancelRequest(requestId: string): void
+  sendResult(): void
+  teardown(): Promise<void>
+}
+
+export type BridgeState = 'ready' | 'connected' | 'reconnecting' | 'failed'
+
+/**
+ * Explicit-param input to initBridgeCore. Everything initReplBridge reads
+ * from bootstrap state (cwd, session ID, git, OAuth) becomes a field here.
+ * A daemon caller (Agent SDK, PR 4) that never runs main.tsx fills these
+ * in itself.
+ */
+export type BridgeCoreParams = {
+  dir: string
+  machineName: string
+  branch: string
+  gitRepoUrl: string | null
+  title: string
+  baseUrl: string
+  sessionIngressUrl: string
+  /**
+   * Opaque string sent as metadata.worker_type. Use BridgeWorkerType for
+   * the two CLI-originated values; daemon callers may send any string the
+   * backend recognizes (it's just a filter key on the web side).
+   */
+  workerType: string
+  getAccessToken: () => string | undefined
+  /**
+   * POST /v1/sessions. Injected because `createSession.ts` lazy-loads
+   * `auth.ts`/`model.ts`/`oauth/client.ts` and `bun --outfile` inlines
+   * dynamic imports — the lazy-load doesn't help, the whole REPL tree ends
+   * up in the Agent SDK bundle.
+   *
+   * REPL wrapper passes `createBridgeSession` from `createSession.ts`.
+   * Daemon wrapper passes `createBridgeSessionLean` from `sessionApi.ts`
+   * (HTTP-only, orgUUID+model supplied by the daemon caller).
+   *
+   * Receives `gitRepoUrl`+`branch` so the REPL wrapper can build the git
+   * source/outcome for claude.ai's session card. Daemon ignores them.
+   */
+  createSession: (opts: {
+    environmentId: string
+    title: string
+    gitRepoUrl: string | null
+    branch: string
+    signal: AbortSignal
+  }) => Promise<string | null>
+  /**
+   * POST /v1/sessions/{id}/archive. Same injection rationale. Best-effort;
+   * the callback MUST NOT throw.
+   */
+  archiveSession: (sessionId: string) => Promise<void>
+  /**
+   * Invoked on reconnect-after-env-lost to refresh the title. REPL wrapper
+   * reads session storage (picks up /rename); daemon returns the static
+   * title. Defaults to () => title.
+   */
+  getCurrentTitle?: () => string
+  /**
+   * Converts internal Message[] → SDKMessage[] for writeMessages() and the
+   * initial-flush/drain paths. REPL wrapper passes the real toSDKMessages
+   * from utils/messages/mappers.ts. Daemon callers that only use
+   * writeSdkMessages() and pass no initialMessages can omit this — those
+   * code paths are unreachable.
+   *
+   * Injected rather than imported because mappers.ts transitively pulls in
+   * src/commands.ts via messages.ts → api.ts → prompts.ts, dragging the
+   * entire command registry + React tree into the Agent SDK bundle.
+   */
+  toSDKMessages?: (messages: Message[]) => SDKMessage[]
+  /**
+   * OAuth 401 refresh handler passed to createBridgeApiClient. REPL wrapper
+   * passes handleOAuth401Error; daemon passes its AuthManager's handler.
+   * Injected because utils/auth.ts transitively pulls in the command
+   * registry via config.ts → file.ts → permissions/filesystem.ts →
+   * sessionStorage.ts → commands.ts.
+   */
+  onAuth401?: (staleAccessToken: string) => Promise<boolean>
+  /**
+   * Poll interval config getter for the work-poll heartbeat loop. REPL
+   * wrapper passes the GrowthBook-backed getPollIntervalConfig (allows ops
+   * to live-tune poll rates fleet-wide). Daemon passes a static config
+   * with a 60s heartbeat (5× headroom under the 300s work-lease TTL).
+   * Injected because growthbook.ts transitively pulls in the command
+   * registry via the same config.ts chain.
+   */
+  getPollIntervalConfig?: () => PollIntervalConfig
+  /**
+   * Max initial messages to replay on connect. REPL wrapper reads from the
+   * tengu_bridge_initial_history_cap GrowthBook flag. Daemon passes no
+   * initialMessages so this is never read. Default 200 matches the flag
+   * default.
+   */
+  initialHistoryCap?: number
+  // Same REPL-flush machinery as InitBridgeOptions — daemon omits these.
+  initialMessages?: Message[]
+  previouslyFlushedUUIDs?: Set<string>
+  onInboundMessage?: (msg: SDKMessage) => void
+  onPermissionResponse?: (response: SDKControlResponse) => void
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  /**
+   * Returns a policy verdict so this module can emit an error control_response
+   * without importing the policy checks itself (bootstrap-isolation constraint).
+   * The callback must guard `auto` (isAutoModeGateEnabled) and
+   * `bypassPermissions` (isBypassPermissionsModeDisabled AND
+   * isBypassPermissionsModeAvailable) BEFORE calling transitionPermissionMode —
+   * that function's internal auto-gate check is a defensive throw, not a
+   * graceful guard, and its side-effect order is setAutoModeActive(true) then
+   * throw, which corrupts the 3-way invariant documented in src/CLAUDE.md if
+   * the callback lets the throw escape here.
+   */
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  /**
+   * Fires on each real user message to flow through writeMessages() until
+   * the callback returns true (done). Mirrors remoteBridgeCore.ts's
+   * onUserMessage so the REPL bridge can derive a session title from early
+   * prompts when none was set at init time (e.g. user runs /remote-control
+   * on an empty conversation, then types). Tool-result wrappers, meta
+   * messages, and display-tag-only messages are skipped. Receives
+   * currentSessionId so the wrapper can PATCH the title without a closure
+   * dance to reach the not-yet-returned handle. The caller owns the
+   * derive-at-count-1-and-3 policy; the transport just keeps calling until
+   * told to stop. Not fired for the writeSdkMessages daemon path (daemon
+   * sets its own title at init). Distinct from SessionSpawnOpts's
+   * onFirstUserMessage (spawn-bridge, PR #21250), which stays fire-once.
+   */
+  onUserMessage?: (text: string, sessionId: string) => boolean
+  /** See InitBridgeOptions.perpetual. */
+  perpetual?: boolean
+  /**
+   * Seeds lastTransportSequenceNum — the SSE event-stream high-water mark
+   * that's carried across transport swaps within one process. Daemon callers
+   * pass the value they persisted at shutdown so the FIRST SSE connect of a
+   * fresh process sends from_sequence_num and the server doesn't replay full
+   * history. REPL callers omit (fresh session each run → 0 is correct).
+   */
+  initialSSESequenceNum?: number
+}
+
+/**
+ * Superset of ReplBridgeHandle. Adds getSSESequenceNum for daemon callers
+ * that persist the SSE seq-num across process restarts and pass it back as
+ * initialSSESequenceNum on the next start.
+ */
+export type BridgeCoreHandle = ReplBridgeHandle & {
+  /**
+   * Current SSE sequence-number high-water mark. Updates as transports
+   * swap. Daemon callers persist this on shutdown and pass it back as
+   * initialSSESequenceNum on next start.
+   */
+  getSSESequenceNum(): number
+}
+
+/**
+ * Poll error recovery constants. When the work poll starts failing (e.g.
+ * server 500s), we use exponential backoff and give up after this timeout.
+ * This is deliberately long — the server is the authority on when a session
+ * is truly dead. As long as the server accepts our poll, we keep waiting
+ * for it to re-dispatch the work item.
+ */
+const POLL_ERROR_INITIAL_DELAY_MS = 2_000
+const POLL_ERROR_MAX_DELAY_MS = 60_000
+const POLL_ERROR_GIVE_UP_MS = 15 * 60 * 1000
+
+// Monotonically increasing counter for distinguishing init calls in logs
+let initSequence = 0
+
+/**
+ * Bootstrap-free core: env registration → session creation → poll loop →
+ * ingress WS → teardown. Reads nothing from bootstrap/state or
+ * sessionStorage — all context comes from params. Caller (initReplBridge
+ * below, or a daemon in PR 4) has already passed entitlement gates and
+ * gathered git/auth/title.
+ *
+ * Returns null on registration or session-creation failure.
+ */
+export async function initBridgeCore(
+  params: BridgeCoreParams,
+): Promise<BridgeCoreHandle | null> {
+  const {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    title,
+    baseUrl,
+    sessionIngressUrl,
+    workerType,
+    getAccessToken,
+    createSession,
+    archiveSession,
+    getCurrentTitle = () => title,
+    toSDKMessages = () => {
+      throw new Error(
+        'BridgeCoreParams.toSDKMessages not provided. Pass it if you use writeMessages() or initialMessages — daemon callers that only use writeSdkMessages() never hit this path.',
+      )
+    },
+    onAuth401,
+    getPollIntervalConfig = () => DEFAULT_POLL_CONFIG,
+    initialHistoryCap = 200,
+    initialMessages,
+    previouslyFlushedUUIDs,
+    onInboundMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    onUserMessage,
+    perpetual,
+    initialSSESequenceNum = 0,
+  } = params
+
+  const seq = ++initSequence
+
+  // bridgePointer import hoisted: perpetual mode reads it before register;
+  // non-perpetual writes it after session create; both use clear at teardown.
+  const { writeBridgePointer, clearBridgePointer, readBridgePointer } =
+    await import('./bridgePointer.js')
+
+  // Perpetual mode: read the crash-recovery pointer and treat it as prior
+  // state. The pointer is written unconditionally after session create
+  // (crash-recovery for all sessions); perpetual mode just skips the
+  // teardown clear so it survives clean exits too. Only reuse 'repl'
+  // pointers — a crashed standalone bridge (`claude remote-control`)
+  // writes source:'standalone' with a different workerType.
+  const rawPrior = perpetual ? await readBridgePointer(dir) : null
+  const prior = rawPrior?.source === 'repl' ? rawPrior : null
+
+  logForDebugging(
+    `[bridge:repl] initBridgeCore #${seq} starting (initialMessages=${initialMessages?.length ?? 0}${prior ? ` perpetual prior=env:${prior.environmentId}` : ''})`,
+  )
+
+  // 5. Register bridge environment
+  const rawApi = createBridgeApiClient({
+    baseUrl,
+    getAccessToken,
+    runnerVersion: MACRO.VERSION,
+    onDebug: logForDebugging,
+    onAuth401,
+    getTrustedDeviceToken,
+  })
+  // Ant-only: interpose so /bridge-kick can inject poll/register/heartbeat
+  // failures. Zero cost in external builds (rawApi passes through unchanged).
+  const api =
+    process.env.USER_TYPE === 'ant' ? wrapApiForFaultInjection(rawApi) : rawApi
+
+  const bridgeConfig: BridgeConfig = {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    maxSessions: 1,
+    spawnMode: 'single-session',
+    verbose: false,
+    sandbox: false,
+    bridgeId: randomUUID(),
+    workerType,
+    environmentId: randomUUID(),
+    reuseEnvironmentId: prior?.environmentId,
+    apiBaseUrl: baseUrl,
+    sessionIngressUrl,
+  }
+
+  let environmentId: string
+  let environmentSecret: string
+  try {
+    const reg = await api.registerBridgeEnvironment(bridgeConfig)
+    environmentId = reg.environment_id
+    environmentSecret = reg.environment_secret
+  } catch (err) {
+    logBridgeSkip(
+      'registration_failed',
+      `[bridge:repl] Environment registration failed: ${errorMessage(err)}`,
+    )
+    // Stale pointer may be the cause (expired/deleted env) — clear it so
+    // the next start doesn't retry the same dead ID.
+    if (prior) {
+      await clearBridgePointer(dir)
+    }
+    onStateChange?.('failed', errorMessage(err))
+    return null
+  }
+
+  logForDebugging(`[bridge:repl] Environment registered: ${environmentId}`)
+  logForDiagnosticsNoPII('info', 'bridge_repl_env_registered')
+  logEvent('tengu_bridge_repl_env_registered', {})
+
+  /**
+   * Reconnect-in-place: if the just-registered environmentId matches what
+   * was requested, call reconnectSession to force-stop stale workers and
+   * re-queue the session. Used at init (perpetual mode — env is alive but
+   * idle after clean teardown) and in doReconnect() Strategy 1 (env lost
+   * then resurrected). Returns true on success; caller falls back to
+   * fresh session creation on false.
+   */
+  async function tryReconnectInPlace(
+    requestedEnvId: string,
+    sessionId: string,
+  ): Promise<boolean> {
+    if (environmentId !== requestedEnvId) {
+      logForDebugging(
+        `[bridge:repl] Env mismatch (requested ${requestedEnvId}, got ${environmentId}) — cannot reconnect in place`,
+      )
+      return false
+    }
+    // The pointer stores what createBridgeSession returned (session_*,
+    // compat/convert.go:41). /bridge/reconnect is an environments-layer
+    // endpoint — once the server's ccr_v2_compat_enabled gate is on it
+    // looks sessions up by their infra tag (cse_*) and returns "Session
+    // not found" for the session_* costume. We don't know the gate state
+    // pre-poll, so try both; the re-tag is a no-op if the ID is already
+    // cse_* (doReconnect Strategy 1 path — currentSessionId never mutates
+    // to cse_* but future-proof the check).
+    const infraId = toInfraSessionId(sessionId)
+    const candidates =
+      infraId === sessionId ? [sessionId] : [sessionId, infraId]
+    for (const id of candidates) {
+      try {
+        await api.reconnectSession(environmentId, id)
+        logForDebugging(
+          `[bridge:repl] Reconnected session ${id} in place on env ${environmentId}`,
+        )
+        return true
+      } catch (err) {
+        logForDebugging(
+          `[bridge:repl] reconnectSession(${id}) failed: ${errorMessage(err)}`,
+        )
+      }
+    }
+    logForDebugging(
+      '[bridge:repl] reconnectSession exhausted — falling through to fresh session',
+    )
+    return false
+  }
+
+  // Perpetual init: env is alive but has no queued work after clean
+  // teardown. reconnectSession re-queues it. doReconnect() has the same
+  // call but only fires on poll 404 (env dead);
+  // here the env is alive but idle.
+  const reusedPriorSession = prior
+    ? await tryReconnectInPlace(prior.environmentId, prior.sessionId)
+    : false
+  if (prior && !reusedPriorSession) {
+    await clearBridgePointer(dir)
+  }
+
+  // 6. Create session on the bridge. Initial messages are NOT included as
+  // session creation events because those use STREAM_ONLY persistence and
+  // are published before the CCR UI subscribes, so they get lost. Instead,
+  // initial messages are flushed via the ingress WebSocket once it connects.
+
+  // Mutable session ID — updated when the environment+session pair is
+  // re-created after a connection loss.
+  let currentSessionId: string
+
+
+  if (reusedPriorSession && prior) {
+    currentSessionId = prior.sessionId
+    logForDebugging(
+      `[bridge:repl] Perpetual session reused: ${currentSessionId}`,
+    )
+    // Server already has all initialMessages from the prior CLI run. Mark
+    // them as previously-flushed so the initial flush filter excludes them
+    // (previouslyFlushedUUIDs is a fresh Set on every CLI start). Duplicate
+    // UUIDs cause the server to kill the WebSocket.
+    if (initialMessages && previouslyFlushedUUIDs) {
+      for (const msg of initialMessages) {
+        previouslyFlushedUUIDs.add(msg.uuid)
+      }
+    }
+  } else {
+    const createdSessionId = await createSession({
+      environmentId,
+      title,
+      gitRepoUrl,
+      branch,
+      signal: AbortSignal.timeout(15_000),
+    })
+
+    if (!createdSessionId) {
+      logForDebugging(
+        '[bridge:repl] Session creation failed, deregistering environment',
+      )
+      logEvent('tengu_bridge_repl_session_failed', {})
+      await api.deregisterEnvironment(environmentId).catch(() => {})
+      onStateChange?.('failed', 'Session creation failed')
+      return null
+    }
+
+    currentSessionId = createdSessionId
+    logForDebugging(`[bridge:repl] Session created: ${currentSessionId}`)
+  }
+
+  // Crash-recovery pointer: written now so a kill -9 at any point after
+  // this leaves a recoverable trail. Cleared in teardown (non-perpetual)
+  // or left alone (perpetual mode — pointer survives clean exit too).
+  // `claude remote-control --continue` from the same directory will detect
+  // it and offer to resume.
+  await writeBridgePointer(dir, {
+    sessionId: currentSessionId,
+    environmentId,
+    source: 'repl',
+  })
+  logForDiagnosticsNoPII('info', 'bridge_repl_session_created')
+  logEvent('tengu_bridge_repl_started', {
+    has_initial_messages: !!(initialMessages && initialMessages.length > 0),
+    inProtectedNamespace: isInProtectedNamespace(),
+  })
+
+  // UUIDs of initial messages. Used for dedup in writeMessages to avoid
+  // re-sending messages that were already flushed on WebSocket open.
+  const initialMessageUUIDs = new Set<string>()
+  if (initialMessages) {
+    for (const msg of initialMessages) {
+      initialMessageUUIDs.add(msg.uuid)
+    }
+  }
+
+  // Bounded ring buffer of UUIDs for messages we've already sent to the
+  // server via the ingress WebSocket. Serves two purposes:
+  //  1. Echo filtering — ignore our own messages bouncing back on the WS.
+  //  2. Secondary dedup in writeMessages — catch race conditions where
+  //     the hook's index-based tracking isn't sufficient.
+  //
+  // Seeded with initialMessageUUIDs so that when the server echoes back
+  // the initial conversation context over the ingress WebSocket, those
+  // messages are recognized as echoes and not re-injected into the REPL.
+  //
+  // Capacity of 2000 covers well over any realistic echo window (echoes
+  // arrive within milliseconds) and any messages that might be re-encountered
+  // after compaction. The hook's lastWrittenIndexRef is the primary dedup;
+  // this is a safety net.
+  const recentPostedUUIDs = new BoundedUUIDSet(2000)
+  for (const uuid of initialMessageUUIDs) {
+    recentPostedUUIDs.add(uuid)
+  }
+
+  // Bounded set of INBOUND prompt UUIDs we've already forwarded to the REPL.
+  // Defensive dedup for when the server re-delivers prompts (seq-num
+  // negotiation failure, server edge cases, transport swap races). The
+  // seq-num carryover below is the primary fix; this is the safety net.
+  const recentInboundUUIDs = new BoundedUUIDSet(2000)
+
+  // 7. Start poll loop for work items — this is what makes the session
+  // "live" on claude.ai. When a user types there, the backend dispatches
+  // a work item to our environment. We poll for it, get the ingress token,
+  // and connect the ingress WebSocket.
+  //
+  // The poll loop keeps running: when work arrives it connects the ingress
+  // WebSocket, and if the WebSocket drops unexpectedly (code != 1000) it
+  // resumes polling to get a fresh ingress token and reconnect.
+  const pollController = new AbortController()
+  // Adapter over either HybridTransport (v1: WS reads + POST writes to
+  // Session-Ingress) or SSETransport+CCRClient (v2: SSE reads + POST
+  // writes to CCR /worker/*). The v1/v2 choice is made in onWorkReceived:
+  // server-driven via secret.use_code_sessions, with CLAUDE_BRIDGE_USE_CCR_V2
+  // as an ant-dev override.
+  let transport: ReplBridgeTransport | null = null
+  // Bumped on every onWorkReceived. Captured in createV2ReplTransport's .then()
+  // closure to detect stale resolutions: if two calls race while transport is
+  // null, both registerWorker() (bumping server epoch), and whichever resolves
+  // SECOND is the correct one — but the transport !== null check gets this
+  // backwards (first-to-resolve installs, second discards). The generation
+  // counter catches it independent of transport state.
+  let v2Generation = 0
+  // SSE sequence-number high-water mark carried across transport swaps.
+  // Without this, each new SSETransport starts at 0, sends no
+  // from_sequence_num / Last-Event-ID on its first connect, and the server
+  // replays the entire session event history — every prompt ever sent
+  // re-delivered as fresh inbound messages on every onWorkReceived.
+  //
+  // Seed only when we actually reconnected the prior session. If
+  // `reusedPriorSession` is false we fell through to `createSession()` —
+  // the caller's persisted seq-num belongs to a dead session and applying
+  // it to the fresh stream (starting at 1) silently drops events. Same
+  // hazard as doReconnect Strategy 2; same fix as the reset there.
+  let lastTransportSequenceNum = reusedPriorSession ? initialSSESequenceNum : 0
+  // Track the current work ID so teardown can call stopWork
+  let currentWorkId: string | null = null
+  // Session ingress JWT for the current work item — used for heartbeat auth.
+  let currentIngressToken: string | null = null
+  // Signal to wake the at-capacity sleep early when the transport is lost,
+  // so the poll loop immediately switches back to fast polling for new work.
+  const capacityWake = createCapacityWake(pollController.signal)
+  const wakePollLoop = capacityWake.wake
+  const capacitySignal = capacityWake.signal
+  // Gates message writes during the initial flush to prevent ordering
+  // races where new messages arrive at the server interleaved with history.
+  const flushGate = new FlushGate<Message>()
+
+  // Latch for onUserMessage — flips true when the callback returns true
+  // (policy says "done deriving"). If no callback, skip scanning entirely
+  // (daemon path — no title derivation needed).
+  let userMessageCallbackDone = !onUserMessage
+
+  // Shared counter for environment re-creations, used by both
+  // onEnvironmentLost and the abnormal-close handler.
+  const MAX_ENVIRONMENT_RECREATIONS = 3
+  let environmentRecreations = 0
+  let reconnectPromise: Promise<boolean> | null = null
+
+  /**
+   * Recover from onEnvironmentLost (poll returned 404 — env was reaped
+   * server-side). Tries two strategies in order:
+   *
+   *   1. Reconnect-in-place: idempotent re-register with reuseEnvironmentId
+   *      → if the backend returns the same env ID, call reconnectSession()
+   *      to re-queue the existing session. currentSessionId stays the same;
+   *      the URL on the user's phone stays valid; previouslyFlushedUUIDs is
+   *      preserved so history isn't re-sent.
+   *
+   *   2. Fresh session fallback: if the backend returns a different env ID
+   *      (original TTL-expired, e.g. laptop slept >4h) or reconnectSession()
+   *      throws, archive the old session and create a new one on the
+   *      now-registered env. Old behavior before #20460 primitives landed.
+   *
+   * Uses a promise-based reentrancy guard so concurrent callers share the
+   * same reconnection attempt.
+   */
+  async function reconnectEnvironmentWithSession(): Promise<boolean> {
+    if (reconnectPromise) {
+      return reconnectPromise
+    }
+    reconnectPromise = doReconnect()
+    try {
+      return await reconnectPromise
+    } finally {
+      reconnectPromise = null
+    }
+  }
+
+  async function doReconnect(): Promise<boolean> {
+    environmentRecreations++
+    // Invalidate any in-flight v2 handshake — the environment is being
+    // recreated, so a stale transport arriving post-reconnect would be
+    // pointed at a dead session.
+    v2Generation++
+    logForDebugging(
+      `[bridge:repl] Reconnecting after env lost (attempt ${environmentRecreations}/${MAX_ENVIRONMENT_RECREATIONS})`,
+    )
+
+    if (environmentRecreations > MAX_ENVIRONMENT_RECREATIONS) {
+      logForDebugging(
+        `[bridge:repl] Environment reconnect limit reached (${MAX_ENVIRONMENT_RECREATIONS}), giving up`,
+      )
+      return false
+    }
+
+    // Close the stale transport. Capture seq BEFORE close — if Strategy 1
+    // (tryReconnectInPlace) succeeds we keep the SAME session, and the
+    // next transport must resume where this one left off, not replay from
+    // the last transport-swap checkpoint.
+    if (transport) {
+      const seq = transport.getLastSequenceNum()
+      if (seq > lastTransportSequenceNum) {
+        lastTransportSequenceNum = seq
+      }
+      transport.close()
+      transport = null
+    }
+    // Transport is gone — wake the poll loop out of its at-capacity
+    // heartbeat sleep so it can fast-poll for re-dispatched work.
+    wakePollLoop()
+    // Reset flush gate so writeMessages() hits the !transport guard
+    // instead of silently queuing into a dead buffer.
+    flushGate.drop()
+
+    // Release the current work item (force=false — we may want the session
+    // back). Best-effort: the env is probably gone, so this likely 404s.
+    if (currentWorkId) {
+      const workIdBeingCleared = currentWorkId
+      await api
+        .stopWork(environmentId, workIdBeingCleared, false)
+        .catch(() => {})
+      // When doReconnect runs concurrently with the poll loop (ws_closed
+      // handler case — void-called, unlike the awaited onEnvironmentLost
+      // path), onWorkReceived can fire during the stopWork await and set
+      // a fresh currentWorkId. If it did, the poll loop has already
+      // recovered on its own — defer to it rather than proceeding to
+      // archiveSession, which would destroy the session its new
+      // transport is connected to.
+      if (currentWorkId !== workIdBeingCleared) {
+        logForDebugging(
+          '[bridge:repl] Poll loop recovered during stopWork await — deferring to it',
+        )
+        environmentRecreations = 0
+        return true
+      }
+      currentWorkId = null
+      currentIngressToken = null
+    }
+
+    // Bail out if teardown started while we were awaiting
+    if (pollController.signal.aborted) {
+      logForDebugging('[bridge:repl] Reconnect aborted by teardown')
+      return false
+    }
+
+    // Strategy 1: idempotent re-register with the server-issued env ID.
+    // If the backend resurrects the same env (fresh secret), we can
+    // reconnect the existing session. If it hands back a different ID, the
+    // original env is truly gone and we fall through to a fresh session.
+    const requestedEnvId = environmentId
+    bridgeConfig.reuseEnvironmentId = requestedEnvId
+    try {
+      const reg = await api.registerBridgeEnvironment(bridgeConfig)
+      environmentId = reg.environment_id
+      environmentSecret = reg.environment_secret
+    } catch (err) {
+      bridgeConfig.reuseEnvironmentId = undefined
+      logForDebugging(
+        `[bridge:repl] Environment re-registration failed: ${errorMessage(err)}`,
+      )
+      return false
+    }
+    // Clear before any await — a stale value would poison the next fresh
+    // registration if doReconnect runs again.
+    bridgeConfig.reuseEnvironmentId = undefined
+
+    logForDebugging(
+      `[bridge:repl] Re-registered: requested=${requestedEnvId} got=${environmentId}`,
+    )
+
+    // Bail out if teardown started while we were registering
+    if (pollController.signal.aborted) {
+      logForDebugging(
+        '[bridge:repl] Reconnect aborted after env registration, cleaning up',
+      )
+      await api.deregisterEnvironment(environmentId).catch(() => {})
+      return false
+    }
+
+    // Same race as above, narrower window: poll loop may have set up a
+    // transport during the registerBridgeEnvironment await. Bail before
+    // tryReconnectInPlace/archiveSession kill it server-side.
+    if (transport !== null) {
+      logForDebugging(
+        '[bridge:repl] Poll loop recovered during registerBridgeEnvironment await — deferring to it',
+      )
+      environmentRecreations = 0
+      return true
+    }
+
+    // Strategy 1: same helper as perpetual init. currentSessionId stays
+    // the same on success; URL on mobile/web stays valid;
+    // previouslyFlushedUUIDs preserved (no re-flush).
+    if (await tryReconnectInPlace(requestedEnvId, currentSessionId)) {
+      logEvent('tengu_bridge_repl_reconnected_in_place', {})
+      environmentRecreations = 0
+      return true
+    }
+    // Env differs → TTL-expired/reaped; or reconnect failed.
+    // Don't deregister — we have a fresh secret for this env either way.
+    if (environmentId !== requestedEnvId) {
+      logEvent('tengu_bridge_repl_env_expired_fresh_session', {})
+    }
+
+    // Strategy 2: fresh session on the now-registered environment.
+    // Archive the old session first — it's orphaned (bound to a dead env,
+    // or reconnectSession rejected it). Don't deregister the env — we just
+    // got a fresh secret for it and are about to use it.
+    await archiveSession(currentSessionId)
+
+    // Bail out if teardown started while we were archiving
+    if (pollController.signal.aborted) {
+      logForDebugging(
+        '[bridge:repl] Reconnect aborted after archive, cleaning up',
+      )
+      await api.deregisterEnvironment(environmentId).catch(() => {})
+      return false
+    }
+
+    // Re-read the current title in case the user renamed the session.
+    // REPL wrapper reads session storage; daemon wrapper returns the
+    // original title (nothing to refresh).
+    const currentTitle = getCurrentTitle()
+
+    // Create a new session on the now-registered environment
+    const newSessionId = await createSession({
+      environmentId,
+      title: currentTitle,
+      gitRepoUrl,
+      branch,
+      signal: AbortSignal.timeout(15_000),
+    })
+
+    if (!newSessionId) {
+      logForDebugging(
+        '[bridge:repl] Session creation failed during reconnection',
+      )
+      return false
+    }
+
+    // Bail out if teardown started during session creation (up to 15s)
+    if (pollController.signal.aborted) {
+      logForDebugging(
+        '[bridge:repl] Reconnect aborted after session creation, cleaning up',
+      )
+      await archiveSession(newSessionId)
+      return false
+    }
+
+    currentSessionId = newSessionId
+    // Re-publish to the PID file so peer dedup (peerRegistry.ts) picks up the
+    // new ID — setReplBridgeHandle only fires at init/teardown, not reconnect.
+    void updateSessionBridgeId(toCompatSessionId(newSessionId)).catch(() => {})
+    // Reset per-session transport state IMMEDIATELY after the session swap,
+    // before any await. If this runs after `await writeBridgePointer` below,
+    // there's a window where handle.bridgeSessionId already returns session B
+    // but getSSESequenceNum() still returns session A's seq — a daemon
+    // persistState() in that window writes {bridgeSessionId: B, seq: OLD_A},
+    // which PASSES the session-ID validation check and defeats it entirely.
+    //
+    // The SSE seq-num is scoped to the session's event stream — carrying it
+    // over leaves the transport's lastSequenceNum stuck high (seq only
+    // advances when received > last), and its next internal reconnect would
+    // send from_sequence_num=OLD_SEQ against a stream starting at 1 → all
+    // events in the gap silently dropped. Inbound UUID dedup is also
+    // session-scoped.
+    lastTransportSequenceNum = 0
+    recentInboundUUIDs.clear()
+    // Title derivation is session-scoped too: if the user typed during the
+    // createSession await above, the callback fired against the OLD archived
+    // session ID (PATCH lost) and the new session got `currentTitle` captured
+    // BEFORE they typed. Reset so the next prompt can re-derive. Self-
+    // correcting: if the caller's policy is already done (explicit title or
+    // count ≥ 3), it returns true on the first post-reset call and re-latches.
+    userMessageCallbackDone = !onUserMessage
+    logForDebugging(`[bridge:repl] Re-created session: ${currentSessionId}`)
+
+    // Rewrite the crash-recovery pointer with the new IDs so a crash after
+    // this point resumes the right session. (The reconnect-in-place path
+    // above doesn't touch the pointer — same session, same env.)
+    await writeBridgePointer(dir, {
+      sessionId: currentSessionId,
+      environmentId,
+      source: 'repl',
+    })
+
+    // Clear flushed UUIDs so initial messages are re-sent to the new session.
+    // UUIDs are scoped per-session on the server, so re-flushing is safe.
+    previouslyFlushedUUIDs?.clear()
+
+
+    // Reset the counter so independent reconnections hours apart don't
+    // exhaust the limit — it guards against rapid consecutive failures,
+    // not lifetime total.
+    environmentRecreations = 0
+
+    return true
+  }
+
+  // Helper: get the current OAuth access token for session ingress auth.
+  // Unlike the JWT path, OAuth tokens are refreshed by the standard OAuth
+  // flow — no proactive scheduler needed.
+  function getOAuthToken(): string | undefined {
+    return getAccessToken()
+  }
+
+  // Drain any messages that were queued during the initial flush.
+  // Called after writeBatch completes (or fails) so queued messages
+  // are sent in order after the historical messages.
+  function drainFlushGate(): void {
+    const msgs = flushGate.end()
+    if (msgs.length === 0) return
+    if (!transport) {
+      logForDebugging(
+        `[bridge:repl] Cannot drain ${msgs.length} pending message(s): no transport`,
+      )
+      return
+    }
+    for (const msg of msgs) {
+      recentPostedUUIDs.add(msg.uuid)
+    }
+    const sdkMessages = toSDKMessages(msgs)
+    const events = sdkMessages.map(sdkMsg => ({
+      ...sdkMsg,
+      session_id: currentSessionId,
+    }))
+    logForDebugging(
+      `[bridge:repl] Drained ${msgs.length} pending message(s) after flush`,
+    )
+    void transport.writeBatch(events)
+  }
+
+  // Teardown reference — set after definition below. All callers are async
+  // callbacks that run after assignment, so the reference is always valid.
+  let doTeardownImpl: (() => Promise<void>) | null = null
+  function triggerTeardown(): void {
+    void doTeardownImpl?.()
+  }
+
+  /**
+   * Body of the transport's setOnClose callback, hoisted to initBridgeCore
+   * scope so /bridge-kick can fire it directly. setOnClose wraps this with
+   * a stale-transport guard; debugFireClose calls it bare.
+   *
+   * With autoReconnect:true, this only fires on: clean close (1000),
+   * permanent server rejection (4001/1002/4003), or 10-min budget
+   * exhaustion. Transient drops are retried internally by the transport.
+   */
+  function handleTransportPermanentClose(closeCode: number | undefined): void {
+    logForDebugging(
+      `[bridge:repl] Transport permanently closed: code=${closeCode}`,
+    )
+    logEvent('tengu_bridge_repl_ws_closed', {
+      code: closeCode,
+    })
+    // Capture SSE seq high-water mark before nulling. When called from
+    // setOnClose the guard guarantees transport !== null; when fired from
+    // /bridge-kick it may already be null (e.g. fired twice) — skip.
+    if (transport) {
+      const closedSeq = transport.getLastSequenceNum()
+      if (closedSeq > lastTransportSequenceNum) {
+        lastTransportSequenceNum = closedSeq
+      }
+      transport = null
+    }
+    // Transport is gone — wake the poll loop out of its at-capacity
+    // heartbeat sleep so it's fast-polling by the time the reconnect
+    // below completes and the server re-queues work.
+    wakePollLoop()
+    // Reset flush state so writeMessages() hits the !transport guard
+    // (with a warning log) instead of silently queuing into a buffer
+    // that will never be drained. Unlike onWorkReceived (which
+    // preserves pending messages for the new transport), onClose is
+    // a permanent close — no new transport will drain these.
+    const dropped = flushGate.drop()
+    if (dropped > 0) {
+      logForDebugging(
+        `[bridge:repl] Dropping ${dropped} pending message(s) on transport close (code=${closeCode})`,
+        { level: 'warn' },
+      )
+    }
+
+    if (closeCode === 1000) {
+      // Clean close — session ended normally. Tear down the bridge.
+      onStateChange?.('failed', 'session ended')
+      pollController.abort()
+      triggerTeardown()
+      return
+    }
+
+    // Transport reconnect budget exhausted or permanent server
+    // rejection. By this point the env has usually been reaped
+    // server-side (BQ 2026-03-12: ~98% of ws_closed never recover
+    // via poll alone). stopWork(force=false) can't re-dispatch work
+    // from an archived env; reconnectEnvironmentWithSession can
+    // re-activate it via POST /bridge/reconnect, or fall through
+    // to a fresh session if the env is truly gone. The poll loop
+    // (already woken above) picks up the re-queued work once
+    // doReconnect completes.
+    onStateChange?.(
+      'reconnecting',
+      `Remote Control connection lost (code ${closeCode})`,
+    )
+    logForDebugging(
+      `[bridge:repl] Transport reconnect budget exhausted (code=${closeCode}), attempting env reconnect`,
+    )
+    void reconnectEnvironmentWithSession().then(success => {
+      if (success) return
+      // doReconnect has four abort-check return-false sites for
+      // teardown-in-progress. Don't pollute the BQ failure signal
+      // or double-teardown when the user just quit.
+      if (pollController.signal.aborted) return
+      // doReconnect returns false (never throws) on genuine failure.
+      // The dangerous case: registerBridgeEnvironment succeeded (so
+      // environmentId now points at a fresh valid env) but
+      // createSession failed — poll loop would poll a sessionless
+      // env getting null work with no errors, never hitting any
+      // give-up path. Tear down explicitly.
+      logForDebugging(
+        '[bridge:repl] reconnectEnvironmentWithSession resolved false — tearing down',
+      )
+      logEvent('tengu_bridge_repl_reconnect_failed', {
+        close_code: closeCode,
+      })
+      onStateChange?.('failed', 'reconnection failed')
+      triggerTeardown()
+    })
+  }
+
+  // Ant-only: SIGUSR2 → force doReconnect() for manual testing. Skips the
+  // ~30s poll wait — fire-and-observe in the debug log immediately.
+  // Windows has no USR signals; `process.on` would throw there.
+  let sigusr2Handler: (() => void) | undefined
+  if (process.env.USER_TYPE === 'ant' && process.platform !== 'win32') {
+    sigusr2Handler = () => {
+      logForDebugging(
+        '[bridge:repl] SIGUSR2 received — forcing doReconnect() for testing',
+      )
+      void reconnectEnvironmentWithSession()
+    }
+    process.on('SIGUSR2', sigusr2Handler)
+  }
+
+  // Ant-only: /bridge-kick fault injection. handleTransportPermanentClose
+  // is defined below and assigned into this slot so the slash command can
+  // invoke it directly — the real setOnClose callback is buried inside
+  // wireTransport which is itself inside onWorkReceived.
+  let debugFireClose: ((code: number) => void) | null = null
+  if (process.env.USER_TYPE === 'ant') {
+    registerBridgeDebugHandle({
+      fireClose: code => {
+        if (!debugFireClose) {
+          logForDebugging('[bridge:debug] fireClose: no transport wired yet')
+          return
+        }
+        logForDebugging(`[bridge:debug] fireClose(${code}) — injecting`)
+        debugFireClose(code)
+      },
+      forceReconnect: () => {
+        logForDebugging('[bridge:debug] forceReconnect — injecting')
+        void reconnectEnvironmentWithSession()
+      },
+      injectFault: injectBridgeFault,
+      wakePollLoop,
+      describe: () =>
+        `env=${environmentId} session=${currentSessionId} transport=${transport?.getStateLabel() ?? 'null'} workId=${currentWorkId ?? 'null'}`,
+    })
+  }
+
+  const pollOpts = {
+    api,
+    getCredentials: () => ({ environmentId, environmentSecret }),
+    signal: pollController.signal,
+    getPollIntervalConfig,
+    onStateChange,
+    getWsState: () => transport?.getStateLabel() ?? 'null',
+    // REPL bridge is single-session: having any transport == at capacity.
+    // No need to check isConnectedStatus() — even while the transport is
+    // auto-reconnecting internally (up to 10 min), poll is heartbeat-only.
+    isAtCapacity: () => transport !== null,
+    capacitySignal,
+    onFatalError: triggerTeardown,
+    getHeartbeatInfo: () => {
+      if (!currentWorkId || !currentIngressToken) {
+        return null
+      }
+      return {
+        environmentId,
+        workId: currentWorkId,
+        sessionToken: currentIngressToken,
+      }
+    },
+    // Work-item JWT expired (or work gone). The transport is useless —
+    // SSE reconnects and CCR writes use the same stale token. Without
+    // this callback the poll loop would do a 10-min at-capacity backoff,
+    // during which the work lease (300s TTL) expires and the server stops
+    // forwarding prompts → ~25-min dead window observed in daemon logs.
+    // Kill the transport + work state so isAtCapacity()=false; the loop
+    // fast-polls and picks up the server's re-dispatched work in seconds.
+    onHeartbeatFatal: (err: BridgeFatalError) => {
+      logForDebugging(
+        `[bridge:repl] heartbeatWork fatal (status=${err.status}) — tearing down work item for fast re-dispatch`,
+      )
+      if (transport) {
+        const seq = transport.getLastSequenceNum()
+        if (seq > lastTransportSequenceNum) {
+          lastTransportSequenceNum = seq
+        }
+        transport.close()
+        transport = null
+      }
+      flushGate.drop()
+      // force=false → server re-queues. Likely already expired, but
+      // idempotent and makes re-dispatch immediate if not.
+      if (currentWorkId) {
+        void api
+          .stopWork(environmentId, currentWorkId, false)
+          .catch((e: unknown) => {
+            logForDebugging(
+              `[bridge:repl] stopWork after heartbeat fatal: ${errorMessage(e)}`,
+            )
+          })
+      }
+      currentWorkId = null
+      currentIngressToken = null
+      wakePollLoop()
+      onStateChange?.(
+        'reconnecting',
+        'Work item lease expired, fetching fresh token',
+      )
+    },
+    async onEnvironmentLost() {
+      const success = await reconnectEnvironmentWithSession()
+      if (!success) {
+        return null
+      }
+      return { environmentId, environmentSecret }
+    },
+    onWorkReceived: (
+      workSessionId: string,
+      ingressToken: string,
+      workId: string,
+      serverUseCcrV2: boolean,
+    ) => {
+      // When new work arrives while a transport is already open, the
+      // server has decided to re-dispatch (e.g. token rotation, server
+      // restart). Close the existing transport and reconnect — discarding
+      // the work causes a stuck 'reconnecting' state if the old WS dies
+      // shortly after (the server won't re-dispatch a work item it
+      // already delivered).
+      // ingressToken (JWT) is stored for heartbeat auth (both v1 and v2).
+      // Transport auth diverges — see the v1/v2 split below.
+      if (transport?.isConnectedStatus()) {
+        logForDebugging(
+          `[bridge:repl] Work received while transport connected, replacing with fresh token (workId=${workId})`,
+        )
+      }
+
+      logForDebugging(
+        `[bridge:repl] Work received: workId=${workId} workSessionId=${workSessionId} currentSessionId=${currentSessionId} match=${sameSessionId(workSessionId, currentSessionId)}`,
+      )
+
+      // Refresh the crash-recovery pointer's mtime. Staleness checks file
+      // mtime (not embedded timestamp) so this re-write bumps the clock —
+      // a 5h+ session that crashes still has a fresh pointer. Fires once
+      // per work dispatch (infrequent — bounded by user message rate).
+      void writeBridgePointer(dir, {
+        sessionId: currentSessionId,
+        environmentId,
+        source: 'repl',
+      })
+
+      // Reject foreign session IDs — the server shouldn't assign sessions
+      // from other environments. Since we create env+session as a pair,
+      // a mismatch indicates an unexpected server-side reassignment.
+      //
+      // Compare by underlying UUID, not by tagged-ID prefix. When CCR
+      // v2's compat layer serves the session, createBridgeSession gets
+      // session_* from the v1-facing API (compat/convert.go:41) but the
+      // infrastructure layer delivers cse_* in the work queue
+      // (container_manager.go:129). Same UUID, different tag.
+      if (!sameSessionId(workSessionId, currentSessionId)) {
+        logForDebugging(
+          `[bridge:repl] Rejecting foreign session: expected=${currentSessionId} got=${workSessionId}`,
+        )
+        return
+      }
+
+      currentWorkId = workId
+      currentIngressToken = ingressToken
+
+      // Server decides per-session (secret.use_code_sessions from the work
+      // secret, threaded through runWorkPollLoop). The env var is an ant-dev
+      // override for forcing v2 before the server flag is on for your user —
+      // requires ccr_v2_compat_enabled server-side or registerWorker 404s.
+      //
+      // Kept separate from CLAUDE_CODE_USE_CCR_V2 (the child-SDK transport
+      // selector set by sessionRunner/environment-manager) to avoid the
+      // inheritance hazard in spawn mode where the parent's orchestrator
+      // var would leak into a v1 child.
+      const useCcrV2 =
+        serverUseCcrV2 || isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
+
+      // Auth is the one place v1 and v2 diverge hard:
+      //
+      // - v1 (Session-Ingress): accepts OAuth OR JWT. We prefer OAuth
+      //   because the standard OAuth refresh flow handles expiry — no
+      //   separate JWT refresh scheduler needed.
+      //
+      // - v2 (CCR /worker/*): REQUIRES the JWT. register_worker.go:32
+      //   validates the session_id claim, which OAuth tokens don't carry.
+      //   The JWT from the work secret has both that claim and the worker
+      //   role (environment_auth.py:856). JWT refresh: when it expires the
+      //   server re-dispatches work with a fresh one, and onWorkReceived
+      //   fires again. createV2ReplTransport stores it via
+      //   updateSessionIngressAuthToken() before touching the network.
+      let v1OauthToken: string | undefined
+      if (!useCcrV2) {
+        v1OauthToken = getOAuthToken()
+        if (!v1OauthToken) {
+          logForDebugging(
+            '[bridge:repl] No OAuth token available for session ingress, skipping work',
+          )
+          return
+        }
+        updateSessionIngressAuthToken(v1OauthToken)
+      }
+      logEvent('tengu_bridge_repl_work_received', {})
+
+      // Close the previous transport. Nullify BEFORE calling close() so
+      // the close callback doesn't treat the programmatic close as
+      // "session ended normally" and trigger a full teardown.
+      if (transport) {
+        const oldTransport = transport
+        transport = null
+        // Capture the SSE sequence high-water mark so the next transport
+        // resumes the stream instead of replaying from seq 0. Use max() —
+        // a transport that died early (never received any frames) would
+        // otherwise reset a non-zero mark back to 0.
+        const oldSeq = oldTransport.getLastSequenceNum()
+        if (oldSeq > lastTransportSequenceNum) {
+          lastTransportSequenceNum = oldSeq
+        }
+        oldTransport.close()
+      }
+      // Reset flush state — the old flush (if any) is no longer relevant.
+      // Preserve pending messages so they're drained after the new
+      // transport's flush completes (the hook has already advanced its
+      // lastWrittenIndex and won't re-send them).
+      flushGate.deactivate()
+
+      // Closure adapter over the shared handleServerControlRequest —
+      // captures transport/currentSessionId so the transport.setOnData
+      // callback below doesn't need to thread them through.
+      const onServerControlRequest = (request: SDKControlRequest): void =>
+        handleServerControlRequest(request, {
+          transport,
+          sessionId: currentSessionId,
+          onInterrupt,
+          onSetModel,
+          onSetMaxThinkingTokens,
+          onSetPermissionMode,
+        })
+
+      let initialFlushDone = false
+
+      // Wire callbacks onto a freshly constructed transport and connect.
+      // Extracted so the (sync) v1 and (async) v2 construction paths can
+      // share the identical callback + flush machinery.
+      const wireTransport = (newTransport: ReplBridgeTransport): void => {
+        transport = newTransport
+
+        newTransport.setOnConnect(() => {
+          // Guard: if transport was replaced by a newer onWorkReceived call
+          // while the WS was connecting, ignore this stale callback.
+          if (transport !== newTransport) return
+
+          logForDebugging('[bridge:repl] Ingress transport connected')
+          logEvent('tengu_bridge_repl_ws_connected', {})
+
+          // Update the env var with the latest OAuth token so POST writes
+          // (which read via getSessionIngressAuthToken()) use a fresh token.
+          // v2 skips this — createV2ReplTransport already stored the JWT,
+          // and overwriting it with OAuth would break subsequent /worker/*
+          // requests (session_id claim check).
+          if (!useCcrV2) {
+            const freshToken = getOAuthToken()
+            if (freshToken) {
+              updateSessionIngressAuthToken(freshToken)
+            }
+          }
+
+          // Reset teardownStarted so future teardowns are not blocked.
+          teardownStarted = false
+
+          // Flush initial messages only on first connect, not on every
+          // WS reconnection. Re-flushing would cause duplicate messages.
+          // IMPORTANT: onStateChange('connected') is deferred until the
+          // flush completes. This prevents writeMessages() from sending
+          // new messages that could arrive at the server interleaved with
+          // the historical messages, and delays the web UI from showing
+          // the session as active until history is persisted.
+          if (
+            !initialFlushDone &&
+            initialMessages &&
+            initialMessages.length > 0
+          ) {
+            initialFlushDone = true
+
+            // Cap the initial flush to the most recent N messages. The full
+            // history is UI-only (model doesn't see it) and large replays cause
+            // slow session-ingress persistence (each event is a threadstore write)
+            // plus elevated Firestore pressure. A 0 or negative cap disables it.
+            const historyCap = initialHistoryCap
+            const eligibleMessages = initialMessages.filter(
+              m =>
+                isEligibleBridgeMessage(m) &&
+                !previouslyFlushedUUIDs?.has(m.uuid),
+            )
+            const cappedMessages =
+              historyCap > 0 && eligibleMessages.length > historyCap
+                ? eligibleMessages.slice(-historyCap)
+                : eligibleMessages
+            if (cappedMessages.length < eligibleMessages.length) {
+              logForDebugging(
+                `[bridge:repl] Capped initial flush: ${eligibleMessages.length} -> ${cappedMessages.length} (cap=${historyCap})`,
+              )
+              logEvent('tengu_bridge_repl_history_capped', {
+                eligible_count: eligibleMessages.length,
+                capped_count: cappedMessages.length,
+              })
+            }
+            const sdkMessages = toSDKMessages(cappedMessages)
+            if (sdkMessages.length > 0) {
+              logForDebugging(
+                `[bridge:repl] Flushing ${sdkMessages.length} initial message(s) via transport`,
+              )
+              const events = sdkMessages.map(sdkMsg => ({
+                ...sdkMsg,
+                session_id: currentSessionId,
+              }))
+              const dropsBefore = newTransport.droppedBatchCount
+              void newTransport
+                .writeBatch(events)
+                .then(() => {
+                  // If any batch was dropped during this flush (SI down for
+                  // maxConsecutiveFailures attempts), flush() still resolved
+                  // normally but the events were NOT delivered. Don't mark
+                  // UUIDs as flushed — keep them eligible for re-send on the
+                  // next onWorkReceived (JWT refresh re-dispatch, line ~1144).
+                  if (newTransport.droppedBatchCount > dropsBefore) {
+                    logForDebugging(
+                      `[bridge:repl] Initial flush dropped ${newTransport.droppedBatchCount - dropsBefore} batch(es) — not marking ${sdkMessages.length} UUID(s) as flushed`,
+                    )
+                    return
+                  }
+                  if (previouslyFlushedUUIDs) {
+                    for (const sdkMsg of sdkMessages) {
+                      if (sdkMsg.uuid) {
+                        previouslyFlushedUUIDs.add(sdkMsg.uuid)
+                      }
+                    }
+                  }
+                })
+                .catch(e =>
+                  logForDebugging(`[bridge:repl] Initial flush failed: ${e}`),
+                )
+                .finally(() => {
+                  // Guard: if transport was replaced during the flush,
+                  // don't signal connected or drain — the new transport
+                  // owns the lifecycle now.
+                  if (transport !== newTransport) return
+                  drainFlushGate()
+                  onStateChange?.('connected')
+                })
+            } else {
+              // All initial messages were already flushed (filtered by
+              // previouslyFlushedUUIDs). No flush POST needed — clear
+              // the flag and signal connected immediately. This is the
+              // first connect for this transport (inside !initialFlushDone),
+              // so no flush POST is in-flight — the flag was set before
+              // connect() and must be cleared here.
+              drainFlushGate()
+              onStateChange?.('connected')
+            }
+          } else if (!flushGate.active) {
+            // No initial messages or already flushed on first connect.
+            // WS auto-reconnect path — only signal connected if no flush
+            // POST is in-flight. If one is, .finally() owns the lifecycle.
+            onStateChange?.('connected')
+          }
+        })
+
+        newTransport.setOnData(data => {
+          handleIngressMessage(
+            data,
+            recentPostedUUIDs,
+            recentInboundUUIDs,
+            onInboundMessage,
+            onPermissionResponse,
+            onServerControlRequest,
+          )
+        })
+
+        // Body lives at initBridgeCore scope so /bridge-kick can call it
+        // directly via debugFireClose. All referenced closures (transport,
+        // wakePollLoop, flushGate, reconnectEnvironmentWithSession, etc.)
+        // are already at that scope. The only lexical dependency on
+        // wireTransport was `newTransport.getLastSequenceNum()` — but after
+        // the guard below passes we know transport === newTransport.
+        debugFireClose = handleTransportPermanentClose
+        newTransport.setOnClose(closeCode => {
+          // Guard: if transport was replaced, ignore stale close.
+          if (transport !== newTransport) return
+          handleTransportPermanentClose(closeCode)
+        })
+
+        // Start the flush gate before connect() to cover the WS handshake
+        // window. Between transport assignment and setOnConnect firing,
+        // writeMessages() could send messages via HTTP POST before the
+        // initial flush starts. Starting the gate here ensures those
+        // calls are queued. If there are no initial messages, the gate
+        // stays inactive.
+        if (
+          !initialFlushDone &&
+          initialMessages &&
+          initialMessages.length > 0
+        ) {
+          flushGate.start()
+        }
+
+        newTransport.connect()
+      } // end wireTransport
+
+      // Bump unconditionally — ANY new transport (v1 or v2) invalidates an
+      // in-flight v2 handshake. Also bumped in doReconnect().
+      v2Generation++
+
+      if (useCcrV2) {
+        // workSessionId is the cse_* form (infrastructure-layer ID from the
+        // work queue), which is what /v1/code/sessions/{id}/worker/* wants.
+        // The session_* form (currentSessionId) is NOT usable here —
+        // handler/convert.go:30 validates TagCodeSession.
+        const sessionUrl = buildCCRv2SdkUrl(baseUrl, workSessionId)
+        const thisGen = v2Generation
+        logForDebugging(
+          `[bridge:repl] CCR v2: sessionUrl=${sessionUrl} session=${workSessionId} gen=${thisGen}`,
+        )
+        void createV2ReplTransport({
+          sessionUrl,
+          ingressToken,
+          sessionId: workSessionId,
+          initialSequenceNum: lastTransportSequenceNum,
+        }).then(
+          t => {
+            // Teardown started while registerWorker was in flight. Teardown
+            // saw transport === null and skipped close(); installing now
+            // would leak CCRClient heartbeat timers and reset
+            // teardownStarted via wireTransport's side effects.
+            if (pollController.signal.aborted) {
+              t.close()
+              return
+            }
+            // onWorkReceived may have fired again while registerWorker()
+            // was in flight (server re-dispatch with a fresh JWT). The
+            // transport !== null check alone gets the race wrong when BOTH
+            // attempts saw transport === null — it keeps the first resolver
+            // (stale epoch) and discards the second (correct epoch). The
+            // generation check catches it regardless of transport state.
+            if (thisGen !== v2Generation) {
+              logForDebugging(
+                `[bridge:repl] CCR v2: discarding stale handshake gen=${thisGen} current=${v2Generation}`,
+              )
+              t.close()
+              return
+            }
+            wireTransport(t)
+          },
+          (err: unknown) => {
+            logForDebugging(
+              `[bridge:repl] CCR v2: createV2ReplTransport failed: ${errorMessage(err)}`,
+              { level: 'error' },
+            )
+            logEvent('tengu_bridge_repl_ccr_v2_init_failed', {})
+            // If a newer attempt is in flight or already succeeded, don't
+            // touch its work item — our failure is irrelevant.
+            if (thisGen !== v2Generation) return
+            // Release the work item so the server re-dispatches immediately
+            // instead of waiting for its own timeout. currentWorkId was set
+            // above; without this, the session looks stuck to the user.
+            if (currentWorkId) {
+              void api
+                .stopWork(environmentId, currentWorkId, false)
+                .catch((e: unknown) => {
+                  logForDebugging(
+                    `[bridge:repl] stopWork after v2 init failure: ${errorMessage(e)}`,
+                  )
+                })
+              currentWorkId = null
+              currentIngressToken = null
+            }
+            wakePollLoop()
+          },
+        )
+      } else {
+        // v1: HybridTransport (WS reads + POST writes to Session-Ingress).
+        // autoReconnect is true (default) — when the WS dies, the transport
+        // reconnects automatically with exponential backoff. POST writes
+        // continue during reconnection (they use getSessionIngressAuthToken()
+        // independently of WS state). The poll loop remains as a secondary
+        // fallback if the reconnect budget is exhausted (10 min).
+        //
+        // Auth: uses OAuth tokens directly instead of the JWT from the work
+        // secret. refreshHeaders picks up the latest OAuth token on each
+        // WS reconnect attempt.
+        const wsUrl = buildSdkUrl(sessionIngressUrl, workSessionId)
+        logForDebugging(`[bridge:repl] Ingress URL: ${wsUrl}`)
+        logForDebugging(
+          `[bridge:repl] Creating HybridTransport: session=${workSessionId}`,
+        )
+        // v1OauthToken was validated non-null above (we'd have returned early).
+        const oauthToken = v1OauthToken ?? ''
+        wireTransport(
+          createV1ReplTransport(
+            new HybridTransport(
+              new URL(wsUrl),
+              {
+                Authorization: `Bearer ${oauthToken}`,
+                'anthropic-version': '2023-06-01',
+              },
+              workSessionId,
+              () => ({
+                Authorization: `Bearer ${getOAuthToken() ?? oauthToken}`,
+                'anthropic-version': '2023-06-01',
+              }),
+              // Cap retries so a persistently-failing session-ingress can't
+              // pin the uploader drain loop for the lifetime of the bridge.
+              // 50 attempts ≈ 20 min (15s POST timeout + 8s backoff + jitter
+              // per cycle at steady state). Bridge-only — 1P keeps indefinite.
+              {
+                maxConsecutiveFailures: 50,
+                isBridge: true,
+                onBatchDropped: () => {
+                  onStateChange?.(
+                    'reconnecting',
+                    'Lost sync with Remote Control — events could not be delivered',
+                  )
+                  // SI has been down ~20 min. Wake the poll loop so that when
+                  // SI recovers, next poll → onWorkReceived → fresh transport
+                  // → initial flush succeeds → onStateChange('connected') at
+                  // ~line 1420. Without this, state stays 'reconnecting' even
+                  // after SI recovers — daemon.ts:437 denies all permissions,
+                  // useReplBridge.ts:311 keeps replBridgeSessionActive=false.
+                  // If the env was archived during the outage, poll 404 →
+                  // onEnvironmentLost recovery path handles it.
+                  wakePollLoop()
+                },
+              },
+            ),
+          ),
+        )
+      }
+    },
+  }
+  void startWorkPollLoop(pollOpts)
+
+  // Perpetual mode: hourly mtime refresh of the crash-recovery pointer.
+  // The onWorkReceived refresh only fires per user prompt — a
+  // daemon idle for >4h would have a stale pointer, and the next restart
+  // would clear it (readBridgePointer TTL check) → fresh session. The
+  // standalone bridge (bridgeMain.ts) has an identical hourly timer.
+  const pointerRefreshTimer = perpetual
+    ? setInterval(() => {
+        // doReconnect() reassigns currentSessionId/environmentId non-
+        // atomically (env at ~:634, session at ~:719, awaits in between).
+        // If this timer fires in that window, its fire-and-forget write can
+        // race with (and overwrite) doReconnect's own pointer write at ~:740,
+        // leaving the pointer at the now-archived old session. doReconnect
+        // writes the pointer itself, so skipping here is free.
+        if (reconnectPromise) return
+        void writeBridgePointer(dir, {
+          sessionId: currentSessionId,
+          environmentId,
+          source: 'repl',
+        })
+      }, 60 * 60_000)
+    : null
+  pointerRefreshTimer?.unref?.()
+
+  // Push a silent keep_alive frame on a fixed interval so upstream proxies
+  // and the session-ingress layer don't GC an otherwise-idle remote control
+  // session. The keep_alive type is filtered before reaching any client UI
+  // (Query.ts drops it; web/iOS/Android never see it in their message loop).
+  // Interval comes from GrowthBook (tengu_bridge_poll_interval_config
+  // session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
+  const keepAliveIntervalMs =
+    getPollIntervalConfig().session_keepalive_interval_v2_ms
+  const keepAliveTimer =
+    keepAliveIntervalMs > 0
+      ? setInterval(() => {
+          if (!transport) return
+          logForDebugging('[bridge:repl] keep_alive sent')
+          void transport.write({ type: 'keep_alive' }).catch((err: unknown) => {
+            logForDebugging(
+              `[bridge:repl] keep_alive write failed: ${errorMessage(err)}`,
+            )
+          })
+        }, keepAliveIntervalMs)
+      : null
+  keepAliveTimer?.unref?.()
+
+  // Shared teardown sequence used by both cleanup registration and
+  // the explicit teardown() method on the returned handle.
+  let teardownStarted = false
+  doTeardownImpl = async (): Promise<void> => {
+    if (teardownStarted) {
+      logForDebugging(
+        `[bridge:repl] Teardown already in progress, skipping duplicate call env=${environmentId} session=${currentSessionId}`,
+      )
+      return
+    }
+    teardownStarted = true
+    const teardownStart = Date.now()
+    logForDebugging(
+      `[bridge:repl] Teardown starting: env=${environmentId} session=${currentSessionId} workId=${currentWorkId ?? 'none'} transportState=${transport?.getStateLabel() ?? 'null'}`,
+    )
+
+    if (pointerRefreshTimer !== null) {
+      clearInterval(pointerRefreshTimer)
+    }
+    if (keepAliveTimer !== null) {
+      clearInterval(keepAliveTimer)
+    }
+    if (sigusr2Handler) {
+      process.off('SIGUSR2', sigusr2Handler)
+    }
+    if (process.env.USER_TYPE === 'ant') {
+      clearBridgeDebugHandle()
+      debugFireClose = null
+    }
+    pollController.abort()
+    logForDebugging('[bridge:repl] Teardown: poll loop aborted')
+
+    // Capture the live transport's seq BEFORE close() — close() is sync
+    // (just aborts the SSE fetch) and does NOT invoke onClose, so the
+    // setOnClose capture path never runs for explicit teardown.
+    // Without this, getSSESequenceNum() after teardown returns the stale
+    // lastTransportSequenceNum (captured at the last transport swap), and
+    // daemon callers persisting that value lose all events since then.
+    if (transport) {
+      const finalSeq = transport.getLastSequenceNum()
+      if (finalSeq > lastTransportSequenceNum) {
+        lastTransportSequenceNum = finalSeq
+      }
+    }
+
+    if (perpetual) {
+      // Perpetual teardown is LOCAL-ONLY — do not send result, do not call
+      // stopWork, do not close the transport. All of those signal the
+      // server (and any mobile/attach subscribers) that the session is
+      // ending. Instead: stop polling, let the socket die with the
+      // process; the backend times the work-item lease back to pending on
+      // its own (TTL 300s). Next daemon start reads the pointer and
+      // reconnectSession re-queues work.
+      transport = null
+      flushGate.drop()
+      // Refresh the pointer mtime so that sessions lasting longer than
+      // BRIDGE_POINTER_TTL_MS (4h) don't appear stale on next start.
+      await writeBridgePointer(dir, {
+        sessionId: currentSessionId,
+        environmentId,
+        source: 'repl',
+      })
+      logForDebugging(
+        `[bridge:repl] Teardown (perpetual): leaving env=${environmentId} session=${currentSessionId} alive on server, duration=${Date.now() - teardownStart}ms`,
+      )
+      return
+    }
+
+    // Fire the result message, then archive, THEN close. transport.write()
+    // only enqueues (SerialBatchEventUploader resolves on buffer-add); the
+    // stopWork/archive latency (~200-500ms) is the drain window for the
+    // result POST. Closing BEFORE archive meant relying on HybridTransport's
+    // void-ed 3s grace period, which nothing awaits — forceExit can kill the
+    // socket mid-POST. Same reorder as remoteBridgeCore.ts teardown (#22803).
+    const teardownTransport = transport
+    transport = null
+    flushGate.drop()
+    if (teardownTransport) {
+      void teardownTransport.write(makeResultMessage(currentSessionId))
+    }
+
+    const stopWorkP = currentWorkId
+      ? api
+          .stopWork(environmentId, currentWorkId, true)
+          .then(() => {
+            logForDebugging('[bridge:repl] Teardown: stopWork completed')
+          })
+          .catch((err: unknown) => {
+            logForDebugging(
+              `[bridge:repl] Teardown stopWork failed: ${errorMessage(err)}`,
+            )
+          })
+      : Promise.resolve()
+
+    // Run stopWork and archiveSession in parallel. gracefulShutdown.ts:407
+    // races runCleanupFunctions() against 2s (NOT the 5s outer failsafe),
+    // so archive is capped at 1.5s at the injection site to stay under budget.
+    // archiveSession is contractually no-throw; the injected implementations
+    // log their own success/failure internally.
+    await Promise.all([stopWorkP, archiveSession(currentSessionId)])
+
+    teardownTransport?.close()
+    logForDebugging('[bridge:repl] Teardown: transport closed')
+
+    await api.deregisterEnvironment(environmentId).catch((err: unknown) => {
+      logForDebugging(
+        `[bridge:repl] Teardown deregister failed: ${errorMessage(err)}`,
+      )
+    })
+
+    // Clear the crash-recovery pointer — explicit disconnect or clean REPL
+    // exit means the user is done with this session. Crash/kill-9 never
+    // reaches this line, leaving the pointer for next-launch recovery.
+    await clearBridgePointer(dir)
+
+    logForDebugging(
+      `[bridge:repl] Teardown complete: env=${environmentId} duration=${Date.now() - teardownStart}ms`,
+    )
+  }
+
+  // 8. Register cleanup for graceful shutdown
+  const unregister = registerCleanup(() => doTeardownImpl?.())
+
+  logForDebugging(
+    `[bridge:repl] Ready: env=${environmentId} session=${currentSessionId}`,
+  )
+  onStateChange?.('ready')
+
+  return {
+    get bridgeSessionId() {
+      return currentSessionId
+    },
+    get environmentId() {
+      return environmentId
+    },
+    getSSESequenceNum() {
+      // lastTransportSequenceNum only updates when a transport is CLOSED
+      // (captured at swap/onClose). During normal operation the CURRENT
+      // transport's live seq isn't reflected there. Merge both so callers
+      // (e.g. daemon persistState()) get the actual high-water mark.
+      const live = transport?.getLastSequenceNum() ?? 0
+      return Math.max(lastTransportSequenceNum, live)
+    },
+    sessionIngressUrl,
+    writeMessages(messages) {
+      // Filter to user/assistant messages that haven't already been sent.
+      // Two layers of dedup:
+      //  - initialMessageUUIDs: messages sent as session creation events
+      //  - recentPostedUUIDs: messages recently sent via POST
+      const filtered = messages.filter(
+        m =>
+          isEligibleBridgeMessage(m) &&
+          !initialMessageUUIDs.has(m.uuid) &&
+          !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+
+      // Fire onUserMessage for title derivation. Scan before the flushGate
+      // check — prompts are title-worthy even if they queue behind the
+      // initial history flush. Keeps calling on every title-worthy message
+      // until the callback returns true; the caller owns the policy.
+      if (!userMessageCallbackDone) {
+        for (const m of filtered) {
+          const text = extractTitleText(m)
+          if (text !== undefined && onUserMessage?.(text, currentSessionId)) {
+            userMessageCallbackDone = true
+            break
+          }
+        }
+      }
+
+      // Queue messages while the initial flush is in progress to prevent
+      // them from arriving at the server interleaved with history.
+      if (flushGate.enqueue(...filtered)) {
+        logForDebugging(
+          `[bridge:repl] Queued ${filtered.length} message(s) during initial flush`,
+        )
+        return
+      }
+
+      if (!transport) {
+        const types = filtered.map(m => m.type).join(',')
+        logForDebugging(
+          `[bridge:repl] Transport not configured, dropping ${filtered.length} message(s) [${types}] for session=${currentSessionId}`,
+          { level: 'warn' },
+        )
+        return
+      }
+
+      // Track in the bounded ring buffer for echo filtering and dedup.
+      for (const msg of filtered) {
+        recentPostedUUIDs.add(msg.uuid)
+      }
+
+      logForDebugging(
+        `[bridge:repl] Sending ${filtered.length} message(s) via transport`,
+      )
+
+      // Convert to SDK format and send via HTTP POST (HybridTransport).
+      // The web UI receives them via the subscribe WebSocket.
+      const sdkMessages = toSDKMessages(filtered)
+      const events = sdkMessages.map(sdkMsg => ({
+        ...sdkMsg,
+        session_id: currentSessionId,
+      }))
+      void transport.writeBatch(events)
+    },
+    writeSdkMessages(messages) {
+      // Daemon path: query() already yields SDKMessage, skip conversion.
+      // Still run echo dedup (server bounces writes back on the WS).
+      // No initialMessageUUIDs filter — daemon has no initial messages.
+      // No flushGate — daemon never starts it (no initial flush).
+      const filtered = messages.filter(
+        m => !m.uuid || !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+      if (!transport) {
+        logForDebugging(
+          `[bridge:repl] Transport not configured, dropping ${filtered.length} SDK message(s) for session=${currentSessionId}`,
+          { level: 'warn' },
+        )
+        return
+      }
+      for (const msg of filtered) {
+        if (msg.uuid) recentPostedUUIDs.add(msg.uuid)
+      }
+      const events = filtered.map(m => ({ ...m, session_id: currentSessionId }))
+      void transport.writeBatch(events)
+    },
+    sendControlRequest(request: SDKControlRequest) {
+      if (!transport) {
+        logForDebugging(
+          '[bridge:repl] Transport not configured, skipping control_request',
+        )
+        return
+      }
+      const event = { ...request, session_id: currentSessionId }
+      void transport.write(event)
+      logForDebugging(
+        `[bridge:repl] Sent control_request request_id=${request.request_id}`,
+      )
+    },
+    sendControlResponse(response: SDKControlResponse) {
+      if (!transport) {
+        logForDebugging(
+          '[bridge:repl] Transport not configured, skipping control_response',
+        )
+        return
+      }
+      const event = { ...response, session_id: currentSessionId }
+      void transport.write(event)
+      logForDebugging('[bridge:repl] Sent control_response')
+    },
+    sendControlCancelRequest(requestId: string) {
+      if (!transport) {
+        logForDebugging(
+          '[bridge:repl] Transport not configured, skipping control_cancel_request',
+        )
+        return
+      }
+      const event = {
+        type: 'control_cancel_request' as const,
+        request_id: requestId,
+        session_id: currentSessionId,
+      }
+      void transport.write(event)
+      logForDebugging(
+        `[bridge:repl] Sent control_cancel_request request_id=${requestId}`,
+      )
+    },
+    sendResult() {
+      if (!transport) {
+        logForDebugging(
+          `[bridge:repl] sendResult: skipping, transport not configured session=${currentSessionId}`,
+        )
+        return
+      }
+      void transport.write(makeResultMessage(currentSessionId))
+      logForDebugging(
+        `[bridge:repl] Sent result for session=${currentSessionId}`,
+      )
+    },
+    async teardown() {
+      unregister()
+      await doTeardownImpl?.()
+      logForDebugging('[bridge:repl] Torn down')
+      logEvent('tengu_bridge_repl_teardown', {})
+    },
+  }
+}
+
+/**
+ * Persistent poll loop for work items. Runs in the background for the
+ * lifetime of the bridge connection.
+ *
+ * When a work item arrives, acknowledges it and calls onWorkReceived
+ * with the session ID and ingress token (which connects the ingress
+ * WebSocket). Then continues polling — the server will dispatch a new
+ * work item if the ingress WebSocket drops, allowing automatic
+ * reconnection without tearing down the bridge.
+ */
+async function startWorkPollLoop({
+  api,
+  getCredentials,
+  signal,
+  onStateChange,
+  onWorkReceived,
+  onEnvironmentLost,
+  getWsState,
+  isAtCapacity,
+  capacitySignal,
+  onFatalError,
+  getPollIntervalConfig = () => DEFAULT_POLL_CONFIG,
+  getHeartbeatInfo,
+  onHeartbeatFatal,
+}: {
+  api: BridgeApiClient
+  getCredentials: () => { environmentId: string; environmentSecret: string }
+  signal: AbortSignal
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  onWorkReceived: (
+    sessionId: string,
+    ingressToken: string,
+    workId: string,
+    useCodeSessions: boolean,
+  ) => void
+  /** Called when the environment has been deleted. Returns new credentials or null. */
+  onEnvironmentLost?: () => Promise<{
+    environmentId: string
+    environmentSecret: string
+  } | null>
+  /** Returns the current WebSocket readyState label for diagnostic logging. */
+  getWsState?: () => string
+  /**
+   * Returns true when the caller cannot accept new work (transport already
+   * connected). When true, the loop polls at the configured at-capacity
+   * interval as a heartbeat only. Server-side BRIDGE_LAST_POLL_TTL is
+   * 4 hours — anything shorter than that is sufficient for liveness.
+   */
+  isAtCapacity?: () => boolean
+  /**
+   * Produces a signal that aborts when capacity frees up (transport lost),
+   * merged with the loop signal. Used to interrupt the at-capacity sleep
+   * so recovery polling starts immediately.
+   */
+  capacitySignal?: () => CapacitySignal
+  /** Called on unrecoverable errors (e.g. server-side expiry) to trigger full teardown. */
+  onFatalError?: () => void
+  /** Poll interval config getter — defaults to DEFAULT_POLL_CONFIG. */
+  getPollIntervalConfig?: () => PollIntervalConfig
+  /**
+   * Returns the current work ID and session ingress token for heartbeat.
+   * When null, heartbeat is not possible (no active work item).
+   */
+  getHeartbeatInfo?: () => {
+    environmentId: string
+    workId: string
+    sessionToken: string
+  } | null
+  /**
+   * Called when heartbeatWork throws BridgeFatalError (401/403/404/410 —
+   * JWT expired or work item gone). Caller should tear down the transport
+   * + work state so isAtCapacity() flips to false and the loop fast-polls
+   * for the server's re-dispatched work item. When provided, the loop
+   * SKIPS the at-capacity backoff sleep (which would otherwise cause a
+   * ~10-minute dead window before recovery). When omitted, falls back to
+   * the backoff sleep to avoid a tight poll+heartbeat loop.
+   */
+  onHeartbeatFatal?: (err: BridgeFatalError) => void
+}): Promise<void> {
+  const MAX_ENVIRONMENT_RECREATIONS = 3
+
+  logForDebugging(
+    `[bridge:repl] Starting work poll loop for env=${getCredentials().environmentId}`,
+  )
+
+  let consecutiveErrors = 0
+  let firstErrorTime: number | null = null
+  let lastPollErrorTime: number | null = null
+  let environmentRecreations = 0
+  // Set when the at-capacity sleep overruns its deadline by a large margin
+  // (process suspension). Consumed at the top of the next iteration to
+  // force one fast-poll cycle — isAtCapacity() is `transport !== null`,
+  // which stays true while the transport auto-reconnects, so the poll
+  // loop would otherwise go straight back to a 10-minute sleep on a
+  // transport that may be pointed at a dead socket.
+  let suspensionDetected = false
+
+  while (!signal.aborted) {
+    // Capture credentials outside try so the catch block can detect
+    // whether a concurrent reconnection replaced the environment.
+    const { environmentId: envId, environmentSecret: envSecret } =
+      getCredentials()
+    const pollConfig = getPollIntervalConfig()
+    try {
+      const work = await api.pollForWork(
+        envId,
+        envSecret,
+        signal,
+        pollConfig.reclaim_older_than_ms,
+      )
+
+      // A successful poll proves the env is genuinely healthy — reset the
+      // env-loss counter so events hours apart each start fresh. Outside
+      // the state-change guard below because onEnvLost's success path
+      // already emits 'ready'; emitting again here would be a duplicate.
+      // (onEnvLost returning creds does NOT reset this — that would break
+      // oscillation protection when the new env immediately dies.)
+      environmentRecreations = 0
+
+      // Reset error tracking on successful poll
+      if (consecutiveErrors > 0) {
+        logForDebugging(
+          `[bridge:repl] Poll recovered after ${consecutiveErrors} consecutive error(s)`,
+        )
+        consecutiveErrors = 0
+        firstErrorTime = null
+        lastPollErrorTime = null
+        onStateChange?.('ready')
+      }
+
+      if (!work) {
+        // Read-and-clear: after a detected suspension, skip the at-capacity
+        // branch exactly once. The pollForWork above already refreshed the
+        // server's BRIDGE_LAST_POLL_TTL; this fast cycle gives any
+        // re-dispatched work item a chance to land before we go back under.
+        const skipAtCapacityOnce = suspensionDetected
+        suspensionDetected = false
+        if (isAtCapacity?.() && capacitySignal && !skipAtCapacityOnce) {
+          const atCapMs = pollConfig.poll_interval_ms_at_capacity
+          // Heartbeat loops WITHOUT polling. When at-capacity polling is also
+          // enabled (atCapMs > 0), the loop tracks a deadline and breaks out
+          // to poll at that interval — heartbeat and poll compose instead of
+          // one suppressing the other. Breaks out when:
+          //   - Poll deadline reached (atCapMs > 0 only)
+          //   - Auth fails (JWT expired → poll refreshes tokens)
+          //   - Capacity wake fires (transport lost → poll for new work)
+          //   - Heartbeat config disabled (GrowthBook update)
+          //   - Loop aborted (shutdown)
+          if (
+            pollConfig.non_exclusive_heartbeat_interval_ms > 0 &&
+            getHeartbeatInfo
+          ) {
+            logEvent('tengu_bridge_heartbeat_mode_entered', {
+              heartbeat_interval_ms:
+                pollConfig.non_exclusive_heartbeat_interval_ms,
+            })
+            // Deadline computed once at entry — GB updates to atCapMs don't
+            // shift an in-flight deadline (next entry picks up the new value).
+            const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
+            let needsBackoff = false
+            let hbCycles = 0
+            while (
+              !signal.aborted &&
+              isAtCapacity() &&
+              (pollDeadline === null || Date.now() < pollDeadline)
+            ) {
+              const hbConfig = getPollIntervalConfig()
+              if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
+
+              const info = getHeartbeatInfo()
+              if (!info) break
+
+              // Capture capacity signal BEFORE the async heartbeat call so
+              // a transport loss during the HTTP request is caught by the
+              // subsequent sleep.
+              const cap = capacitySignal()
+
+              try {
+                await api.heartbeatWork(
+                  info.environmentId,
+                  info.workId,
+                  info.sessionToken,
+                )
+              } catch (err) {
+                logForDebugging(
+                  `[bridge:repl:heartbeat] Failed: ${errorMessage(err)}`,
+                )
+                if (err instanceof BridgeFatalError) {
+                  cap.cleanup()
+                  logEvent('tengu_bridge_heartbeat_error', {
+                    status:
+                      err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+                    error_type: (err.status === 401 || err.status === 403
+                      ? 'auth_failed'
+                      : 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+                  })
+                  // JWT expired (401/403) or work item gone (404/410).
+                  // Either way the current transport is dead — SSE
+                  // reconnects and CCR writes will fail on the same
+                  // stale token. If the caller gave us a recovery hook,
+                  // tear down work state and skip backoff: isAtCapacity()
+                  // flips to false, next outer-loop iteration fast-polls
+                  // for the server's re-dispatched work item. Without
+                  // the hook, backoff to avoid tight poll+heartbeat loop.
+                  if (onHeartbeatFatal) {
+                    onHeartbeatFatal(err)
+                    logForDebugging(
+                      `[bridge:repl:heartbeat] Fatal (status=${err.status}), work state cleared — fast-polling for re-dispatch`,
+                    )
+                  } else {
+                    needsBackoff = true
+                  }
+                  break
+                }
+              }
+
+              hbCycles++
+              await sleep(
+                hbConfig.non_exclusive_heartbeat_interval_ms,
+                cap.signal,
+              )
+              cap.cleanup()
+            }
+
+            const exitReason = needsBackoff
+              ? 'error'
+              : signal.aborted
+                ? 'shutdown'
+                : !isAtCapacity()
+                  ? 'capacity_changed'
+                  : pollDeadline !== null && Date.now() >= pollDeadline
+                    ? 'poll_due'
+                    : 'config_disabled'
+            logEvent('tengu_bridge_heartbeat_mode_exited', {
+              reason:
+                exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              heartbeat_cycles: hbCycles,
+            })
+
+            // On auth_failed or fatal, backoff before polling to avoid a
+            // tight poll+heartbeat loop. Fall through to the shared sleep
+            // below — it's the same capacitySignal-wrapped sleep the legacy
+            // path uses, and both need the suspension-overrun check.
+            if (!needsBackoff) {
+              if (exitReason === 'poll_due') {
+                // bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
+                // so the once-per-10min poll_due poll is invisible at counter=2.
+                // Log it here so verification runs see both endpoints in the debug log.
+                logForDebugging(
+                  `[bridge:repl] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
+                )
+              }
+              continue
+            }
+          }
+          // At-capacity sleep — reached by both the legacy path (heartbeat
+          // disabled) and the heartbeat-backoff path (needsBackoff=true).
+          // Merged so the suspension detector covers both; previously the
+          // backoff path had no overrun check and could go straight back
+          // under for 10 min after a laptop wake. Use atCapMs when enabled,
+          // else the heartbeat interval as a floor (guaranteed > 0 on the
+          // backoff path) so heartbeat-only configs don't tight-loop.
+          const sleepMs =
+            atCapMs > 0
+              ? atCapMs
+              : pollConfig.non_exclusive_heartbeat_interval_ms
+          if (sleepMs > 0) {
+            const cap = capacitySignal()
+            const sleepStart = Date.now()
+            await sleep(sleepMs, cap.signal)
+            cap.cleanup()
+            // Process-suspension detector. A setTimeout overshooting its
+            // deadline by 60s means the process was suspended (laptop lid,
+            // SIGSTOP, VM pause) — even a pathological GC pause is seconds,
+            // not minutes. Early aborts (wakePollLoop → cap.signal) produce
+            // overrun < 0 and fall through. Note: this only catches sleeps
+            // that outlast their deadline; WebSocketTransport's ping
+            // interval (10s granularity) is the primary detector for shorter
+            // suspensions. This is the backstop for when that detector isn't
+            // running (transport mid-reconnect, interval stopped).
+            const overrun = Date.now() - sleepStart - sleepMs
+            if (overrun > 60_000) {
+              logForDebugging(
+                `[bridge:repl] At-capacity sleep overran by ${Math.round(overrun / 1000)}s — process suspension detected, forcing one fast-poll cycle`,
+              )
+              logEvent('tengu_bridge_repl_suspension_detected', {
+                overrun_ms: overrun,
+              })
+              suspensionDetected = true
+            }
+          }
+        } else {
+          await sleep(pollConfig.poll_interval_ms_not_at_capacity, signal)
+        }
+        continue
+      }
+
+      // Decode before type dispatch — need the JWT for the explicit ack.
+      let secret
+      try {
+        secret = decodeWorkSecret(work.secret)
+      } catch (err) {
+        logForDebugging(
+          `[bridge:repl] Failed to decode work secret: ${errorMessage(err)}`,
+        )
+        logEvent('tengu_bridge_repl_work_secret_failed', {})
+        // Can't ack (needs the JWT we failed to decode). stopWork uses OAuth.
+        // Prevents XAUTOCLAIM re-delivering this poisoned item every cycle.
+        await api.stopWork(envId, work.id, false).catch(() => {})
+        continue
+      }
+
+      // Explicitly acknowledge to prevent redelivery. Non-fatal on failure:
+      // server re-delivers, and the onWorkReceived callback handles dedup.
+      logForDebugging(`[bridge:repl] Acknowledging workId=${work.id}`)
+      try {
+        await api.acknowledgeWork(envId, work.id, secret.session_ingress_token)
+      } catch (err) {
+        logForDebugging(
+          `[bridge:repl] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
+        )
+      }
+
+      if (work.data.type === 'healthcheck') {
+        logForDebugging('[bridge:repl] Healthcheck received')
+        continue
+      }
+
+      if (work.data.type === 'session') {
+        const workSessionId = work.data.id
+        try {
+          validateBridgeId(workSessionId, 'session_id')
+        } catch {
+          logForDebugging(
+            `[bridge:repl] Invalid session_id in work: ${workSessionId}`,
+          )
+          continue
+        }
+
+        onWorkReceived(
+          workSessionId,
+          secret.session_ingress_token,
+          work.id,
+          secret.use_code_sessions === true,
+        )
+        logForDebugging('[bridge:repl] Work accepted, continuing poll loop')
+      }
+    } catch (err) {
+      if (signal.aborted) break
+
+      // Detect permanent "environment deleted" error — no amount of
+      // retrying will recover. Re-register a new environment instead.
+      // Checked BEFORE the generic BridgeFatalError bail. pollForWork uses
+      // validateStatus: s => s < 500, so 404 is always wrapped into a
+      // BridgeFatalError by handleErrorStatus() — never an axios-shaped
+      // error. The poll endpoint's only path param is the env ID; 404
+      // unambiguously means env-gone (no-work is a 200 with null body).
+      // The server sends error.type='not_found_error' (standard Anthropic
+      // API shape), not a bridge-specific string — but status===404 is
+      // the real signal and survives body-shape changes.
+      if (
+        err instanceof BridgeFatalError &&
+        err.status === 404 &&
+        onEnvironmentLost
+      ) {
+        // If credentials have already been refreshed by a concurrent
+        // reconnection (e.g. WS close handler), the stale poll's error
+        // is expected — skip onEnvironmentLost and retry with fresh creds.
+        const currentEnvId = getCredentials().environmentId
+        if (envId !== currentEnvId) {
+          logForDebugging(
+            `[bridge:repl] Stale poll error for old env=${envId}, current env=${currentEnvId} — skipping onEnvironmentLost`,
+          )
+          consecutiveErrors = 0
+          firstErrorTime = null
+          continue
+        }
+
+        environmentRecreations++
+        logForDebugging(
+          `[bridge:repl] Environment deleted, attempting re-registration (attempt ${environmentRecreations}/${MAX_ENVIRONMENT_RECREATIONS})`,
+        )
+        logEvent('tengu_bridge_repl_env_lost', {
+          attempt: environmentRecreations,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+
+        if (environmentRecreations > MAX_ENVIRONMENT_RECREATIONS) {
+          logForDebugging(
+            `[bridge:repl] Environment re-registration limit reached (${MAX_ENVIRONMENT_RECREATIONS}), giving up`,
+          )
+          onStateChange?.(
+            'failed',
+            'Environment deleted and re-registration limit reached',
+          )
+          onFatalError?.()
+          break
+        }
+
+        onStateChange?.('reconnecting', 'environment lost, recreating session')
+        const newCreds = await onEnvironmentLost()
+        // doReconnect() makes several sequential network calls (1-5s).
+        // If the user triggered teardown during that window, its internal
+        // abort checks return false — but we need to re-check here to
+        // avoid emitting a spurious 'failed' + onFatalError() during
+        // graceful shutdown.
+        if (signal.aborted) break
+        if (newCreds) {
+          // Credentials are updated in the outer scope via
+          // reconnectEnvironmentWithSession — getCredentials() will
+          // return the fresh values on the next poll iteration.
+          // Do NOT reset environmentRecreations here — onEnvLost returning
+          // creds only proves we tried to fix it, not that the env is
+          // healthy. A successful poll (above) is the reset point; if the
+          // new env immediately dies again we still want the limit to fire.
+          consecutiveErrors = 0
+          firstErrorTime = null
+          onStateChange?.('ready')
+          logForDebugging(
+            `[bridge:repl] Re-registered environment: ${newCreds.environmentId}`,
+          )
+          continue
+        }
+
+        onStateChange?.(
+          'failed',
+          'Environment deleted and re-registration failed',
+        )
+        onFatalError?.()
+        break
+      }
+
+      // Fatal errors (401/403/404/410) — no point retrying
+      if (err instanceof BridgeFatalError) {
+        const isExpiry = isExpiredErrorType(err.errorType)
+        const isSuppressible = isSuppressible403(err)
+        logForDebugging(
+          `[bridge:repl] Fatal poll error: ${err.message} (status=${err.status}, type=${err.errorType ?? 'unknown'})${isSuppressible ? ' (suppressed)' : ''}`,
+        )
+        logEvent('tengu_bridge_repl_fatal_error', {
+          status: err.status,
+          error_type:
+            err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+        logForDiagnosticsNoPII(
+          isExpiry ? 'info' : 'error',
+          'bridge_repl_fatal_error',
+          { status: err.status, error_type: err.errorType },
+        )
+        // Cosmetic 403 errors (e.g., external_poll_sessions scope,
+        // environments:manage permission) — suppress user-visible error
+        // but always trigger teardown so cleanup runs.
+        if (!isSuppressible) {
+          onStateChange?.(
+            'failed',
+            isExpiry
+              ? 'session expired · /remote-control to reconnect'
+              : err.message,
+          )
+        }
+        // Always trigger teardown — matches bridgeMain.ts where fatalExit=true
+        // is unconditional and post-loop cleanup always runs.
+        onFatalError?.()
+        break
+      }
+
+      const now = Date.now()
+
+      // Detect system sleep/wake: if the gap since the last poll error
+      // greatly exceeds the max backoff delay, the machine likely slept.
+      // Reset error tracking so we retry with a fresh budget instead of
+      // immediately giving up.
+      if (
+        lastPollErrorTime !== null &&
+        now - lastPollErrorTime > POLL_ERROR_MAX_DELAY_MS * 2
+      ) {
+        logForDebugging(
+          `[bridge:repl] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting poll error budget`,
+        )
+        logForDiagnosticsNoPII('info', 'bridge_repl_poll_sleep_detected', {
+          gapMs: now - lastPollErrorTime,
+        })
+        consecutiveErrors = 0
+        firstErrorTime = null
+      }
+      lastPollErrorTime = now
+
+      consecutiveErrors++
+      if (firstErrorTime === null) {
+        firstErrorTime = now
+      }
+      const elapsed = now - firstErrorTime
+      const httpStatus = extractHttpStatus(err)
+      const errMsg = describeAxiosError(err)
+      const wsLabel = getWsState?.() ?? 'unknown'
+
+      logForDebugging(
+        `[bridge:repl] Poll error (attempt ${consecutiveErrors}, elapsed ${Math.round(elapsed / 1000)}s, ws=${wsLabel}): ${errMsg}`,
+      )
+      logEvent('tengu_bridge_repl_poll_error', {
+        status: httpStatus,
+        consecutiveErrors,
+        elapsedMs: elapsed,
+      } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+
+      // Only transition to 'reconnecting' on the first error — stay
+      // there until a successful poll (avoid flickering the UI state).
+      if (consecutiveErrors === 1) {
+        onStateChange?.('reconnecting', errMsg)
+      }
+
+      // Give up after continuous failures
+      if (elapsed >= POLL_ERROR_GIVE_UP_MS) {
+        logForDebugging(
+          `[bridge:repl] Poll failures exceeded ${POLL_ERROR_GIVE_UP_MS / 1000}s (${consecutiveErrors} errors), giving up`,
+        )
+        logForDiagnosticsNoPII('info', 'bridge_repl_poll_give_up')
+        logEvent('tengu_bridge_repl_poll_give_up', {
+          consecutiveErrors,
+          elapsedMs: elapsed,
+          lastStatus: httpStatus,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+        onStateChange?.('failed', 'connection to server lost')
+        break
+      }
+
+      // Exponential backoff: 2s → 4s → 8s → 16s → 32s → 60s (cap)
+      const backoff = Math.min(
+        POLL_ERROR_INITIAL_DELAY_MS * 2 ** (consecutiveErrors - 1),
+        POLL_ERROR_MAX_DELAY_MS,
+      )
+      // The poll_due heartbeat-loop exit leaves a healthy lease exposed to
+      // this backoff path. Heartbeat before each sleep so /poll outages
+      // (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced to
+      // avoid) don't kill the 300s lease TTL.
+      if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
+        const info = getHeartbeatInfo?.()
+        if (info) {
+          try {
+            await api.heartbeatWork(
+              info.environmentId,
+              info.workId,
+              info.sessionToken,
+            )
+          } catch {
+            // Best-effort — if heartbeat also fails the lease dies, same as
+            // pre-poll_due behavior (where the only heartbeat-loop exits were
+            // ones where the lease was already dying).
+          }
+        }
+      }
+      await sleep(backoff, signal)
+    }
+  }
+
+  logForDebugging(
+    `[bridge:repl] Work poll loop ended (aborted=${signal.aborted}) env=${getCredentials().environmentId}`,
+  )
+}
+
+// Exported for testing only
+export {
+  startWorkPollLoop as _startWorkPollLoopForTesting,
+  POLL_ERROR_INITIAL_DELAY_MS as _POLL_ERROR_INITIAL_DELAY_MS_ForTesting,
+  POLL_ERROR_MAX_DELAY_MS as _POLL_ERROR_MAX_DELAY_MS_ForTesting,
+  POLL_ERROR_GIVE_UP_MS as _POLL_ERROR_GIVE_UP_MS_ForTesting,
+}

+ 36 - 0
src/bridge/replBridgeHandle.ts

@@ -0,0 +1,36 @@
+import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
+import type { ReplBridgeHandle } from './replBridge.js'
+import { toCompatSessionId } from './sessionIdCompat.js'
+
+/**
+ * Global pointer to the active REPL bridge handle, so callers outside
+ * useReplBridge's React tree (tools, slash commands) can invoke handle methods
+ * like subscribePR. Same one-bridge-per-process justification as bridgeDebug.ts
+ * — the handle's closure captures the sessionId and getAccessToken that created
+ * the session, and re-deriving those independently (BriefTool/upload.ts pattern)
+ * risks staging/prod token divergence.
+ *
+ * Set from useReplBridge.tsx when init completes; cleared on teardown.
+ */
+
+let handle: ReplBridgeHandle | null = null
+
+export function setReplBridgeHandle(h: ReplBridgeHandle | null): void {
+  handle = h
+  // Publish (or clear) our bridge session ID in the session record so other
+  // local peers can dedup us out of their bridge list — local is preferred.
+  void updateSessionBridgeId(getSelfBridgeCompatId() ?? null).catch(() => {})
+}
+
+export function getReplBridgeHandle(): ReplBridgeHandle | null {
+  return handle
+}
+
+/**
+ * Our own bridge session ID in the session_* compat format the API returns
+ * in /v1/sessions responses — or undefined if bridge isn't connected.
+ */
+export function getSelfBridgeCompatId(): string | undefined {
+  const h = getReplBridgeHandle()
+  return h ? toCompatSessionId(h.bridgeSessionId) : undefined
+}

+ 370 - 0
src/bridge/replBridgeTransport.ts

@@ -0,0 +1,370 @@
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { CCRClient } from '../cli/transports/ccrClient.js'
+import type { HybridTransport } from '../cli/transports/HybridTransport.js'
+import { SSETransport } from '../cli/transports/SSETransport.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
+import type { SessionState } from '../utils/sessionState.js'
+import { registerWorker } from './workSecret.js'
+
+/**
+ * Transport abstraction for replBridge. Covers exactly the surface that
+ * replBridge.ts uses against HybridTransport so the v1/v2 choice is
+ * confined to the construction site.
+ *
+ * - v1: HybridTransport (WS reads + POST writes to Session-Ingress)
+ * - v2: SSETransport (reads) + CCRClient (writes to CCR v2 /worker/*)
+ *
+ * The v2 write path goes through CCRClient.writeEvent → SerialBatchEventUploader,
+ * NOT through SSETransport.write() — SSETransport.write() targets the
+ * Session-Ingress POST URL shape, which is wrong for CCR v2.
+ */
+export type ReplBridgeTransport = {
+  write(message: StdoutMessage): Promise<void>
+  writeBatch(messages: StdoutMessage[]): Promise<void>
+  close(): void
+  isConnectedStatus(): boolean
+  getStateLabel(): string
+  setOnData(callback: (data: string) => void): void
+  setOnClose(callback: (closeCode?: number) => void): void
+  setOnConnect(callback: () => void): void
+  connect(): void
+  /**
+   * High-water mark of the underlying read stream's event sequence numbers.
+   * replBridge reads this before swapping transports so the new one can
+   * resume from where the old one left off (otherwise the server replays
+   * the entire session history from seq 0).
+   *
+   * v1 returns 0 — Session-Ingress WS doesn't use SSE sequence numbers;
+   * replay-on-reconnect is handled by the server-side message cursor.
+   */
+  getLastSequenceNum(): number
+  /**
+   * Monotonic count of batches dropped via maxConsecutiveFailures.
+   * Snapshot before writeBatch() and compare after to detect silent drops
+   * (writeBatch() resolves normally even when batches were dropped).
+   * v2 returns 0 — the v2 write path doesn't set maxConsecutiveFailures.
+   */
+  readonly droppedBatchCount: number
+  /**
+   * PUT /worker state (v2 only; v1 is a no-op). `requires_action` tells
+   * the backend a permission prompt is pending — claude.ai shows the
+   * "waiting for input" indicator. REPL/daemon callers don't need this
+   * (user watches the REPL locally); multi-session worker callers do.
+   */
+  reportState(state: SessionState): void
+  /** PUT /worker external_metadata (v2 only; v1 is a no-op). */
+  reportMetadata(metadata: Record<string, unknown>): void
+  /**
+   * POST /worker/events/{id}/delivery (v2 only; v1 is a no-op). Populates
+   * CCR's processing_at/processed_at columns. `received` is auto-fired by
+   * CCRClient on every SSE frame and is not exposed here.
+   */
+  reportDelivery(eventId: string, status: 'processing' | 'processed'): void
+  /**
+   * Drain the write queue before close() (v2 only; v1 resolves
+   * immediately — HybridTransport POSTs are already awaited per-write).
+   */
+  flush(): Promise<void>
+}
+
+/**
+ * v1 adapter: HybridTransport already has the full surface (it extends
+ * WebSocketTransport which has setOnConnect + getStateLabel). This is a
+ * no-op wrapper that exists only so replBridge's `transport` variable
+ * has a single type.
+ */
+export function createV1ReplTransport(
+  hybrid: HybridTransport,
+): ReplBridgeTransport {
+  return {
+    write: msg => hybrid.write(msg),
+    writeBatch: msgs => hybrid.writeBatch(msgs),
+    close: () => hybrid.close(),
+    isConnectedStatus: () => hybrid.isConnectedStatus(),
+    getStateLabel: () => hybrid.getStateLabel(),
+    setOnData: cb => hybrid.setOnData(cb),
+    setOnClose: cb => hybrid.setOnClose(cb),
+    setOnConnect: cb => hybrid.setOnConnect(cb),
+    connect: () => void hybrid.connect(),
+    // v1 Session-Ingress WS doesn't use SSE sequence numbers; replay
+    // semantics are different. Always return 0 so the seq-num carryover
+    // logic in replBridge is a no-op for v1.
+    getLastSequenceNum: () => 0,
+    get droppedBatchCount() {
+      return hybrid.droppedBatchCount
+    },
+    reportState: () => {},
+    reportMetadata: () => {},
+    reportDelivery: () => {},
+    flush: () => Promise.resolve(),
+  }
+}
+
+/**
+ * v2 adapter: wrap SSETransport (reads) + CCRClient (writes, heartbeat,
+ * state, delivery tracking).
+ *
+ * Auth: v2 endpoints validate the JWT's session_id claim (register_worker.go:32)
+ * and worker role (environment_auth.py:856). OAuth tokens have neither.
+ * This is the inverse of the v1 replBridge path, which deliberately uses OAuth.
+ * The JWT is refreshed when the poll loop re-dispatches work — the caller
+ * invokes createV2ReplTransport again with the fresh token.
+ *
+ * Registration happens here (not in the caller) so the entire v2 handshake
+ * is one async step. registerWorker failure propagates — replBridge will
+ * catch it and stay on the poll loop.
+ */
+export async function createV2ReplTransport(opts: {
+  sessionUrl: string
+  ingressToken: string
+  sessionId: string
+  /**
+   * SSE sequence-number high-water mark from the previous transport.
+   * Passed to the new SSETransport so its first connect() sends
+   * from_sequence_num / Last-Event-ID and the server resumes from where
+   * the old stream left off. Without this, every transport swap asks the
+   * server to replay the entire session history from seq 0.
+   */
+  initialSequenceNum?: number
+  /**
+   * Worker epoch from POST /bridge response. When provided, the server
+   * already bumped epoch (the /bridge call IS the register — see server
+   * PR #293280). When omitted (v1 CCR-v2 path via replBridge.ts poll loop),
+   * call registerWorker as before.
+   */
+  epoch?: number
+  /** CCRClient heartbeat interval. Defaults to 20s when omitted. */
+  heartbeatIntervalMs?: number
+  /** ±fraction per-beat jitter. Defaults to 0 (no jitter) when omitted. */
+  heartbeatJitterFraction?: number
+  /**
+   * When true, skip opening the SSE read stream — only the CCRClient write
+   * path is activated. Use for mirror-mode attachments that forward events
+   * but never receive inbound prompts or control requests.
+   */
+  outboundOnly?: boolean
+  /**
+   * Per-instance auth header source. When provided, CCRClient + SSETransport
+   * read auth from this closure instead of the process-wide
+   * CLAUDE_CODE_SESSION_ACCESS_TOKEN env var. Required for callers managing
+   * multiple concurrent sessions — the env-var path stomps across sessions.
+   * When omitted, falls back to the env var (single-session callers).
+   */
+  getAuthToken?: () => string | undefined
+}): Promise<ReplBridgeTransport> {
+  const {
+    sessionUrl,
+    ingressToken,
+    sessionId,
+    initialSequenceNum,
+    getAuthToken,
+  } = opts
+
+  // Auth header builder. If getAuthToken is provided, read from it
+  // (per-instance, multi-session safe). Otherwise write ingressToken to
+  // the process-wide env var (legacy single-session path — CCRClient's
+  // default getAuthHeaders reads it via getSessionIngressAuthHeaders).
+  let getAuthHeaders: (() => Record<string, string>) | undefined
+  if (getAuthToken) {
+    getAuthHeaders = (): Record<string, string> => {
+      const token = getAuthToken()
+      if (!token) return {}
+      return { Authorization: `Bearer ${token}` }
+    }
+  } else {
+    // CCRClient.request() and SSETransport.connect() both read auth via
+    // getSessionIngressAuthHeaders() → this env var. Set it before either
+    // touches the network.
+    updateSessionIngressAuthToken(ingressToken)
+  }
+
+  const epoch = opts.epoch ?? (await registerWorker(sessionUrl, ingressToken))
+  logForDebugging(
+    `[bridge:repl] CCR v2: worker sessionId=${sessionId} epoch=${epoch}${opts.epoch !== undefined ? ' (from /bridge)' : ' (via registerWorker)'}`,
+  )
+
+  // Derive SSE stream URL. Same logic as transportUtils.ts:26-33 but
+  // starting from an http(s) base instead of a --sdk-url that might be ws://.
+  const sseUrl = new URL(sessionUrl)
+  sseUrl.pathname = sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
+
+  const sse = new SSETransport(
+    sseUrl,
+    {},
+    sessionId,
+    undefined,
+    initialSequenceNum,
+    getAuthHeaders,
+  )
+  let onCloseCb: ((closeCode?: number) => void) | undefined
+  const ccr = new CCRClient(sse, new URL(sessionUrl), {
+    getAuthHeaders,
+    heartbeatIntervalMs: opts.heartbeatIntervalMs,
+    heartbeatJitterFraction: opts.heartbeatJitterFraction,
+    // Default is process.exit(1) — correct for spawn-mode children. In-process,
+    // that kills the REPL. Close instead: replBridge's onClose wakes the poll
+    // loop, which picks up the server's re-dispatch (with fresh epoch).
+    onEpochMismatch: () => {
+      logForDebugging(
+        '[bridge:repl] CCR v2: epoch superseded (409) — closing for poll-loop recovery',
+      )
+      // Close resources in a try block so the throw always executes.
+      // If ccr.close() or sse.close() throw, we still need to unwind
+      // the caller (request()) — otherwise handleEpochMismatch's `never`
+      // return type is violated at runtime and control falls through.
+      try {
+        ccr.close()
+        sse.close()
+        onCloseCb?.(4090)
+      } catch (closeErr: unknown) {
+        logForDebugging(
+          `[bridge:repl] CCR v2: error during epoch-mismatch cleanup: ${errorMessage(closeErr)}`,
+          { level: 'error' },
+        )
+      }
+      // Don't return — the calling request() code continues after the 409
+      // branch, so callers see the logged warning and a false return. We
+      // throw to unwind; the uploaders catch it as a send failure.
+      throw new Error('epoch superseded')
+    },
+  })
+
+  // CCRClient's constructor wired sse.setOnEvent → reportDelivery('received').
+  // remoteIO.ts additionally sends 'processing'/'processed' via
+  // setCommandLifecycleListener, which the in-process query loop fires. This
+  // transport's only caller (replBridge/daemonBridge) has no such wiring — the
+  // daemon's agent child is a separate process (ProcessTransport), and its
+  // notifyCommandLifecycle calls fire with listener=null in its own module
+  // scope. So events stay at 'received' forever, and reconnectSession re-queues
+  // them on every daemon restart (observed: 21→24→25 phantom prompts as
+  // "user sent a new message while you were working" system-reminders).
+  //
+  // Fix: ACK 'processed' immediately alongside 'received'. The window between
+  // SSE receipt and transcript-write is narrow (queue → SDK → child stdin →
+  // model); a crash there loses one prompt vs. the observed N-prompt flood on
+  // every restart. Overwrite the constructor's wiring to do both — setOnEvent
+  // replaces, not appends (SSETransport.ts:658).
+  sse.setOnEvent(event => {
+    ccr.reportDelivery(event.event_id, 'received')
+    ccr.reportDelivery(event.event_id, 'processed')
+  })
+
+  // Both sse.connect() and ccr.initialize() are deferred to connect() below.
+  // replBridge's calling order is newTransport → setOnConnect → setOnData →
+  // setOnClose → connect(), and both calls need those callbacks wired first:
+  // sse.connect() opens the stream (events flow to onData/onClose immediately),
+  // and ccr.initialize().then() fires onConnectCb.
+  //
+  // onConnect fires once ccr.initialize() resolves. Writes go via
+  // CCRClient HTTP POST (SerialBatchEventUploader), not SSE, so the
+  // write path is ready the moment workerEpoch is set. SSE.connect()
+  // awaits its read loop and never resolves — don't gate on it.
+  // The SSE stream opens in parallel (~30ms) and starts delivering
+  // inbound events via setOnData; outbound doesn't need to wait for it.
+  let onConnectCb: (() => void) | undefined
+  let ccrInitialized = false
+  let closed = false
+
+  return {
+    write(msg) {
+      return ccr.writeEvent(msg)
+    },
+    async writeBatch(msgs) {
+      // SerialBatchEventUploader already batches internally (maxBatchSize=100);
+      // sequential enqueue preserves order and the uploader coalesces.
+      // Check closed between writes to avoid sending partial batches after
+      // transport teardown (epoch mismatch, SSE drop).
+      for (const m of msgs) {
+        if (closed) break
+        await ccr.writeEvent(m)
+      }
+    },
+    close() {
+      closed = true
+      ccr.close()
+      sse.close()
+    },
+    isConnectedStatus() {
+      // Write-readiness, not read-readiness — replBridge checks this
+      // before calling writeBatch. SSE open state is orthogonal.
+      return ccrInitialized
+    },
+    getStateLabel() {
+      // SSETransport doesn't expose its state string; synthesize from
+      // what we can observe. replBridge only uses this for debug logging.
+      if (sse.isClosedStatus()) return 'closed'
+      if (sse.isConnectedStatus()) return ccrInitialized ? 'connected' : 'init'
+      return 'connecting'
+    },
+    setOnData(cb) {
+      sse.setOnData(cb)
+    },
+    setOnClose(cb) {
+      onCloseCb = cb
+      // SSE reconnect-budget exhaustion fires onClose(undefined) — map to
+      // 4092 so ws_closed telemetry can distinguish it from HTTP-status
+      // closes (SSETransport:280 passes response.status). Stop CCRClient's
+      // heartbeat timer before notifying replBridge. (sse.close() doesn't
+      // invoke this, so the epoch-mismatch path above isn't double-firing.)
+      sse.setOnClose(code => {
+        ccr.close()
+        cb(code ?? 4092)
+      })
+    },
+    setOnConnect(cb) {
+      onConnectCb = cb
+    },
+    getLastSequenceNum() {
+      return sse.getLastSequenceNum()
+    },
+    // v2 write path (CCRClient) doesn't set maxConsecutiveFailures — no drops.
+    droppedBatchCount: 0,
+    reportState(state) {
+      ccr.reportState(state)
+    },
+    reportMetadata(metadata) {
+      ccr.reportMetadata(metadata)
+    },
+    reportDelivery(eventId, status) {
+      ccr.reportDelivery(eventId, status)
+    },
+    flush() {
+      return ccr.flush()
+    },
+    connect() {
+      // Outbound-only: skip the SSE read stream entirely — no inbound
+      // events to receive, no delivery ACKs to send. Only the CCRClient
+      // write path (POST /worker/events) and heartbeat are needed.
+      if (!opts.outboundOnly) {
+        // Fire-and-forget — SSETransport.connect() awaits readStream()
+        // (the read loop) and only resolves on stream close/error. The
+        // spawn-mode path in remoteIO.ts does the same void discard.
+        void sse.connect()
+      }
+      void ccr.initialize(epoch).then(
+        () => {
+          ccrInitialized = true
+          logForDebugging(
+            `[bridge:repl] v2 transport ready for writes (epoch=${epoch}, sse=${sse.isConnectedStatus() ? 'open' : 'opening'})`,
+          )
+          onConnectCb?.()
+        },
+        (err: unknown) => {
+          logForDebugging(
+            `[bridge:repl] CCR v2 initialize failed: ${errorMessage(err)}`,
+            { level: 'error' },
+          )
+          // Close transport resources and notify replBridge via onClose
+          // so the poll loop can retry on the next work dispatch.
+          // Without this callback, replBridge never learns the transport
+          // failed to initialize and sits with transport === null forever.
+          ccr.close()
+          sse.close()
+          onCloseCb?.(4091) // 4091 = init failure, distinguishable from 4090 epoch mismatch
+        },
+      )
+    },
+  }
+}

+ 57 - 0
src/bridge/sessionIdCompat.ts

@@ -0,0 +1,57 @@
+/**
+ * Session ID tag translation helpers for the CCR v2 compat layer.
+ *
+ * Lives in its own file (rather than workSecret.ts) so that sessionHandle.ts
+ * and replBridgeTransport.ts (bridge.mjs entry points) can import from
+ * workSecret.ts without pulling in these retag functions.
+ *
+ * The isCseShimEnabled kill switch is injected via setCseShimGate() to avoid
+ * a static import of bridgeEnabled.ts → growthbook.ts → config.ts — all
+ * banned from the sdk.mjs bundle (scripts/build-agent-sdk.sh). Callers that
+ * already import bridgeEnabled.ts register the gate; the SDK path never does,
+ * so the shim defaults to active (matching isCseShimEnabled()'s own default).
+ */
+
+let _isCseShimEnabled: (() => boolean) | undefined
+
+/**
+ * Register the GrowthBook gate for the cse_ shim. Called from bridge
+ * init code that already imports bridgeEnabled.ts.
+ */
+export function setCseShimGate(gate: () => boolean): void {
+  _isCseShimEnabled = gate
+}
+
+/**
+ * Re-tag a `cse_*` session ID to `session_*` for use with the v1 compat API.
+ *
+ * Worker endpoints (/v1/code/sessions/{id}/worker/*) want `cse_*`; that's
+ * what the work poll delivers. Client-facing compat endpoints
+ * (/v1/sessions/{id}, /v1/sessions/{id}/archive, /v1/sessions/{id}/events)
+ * want `session_*` — compat/convert.go:27 validates TagSession. Same UUID,
+ * different costume. No-op for IDs that aren't `cse_*`.
+ *
+ * bridgeMain holds one sessionId variable for both worker registration and
+ * session-management calls. It arrives as `cse_*` from the work poll under
+ * the compat gate, so archiveSession/fetchSessionTitle need this re-tag.
+ */
+export function toCompatSessionId(id: string): string {
+  if (!id.startsWith('cse_')) return id
+  if (_isCseShimEnabled && !_isCseShimEnabled()) return id
+  return 'session_' + id.slice('cse_'.length)
+}
+
+/**
+ * Re-tag a `session_*` session ID to `cse_*` for infrastructure-layer calls.
+ *
+ * Inverse of toCompatSessionId. POST /v1/environments/{id}/bridge/reconnect
+ * lives below the compat layer: once ccr_v2_compat_enabled is on server-side,
+ * it looks sessions up by their infra tag (`cse_*`). createBridgeSession still
+ * returns `session_*` (compat/convert.go:41) and that's what bridge-pointer
+ * stores — so perpetual reconnect passes the wrong costume and gets "Session
+ * not found" back. Same UUID, wrong tag. No-op for IDs that aren't `session_*`.
+ */
+export function toInfraSessionId(id: string): string {
+  if (!id.startsWith('session_')) return id
+  return 'cse_' + id.slice('session_'.length)
+}

+ 550 - 0
src/bridge/sessionRunner.ts

@@ -0,0 +1,550 @@
+import { type ChildProcess, spawn } from 'child_process'
+import { createWriteStream, type WriteStream } from 'fs'
+import { tmpdir } from 'os'
+import { dirname, join } from 'path'
+import { createInterface } from 'readline'
+import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
+import { debugTruncate } from './debugUtils.js'
+import type {
+  SessionActivity,
+  SessionDoneStatus,
+  SessionHandle,
+  SessionSpawner,
+  SessionSpawnOpts,
+} from './types.js'
+
+const MAX_ACTIVITIES = 10
+const MAX_STDERR_LINES = 10
+
+/**
+ * Sanitize a session ID for use in file names.
+ * Strips any characters that could cause path traversal (e.g. `../`, `/`)
+ * or other filesystem issues, replacing them with underscores.
+ */
+export function safeFilenameId(id: string): string {
+  return id.replace(/[^a-zA-Z0-9_-]/g, '_')
+}
+
+/**
+ * A control_request emitted by the child CLI when it needs permission to
+ * execute a **specific** tool invocation (not a general capability check).
+ * The bridge forwards this to the server so the user can approve/deny.
+ */
+export type PermissionRequest = {
+  type: 'control_request'
+  request_id: string
+  request: {
+    /** Per-invocation permission check — "may I run this tool with these inputs?" */
+    subtype: 'can_use_tool'
+    tool_name: string
+    input: Record<string, unknown>
+    tool_use_id: string
+  }
+}
+
+type SessionSpawnerDeps = {
+  execPath: string
+  /**
+   * Arguments that must precede the CLI flags when spawning. Empty for
+   * compiled binaries (where execPath is the claude binary itself); contains
+   * the script path (process.argv[1]) for npm installs where execPath is the
+   * node runtime. Without this, node sees --sdk-url as a node option and
+   * exits with "bad option: --sdk-url" (see anthropics/claude-code#28334).
+   */
+  scriptArgs: string[]
+  env: NodeJS.ProcessEnv
+  verbose: boolean
+  sandbox: boolean
+  debugFile?: string
+  permissionMode?: string
+  onDebug: (msg: string) => void
+  onActivity?: (sessionId: string, activity: SessionActivity) => void
+  onPermissionRequest?: (
+    sessionId: string,
+    request: PermissionRequest,
+    accessToken: string,
+  ) => void
+}
+
+/** Map tool names to human-readable verbs for the status display. */
+const TOOL_VERBS: Record<string, string> = {
+  Read: 'Reading',
+  Write: 'Writing',
+  Edit: 'Editing',
+  MultiEdit: 'Editing',
+  Bash: 'Running',
+  Glob: 'Searching',
+  Grep: 'Searching',
+  WebFetch: 'Fetching',
+  WebSearch: 'Searching',
+  Task: 'Running task',
+  FileReadTool: 'Reading',
+  FileWriteTool: 'Writing',
+  FileEditTool: 'Editing',
+  GlobTool: 'Searching',
+  GrepTool: 'Searching',
+  BashTool: 'Running',
+  NotebookEditTool: 'Editing notebook',
+  LSP: 'LSP',
+}
+
+function toolSummary(name: string, input: Record<string, unknown>): string {
+  const verb = TOOL_VERBS[name] ?? name
+  const target =
+    (input.file_path as string) ??
+    (input.filePath as string) ??
+    (input.pattern as string) ??
+    (input.command as string | undefined)?.slice(0, 60) ??
+    (input.url as string) ??
+    (input.query as string) ??
+    ''
+  if (target) {
+    return `${verb} ${target}`
+  }
+  return verb
+}
+
+function extractActivities(
+  line: string,
+  sessionId: string,
+  onDebug: (msg: string) => void,
+): SessionActivity[] {
+  let parsed: unknown
+  try {
+    parsed = jsonParse(line)
+  } catch {
+    return []
+  }
+
+  if (!parsed || typeof parsed !== 'object') {
+    return []
+  }
+
+  const msg = parsed as Record<string, unknown>
+  const activities: SessionActivity[] = []
+  const now = Date.now()
+
+  switch (msg.type) {
+    case 'assistant': {
+      const message = msg.message as Record<string, unknown> | undefined
+      if (!message) break
+      const content = message.content
+      if (!Array.isArray(content)) break
+
+      for (const block of content) {
+        if (!block || typeof block !== 'object') continue
+        const b = block as Record<string, unknown>
+
+        if (b.type === 'tool_use') {
+          const name = (b.name as string) ?? 'Tool'
+          const input = (b.input as Record<string, unknown>) ?? {}
+          const summary = toolSummary(name, input)
+          activities.push({
+            type: 'tool_start',
+            summary,
+            timestamp: now,
+          })
+          onDebug(
+            `[bridge:activity] sessionId=${sessionId} tool_use name=${name} ${inputPreview(input)}`,
+          )
+        } else if (b.type === 'text') {
+          const text = (b.text as string) ?? ''
+          if (text.length > 0) {
+            activities.push({
+              type: 'text',
+              summary: text.slice(0, 80),
+              timestamp: now,
+            })
+            onDebug(
+              `[bridge:activity] sessionId=${sessionId} text "${text.slice(0, 100)}"`,
+            )
+          }
+        }
+      }
+      break
+    }
+    case 'result': {
+      const subtype = msg.subtype as string | undefined
+      if (subtype === 'success') {
+        activities.push({
+          type: 'result',
+          summary: 'Session completed',
+          timestamp: now,
+        })
+        onDebug(
+          `[bridge:activity] sessionId=${sessionId} result subtype=success`,
+        )
+      } else if (subtype) {
+        const errors = msg.errors as string[] | undefined
+        const errorSummary = errors?.[0] ?? `Error: ${subtype}`
+        activities.push({
+          type: 'error',
+          summary: errorSummary,
+          timestamp: now,
+        })
+        onDebug(
+          `[bridge:activity] sessionId=${sessionId} result subtype=${subtype} error="${errorSummary}"`,
+        )
+      } else {
+        onDebug(
+          `[bridge:activity] sessionId=${sessionId} result subtype=undefined`,
+        )
+      }
+      break
+    }
+    default:
+      break
+  }
+
+  return activities
+}
+
+/**
+ * Extract plain text from a replayed SDKUserMessage NDJSON line. Returns the
+ * trimmed text if this looks like a real human-authored message, otherwise
+ * undefined so the caller keeps waiting for the first real message.
+ */
+function extractUserMessageText(
+  msg: Record<string, unknown>,
+): string | undefined {
+  // Skip tool-result user messages (wrapped subagent results) and synthetic
+  // caveat messages — neither is human-authored.
+  if (msg.parent_tool_use_id != null || msg.isSynthetic || msg.isReplay)
+    return undefined
+
+  const message = msg.message as Record<string, unknown> | undefined
+  const content = message?.content
+  let text: string | undefined
+  if (typeof content === 'string') {
+    text = content
+  } else if (Array.isArray(content)) {
+    for (const block of content) {
+      if (
+        block &&
+        typeof block === 'object' &&
+        (block as Record<string, unknown>).type === 'text'
+      ) {
+        text = (block as Record<string, unknown>).text as string | undefined
+        break
+      }
+    }
+  }
+  text = text?.trim()
+  return text ? text : undefined
+}
+
+/** Build a short preview of tool input for debug logging. */
+function inputPreview(input: Record<string, unknown>): string {
+  const parts: string[] = []
+  for (const [key, val] of Object.entries(input)) {
+    if (typeof val === 'string') {
+      parts.push(`${key}="${val.slice(0, 100)}"`)
+    }
+    if (parts.length >= 3) break
+  }
+  return parts.join(' ')
+}
+
+export function createSessionSpawner(deps: SessionSpawnerDeps): SessionSpawner {
+  return {
+    spawn(opts: SessionSpawnOpts, dir: string): SessionHandle {
+      // Debug file resolution:
+      // 1. If deps.debugFile is provided, use it with session ID suffix for uniqueness
+      // 2. If verbose or ant build, auto-generate a temp file path
+      // 3. Otherwise, no debug file
+      const safeId = safeFilenameId(opts.sessionId)
+      let debugFile: string | undefined
+      if (deps.debugFile) {
+        const ext = deps.debugFile.lastIndexOf('.')
+        if (ext > 0) {
+          debugFile = `${deps.debugFile.slice(0, ext)}-${safeId}${deps.debugFile.slice(ext)}`
+        } else {
+          debugFile = `${deps.debugFile}-${safeId}`
+        }
+      } else if (deps.verbose || process.env.USER_TYPE === 'ant') {
+        debugFile = join(tmpdir(), 'claude', `bridge-session-${safeId}.log`)
+      }
+
+      // Transcript file: write raw NDJSON lines for post-hoc analysis.
+      // Placed alongside the debug file when one is configured.
+      let transcriptStream: WriteStream | null = null
+      let transcriptPath: string | undefined
+      if (deps.debugFile) {
+        transcriptPath = join(
+          dirname(deps.debugFile),
+          `bridge-transcript-${safeId}.jsonl`,
+        )
+        transcriptStream = createWriteStream(transcriptPath, { flags: 'a' })
+        transcriptStream.on('error', err => {
+          deps.onDebug(
+            `[bridge:session] Transcript write error: ${err.message}`,
+          )
+          transcriptStream = null
+        })
+        deps.onDebug(`[bridge:session] Transcript log: ${transcriptPath}`)
+      }
+
+      const args = [
+        ...deps.scriptArgs,
+        '--print',
+        '--sdk-url',
+        opts.sdkUrl,
+        '--session-id',
+        opts.sessionId,
+        '--input-format',
+        'stream-json',
+        '--output-format',
+        'stream-json',
+        '--replay-user-messages',
+        ...(deps.verbose ? ['--verbose'] : []),
+        ...(debugFile ? ['--debug-file', debugFile] : []),
+        ...(deps.permissionMode
+          ? ['--permission-mode', deps.permissionMode]
+          : []),
+      ]
+
+      const env: NodeJS.ProcessEnv = {
+        ...deps.env,
+        // Strip the bridge's OAuth token so the child CC process uses
+        // the session access token for inference instead.
+        CLAUDE_CODE_OAUTH_TOKEN: undefined,
+        CLAUDE_CODE_ENVIRONMENT_KIND: 'bridge',
+        ...(deps.sandbox && { CLAUDE_CODE_FORCE_SANDBOX: '1' }),
+        CLAUDE_CODE_SESSION_ACCESS_TOKEN: opts.accessToken,
+        // v1: HybridTransport (WS reads + POST writes) to Session-Ingress.
+        // Harmless in v2 mode — transportUtils checks CLAUDE_CODE_USE_CCR_V2 first.
+        CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2: '1',
+        // v2: SSETransport + CCRClient to CCR's /v1/code/sessions/* endpoints.
+        // Same env vars environment-manager sets in the container path.
+        ...(opts.useCcrV2 && {
+          CLAUDE_CODE_USE_CCR_V2: '1',
+          CLAUDE_CODE_WORKER_EPOCH: String(opts.workerEpoch),
+        }),
+      }
+
+      deps.onDebug(
+        `[bridge:session] Spawning sessionId=${opts.sessionId} sdkUrl=${opts.sdkUrl} accessToken=${opts.accessToken ? 'present' : 'MISSING'}`,
+      )
+      deps.onDebug(`[bridge:session] Child args: ${args.join(' ')}`)
+      if (debugFile) {
+        deps.onDebug(`[bridge:session] Debug log: ${debugFile}`)
+      }
+
+      // Pipe all three streams: stdin for control, stdout for NDJSON parsing,
+      // stderr for error capture and diagnostics.
+      const child: ChildProcess = spawn(deps.execPath, args, {
+        cwd: dir,
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env,
+        windowsHide: true,
+      })
+
+      deps.onDebug(
+        `[bridge:session] sessionId=${opts.sessionId} pid=${child.pid}`,
+      )
+
+      const activities: SessionActivity[] = []
+      let currentActivity: SessionActivity | null = null
+      const lastStderr: string[] = []
+      let sigkillSent = false
+      let firstUserMessageSeen = false
+
+      // Buffer stderr for error diagnostics
+      if (child.stderr) {
+        const stderrRl = createInterface({ input: child.stderr })
+        stderrRl.on('line', line => {
+          // Forward stderr to bridge's stderr in verbose mode
+          if (deps.verbose) {
+            process.stderr.write(line + '\n')
+          }
+          // Ring buffer of last N lines
+          if (lastStderr.length >= MAX_STDERR_LINES) {
+            lastStderr.shift()
+          }
+          lastStderr.push(line)
+        })
+      }
+
+      // Parse NDJSON from child stdout
+      if (child.stdout) {
+        const rl = createInterface({ input: child.stdout })
+        rl.on('line', line => {
+          // Write raw NDJSON to transcript file
+          if (transcriptStream) {
+            transcriptStream.write(line + '\n')
+          }
+
+          // Log all messages flowing from the child CLI to the bridge
+          deps.onDebug(
+            `[bridge:ws] sessionId=${opts.sessionId} <<< ${debugTruncate(line)}`,
+          )
+
+          // In verbose mode, forward raw output to stderr
+          if (deps.verbose) {
+            process.stderr.write(line + '\n')
+          }
+
+          const extracted = extractActivities(
+            line,
+            opts.sessionId,
+            deps.onDebug,
+          )
+          for (const activity of extracted) {
+            // Maintain ring buffer
+            if (activities.length >= MAX_ACTIVITIES) {
+              activities.shift()
+            }
+            activities.push(activity)
+            currentActivity = activity
+
+            deps.onActivity?.(opts.sessionId, activity)
+          }
+
+          // Detect control_request and replayed user messages.
+          // extractActivities parses the same line but swallows parse errors
+          // and skips 'user' type — re-parse here is cheap (NDJSON lines are
+          // small) and keeps each path self-contained.
+          {
+            let parsed: unknown
+            try {
+              parsed = jsonParse(line)
+            } catch {
+              // Non-JSON line, skip detection
+            }
+            if (parsed && typeof parsed === 'object') {
+              const msg = parsed as Record<string, unknown>
+
+              if (msg.type === 'control_request') {
+                const request = msg.request as
+                  | Record<string, unknown>
+                  | undefined
+                if (
+                  request?.subtype === 'can_use_tool' &&
+                  deps.onPermissionRequest
+                ) {
+                  deps.onPermissionRequest(
+                    opts.sessionId,
+                    parsed as PermissionRequest,
+                    opts.accessToken,
+                  )
+                }
+                // interrupt is turn-level; the child handles it internally (print.ts)
+              } else if (
+                msg.type === 'user' &&
+                !firstUserMessageSeen &&
+                opts.onFirstUserMessage
+              ) {
+                const text = extractUserMessageText(msg)
+                if (text) {
+                  firstUserMessageSeen = true
+                  opts.onFirstUserMessage(text)
+                }
+              }
+            }
+          }
+        })
+      }
+
+      const done = new Promise<SessionDoneStatus>(resolve => {
+        child.on('close', (code, signal) => {
+          // Close transcript stream on exit
+          if (transcriptStream) {
+            transcriptStream.end()
+            transcriptStream = null
+          }
+
+          if (signal === 'SIGTERM' || signal === 'SIGINT') {
+            deps.onDebug(
+              `[bridge:session] sessionId=${opts.sessionId} interrupted signal=${signal} pid=${child.pid}`,
+            )
+            resolve('interrupted')
+          } else if (code === 0) {
+            deps.onDebug(
+              `[bridge:session] sessionId=${opts.sessionId} completed exit_code=0 pid=${child.pid}`,
+            )
+            resolve('completed')
+          } else {
+            deps.onDebug(
+              `[bridge:session] sessionId=${opts.sessionId} failed exit_code=${code} pid=${child.pid}`,
+            )
+            resolve('failed')
+          }
+        })
+
+        child.on('error', err => {
+          deps.onDebug(
+            `[bridge:session] sessionId=${opts.sessionId} spawn error: ${err.message}`,
+          )
+          resolve('failed')
+        })
+      })
+
+      const handle: SessionHandle = {
+        sessionId: opts.sessionId,
+        done,
+        activities,
+        accessToken: opts.accessToken,
+        lastStderr,
+        get currentActivity(): SessionActivity | null {
+          return currentActivity
+        },
+        kill(): void {
+          if (!child.killed) {
+            deps.onDebug(
+              `[bridge:session] Sending SIGTERM to sessionId=${opts.sessionId} pid=${child.pid}`,
+            )
+            // On Windows, child.kill('SIGTERM') throws; use default signal.
+            if (process.platform === 'win32') {
+              child.kill()
+            } else {
+              child.kill('SIGTERM')
+            }
+          }
+        },
+        forceKill(): void {
+          // Use separate flag because child.killed is set when kill() is called,
+          // not when the process exits. We need to send SIGKILL even after SIGTERM.
+          if (!sigkillSent && child.pid) {
+            sigkillSent = true
+            deps.onDebug(
+              `[bridge:session] Sending SIGKILL to sessionId=${opts.sessionId} pid=${child.pid}`,
+            )
+            if (process.platform === 'win32') {
+              child.kill()
+            } else {
+              child.kill('SIGKILL')
+            }
+          }
+        },
+        writeStdin(data: string): void {
+          if (child.stdin && !child.stdin.destroyed) {
+            deps.onDebug(
+              `[bridge:ws] sessionId=${opts.sessionId} >>> ${debugTruncate(data)}`,
+            )
+            child.stdin.write(data)
+          }
+        },
+        updateAccessToken(token: string): void {
+          handle.accessToken = token
+          // Send the fresh token to the child process via stdin. The child's
+          // StructuredIO handles update_environment_variables messages by
+          // setting process.env directly, so getSessionIngressAuthToken()
+          // picks up the new token on the next refreshHeaders call.
+          handle.writeStdin(
+            jsonStringify({
+              type: 'update_environment_variables',
+              variables: { CLAUDE_CODE_SESSION_ACCESS_TOKEN: token },
+            }) + '\n',
+          )
+          deps.onDebug(
+            `[bridge:session] Sent token refresh via stdin for sessionId=${opts.sessionId}`,
+          )
+        },
+      }
+
+      return handle
+    },
+  }
+}
+
+export { extractActivities as _extractActivitiesForTesting }

+ 210 - 0
src/bridge/trustedDevice.ts

@@ -0,0 +1,210 @@
+import axios from 'axios'
+import memoize from 'lodash-es/memoize.js'
+import { hostname } from 'os'
+import { getOauthConfig } from '../constants/oauth.js'
+import {
+  checkGate_CACHED_OR_BLOCKING,
+  getFeatureValue_CACHED_MAY_BE_STALE,
+} from '../services/analytics/growthbook.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
+import { getSecureStorage } from '../utils/secureStorage/index.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+
+/**
+ * Trusted device token source for bridge (remote-control) sessions.
+ *
+ * Bridge sessions have SecurityTier=ELEVATED on the server (CCR v2).
+ * The server gates ConnectBridgeWorker on its own flag
+ * (sessions_elevated_auth_enforcement in Anthropic Main); this CLI-side
+ * flag controls whether the CLI sends X-Trusted-Device-Token at all.
+ * Two flags so rollout can be staged: flip CLI-side first (headers
+ * start flowing, server still no-ops), then flip server-side.
+ *
+ * Enrollment (POST /auth/trusted_devices) is gated server-side by
+ * account_session.created_at < 10min, so it must happen during /login.
+ * Token is persistent (90d rolling expiry) and stored in keychain.
+ *
+ * See anthropics/anthropic#274559 (spec), #310375 (B1b tenant RPCs),
+ * #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
+ */
+
+const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
+
+function isGateEnabled(): boolean {
+  return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
+}
+
+// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
+// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
+// Cache cleared after enrollment (below) and on logout (clearAuthRelatedCaches).
+//
+// Only the storage read is memoized — the GrowthBook gate is checked live so
+// that a gate flip after GrowthBook refresh takes effect without a restart.
+const readStoredToken = memoize((): string | undefined => {
+  // Env var takes precedence for testing/canary.
+  const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
+  if (envToken) {
+    return envToken
+  }
+  return getSecureStorage().read()?.trustedDeviceToken
+})
+
+export function getTrustedDeviceToken(): string | undefined {
+  if (!isGateEnabled()) {
+    return undefined
+  }
+  return readStoredToken()
+}
+
+export function clearTrustedDeviceTokenCache(): void {
+  readStoredToken.cache?.clear?.()
+}
+
+/**
+ * Clear the stored trusted device token from secure storage and the memo cache.
+ * Called before enrollTrustedDevice() during /login so a stale token from the
+ * previous account isn't sent as X-Trusted-Device-Token while enrollment is
+ * in-flight (enrollTrustedDevice is async — bridge API calls between login and
+ * enrollment completion would otherwise still read the old cached token).
+ */
+export function clearTrustedDeviceToken(): void {
+  if (!isGateEnabled()) {
+    return
+  }
+  const secureStorage = getSecureStorage()
+  try {
+    const data = secureStorage.read()
+    if (data?.trustedDeviceToken) {
+      delete data.trustedDeviceToken
+      secureStorage.update(data)
+    }
+  } catch {
+    // Best-effort — don't block login if storage is inaccessible
+  }
+  readStoredToken.cache?.clear?.()
+}
+
+/**
+ * Enroll this device via POST /auth/trusted_devices and persist the token
+ * to keychain. Best-effort — logs and returns on failure so callers
+ * (post-login hooks) don't block the login flow.
+ *
+ * The server gates enrollment on account_session.created_at < 10min, so
+ * this must be called immediately after a fresh /login. Calling it later
+ * (e.g. lazy enrollment on /bridge 403) will fail with 403 stale_session.
+ */
+export async function enrollTrustedDevice(): Promise<void> {
+  try {
+    // checkGate_CACHED_OR_BLOCKING awaits any in-flight GrowthBook re-init
+    // (triggered by refreshGrowthBookAfterAuthChange in login.tsx) before
+    // reading the gate, so we get the post-refresh value.
+    if (!(await checkGate_CACHED_OR_BLOCKING(TRUSTED_DEVICE_GATE))) {
+      logForDebugging(
+        `[trusted-device] Gate ${TRUSTED_DEVICE_GATE} is off, skipping enrollment`,
+      )
+      return
+    }
+    // If CLAUDE_TRUSTED_DEVICE_TOKEN is set (e.g. by an enterprise wrapper),
+    // skip enrollment — the env var takes precedence in readStoredToken() so
+    // any enrolled token would be shadowed and never used.
+    if (process.env.CLAUDE_TRUSTED_DEVICE_TOKEN) {
+      logForDebugging(
+        '[trusted-device] CLAUDE_TRUSTED_DEVICE_TOKEN env var is set, skipping enrollment (env var takes precedence)',
+      )
+      return
+    }
+    // Lazy require — utils/auth.ts transitively pulls ~1300 modules
+    // (config → file → permissions → sessionStorage → commands). Daemon callers
+    // of getTrustedDeviceToken() don't need this; only /login does.
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { getClaudeAIOAuthTokens } =
+      require('../utils/auth.js') as typeof import('../utils/auth.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    const accessToken = getClaudeAIOAuthTokens()?.accessToken
+    if (!accessToken) {
+      logForDebugging('[trusted-device] No OAuth token, skipping enrollment')
+      return
+    }
+    // Always re-enroll on /login — the existing token may belong to a
+    // different account (account-switch without /logout). Skipping enrollment
+    // would send the old account's token on the new account's bridge calls.
+    const secureStorage = getSecureStorage()
+
+    if (isEssentialTrafficOnly()) {
+      logForDebugging(
+        '[trusted-device] Essential traffic only, skipping enrollment',
+      )
+      return
+    }
+
+    const baseUrl = getOauthConfig().BASE_API_URL
+    let response
+    try {
+      response = await axios.post<{
+        device_token?: string
+        device_id?: string
+      }>(
+        `${baseUrl}/api/auth/trusted_devices`,
+        { display_name: `Claude Code on ${hostname()} · ${process.platform}` },
+        {
+          headers: {
+            Authorization: `Bearer ${accessToken}`,
+            'Content-Type': 'application/json',
+          },
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+    } catch (err: unknown) {
+      logForDebugging(
+        `[trusted-device] Enrollment request failed: ${errorMessage(err)}`,
+      )
+      return
+    }
+
+    if (response.status !== 200 && response.status !== 201) {
+      logForDebugging(
+        `[trusted-device] Enrollment failed ${response.status}: ${jsonStringify(response.data).slice(0, 200)}`,
+      )
+      return
+    }
+
+    const token = response.data?.device_token
+    if (!token || typeof token !== 'string') {
+      logForDebugging(
+        '[trusted-device] Enrollment response missing device_token field',
+      )
+      return
+    }
+
+    try {
+      const storageData = secureStorage.read()
+      if (!storageData) {
+        logForDebugging(
+          '[trusted-device] Cannot read storage, skipping token persist',
+        )
+        return
+      }
+      storageData.trustedDeviceToken = token
+      const result = secureStorage.update(storageData)
+      if (!result.success) {
+        logForDebugging(
+          `[trusted-device] Failed to persist token: ${result.warning ?? 'unknown'}`,
+        )
+        return
+      }
+      readStoredToken.cache?.clear?.()
+      logForDebugging(
+        `[trusted-device] Enrolled device_id=${response.data.device_id ?? 'unknown'}`,
+      )
+    } catch (err: unknown) {
+      logForDebugging(
+        `[trusted-device] Storage write failed: ${errorMessage(err)}`,
+      )
+    }
+  } catch (err: unknown) {
+    logForDebugging(`[trusted-device] Enrollment error: ${errorMessage(err)}`)
+  }
+}

+ 262 - 0
src/bridge/types.ts

@@ -0,0 +1,262 @@
+/** Default per-session timeout (24 hours). */
+export const DEFAULT_SESSION_TIMEOUT_MS = 24 * 60 * 60 * 1000
+
+/** Reusable login guidance appended to bridge auth errors. */
+export const BRIDGE_LOGIN_INSTRUCTION =
+  'Remote Control is only available with claude.ai subscriptions. Please use `/login` to sign in with your claude.ai account.'
+
+/** Full error printed when `claude remote-control` is run without auth. */
+export const BRIDGE_LOGIN_ERROR =
+  'Error: You must be logged in to use Remote Control.\n\n' +
+  BRIDGE_LOGIN_INSTRUCTION
+
+/** Shown when the user disconnects Remote Control (via /remote-control or ultraplan launch). */
+export const REMOTE_CONTROL_DISCONNECTED_MSG = 'Remote Control disconnected.'
+
+// --- Protocol types for the environments API ---
+
+export type WorkData = {
+  type: 'session' | 'healthcheck'
+  id: string
+}
+
+export type WorkResponse = {
+  id: string
+  type: 'work'
+  environment_id: string
+  state: string
+  data: WorkData
+  secret: string // base64url-encoded JSON
+  created_at: string
+}
+
+export type WorkSecret = {
+  version: number
+  session_ingress_token: string
+  api_base_url: string
+  sources: Array<{
+    type: string
+    git_info?: { type: string; repo: string; ref?: string; token?: string }
+  }>
+  auth: Array<{ type: string; token: string }>
+  claude_code_args?: Record<string, string> | null
+  mcp_config?: unknown | null
+  environment_variables?: Record<string, string> | null
+  /**
+   * Server-driven CCR v2 selector. Set by prepare_work_secret() when the
+   * session was created via the v2 compat layer (ccr_v2_compat_enabled).
+   * Same field the BYOC runner reads at environment-runner/sessionExecutor.ts.
+   */
+  use_code_sessions?: boolean
+}
+
+export type SessionDoneStatus = 'completed' | 'failed' | 'interrupted'
+
+export type SessionActivityType = 'tool_start' | 'text' | 'result' | 'error'
+
+export type SessionActivity = {
+  type: SessionActivityType
+  summary: string // e.g. "Editing src/foo.ts", "Reading package.json"
+  timestamp: number
+}
+
+/**
+ * How `claude remote-control` chooses session working directories.
+ * - `single-session`: one session in cwd, bridge tears down when it ends
+ * - `worktree`: persistent server, every session gets an isolated git worktree
+ * - `same-dir`: persistent server, every session shares cwd (can stomp each other)
+ */
+export type SpawnMode = 'single-session' | 'worktree' | 'same-dir'
+
+/**
+ * Well-known worker_type values THIS codebase produces. Sent as
+ * `metadata.worker_type` at environment registration so claude.ai can filter
+ * the session picker by origin (e.g. assistant tab only shows assistant
+ * workers). The backend treats this as an opaque string — desktop cowork
+ * sends `"cowork"`, which isn't in this union. REPL code uses this narrow
+ * type for its own exhaustiveness; wire-level fields accept any string.
+ */
+export type BridgeWorkerType = 'claude_code' | 'claude_code_assistant'
+
+export type BridgeConfig = {
+  dir: string
+  machineName: string
+  branch: string
+  gitRepoUrl: string | null
+  maxSessions: number
+  spawnMode: SpawnMode
+  verbose: boolean
+  sandbox: boolean
+  /** Client-generated UUID identifying this bridge instance. */
+  bridgeId: string
+  /**
+   * Sent as metadata.worker_type so web clients can filter by origin.
+   * Backend treats this as opaque — any string, not just BridgeWorkerType.
+   */
+  workerType: string
+  /** Client-generated UUID for idempotent environment registration. */
+  environmentId: string
+  /**
+   * Backend-issued environment_id to reuse on re-register. When set, the
+   * backend treats registration as a reconnect to the existing environment
+   * instead of creating a new one. Used by `claude remote-control
+   * --session-id` resume. Must be a backend-format ID — client UUIDs are
+   * rejected with 400.
+   */
+  reuseEnvironmentId?: string
+  /** API base URL the bridge is connected to (used for polling). */
+  apiBaseUrl: string
+  /** Session ingress base URL for WebSocket connections (may differ from apiBaseUrl locally). */
+  sessionIngressUrl: string
+  /** Debug file path passed via --debug-file. */
+  debugFile?: string
+  /** Per-session timeout in milliseconds. Sessions exceeding this are killed. */
+  sessionTimeoutMs?: number
+}
+
+// --- Dependency interfaces (for testability) ---
+
+/**
+ * A control_response event sent back to a session (e.g. a permission decision).
+ * The `subtype` is `'success'` per the SDK protocol; the inner `response`
+ * carries the permission decision payload (e.g. `{ behavior: 'allow' }`).
+ */
+export type PermissionResponseEvent = {
+  type: 'control_response'
+  response: {
+    subtype: 'success'
+    request_id: string
+    response: Record<string, unknown>
+  }
+}
+
+export type BridgeApiClient = {
+  registerBridgeEnvironment(config: BridgeConfig): Promise<{
+    environment_id: string
+    environment_secret: string
+  }>
+  pollForWork(
+    environmentId: string,
+    environmentSecret: string,
+    signal?: AbortSignal,
+    reclaimOlderThanMs?: number,
+  ): Promise<WorkResponse | null>
+  acknowledgeWork(
+    environmentId: string,
+    workId: string,
+    sessionToken: string,
+  ): Promise<void>
+  /** Stop a work item via the environments API. */
+  stopWork(environmentId: string, workId: string, force: boolean): Promise<void>
+  /** Deregister/delete the bridge environment on graceful shutdown. */
+  deregisterEnvironment(environmentId: string): Promise<void>
+  /** Send a permission response (control_response) to a session via the session events API. */
+  sendPermissionResponseEvent(
+    sessionId: string,
+    event: PermissionResponseEvent,
+    sessionToken: string,
+  ): Promise<void>
+  /** Archive a session so it no longer appears as active on the server. */
+  archiveSession(sessionId: string): Promise<void>
+  /**
+   * Force-stop stale worker instances and re-queue a session on an environment.
+   * Used by `--session-id` to resume a session after the original bridge died.
+   */
+  reconnectSession(environmentId: string, sessionId: string): Promise<void>
+  /**
+   * Send a lightweight heartbeat for an active work item, extending its lease.
+   * Uses SessionIngressAuth (JWT, no DB hit) instead of EnvironmentSecretAuth.
+   * Returns the server's response with lease status.
+   */
+  heartbeatWork(
+    environmentId: string,
+    workId: string,
+    sessionToken: string,
+  ): Promise<{ lease_extended: boolean; state: string }>
+}
+
+export type SessionHandle = {
+  sessionId: string
+  done: Promise<SessionDoneStatus>
+  kill(): void
+  forceKill(): void
+  activities: SessionActivity[] // ring buffer of recent activities (last ~10)
+  currentActivity: SessionActivity | null // most recent
+  accessToken: string // session_ingress_token for API calls
+  lastStderr: string[] // ring buffer of last stderr lines
+  writeStdin(data: string): void // write directly to child stdin
+  /** Update the access token for a running session (e.g. after token refresh). */
+  updateAccessToken(token: string): void
+}
+
+export type SessionSpawnOpts = {
+  sessionId: string
+  sdkUrl: string
+  accessToken: string
+  /** When true, spawn the child with CCR v2 env vars (SSE transport + CCRClient). */
+  useCcrV2?: boolean
+  /** Required when useCcrV2 is true. Obtained from POST /worker/register. */
+  workerEpoch?: number
+  /**
+   * Fires once with the text of the first real user message seen on the
+   * child's stdout (via --replay-user-messages). Lets the caller derive a
+   * session title when none exists yet. Tool-result and synthetic user
+   * messages are skipped.
+   */
+  onFirstUserMessage?: (text: string) => void
+}
+
+export type SessionSpawner = {
+  spawn(opts: SessionSpawnOpts, dir: string): SessionHandle
+}
+
+export type BridgeLogger = {
+  printBanner(config: BridgeConfig, environmentId: string): void
+  logSessionStart(sessionId: string, prompt: string): void
+  logSessionComplete(sessionId: string, durationMs: number): void
+  logSessionFailed(sessionId: string, error: string): void
+  logStatus(message: string): void
+  logVerbose(message: string): void
+  logError(message: string): void
+  /** Log a reconnection success event after recovering from connection errors. */
+  logReconnected(disconnectedMs: number): void
+  /** Show idle status with repo/branch info and shimmer animation. */
+  updateIdleStatus(): void
+  /** Show reconnecting status in the live display. */
+  updateReconnectingStatus(delayStr: string, elapsedStr: string): void
+  updateSessionStatus(
+    sessionId: string,
+    elapsed: string,
+    activity: SessionActivity,
+    trail: string[],
+  ): void
+  clearStatus(): void
+  /** Set repository info for status line display. */
+  setRepoInfo(repoName: string, branch: string): void
+  /** Set debug log glob shown above the status line (ant users). */
+  setDebugLogPath(path: string): void
+  /** Transition to "Attached" state when a session starts. */
+  setAttached(sessionId: string): void
+  /** Show failed status in the live display. */
+  updateFailedStatus(error: string): void
+  /** Toggle QR code visibility. */
+  toggleQr(): void
+  /** Update the "<n> of <m> sessions" indicator and spawn mode hint. */
+  updateSessionCount(active: number, max: number, mode: SpawnMode): void
+  /** Update the spawn mode shown in the session-count line. Pass null to hide (single-session or toggle unavailable). */
+  setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void
+  /** Register a new session for multi-session display (called after spawn succeeds). */
+  addSession(sessionId: string, url: string): void
+  /** Update the per-session activity summary (tool being run) in the multi-session list. */
+  updateSessionActivity(sessionId: string, activity: SessionActivity): void
+  /**
+   * Set a session's display title. In multi-session mode, updates the bullet list
+   * entry. In single-session mode, also shows the title in the main status line.
+   * Triggers a render (guarded against reconnecting/failed states).
+   */
+  setSessionTitle(sessionId: string, title: string): void
+  /** Remove a session from the multi-session display when it ends. */
+  removeSession(sessionId: string): void
+  /** Force a re-render of the status display (for multi-session activity refresh). */
+  refreshDisplay(): void
+}

+ 127 - 0
src/bridge/workSecret.ts

@@ -0,0 +1,127 @@
+import axios from 'axios'
+import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
+import type { WorkSecret } from './types.js'
+
+/** Decode a base64url-encoded work secret and validate its version. */
+export function decodeWorkSecret(secret: string): WorkSecret {
+  const json = Buffer.from(secret, 'base64url').toString('utf-8')
+  const parsed: unknown = jsonParse(json)
+  if (
+    !parsed ||
+    typeof parsed !== 'object' ||
+    !('version' in parsed) ||
+    parsed.version !== 1
+  ) {
+    throw new Error(
+      `Unsupported work secret version: ${parsed && typeof parsed === 'object' && 'version' in parsed ? parsed.version : 'unknown'}`,
+    )
+  }
+  const obj = parsed as Record<string, unknown>
+  if (
+    typeof obj.session_ingress_token !== 'string' ||
+    obj.session_ingress_token.length === 0
+  ) {
+    throw new Error(
+      'Invalid work secret: missing or empty session_ingress_token',
+    )
+  }
+  if (typeof obj.api_base_url !== 'string') {
+    throw new Error('Invalid work secret: missing api_base_url')
+  }
+  return parsed as WorkSecret
+}
+
+/**
+ * Build a WebSocket SDK URL from the API base URL and session ID.
+ * Strips the HTTP(S) protocol and constructs a ws(s):// ingress URL.
+ *
+ * Uses /v2/ for localhost (direct to session-ingress, no Envoy rewrite)
+ * and /v1/ for production (Envoy rewrites /v1/ → /v2/).
+ */
+export function buildSdkUrl(apiBaseUrl: string, sessionId: string): string {
+  const isLocalhost =
+    apiBaseUrl.includes('localhost') || apiBaseUrl.includes('127.0.0.1')
+  const protocol = isLocalhost ? 'ws' : 'wss'
+  const version = isLocalhost ? 'v2' : 'v1'
+  const host = apiBaseUrl.replace(/^https?:\/\//, '').replace(/\/+$/, '')
+  return `${protocol}://${host}/${version}/session_ingress/ws/${sessionId}`
+}
+
+/**
+ * Compare two session IDs regardless of their tagged-ID prefix.
+ *
+ * Tagged IDs have the form {tag}_{body} or {tag}_staging_{body}, where the
+ * body encodes a UUID. CCR v2's compat layer returns `session_*` to v1 API
+ * clients (compat/convert.go:41) but the infrastructure layer (sandbox-gateway
+ * work queue, work poll response) uses `cse_*` (compat/CLAUDE.md:13). Both
+ * have the same underlying UUID.
+ *
+ * Without this, replBridge rejects its own session as "foreign" at the
+ * work-received check when the ccr_v2_compat_enabled gate is on.
+ */
+export function sameSessionId(a: string, b: string): boolean {
+  if (a === b) return true
+  // The body is everything after the last underscore — this handles both
+  // `{tag}_{body}` and `{tag}_staging_{body}`.
+  const aBody = a.slice(a.lastIndexOf('_') + 1)
+  const bBody = b.slice(b.lastIndexOf('_') + 1)
+  // Guard against IDs with no underscore (bare UUIDs): lastIndexOf returns -1,
+  // slice(0) returns the whole string, and we already checked a === b above.
+  // Require a minimum length to avoid accidental matches on short suffixes
+  // (e.g. single-char tag remnants from malformed IDs).
+  return aBody.length >= 4 && aBody === bBody
+}
+
+/**
+ * Build a CCR v2 session URL from the API base URL and session ID.
+ * Unlike buildSdkUrl, this returns an HTTP(S) URL (not ws://) and points at
+ * /v1/code/sessions/{id} — the child CC will derive the SSE stream path
+ * and worker endpoints from this base.
+ */
+export function buildCCRv2SdkUrl(
+  apiBaseUrl: string,
+  sessionId: string,
+): string {
+  const base = apiBaseUrl.replace(/\/+$/, '')
+  return `${base}/v1/code/sessions/${sessionId}`
+}
+
+/**
+ * Register this bridge as the worker for a CCR v2 session.
+ * Returns the worker_epoch, which must be passed to the child CC process
+ * so its CCRClient can include it in every heartbeat/state/event request.
+ *
+ * Mirrors what environment-manager does in the container path
+ * (api-go/environment-manager/cmd/cmd_task_run.go RegisterWorker).
+ */
+export async function registerWorker(
+  sessionUrl: string,
+  accessToken: string,
+): Promise<number> {
+  const response = await axios.post(
+    `${sessionUrl}/worker/register`,
+    {},
+    {
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        'Content-Type': 'application/json',
+        'anthropic-version': '2023-06-01',
+      },
+      timeout: 10_000,
+    },
+  )
+  // protojson serializes int64 as a string to avoid JS number precision loss;
+  // the Go side may also return a number depending on encoder settings.
+  const raw = response.data?.worker_epoch
+  const epoch = typeof raw === 'string' ? Number(raw) : raw
+  if (
+    typeof epoch !== 'number' ||
+    !Number.isFinite(epoch) ||
+    !Number.isSafeInteger(epoch)
+  ) {
+    throw new Error(
+      `registerWorker: invalid worker_epoch in response: ${jsonStringify(response.data)}`,
+    )
+  }
+  return epoch
+}

Різницю між файлами не показано, бо вона завелика
+ 370 - 0
src/buddy/CompanionSprite.tsx


+ 133 - 0
src/buddy/companion.ts

@@ -0,0 +1,133 @@
+import { getGlobalConfig } from '../utils/config.js'
+import {
+  type Companion,
+  type CompanionBones,
+  EYES,
+  HATS,
+  RARITIES,
+  RARITY_WEIGHTS,
+  type Rarity,
+  SPECIES,
+  STAT_NAMES,
+  type StatName,
+} from './types.js'
+
+// Mulberry32 — tiny seeded PRNG, good enough for picking ducks
+function mulberry32(seed: number): () => number {
+  let a = seed >>> 0
+  return function () {
+    a |= 0
+    a = (a + 0x6d2b79f5) | 0
+    let t = Math.imul(a ^ (a >>> 15), 1 | a)
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296
+  }
+}
+
+function hashString(s: string): number {
+  if (typeof Bun !== 'undefined') {
+    return Number(BigInt(Bun.hash(s)) & 0xffffffffn)
+  }
+  let h = 2166136261
+  for (let i = 0; i < s.length; i++) {
+    h ^= s.charCodeAt(i)
+    h = Math.imul(h, 16777619)
+  }
+  return h >>> 0
+}
+
+function pick<T>(rng: () => number, arr: readonly T[]): T {
+  return arr[Math.floor(rng() * arr.length)]!
+}
+
+function rollRarity(rng: () => number): Rarity {
+  const total = Object.values(RARITY_WEIGHTS).reduce((a, b) => a + b, 0)
+  let roll = rng() * total
+  for (const rarity of RARITIES) {
+    roll -= RARITY_WEIGHTS[rarity]
+    if (roll < 0) return rarity
+  }
+  return 'common'
+}
+
+const RARITY_FLOOR: Record<Rarity, number> = {
+  common: 5,
+  uncommon: 15,
+  rare: 25,
+  epic: 35,
+  legendary: 50,
+}
+
+// One peak stat, one dump stat, rest scattered. Rarity bumps the floor.
+function rollStats(
+  rng: () => number,
+  rarity: Rarity,
+): Record<StatName, number> {
+  const floor = RARITY_FLOOR[rarity]
+  const peak = pick(rng, STAT_NAMES)
+  let dump = pick(rng, STAT_NAMES)
+  while (dump === peak) dump = pick(rng, STAT_NAMES)
+
+  const stats = {} as Record<StatName, number>
+  for (const name of STAT_NAMES) {
+    if (name === peak) {
+      stats[name] = Math.min(100, floor + 50 + Math.floor(rng() * 30))
+    } else if (name === dump) {
+      stats[name] = Math.max(1, floor - 10 + Math.floor(rng() * 15))
+    } else {
+      stats[name] = floor + Math.floor(rng() * 40)
+    }
+  }
+  return stats
+}
+
+const SALT = 'friend-2026-401'
+
+export type Roll = {
+  bones: CompanionBones
+  inspirationSeed: number
+}
+
+function rollFrom(rng: () => number): Roll {
+  const rarity = rollRarity(rng)
+  const bones: CompanionBones = {
+    rarity,
+    species: pick(rng, SPECIES),
+    eye: pick(rng, EYES),
+    hat: rarity === 'common' ? 'none' : pick(rng, HATS),
+    shiny: rng() < 0.01,
+    stats: rollStats(rng, rarity),
+  }
+  return { bones, inspirationSeed: Math.floor(rng() * 1e9) }
+}
+
+// Called from three hot paths (500ms sprite tick, per-keystroke PromptInput,
+// per-turn observer) with the same userId → cache the deterministic result.
+let rollCache: { key: string; value: Roll } | undefined
+export function roll(userId: string): Roll {
+  const key = userId + SALT
+  if (rollCache?.key === key) return rollCache.value
+  const value = rollFrom(mulberry32(hashString(key)))
+  rollCache = { key, value }
+  return value
+}
+
+export function rollWithSeed(seed: string): Roll {
+  return rollFrom(mulberry32(hashString(seed)))
+}
+
+export function companionUserId(): string {
+  const config = getGlobalConfig()
+  return config.oauthAccount?.accountUuid ?? config.userID ?? 'anon'
+}
+
+// Regenerate bones from userId, merge with stored soul. Bones never persist
+// so species renames and SPECIES-array edits can't break stored companions,
+// and editing config.companion can't fake a rarity.
+export function getCompanion(): Companion | undefined {
+  const stored = getGlobalConfig().companion
+  if (!stored) return undefined
+  const { bones } = roll(companionUserId())
+  // bones last so stale bones fields in old-format configs get overridden
+  return { ...stored, ...bones }
+}

+ 36 - 0
src/buddy/prompt.ts

@@ -0,0 +1,36 @@
+import { feature } from 'bun:bundle'
+import type { Message } from '../types/message.js'
+import type { Attachment } from '../utils/attachments.js'
+import { getGlobalConfig } from '../utils/config.js'
+import { getCompanion } from './companion.js'
+
+export function companionIntroText(name: string, species: string): string {
+  return `# Companion
+
+A small ${species} named ${name} sits beside the user's input box and occasionally comments in a speech bubble. You're not ${name} — it's a separate watcher.
+
+When the user addresses ${name} directly (by name), its bubble will answer. Your job in that moment is to stay out of the way: respond in ONE line or less, or just answer any part of the message meant for you. Don't explain that you're not ${name} — they know. Don't narrate what ${name} might say — the bubble handles that.`
+}
+
+export function getCompanionIntroAttachment(
+  messages: Message[] | undefined,
+): Attachment[] {
+  if (!feature('BUDDY')) return []
+  const companion = getCompanion()
+  if (!companion || getGlobalConfig().companionMuted) return []
+
+  // Skip if already announced for this companion.
+  for (const msg of messages ?? []) {
+    if (msg.type !== 'attachment') continue
+    if (msg.attachment.type !== 'companion_intro') continue
+    if (msg.attachment.name === companion.name) return []
+  }
+
+  return [
+    {
+      type: 'companion_intro',
+      name: companion.name,
+      species: companion.species,
+    },
+  ]
+}

+ 514 - 0
src/buddy/sprites.ts

@@ -0,0 +1,514 @@
+import type { CompanionBones, Eye, Hat, Species } from './types.js'
+import {
+  axolotl,
+  blob,
+  cactus,
+  capybara,
+  cat,
+  chonk,
+  dragon,
+  duck,
+  ghost,
+  goose,
+  mushroom,
+  octopus,
+  owl,
+  penguin,
+  rabbit,
+  robot,
+  snail,
+  turtle,
+} from './types.js'
+
+// Each sprite is 5 lines tall, 12 wide (after {E}→1char substitution).
+// Multiple frames per species for idle fidget animation.
+// Line 0 is the hat slot — must be blank in frames 0-1; frame 2 may use it.
+const BODIES: Record<Species, string[][]> = {
+  [duck]: [
+    [
+      '            ',
+      '    __      ',
+      '  <({E} )___  ',
+      '   (  ._>   ',
+      '    `--´    ',
+    ],
+    [
+      '            ',
+      '    __      ',
+      '  <({E} )___  ',
+      '   (  ._>   ',
+      '    `--´~   ',
+    ],
+    [
+      '            ',
+      '    __      ',
+      '  <({E} )___  ',
+      '   (  .__>  ',
+      '    `--´    ',
+    ],
+  ],
+  [goose]: [
+    [
+      '            ',
+      '     ({E}>    ',
+      '     ||     ',
+      '   _(__)_   ',
+      '    ^^^^    ',
+    ],
+    [
+      '            ',
+      '    ({E}>     ',
+      '     ||     ',
+      '   _(__)_   ',
+      '    ^^^^    ',
+    ],
+    [
+      '            ',
+      '     ({E}>>   ',
+      '     ||     ',
+      '   _(__)_   ',
+      '    ^^^^    ',
+    ],
+  ],
+  [blob]: [
+    [
+      '            ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (      )  ',
+      '   `----´   ',
+    ],
+    [
+      '            ',
+      '  .------.  ',
+      ' (  {E}  {E}  ) ',
+      ' (        ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '    .--.    ',
+      '   ({E}  {E})   ',
+      '   (    )   ',
+      '    `--´    ',
+    ],
+  ],
+  [cat]: [
+    [
+      '            ',
+      '   /\\_/\\    ',
+      '  ( {E}   {E})  ',
+      '  (  ω  )   ',
+      '  (")_(")   ',
+    ],
+    [
+      '            ',
+      '   /\\_/\\    ',
+      '  ( {E}   {E})  ',
+      '  (  ω  )   ',
+      '  (")_(")~  ',
+    ],
+    [
+      '            ',
+      '   /\\-/\\    ',
+      '  ( {E}   {E})  ',
+      '  (  ω  )   ',
+      '  (")_(")   ',
+    ],
+  ],
+  [dragon]: [
+    [
+      '            ',
+      '  /^\\  /^\\  ',
+      ' <  {E}  {E}  > ',
+      ' (   ~~   ) ',
+      '  `-vvvv-´  ',
+    ],
+    [
+      '            ',
+      '  /^\\  /^\\  ',
+      ' <  {E}  {E}  > ',
+      ' (        ) ',
+      '  `-vvvv-´  ',
+    ],
+    [
+      '   ~    ~   ',
+      '  /^\\  /^\\  ',
+      ' <  {E}  {E}  > ',
+      ' (   ~~   ) ',
+      '  `-vvvv-´  ',
+    ],
+  ],
+  [octopus]: [
+    [
+      '            ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (______)  ',
+      '  /\\/\\/\\/\\  ',
+    ],
+    [
+      '            ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (______)  ',
+      '  \\/\\/\\/\\/  ',
+    ],
+    [
+      '     o      ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (______)  ',
+      '  /\\/\\/\\/\\  ',
+    ],
+  ],
+  [owl]: [
+    [
+      '            ',
+      '   /\\  /\\   ',
+      '  (({E})({E}))  ',
+      '  (  ><  )  ',
+      '   `----´   ',
+    ],
+    [
+      '            ',
+      '   /\\  /\\   ',
+      '  (({E})({E}))  ',
+      '  (  ><  )  ',
+      '   .----.   ',
+    ],
+    [
+      '            ',
+      '   /\\  /\\   ',
+      '  (({E})(-))  ',
+      '  (  ><  )  ',
+      '   `----´   ',
+    ],
+  ],
+  [penguin]: [
+    [
+      '            ',
+      '  .---.     ',
+      '  ({E}>{E})     ',
+      ' /(   )\\    ',
+      '  `---´     ',
+    ],
+    [
+      '            ',
+      '  .---.     ',
+      '  ({E}>{E})     ',
+      ' |(   )|    ',
+      '  `---´     ',
+    ],
+    [
+      '  .---.     ',
+      '  ({E}>{E})     ',
+      ' /(   )\\    ',
+      '  `---´     ',
+      '   ~ ~      ',
+    ],
+  ],
+  [turtle]: [
+    [
+      '            ',
+      '   _,--._   ',
+      '  ( {E}  {E} )  ',
+      ' /[______]\\ ',
+      '  ``    ``  ',
+    ],
+    [
+      '            ',
+      '   _,--._   ',
+      '  ( {E}  {E} )  ',
+      ' /[______]\\ ',
+      '   ``  ``   ',
+    ],
+    [
+      '            ',
+      '   _,--._   ',
+      '  ( {E}  {E} )  ',
+      ' /[======]\\ ',
+      '  ``    ``  ',
+    ],
+  ],
+  [snail]: [
+    [
+      '            ',
+      ' {E}    .--.  ',
+      '  \\  ( @ )  ',
+      '   \\_`--´   ',
+      '  ~~~~~~~   ',
+    ],
+    [
+      '            ',
+      '  {E}   .--.  ',
+      '  |  ( @ )  ',
+      '   \\_`--´   ',
+      '  ~~~~~~~   ',
+    ],
+    [
+      '            ',
+      ' {E}    .--.  ',
+      '  \\  ( @  ) ',
+      '   \\_`--´   ',
+      '   ~~~~~~   ',
+    ],
+  ],
+  [ghost]: [
+    [
+      '            ',
+      '   .----.   ',
+      '  / {E}  {E} \\  ',
+      '  |      |  ',
+      '  ~`~``~`~  ',
+    ],
+    [
+      '            ',
+      '   .----.   ',
+      '  / {E}  {E} \\  ',
+      '  |      |  ',
+      '  `~`~~`~`  ',
+    ],
+    [
+      '    ~  ~    ',
+      '   .----.   ',
+      '  / {E}  {E} \\  ',
+      '  |      |  ',
+      '  ~~`~~`~~  ',
+    ],
+  ],
+  [axolotl]: [
+    [
+      '            ',
+      '}~(______)~{',
+      '}~({E} .. {E})~{',
+      '  ( .--. )  ',
+      '  (_/  \\_)  ',
+    ],
+    [
+      '            ',
+      '~}(______){~',
+      '~}({E} .. {E}){~',
+      '  ( .--. )  ',
+      '  (_/  \\_)  ',
+    ],
+    [
+      '            ',
+      '}~(______)~{',
+      '}~({E} .. {E})~{',
+      '  (  --  )  ',
+      '  ~_/  \\_~  ',
+    ],
+  ],
+  [capybara]: [
+    [
+      '            ',
+      '  n______n  ',
+      ' ( {E}    {E} ) ',
+      ' (   oo   ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '  n______n  ',
+      ' ( {E}    {E} ) ',
+      ' (   Oo   ) ',
+      '  `------´  ',
+    ],
+    [
+      '    ~  ~    ',
+      '  u______n  ',
+      ' ( {E}    {E} ) ',
+      ' (   oo   ) ',
+      '  `------´  ',
+    ],
+  ],
+  [cactus]: [
+    [
+      '            ',
+      ' n  ____  n ',
+      ' | |{E}  {E}| | ',
+      ' |_|    |_| ',
+      '   |    |   ',
+    ],
+    [
+      '            ',
+      '    ____    ',
+      ' n |{E}  {E}| n ',
+      ' |_|    |_| ',
+      '   |    |   ',
+    ],
+    [
+      ' n        n ',
+      ' |  ____  | ',
+      ' | |{E}  {E}| | ',
+      ' |_|    |_| ',
+      '   |    |   ',
+    ],
+  ],
+  [robot]: [
+    [
+      '            ',
+      '   .[||].   ',
+      '  [ {E}  {E} ]  ',
+      '  [ ==== ]  ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '   .[||].   ',
+      '  [ {E}  {E} ]  ',
+      '  [ -==- ]  ',
+      '  `------´  ',
+    ],
+    [
+      '     *      ',
+      '   .[||].   ',
+      '  [ {E}  {E} ]  ',
+      '  [ ==== ]  ',
+      '  `------´  ',
+    ],
+  ],
+  [rabbit]: [
+    [
+      '            ',
+      '   (\\__/)   ',
+      '  ( {E}  {E} )  ',
+      ' =(  ..  )= ',
+      '  (")__(")  ',
+    ],
+    [
+      '            ',
+      '   (|__/)   ',
+      '  ( {E}  {E} )  ',
+      ' =(  ..  )= ',
+      '  (")__(")  ',
+    ],
+    [
+      '            ',
+      '   (\\__/)   ',
+      '  ( {E}  {E} )  ',
+      ' =( .  . )= ',
+      '  (")__(")  ',
+    ],
+  ],
+  [mushroom]: [
+    [
+      '            ',
+      ' .-o-OO-o-. ',
+      '(__________)',
+      '   |{E}  {E}|   ',
+      '   |____|   ',
+    ],
+    [
+      '            ',
+      ' .-O-oo-O-. ',
+      '(__________)',
+      '   |{E}  {E}|   ',
+      '   |____|   ',
+    ],
+    [
+      '   . o  .   ',
+      ' .-o-OO-o-. ',
+      '(__________)',
+      '   |{E}  {E}|   ',
+      '   |____|   ',
+    ],
+  ],
+  [chonk]: [
+    [
+      '            ',
+      '  /\\    /\\  ',
+      ' ( {E}    {E} ) ',
+      ' (   ..   ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '  /\\    /|  ',
+      ' ( {E}    {E} ) ',
+      ' (   ..   ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '  /\\    /\\  ',
+      ' ( {E}    {E} ) ',
+      ' (   ..   ) ',
+      '  `------´~ ',
+    ],
+  ],
+}
+
+const HAT_LINES: Record<Hat, string> = {
+  none: '',
+  crown: '   \\^^^/    ',
+  tophat: '   [___]    ',
+  propeller: '    -+-     ',
+  halo: '   (   )    ',
+  wizard: '    /^\\     ',
+  beanie: '   (___)    ',
+  tinyduck: '    ,>      ',
+}
+
+export function renderSprite(bones: CompanionBones, frame = 0): string[] {
+  const frames = BODIES[bones.species]
+  const body = frames[frame % frames.length]!.map(line =>
+    line.replaceAll('{E}', bones.eye),
+  )
+  const lines = [...body]
+  // Only replace with hat if line 0 is empty (some fidget frames use it for smoke etc)
+  if (bones.hat !== 'none' && !lines[0]!.trim()) {
+    lines[0] = HAT_LINES[bones.hat]
+  }
+  // Drop blank hat slot — wastes a row in the Card and ambient sprite when
+  // there's no hat and the frame isn't using it for smoke/antenna/etc.
+  // Only safe when ALL frames have blank line 0; otherwise heights oscillate.
+  if (!lines[0]!.trim() && frames.every(f => !f[0]!.trim())) lines.shift()
+  return lines
+}
+
+export function spriteFrameCount(species: Species): number {
+  return BODIES[species].length
+}
+
+export function renderFace(bones: CompanionBones): string {
+  const eye: Eye = bones.eye
+  switch (bones.species) {
+    case duck:
+    case goose:
+      return `(${eye}>`
+    case blob:
+      return `(${eye}${eye})`
+    case cat:
+      return `=${eye}ω${eye}=`
+    case dragon:
+      return `<${eye}~${eye}>`
+    case octopus:
+      return `~(${eye}${eye})~`
+    case owl:
+      return `(${eye})(${eye})`
+    case penguin:
+      return `(${eye}>)`
+    case turtle:
+      return `[${eye}_${eye}]`
+    case snail:
+      return `${eye}(@)`
+    case ghost:
+      return `/${eye}${eye}\\`
+    case axolotl:
+      return `}${eye}.${eye}{`
+    case capybara:
+      return `(${eye}oo${eye})`
+    case cactus:
+      return `|${eye}  ${eye}|`
+    case robot:
+      return `[${eye}${eye}]`
+    case rabbit:
+      return `(${eye}..${eye})`
+    case mushroom:
+      return `|${eye}  ${eye}|`
+    case chonk:
+      return `(${eye}.${eye})`
+  }
+}

+ 148 - 0
src/buddy/types.ts

@@ -0,0 +1,148 @@
+export const RARITIES = [
+  'common',
+  'uncommon',
+  'rare',
+  'epic',
+  'legendary',
+] as const
+export type Rarity = (typeof RARITIES)[number]
+
+// One species name collides with a model-codename canary in excluded-strings.txt.
+// The check greps build output (not source), so runtime-constructing the value keeps
+// the literal out of the bundle while the check stays armed for the actual codename.
+// All species encoded uniformly; `as` casts are type-position only (erased pre-bundle).
+const c = String.fromCharCode
+// biome-ignore format: keep the species list compact
+
+export const duck = c(0x64,0x75,0x63,0x6b) as 'duck'
+export const goose = c(0x67, 0x6f, 0x6f, 0x73, 0x65) as 'goose'
+export const blob = c(0x62, 0x6c, 0x6f, 0x62) as 'blob'
+export const cat = c(0x63, 0x61, 0x74) as 'cat'
+export const dragon = c(0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e) as 'dragon'
+export const octopus = c(0x6f, 0x63, 0x74, 0x6f, 0x70, 0x75, 0x73) as 'octopus'
+export const owl = c(0x6f, 0x77, 0x6c) as 'owl'
+export const penguin = c(0x70, 0x65, 0x6e, 0x67, 0x75, 0x69, 0x6e) as 'penguin'
+export const turtle = c(0x74, 0x75, 0x72, 0x74, 0x6c, 0x65) as 'turtle'
+export const snail = c(0x73, 0x6e, 0x61, 0x69, 0x6c) as 'snail'
+export const ghost = c(0x67, 0x68, 0x6f, 0x73, 0x74) as 'ghost'
+export const axolotl = c(0x61, 0x78, 0x6f, 0x6c, 0x6f, 0x74, 0x6c) as 'axolotl'
+export const capybara = c(
+  0x63,
+  0x61,
+  0x70,
+  0x79,
+  0x62,
+  0x61,
+  0x72,
+  0x61,
+) as 'capybara'
+export const cactus = c(0x63, 0x61, 0x63, 0x74, 0x75, 0x73) as 'cactus'
+export const robot = c(0x72, 0x6f, 0x62, 0x6f, 0x74) as 'robot'
+export const rabbit = c(0x72, 0x61, 0x62, 0x62, 0x69, 0x74) as 'rabbit'
+export const mushroom = c(
+  0x6d,
+  0x75,
+  0x73,
+  0x68,
+  0x72,
+  0x6f,
+  0x6f,
+  0x6d,
+) as 'mushroom'
+export const chonk = c(0x63, 0x68, 0x6f, 0x6e, 0x6b) as 'chonk'
+
+export const SPECIES = [
+  duck,
+  goose,
+  blob,
+  cat,
+  dragon,
+  octopus,
+  owl,
+  penguin,
+  turtle,
+  snail,
+  ghost,
+  axolotl,
+  capybara,
+  cactus,
+  robot,
+  rabbit,
+  mushroom,
+  chonk,
+] as const
+export type Species = (typeof SPECIES)[number] // biome-ignore format: keep compact
+
+export const EYES = ['·', '✦', '×', '◉', '@', '°'] as const
+export type Eye = (typeof EYES)[number]
+
+export const HATS = [
+  'none',
+  'crown',
+  'tophat',
+  'propeller',
+  'halo',
+  'wizard',
+  'beanie',
+  'tinyduck',
+] as const
+export type Hat = (typeof HATS)[number]
+
+export const STAT_NAMES = [
+  'DEBUGGING',
+  'PATIENCE',
+  'CHAOS',
+  'WISDOM',
+  'SNARK',
+] as const
+export type StatName = (typeof STAT_NAMES)[number]
+
+// Deterministic parts — derived from hash(userId)
+export type CompanionBones = {
+  rarity: Rarity
+  species: Species
+  eye: Eye
+  hat: Hat
+  shiny: boolean
+  stats: Record<StatName, number>
+}
+
+// Model-generated soul — stored in config after first hatch
+export type CompanionSoul = {
+  name: string
+  personality: string
+}
+
+export type Companion = CompanionBones &
+  CompanionSoul & {
+    hatchedAt: number
+  }
+
+// What actually persists in config. Bones are regenerated from hash(userId)
+// on every read so species renames don't break stored companions and users
+// can't edit their way to a legendary.
+export type StoredCompanion = CompanionSoul & { hatchedAt: number }
+
+export const RARITY_WEIGHTS = {
+  common: 60,
+  uncommon: 25,
+  rare: 10,
+  epic: 4,
+  legendary: 1,
+} as const satisfies Record<Rarity, number>
+
+export const RARITY_STARS = {
+  common: '★',
+  uncommon: '★★',
+  rare: '★★★',
+  epic: '★★★★',
+  legendary: '★★★★★',
+} as const satisfies Record<Rarity, string>
+
+export const RARITY_COLORS = {
+  common: 'inactive',
+  uncommon: 'success',
+  rare: 'permission',
+  epic: 'autoAccept',
+  legendary: 'warning',
+} as const satisfies Record<Rarity, keyof import('../utils/theme.js').Theme>

Різницю між файлами не показано, бо вона завелика
+ 97 - 0
src/buddy/useBuddyNotification.tsx


+ 31 - 0
src/cli/exit.ts

@@ -0,0 +1,31 @@
+/**
+ * CLI exit helpers for subcommand handlers.
+ *
+ * Consolidates the 4-5 line "print + lint-suppress + exit" block that was
+ * copy-pasted ~60 times across `claude mcp *` / `claude plugin *` handlers.
+ * The `: never` return type lets TypeScript narrow control flow at call sites
+ * without a trailing `return`.
+ */
+/* eslint-disable custom-rules/no-process-exit -- centralized CLI exit point */
+
+// `return undefined as never` (not a post-exit throw) — tests spy on
+// process.exit and let it return. Call sites write `return cliError(...)`
+// where subsequent code would dereference narrowed-away values under mock.
+// cliError uses console.error (tests spy on console.error); cliOk uses
+// process.stdout.write (tests spy on process.stdout.write — Bun's console.log
+// doesn't route through a spied process.stdout.write).
+
+/** Write an error message to stderr (if given) and exit with code 1. */
+export function cliError(msg?: string): never {
+  // biome-ignore lint/suspicious/noConsole: centralized CLI error output
+  if (msg) console.error(msg)
+  process.exit(1)
+  return undefined as never
+}
+
+/** Write a message to stdout (if given) and exit with code 0. */
+export function cliOk(msg?: string): never {
+  if (msg) process.stdout.write(msg + '\n')
+  process.exit(0)
+  return undefined as never
+}

+ 70 - 0
src/cli/handlers/agents.ts

@@ -0,0 +1,70 @@
+/**
+ * Agents subcommand handler — prints the list of configured agents.
+ * Dynamically imported only when `claude agents` runs.
+ */
+
+import {
+  AGENT_SOURCE_GROUPS,
+  compareAgentsByName,
+  getOverrideSourceLabel,
+  type ResolvedAgent,
+  resolveAgentModelDisplay,
+  resolveAgentOverrides,
+} from '../../tools/AgentTool/agentDisplay.js'
+import {
+  getActiveAgentsFromList,
+  getAgentDefinitionsWithOverrides,
+} from '../../tools/AgentTool/loadAgentsDir.js'
+import { getCwd } from '../../utils/cwd.js'
+
+function formatAgent(agent: ResolvedAgent): string {
+  const model = resolveAgentModelDisplay(agent)
+  const parts = [agent.agentType]
+  if (model) {
+    parts.push(model)
+  }
+  if (agent.memory) {
+    parts.push(`${agent.memory} memory`)
+  }
+  return parts.join(' · ')
+}
+
+export async function agentsHandler(): Promise<void> {
+  const cwd = getCwd()
+  const { allAgents } = await getAgentDefinitionsWithOverrides(cwd)
+  const activeAgents = getActiveAgentsFromList(allAgents)
+  const resolvedAgents = resolveAgentOverrides(allAgents, activeAgents)
+
+  const lines: string[] = []
+  let totalActive = 0
+
+  for (const { label, source } of AGENT_SOURCE_GROUPS) {
+    const groupAgents = resolvedAgents
+      .filter(a => a.source === source)
+      .sort(compareAgentsByName)
+
+    if (groupAgents.length === 0) continue
+
+    lines.push(`${label}:`)
+    for (const agent of groupAgents) {
+      if (agent.overriddenBy) {
+        const winnerSource = getOverrideSourceLabel(agent.overriddenBy)
+        lines.push(`  (shadowed by ${winnerSource}) ${formatAgent(agent)}`)
+      } else {
+        lines.push(`  ${formatAgent(agent)}`)
+        totalActive++
+      }
+    }
+    lines.push('')
+  }
+
+  if (lines.length === 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('No agents found.')
+  } else {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(`${totalActive} active agents\n`)
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(lines.join('\n').trimEnd())
+  }
+}

+ 330 - 0
src/cli/handlers/auth.ts

@@ -0,0 +1,330 @@
+/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handler intentionally exits */
+
+import {
+  clearAuthRelatedCaches,
+  performLogout,
+} from '../../commands/logout/logout.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../../services/analytics/index.js'
+import { getSSLErrorHint } from '../../services/api/errorUtils.js'
+import { fetchAndStoreClaudeCodeFirstTokenDate } from '../../services/api/firstTokenDate.js'
+import {
+  createAndStoreApiKey,
+  fetchAndStoreUserRoles,
+  refreshOAuthToken,
+  shouldUseClaudeAIAuth,
+  storeOAuthAccountInfo,
+} from '../../services/oauth/client.js'
+import { getOauthProfileFromOauthToken } from '../../services/oauth/getOauthProfile.js'
+import { OAuthService } from '../../services/oauth/index.js'
+import type { OAuthTokens } from '../../services/oauth/types.js'
+import {
+  clearOAuthTokenCache,
+  getAnthropicApiKeyWithSource,
+  getAuthTokenSource,
+  getOauthAccountInfo,
+  getSubscriptionType,
+  isUsing3PServices,
+  saveOAuthTokensIfNeeded,
+  validateForceLoginOrg,
+} from '../../utils/auth.js'
+import { saveGlobalConfig } from '../../utils/config.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { isRunningOnHomespace } from '../../utils/envUtils.js'
+import { errorMessage } from '../../utils/errors.js'
+import { logError } from '../../utils/log.js'
+import { getAPIProvider } from '../../utils/model/providers.js'
+import { getInitialSettings } from '../../utils/settings/settings.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import {
+  buildAccountProperties,
+  buildAPIProviderProperties,
+} from '../../utils/status.js'
+
+/**
+ * Shared post-token-acquisition logic. Saves tokens, fetches profile/roles,
+ * and sets up the local auth state.
+ */
+export async function installOAuthTokens(tokens: OAuthTokens): Promise<void> {
+  // Clear old state before saving new credentials
+  await performLogout({ clearOnboarding: false })
+
+  // Reuse pre-fetched profile if available, otherwise fetch fresh
+  const profile =
+    tokens.profile ?? (await getOauthProfileFromOauthToken(tokens.accessToken))
+  if (profile) {
+    storeOAuthAccountInfo({
+      accountUuid: profile.account.uuid,
+      emailAddress: profile.account.email,
+      organizationUuid: profile.organization.uuid,
+      displayName: profile.account.display_name || undefined,
+      hasExtraUsageEnabled:
+        profile.organization.has_extra_usage_enabled ?? undefined,
+      billingType: profile.organization.billing_type ?? undefined,
+      subscriptionCreatedAt:
+        profile.organization.subscription_created_at ?? undefined,
+      accountCreatedAt: profile.account.created_at,
+    })
+  } else if (tokens.tokenAccount) {
+    // Fallback to token exchange account data when profile endpoint fails
+    storeOAuthAccountInfo({
+      accountUuid: tokens.tokenAccount.uuid,
+      emailAddress: tokens.tokenAccount.emailAddress,
+      organizationUuid: tokens.tokenAccount.organizationUuid,
+    })
+  }
+
+  const storageResult = saveOAuthTokensIfNeeded(tokens)
+  clearOAuthTokenCache()
+
+  if (storageResult.warning) {
+    logEvent('tengu_oauth_storage_warning', {
+      warning:
+        storageResult.warning as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+  }
+
+  // Roles and first-token-date may fail for limited-scope tokens (e.g.
+  // inference-only from setup-token). They're not required for core auth.
+  await fetchAndStoreUserRoles(tokens.accessToken).catch(err =>
+    logForDebugging(String(err), { level: 'error' }),
+  )
+
+  if (shouldUseClaudeAIAuth(tokens.scopes)) {
+    await fetchAndStoreClaudeCodeFirstTokenDate().catch(err =>
+      logForDebugging(String(err), { level: 'error' }),
+    )
+  } else {
+    // API key creation is critical for Console users — let it throw.
+    const apiKey = await createAndStoreApiKey(tokens.accessToken)
+    if (!apiKey) {
+      throw new Error(
+        'Unable to create API key. The server accepted the request but did not return a key.',
+      )
+    }
+  }
+
+  await clearAuthRelatedCaches()
+}
+
+export async function authLogin({
+  email,
+  sso,
+  console: useConsole,
+  claudeai,
+}: {
+  email?: string
+  sso?: boolean
+  console?: boolean
+  claudeai?: boolean
+}): Promise<void> {
+  if (useConsole && claudeai) {
+    process.stderr.write(
+      'Error: --console and --claudeai cannot be used together.\n',
+    )
+    process.exit(1)
+  }
+
+  const settings = getInitialSettings()
+  // forceLoginMethod is a hard constraint (enterprise setting) — matches ConsoleOAuthFlow behavior.
+  // Without it, --console selects Console; --claudeai (or no flag) selects claude.ai.
+  const loginWithClaudeAi = settings.forceLoginMethod
+    ? settings.forceLoginMethod === 'claudeai'
+    : !useConsole
+  const orgUUID = settings.forceLoginOrgUUID
+
+  // Fast path: if a refresh token is provided via env var, skip the browser
+  // OAuth flow and exchange it directly for tokens.
+  const envRefreshToken = process.env.CLAUDE_CODE_OAUTH_REFRESH_TOKEN
+  if (envRefreshToken) {
+    const envScopes = process.env.CLAUDE_CODE_OAUTH_SCOPES
+    if (!envScopes) {
+      process.stderr.write(
+        'CLAUDE_CODE_OAUTH_SCOPES is required when using CLAUDE_CODE_OAUTH_REFRESH_TOKEN.\n' +
+          'Set it to the space-separated scopes the refresh token was issued with\n' +
+          '(e.g. "user:inference" or "user:profile user:inference user:sessions:claude_code user:mcp_servers").\n',
+      )
+      process.exit(1)
+    }
+
+    const scopes = envScopes.split(/\s+/).filter(Boolean)
+
+    try {
+      logEvent('tengu_login_from_refresh_token', {})
+
+      const tokens = await refreshOAuthToken(envRefreshToken, { scopes })
+      await installOAuthTokens(tokens)
+
+      const orgResult = await validateForceLoginOrg()
+      if (!orgResult.valid) {
+        process.stderr.write(orgResult.message + '\n')
+        process.exit(1)
+      }
+
+      // Mark onboarding complete — interactive paths handle this via
+      // the Onboarding component, but the env var path skips it.
+      saveGlobalConfig(current => {
+        if (current.hasCompletedOnboarding) return current
+        return { ...current, hasCompletedOnboarding: true }
+      })
+
+      logEvent('tengu_oauth_success', {
+        loginWithClaudeAi: shouldUseClaudeAIAuth(tokens.scopes),
+      })
+      process.stdout.write('Login successful.\n')
+      process.exit(0)
+    } catch (err) {
+      logError(err)
+      const sslHint = getSSLErrorHint(err)
+      process.stderr.write(
+        `Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
+      )
+      process.exit(1)
+    }
+  }
+
+  const resolvedLoginMethod = sso ? 'sso' : undefined
+
+  const oauthService = new OAuthService()
+
+  try {
+    logEvent('tengu_oauth_flow_start', { loginWithClaudeAi })
+
+    const result = await oauthService.startOAuthFlow(
+      async url => {
+        process.stdout.write('Opening browser to sign in…\n')
+        process.stdout.write(`If the browser didn't open, visit: ${url}\n`)
+      },
+      {
+        loginWithClaudeAi,
+        loginHint: email,
+        loginMethod: resolvedLoginMethod,
+        orgUUID,
+      },
+    )
+
+    await installOAuthTokens(result)
+
+    const orgResult = await validateForceLoginOrg()
+    if (!orgResult.valid) {
+      process.stderr.write(orgResult.message + '\n')
+      process.exit(1)
+    }
+
+    logEvent('tengu_oauth_success', { loginWithClaudeAi })
+
+    process.stdout.write('Login successful.\n')
+    process.exit(0)
+  } catch (err) {
+    logError(err)
+    const sslHint = getSSLErrorHint(err)
+    process.stderr.write(
+      `Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
+    )
+    process.exit(1)
+  } finally {
+    oauthService.cleanup()
+  }
+}
+
+export async function authStatus(opts: {
+  json?: boolean
+  text?: boolean
+}): Promise<void> {
+  const { source: authTokenSource, hasToken } = getAuthTokenSource()
+  const { source: apiKeySource } = getAnthropicApiKeyWithSource()
+  const hasApiKeyEnvVar =
+    !!process.env.ANTHROPIC_API_KEY && !isRunningOnHomespace()
+  const oauthAccount = getOauthAccountInfo()
+  const subscriptionType = getSubscriptionType()
+  const using3P = isUsing3PServices()
+  const loggedIn =
+    hasToken || apiKeySource !== 'none' || hasApiKeyEnvVar || using3P
+
+  // Determine auth method
+  let authMethod: string = 'none'
+  if (using3P) {
+    authMethod = 'third_party'
+  } else if (authTokenSource === 'claude.ai') {
+    authMethod = 'claude.ai'
+  } else if (authTokenSource === 'apiKeyHelper') {
+    authMethod = 'api_key_helper'
+  } else if (authTokenSource !== 'none') {
+    authMethod = 'oauth_token'
+  } else if (apiKeySource === 'ANTHROPIC_API_KEY' || hasApiKeyEnvVar) {
+    authMethod = 'api_key'
+  } else if (apiKeySource === '/login managed key') {
+    authMethod = 'claude.ai'
+  }
+
+  if (opts.text) {
+    const properties = [
+      ...buildAccountProperties(),
+      ...buildAPIProviderProperties(),
+    ]
+    let hasAuthProperty = false
+    for (const prop of properties) {
+      const value =
+        typeof prop.value === 'string'
+          ? prop.value
+          : Array.isArray(prop.value)
+            ? prop.value.join(', ')
+            : null
+      if (value === null || value === 'none') {
+        continue
+      }
+      hasAuthProperty = true
+      if (prop.label) {
+        process.stdout.write(`${prop.label}: ${value}\n`)
+      } else {
+        process.stdout.write(`${value}\n`)
+      }
+    }
+    if (!hasAuthProperty && hasApiKeyEnvVar) {
+      process.stdout.write('API key: ANTHROPIC_API_KEY\n')
+    }
+    if (!loggedIn) {
+      process.stdout.write(
+        'Not logged in. Run claude auth login to authenticate.\n',
+      )
+    }
+  } else {
+    const apiProvider = getAPIProvider()
+    const resolvedApiKeySource =
+      apiKeySource !== 'none'
+        ? apiKeySource
+        : hasApiKeyEnvVar
+          ? 'ANTHROPIC_API_KEY'
+          : null
+    const output: Record<string, string | boolean | null> = {
+      loggedIn,
+      authMethod,
+      apiProvider,
+    }
+    if (resolvedApiKeySource) {
+      output.apiKeySource = resolvedApiKeySource
+    }
+    if (authMethod === 'claude.ai') {
+      output.email = oauthAccount?.emailAddress ?? null
+      output.orgId = oauthAccount?.organizationUuid ?? null
+      output.orgName = oauthAccount?.organizationName ?? null
+      output.subscriptionType = subscriptionType ?? null
+    }
+
+    process.stdout.write(jsonStringify(output, null, 2) + '\n')
+  }
+  process.exit(loggedIn ? 0 : 1)
+}
+
+export async function authLogout(): Promise<void> {
+  try {
+    await performLogout({ clearOnboarding: false })
+  } catch {
+    process.stderr.write('Failed to log out.\n')
+    process.exit(1)
+  }
+  process.stdout.write('Successfully logged out from your Anthropic account.\n')
+  process.exit(0)
+}

+ 170 - 0
src/cli/handlers/autoMode.ts

@@ -0,0 +1,170 @@
+/**
+ * Auto mode subcommand handlers — dump default/merged classifier rules and
+ * critique user-written rules. Dynamically imported when `claude auto-mode ...` runs.
+ */
+
+import { errorMessage } from '../../utils/errors.js'
+import {
+  getMainLoopModel,
+  parseUserSpecifiedModel,
+} from '../../utils/model/model.js'
+import {
+  type AutoModeRules,
+  buildDefaultExternalSystemPrompt,
+  getDefaultExternalAutoModeRules,
+} from '../../utils/permissions/yoloClassifier.js'
+import { getAutoModeConfig } from '../../utils/settings/settings.js'
+import { sideQuery } from '../../utils/sideQuery.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+
+function writeRules(rules: AutoModeRules): void {
+  process.stdout.write(jsonStringify(rules, null, 2) + '\n')
+}
+
+export function autoModeDefaultsHandler(): void {
+  writeRules(getDefaultExternalAutoModeRules())
+}
+
+/**
+ * Dump the effective auto mode config: user settings where provided, external
+ * defaults otherwise. Per-section REPLACE semantics — matches how
+ * buildYoloSystemPrompt resolves the external template (a non-empty user
+ * section replaces that section's defaults entirely; an empty/absent section
+ * falls through to defaults).
+ */
+export function autoModeConfigHandler(): void {
+  const config = getAutoModeConfig()
+  const defaults = getDefaultExternalAutoModeRules()
+  writeRules({
+    allow: config?.allow?.length ? config.allow : defaults.allow,
+    soft_deny: config?.soft_deny?.length
+      ? config.soft_deny
+      : defaults.soft_deny,
+    environment: config?.environment?.length
+      ? config.environment
+      : defaults.environment,
+  })
+}
+
+const CRITIQUE_SYSTEM_PROMPT =
+  'You are an expert reviewer of auto mode classifier rules for Claude Code.\n' +
+  '\n' +
+  'Claude Code has an "auto mode" that uses an AI classifier to decide whether ' +
+  'tool calls should be auto-approved or require user confirmation. Users can ' +
+  'write custom rules in three categories:\n' +
+  '\n' +
+  '- **allow**: Actions the classifier should auto-approve\n' +
+  '- **soft_deny**: Actions the classifier should block (require user confirmation)\n' +
+  "- **environment**: Context about the user's setup that helps the classifier make decisions\n" +
+  '\n' +
+  "Your job is to critique the user's custom rules for clarity, completeness, " +
+  'and potential issues. The classifier is an LLM that reads these rules as ' +
+  'part of its system prompt.\n' +
+  '\n' +
+  'For each rule, evaluate:\n' +
+  '1. **Clarity**: Is the rule unambiguous? Could the classifier misinterpret it?\n' +
+  "2. **Completeness**: Are there gaps or edge cases the rule doesn't cover?\n" +
+  '3. **Conflicts**: Do any of the rules conflict with each other?\n' +
+  '4. **Actionability**: Is the rule specific enough for the classifier to act on?\n' +
+  '\n' +
+  'Be concise and constructive. Only comment on rules that could be improved. ' +
+  'If all rules look good, say so.'
+
+export async function autoModeCritiqueHandler(options: {
+  model?: string
+}): Promise<void> {
+  const config = getAutoModeConfig()
+  const hasCustomRules =
+    (config?.allow?.length ?? 0) > 0 ||
+    (config?.soft_deny?.length ?? 0) > 0 ||
+    (config?.environment?.length ?? 0) > 0
+
+  if (!hasCustomRules) {
+    process.stdout.write(
+      'No custom auto mode rules found.\n\n' +
+        'Add rules to your settings file under autoMode.{allow, soft_deny, environment}.\n' +
+        'Run `claude auto-mode defaults` to see the default rules for reference.\n',
+    )
+    return
+  }
+
+  const model = options.model
+    ? parseUserSpecifiedModel(options.model)
+    : getMainLoopModel()
+
+  const defaults = getDefaultExternalAutoModeRules()
+  const classifierPrompt = buildDefaultExternalSystemPrompt()
+
+  const userRulesSummary =
+    formatRulesForCritique('allow', config?.allow ?? [], defaults.allow) +
+    formatRulesForCritique(
+      'soft_deny',
+      config?.soft_deny ?? [],
+      defaults.soft_deny,
+    ) +
+    formatRulesForCritique(
+      'environment',
+      config?.environment ?? [],
+      defaults.environment,
+    )
+
+  process.stdout.write('Analyzing your auto mode rules…\n\n')
+
+  let response
+  try {
+    response = await sideQuery({
+      querySource: 'auto_mode_critique',
+      model,
+      system: CRITIQUE_SYSTEM_PROMPT,
+      skipSystemPromptPrefix: true,
+      max_tokens: 4096,
+      messages: [
+        {
+          role: 'user',
+          content:
+            'Here is the full classifier system prompt that the auto mode classifier receives:\n\n' +
+            '<classifier_system_prompt>\n' +
+            classifierPrompt +
+            '\n</classifier_system_prompt>\n\n' +
+            "Here are the user's custom rules that REPLACE the corresponding default sections:\n\n" +
+            userRulesSummary +
+            '\nPlease critique these custom rules.',
+        },
+      ],
+    })
+  } catch (error) {
+    process.stderr.write(
+      'Failed to analyze rules: ' + errorMessage(error) + '\n',
+    )
+    process.exitCode = 1
+    return
+  }
+
+  const textBlock = response.content.find(block => block.type === 'text')
+  if (textBlock?.type === 'text') {
+    process.stdout.write(textBlock.text + '\n')
+  } else {
+    process.stdout.write('No critique was generated. Please try again.\n')
+  }
+}
+
+function formatRulesForCritique(
+  section: string,
+  userRules: string[],
+  defaultRules: string[],
+): string {
+  if (userRules.length === 0) return ''
+  const customLines = userRules.map(r => '- ' + r).join('\n')
+  const defaultLines = defaultRules.map(r => '- ' + r).join('\n')
+  return (
+    '## ' +
+    section +
+    ' (custom rules replacing defaults)\n' +
+    'Custom:\n' +
+    customLines +
+    '\n\n' +
+    'Defaults being replaced:\n' +
+    defaultLines +
+    '\n\n'
+  )
+}

Різницю між файлами не показано, бо вона завелика
+ 361 - 0
src/cli/handlers/mcp.tsx


+ 878 - 0
src/cli/handlers/plugins.ts

@@ -0,0 +1,878 @@
+/**
+ * Plugin and marketplace subcommand handlers — extracted from main.tsx for lazy loading.
+ * These are dynamically imported only when `claude plugin *` or `claude plugin marketplace *` runs.
+ */
+/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handlers intentionally exit */
+import figures from 'figures'
+import { basename, dirname } from 'path'
+import { setUseCoworkPlugins } from '../../bootstrap/state.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+  logEvent,
+} from '../../services/analytics/index.js'
+import {
+  disableAllPlugins,
+  disablePlugin,
+  enablePlugin,
+  installPlugin,
+  uninstallPlugin,
+  updatePluginCli,
+  VALID_INSTALLABLE_SCOPES,
+  VALID_UPDATE_SCOPES,
+} from '../../services/plugins/pluginCliCommands.js'
+import { getPluginErrorMessage } from '../../types/plugin.js'
+import { errorMessage } from '../../utils/errors.js'
+import { logError } from '../../utils/log.js'
+import { clearAllCaches } from '../../utils/plugins/cacheUtils.js'
+import { getInstallCounts } from '../../utils/plugins/installCounts.js'
+import {
+  isPluginInstalled,
+  loadInstalledPluginsV2,
+} from '../../utils/plugins/installedPluginsManager.js'
+import {
+  createPluginId,
+  loadMarketplacesWithGracefulDegradation,
+} from '../../utils/plugins/marketplaceHelpers.js'
+import {
+  addMarketplaceSource,
+  loadKnownMarketplacesConfig,
+  refreshAllMarketplaces,
+  refreshMarketplace,
+  removeMarketplaceSource,
+  saveMarketplaceToSettings,
+} from '../../utils/plugins/marketplaceManager.js'
+import { loadPluginMcpServers } from '../../utils/plugins/mcpPluginIntegration.js'
+import { parseMarketplaceInput } from '../../utils/plugins/parseMarketplaceInput.js'
+import {
+  parsePluginIdentifier,
+  scopeToSettingSource,
+} from '../../utils/plugins/pluginIdentifier.js'
+import { loadAllPlugins } from '../../utils/plugins/pluginLoader.js'
+import type { PluginSource } from '../../utils/plugins/schemas.js'
+import {
+  type ValidationResult,
+  validateManifest,
+  validatePluginContents,
+} from '../../utils/plugins/validatePlugin.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import { plural } from '../../utils/stringUtils.js'
+import { cliError, cliOk } from '../exit.js'
+
+// Re-export for main.tsx to reference in option definitions
+export { VALID_INSTALLABLE_SCOPES, VALID_UPDATE_SCOPES }
+
+/**
+ * Helper function to handle marketplace command errors consistently.
+ */
+export function handleMarketplaceError(error: unknown, action: string): never {
+  logError(error)
+  cliError(`${figures.cross} Failed to ${action}: ${errorMessage(error)}`)
+}
+
+function printValidationResult(result: ValidationResult): void {
+  if (result.errors.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(
+      `${figures.cross} Found ${result.errors.length} ${plural(result.errors.length, 'error')}:\n`,
+    )
+    result.errors.forEach(error => {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${error.path}: ${error.message}`)
+    })
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('')
+  }
+  if (result.warnings.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(
+      `${figures.warning} Found ${result.warnings.length} ${plural(result.warnings.length, 'warning')}:\n`,
+    )
+    result.warnings.forEach(warning => {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${warning.path}: ${warning.message}`)
+    })
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('')
+  }
+}
+
+// plugin validate
+export async function pluginValidateHandler(
+  manifestPath: string,
+  options: { cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    const result = await validateManifest(manifestPath)
+
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(`Validating ${result.fileType} manifest: ${result.filePath}\n`)
+    printValidationResult(result)
+
+    // If this is a plugin manifest located inside a .claude-plugin directory,
+    // also validate the plugin's content files (skills, agents, commands,
+    // hooks). Works whether the user passed a directory or the plugin.json
+    // path directly.
+    let contentResults: ValidationResult[] = []
+    if (result.fileType === 'plugin') {
+      const manifestDir = dirname(result.filePath)
+      if (basename(manifestDir) === '.claude-plugin') {
+        contentResults = await validatePluginContents(dirname(manifestDir))
+        for (const r of contentResults) {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`Validating ${r.fileType}: ${r.filePath}\n`)
+          printValidationResult(r)
+        }
+      }
+    }
+
+    const allSuccess = result.success && contentResults.every(r => r.success)
+    const hasWarnings =
+      result.warnings.length > 0 ||
+      contentResults.some(r => r.warnings.length > 0)
+
+    if (allSuccess) {
+      cliOk(
+        hasWarnings
+          ? `${figures.tick} Validation passed with warnings`
+          : `${figures.tick} Validation passed`,
+      )
+    } else {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`${figures.cross} Validation failed`)
+      process.exit(1)
+    }
+  } catch (error) {
+    logError(error)
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      `${figures.cross} Unexpected error during validation: ${errorMessage(error)}`,
+    )
+    process.exit(2)
+  }
+}
+
+// plugin list (lines 5217–5416)
+export async function pluginListHandler(options: {
+  json?: boolean
+  available?: boolean
+  cowork?: boolean
+}): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  logEvent('tengu_plugin_list_command', {})
+
+  const installedData = loadInstalledPluginsV2()
+  const { getPluginEditableScopes } = await import(
+    '../../utils/plugins/pluginStartupCheck.js'
+  )
+  const enabledPlugins = getPluginEditableScopes()
+
+  const pluginIds = Object.keys(installedData.plugins)
+
+  // Load all plugins once. The JSON and human paths both need:
+  //  - loadErrors (to show load failures per plugin)
+  //  - inline plugins (session-only via --plugin-dir, source='name@inline')
+  //    which are NOT in installedData.plugins (V2 bookkeeping) — they must
+  //    be surfaced separately or `plugin list` silently ignores --plugin-dir.
+  const {
+    enabled: loadedEnabled,
+    disabled: loadedDisabled,
+    errors: loadErrors,
+  } = await loadAllPlugins()
+  const allLoadedPlugins = [...loadedEnabled, ...loadedDisabled]
+  const inlinePlugins = allLoadedPlugins.filter(p =>
+    p.source.endsWith('@inline'),
+  )
+  // Path-level inline failures (dir doesn't exist, parse error before
+  // manifest is read) use source='inline[N]'. Plugin-level errors after
+  // manifest read use source='name@inline'. Collect both for the session
+  // section — these are otherwise invisible since they have no pluginId.
+  const inlineLoadErrors = loadErrors.filter(
+    e => e.source.endsWith('@inline') || e.source.startsWith('inline['),
+  )
+
+  if (options.json) {
+    // Create a map of plugin source to loaded plugin for quick lookup
+    const loadedPluginMap = new Map(allLoadedPlugins.map(p => [p.source, p]))
+
+    const plugins: Array<{
+      id: string
+      version: string
+      scope: string
+      enabled: boolean
+      installPath: string
+      installedAt?: string
+      lastUpdated?: string
+      projectPath?: string
+      mcpServers?: Record<string, unknown>
+      errors?: string[]
+    }> = []
+
+    for (const pluginId of pluginIds.sort()) {
+      const installations = installedData.plugins[pluginId]
+      if (!installations || installations.length === 0) continue
+
+      // Find loading errors for this plugin
+      const pluginName = parsePluginIdentifier(pluginId).name
+      const pluginErrors = loadErrors
+        .filter(
+          e =>
+            e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
+        )
+        .map(getPluginErrorMessage)
+
+      for (const installation of installations) {
+        // Try to find the loaded plugin to get MCP servers
+        const loadedPlugin = loadedPluginMap.get(pluginId)
+        let mcpServers: Record<string, unknown> | undefined
+
+        if (loadedPlugin) {
+          // Load MCP servers if not already cached
+          const servers =
+            loadedPlugin.mcpServers ||
+            (await loadPluginMcpServers(loadedPlugin))
+          if (servers && Object.keys(servers).length > 0) {
+            mcpServers = servers
+          }
+        }
+
+        plugins.push({
+          id: pluginId,
+          version: installation.version || 'unknown',
+          scope: installation.scope,
+          enabled: enabledPlugins.has(pluginId),
+          installPath: installation.installPath,
+          installedAt: installation.installedAt,
+          lastUpdated: installation.lastUpdated,
+          projectPath: installation.projectPath,
+          mcpServers,
+          errors: pluginErrors.length > 0 ? pluginErrors : undefined,
+        })
+      }
+    }
+
+    // Session-only plugins: scope='session', no install metadata.
+    // Filter from inlineLoadErrors (not loadErrors) so an installed plugin
+    // with the same manifest name doesn't cross-contaminate via e.plugin.
+    // The e.plugin fallback catches the dirName≠manifestName case:
+    // createPluginFromPath tags errors with `${dirName}@inline` but
+    // plugin.source is reassigned to `${manifest.name}@inline` afterward
+    // (pluginLoader.ts loadInlinePlugins), so e.source !== p.source when
+    // a dev checkout dir like ~/code/my-fork/ has manifest name 'cool-plugin'.
+    for (const p of inlinePlugins) {
+      const servers = p.mcpServers || (await loadPluginMcpServers(p))
+      const pErrors = inlineLoadErrors
+        .filter(
+          e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
+        )
+        .map(getPluginErrorMessage)
+      plugins.push({
+        id: p.source,
+        version: p.manifest.version ?? 'unknown',
+        scope: 'session',
+        enabled: p.enabled !== false,
+        installPath: p.path,
+        mcpServers:
+          servers && Object.keys(servers).length > 0 ? servers : undefined,
+        errors: pErrors.length > 0 ? pErrors : undefined,
+      })
+    }
+    // Path-level inline failures (--plugin-dir /nonexistent): no LoadedPlugin
+    // exists so the loop above can't surface them. Mirror the human-path
+    // handling so JSON consumers see the failure instead of silent omission.
+    for (const e of inlineLoadErrors.filter(e =>
+      e.source.startsWith('inline['),
+    )) {
+      plugins.push({
+        id: e.source,
+        version: 'unknown',
+        scope: 'session',
+        enabled: false,
+        installPath: 'path' in e ? e.path : '',
+        errors: [getPluginErrorMessage(e)],
+      })
+    }
+
+    // If --available is set, also load available plugins from marketplaces
+    if (options.available) {
+      const available: Array<{
+        pluginId: string
+        name: string
+        description?: string
+        marketplaceName: string
+        version?: string
+        source: PluginSource
+        installCount?: number
+      }> = []
+
+      try {
+        const [config, installCounts] = await Promise.all([
+          loadKnownMarketplacesConfig(),
+          getInstallCounts(),
+        ])
+        const { marketplaces } =
+          await loadMarketplacesWithGracefulDegradation(config)
+
+        for (const {
+          name: marketplaceName,
+          data: marketplace,
+        } of marketplaces) {
+          if (marketplace) {
+            for (const entry of marketplace.plugins) {
+              const pluginId = createPluginId(entry.name, marketplaceName)
+              // Only include plugins that are not already installed
+              if (!isPluginInstalled(pluginId)) {
+                available.push({
+                  pluginId,
+                  name: entry.name,
+                  description: entry.description,
+                  marketplaceName,
+                  version: entry.version,
+                  source: entry.source,
+                  installCount: installCounts?.get(pluginId),
+                })
+              }
+            }
+          }
+        }
+      } catch {
+        // Silently ignore marketplace loading errors
+      }
+
+      cliOk(jsonStringify({ installed: plugins, available }, null, 2))
+    } else {
+      cliOk(jsonStringify(plugins, null, 2))
+    }
+  }
+
+  if (pluginIds.length === 0 && inlinePlugins.length === 0) {
+    // inlineLoadErrors can exist with zero inline plugins (e.g. --plugin-dir
+    // points at a nonexistent path). Don't early-exit over them — fall
+    // through to the session section so the failure is visible.
+    if (inlineLoadErrors.length === 0) {
+      cliOk(
+        'No plugins installed. Use `claude plugin install` to install a plugin.',
+      )
+    }
+  }
+
+  if (pluginIds.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Installed plugins:\n')
+  }
+
+  for (const pluginId of pluginIds.sort()) {
+    const installations = installedData.plugins[pluginId]
+    if (!installations || installations.length === 0) continue
+
+    // Find loading errors for this plugin
+    const pluginName = parsePluginIdentifier(pluginId).name
+    const pluginErrors = loadErrors.filter(
+      e => e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
+    )
+
+    for (const installation of installations) {
+      const isEnabled = enabledPlugins.has(pluginId)
+      const status =
+        pluginErrors.length > 0
+          ? `${figures.cross} failed to load`
+          : isEnabled
+            ? `${figures.tick} enabled`
+            : `${figures.cross} disabled`
+      const version = installation.version || 'unknown'
+      const scope = installation.scope
+
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${pluginId}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Version: ${version}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Scope: ${scope}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Status: ${status}`)
+      for (const error of pluginErrors) {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(`    Error: ${getPluginErrorMessage(error)}`)
+      }
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log('')
+    }
+  }
+
+  if (inlinePlugins.length > 0 || inlineLoadErrors.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Session-only plugins (--plugin-dir):\n')
+    for (const p of inlinePlugins) {
+      // Same dirName≠manifestName fallback as the JSON path above — error
+      // sources use the dir basename but p.source uses the manifest name.
+      const pErrors = inlineLoadErrors.filter(
+        e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
+      )
+      const status =
+        pErrors.length > 0
+          ? `${figures.cross} loaded with errors`
+          : `${figures.tick} loaded`
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${p.source}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Version: ${p.manifest.version ?? 'unknown'}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Path: ${p.path}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Status: ${status}`)
+      for (const e of pErrors) {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(`    Error: ${getPluginErrorMessage(e)}`)
+      }
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log('')
+    }
+    // Path-level failures: no LoadedPlugin object exists. Show them so
+    // `--plugin-dir /typo` doesn't just silently produce nothing.
+    for (const e of inlineLoadErrors.filter(e =>
+      e.source.startsWith('inline['),
+    )) {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(
+        `  ${figures.pointer} ${e.source}: ${figures.cross} ${getPluginErrorMessage(e)}\n`,
+      )
+    }
+  }
+
+  cliOk()
+}
+
+// marketplace add (lines 5433–5487)
+export async function marketplaceAddHandler(
+  source: string,
+  options: { cowork?: boolean; sparse?: string[]; scope?: string },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    const parsed = await parseMarketplaceInput(source)
+
+    if (!parsed) {
+      cliError(
+        `${figures.cross} Invalid marketplace source format. Try: owner/repo, https://..., or ./path`,
+      )
+    }
+
+    if ('error' in parsed) {
+      cliError(`${figures.cross} ${parsed.error}`)
+    }
+
+    // Validate scope
+    const scope = options.scope ?? 'user'
+    if (scope !== 'user' && scope !== 'project' && scope !== 'local') {
+      cliError(
+        `${figures.cross} Invalid scope '${scope}'. Use: user, project, or local`,
+      )
+    }
+    const settingSource = scopeToSettingSource(scope)
+
+    let marketplaceSource = parsed
+
+    if (options.sparse && options.sparse.length > 0) {
+      if (
+        marketplaceSource.source === 'github' ||
+        marketplaceSource.source === 'git'
+      ) {
+        marketplaceSource = {
+          ...marketplaceSource,
+          sparsePaths: options.sparse,
+        }
+      } else {
+        cliError(
+          `${figures.cross} --sparse is only supported for github and git marketplace sources (got: ${marketplaceSource.source})`,
+        )
+      }
+    }
+
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Adding marketplace...')
+
+    const { name, alreadyMaterialized, resolvedSource } =
+      await addMarketplaceSource(marketplaceSource, message => {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(message)
+      })
+
+    // Write intent to settings at the requested scope
+    saveMarketplaceToSettings(name, { source: resolvedSource }, settingSource)
+
+    clearAllCaches()
+
+    let sourceType = marketplaceSource.source
+    if (marketplaceSource.source === 'github') {
+      sourceType =
+        marketplaceSource.repo as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+    }
+    logEvent('tengu_marketplace_added', {
+      source_type:
+        sourceType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+
+    cliOk(
+      alreadyMaterialized
+        ? `${figures.tick} Marketplace '${name}' already on disk — declared in ${scope} settings`
+        : `${figures.tick} Successfully added marketplace: ${name} (declared in ${scope} settings)`,
+    )
+  } catch (error) {
+    handleMarketplaceError(error, 'add marketplace')
+  }
+}
+
+// marketplace list (lines 5497–5565)
+export async function marketplaceListHandler(options: {
+  json?: boolean
+  cowork?: boolean
+}): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    const config = await loadKnownMarketplacesConfig()
+    const names = Object.keys(config)
+
+    if (options.json) {
+      const marketplaces = names.sort().map(name => {
+        const marketplace = config[name]
+        const source = marketplace?.source
+        return {
+          name,
+          source: source?.source,
+          ...(source?.source === 'github' && { repo: source.repo }),
+          ...(source?.source === 'git' && { url: source.url }),
+          ...(source?.source === 'url' && { url: source.url }),
+          ...(source?.source === 'directory' && { path: source.path }),
+          ...(source?.source === 'file' && { path: source.path }),
+          installLocation: marketplace?.installLocation,
+        }
+      })
+      cliOk(jsonStringify(marketplaces, null, 2))
+    }
+
+    if (names.length === 0) {
+      cliOk('No marketplaces configured')
+    }
+
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Configured marketplaces:\n')
+    names.forEach(name => {
+      const marketplace = config[name]
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${name}`)
+
+      if (marketplace?.source) {
+        const src = marketplace.source
+        if (src.source === 'github') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: GitHub (${src.repo})`)
+        } else if (src.source === 'git') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: Git (${src.url})`)
+        } else if (src.source === 'url') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: URL (${src.url})`)
+        } else if (src.source === 'directory') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: Directory (${src.path})`)
+        } else if (src.source === 'file') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: File (${src.path})`)
+        }
+      }
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log('')
+    })
+
+    cliOk()
+  } catch (error) {
+    handleMarketplaceError(error, 'list marketplaces')
+  }
+}
+
+// marketplace remove (lines 5576–5598)
+export async function marketplaceRemoveHandler(
+  name: string,
+  options: { cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    await removeMarketplaceSource(name)
+    clearAllCaches()
+
+    logEvent('tengu_marketplace_removed', {
+      marketplace_name:
+        name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+
+    cliOk(`${figures.tick} Successfully removed marketplace: ${name}`)
+  } catch (error) {
+    handleMarketplaceError(error, 'remove marketplace')
+  }
+}
+
+// marketplace update (lines 5609–5672)
+export async function marketplaceUpdateHandler(
+  name: string | undefined,
+  options: { cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    if (name) {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`Updating marketplace: ${name}...`)
+
+      await refreshMarketplace(name, message => {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(message)
+      })
+
+      clearAllCaches()
+
+      logEvent('tengu_marketplace_updated', {
+        marketplace_name:
+          name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+
+      cliOk(`${figures.tick} Successfully updated marketplace: ${name}`)
+    } else {
+      const config = await loadKnownMarketplacesConfig()
+      const marketplaceNames = Object.keys(config)
+
+      if (marketplaceNames.length === 0) {
+        cliOk('No marketplaces configured')
+      }
+
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`Updating ${marketplaceNames.length} marketplace(s)...`)
+
+      await refreshAllMarketplaces()
+      clearAllCaches()
+
+      logEvent('tengu_marketplace_updated_all', {
+        count:
+          marketplaceNames.length as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+
+      cliOk(
+        `${figures.tick} Successfully updated ${marketplaceNames.length} marketplace(s)`,
+      )
+    }
+  } catch (error) {
+    handleMarketplaceError(error, 'update marketplace(s)')
+  }
+}
+
+// plugin install (lines 5690–5721)
+export async function pluginInstallHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  const scope = options.scope || 'user'
+  if (options.cowork && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+  if (
+    !VALID_INSTALLABLE_SCOPES.includes(
+      scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+    )
+  ) {
+    cliError(
+      `Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
+    )
+  }
+  // _PROTO_* routes to PII-tagged plugin_name/marketplace_name BQ columns.
+  // Unredacted plugin arg was previously logged to general-access
+  // additional_metadata for all users — dropped in favor of the privileged
+  // column route. marketplace may be undefined (fires before resolution).
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_install_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await installPlugin(plugin, scope as 'user' | 'project' | 'local')
+}
+
+// plugin uninstall (lines 5738–5769)
+export async function pluginUninstallHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean; keepData?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  const scope = options.scope || 'user'
+  if (options.cowork && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+  if (
+    !VALID_INSTALLABLE_SCOPES.includes(
+      scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+    )
+  ) {
+    cliError(
+      `Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
+    )
+  }
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_uninstall_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await uninstallPlugin(
+    plugin,
+    scope as 'user' | 'project' | 'local',
+    options.keepData,
+  )
+}
+
+// plugin enable (lines 5783–5818)
+export async function pluginEnableHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
+  if (options.scope) {
+    if (
+      !VALID_INSTALLABLE_SCOPES.includes(
+        options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+      )
+    ) {
+      cliError(
+        `Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
+      )
+    }
+    scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
+  }
+  if (options.cowork && scope !== undefined && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+
+  // --cowork always operates at user scope
+  if (options.cowork && scope === undefined) {
+    scope = 'user'
+  }
+
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_enable_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: (scope ??
+      'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await enablePlugin(plugin, scope)
+}
+
+// plugin disable (lines 5833–5902)
+export async function pluginDisableHandler(
+  plugin: string | undefined,
+  options: { scope?: string; cowork?: boolean; all?: boolean },
+): Promise<void> {
+  if (options.all && plugin) {
+    cliError('Cannot use --all with a specific plugin')
+  }
+
+  if (!options.all && !plugin) {
+    cliError('Please specify a plugin name or use --all to disable all plugins')
+  }
+
+  if (options.cowork) setUseCoworkPlugins(true)
+
+  if (options.all) {
+    if (options.scope) {
+      cliError('Cannot use --scope with --all')
+    }
+
+    // No _PROTO_plugin_name here — --all disables all plugins.
+    // Distinguishable from the specific-plugin branch by plugin_name IS NULL.
+    logEvent('tengu_plugin_disable_command', {})
+
+    await disableAllPlugins()
+    return
+  }
+
+  let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
+  if (options.scope) {
+    if (
+      !VALID_INSTALLABLE_SCOPES.includes(
+        options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+      )
+    ) {
+      cliError(
+        `Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
+      )
+    }
+    scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
+  }
+  if (options.cowork && scope !== undefined && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+
+  // --cowork always operates at user scope
+  if (options.cowork && scope === undefined) {
+    scope = 'user'
+  }
+
+  const { name, marketplace } = parsePluginIdentifier(plugin!)
+  logEvent('tengu_plugin_disable_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: (scope ??
+      'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await disablePlugin(plugin!, scope)
+}
+
+// plugin update (lines 5918–5948)
+export async function pluginUpdateHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_update_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+  })
+
+  let scope: (typeof VALID_UPDATE_SCOPES)[number] = 'user'
+  if (options.scope) {
+    if (
+      !VALID_UPDATE_SCOPES.includes(
+        options.scope as (typeof VALID_UPDATE_SCOPES)[number],
+      )
+    ) {
+      cliError(
+        `Invalid scope "${options.scope}". Valid scopes: ${VALID_UPDATE_SCOPES.join(', ')}`,
+      )
+    }
+    scope = options.scope as (typeof VALID_UPDATE_SCOPES)[number]
+  }
+  if (options.cowork && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+
+  await updatePluginCli(plugin, scope)
+}

Різницю між файлами не показано, бо вона завелика
+ 109 - 0
src/cli/handlers/util.tsx


+ 32 - 0
src/cli/ndjsonSafeStringify.ts

@@ -0,0 +1,32 @@
+import { jsonStringify } from '../utils/slowOperations.js'
+
+// JSON.stringify emits U+2028/U+2029 raw (valid per ECMA-404). When the
+// output is a single NDJSON line, any receiver that uses JavaScript
+// line-terminator semantics (ECMA-262 §11.3 — \n \r U+2028 U+2029) to
+// split the stream will cut the JSON mid-string. ProcessTransport now
+// silently skips non-JSON lines rather than crashing (gh-28405), but
+// the truncated fragment is still lost — the message is silently dropped.
+//
+// The \uXXXX form is equivalent JSON (parses to the same string) but
+// can never be mistaken for a line terminator by ANY receiver. This is
+// what ES2019's "Subsume JSON" proposal and Node's util.inspect do.
+//
+// Single regex with alternation: the callback's one dispatch per match
+// is cheaper than two full-string scans.
+const JS_LINE_TERMINATORS = /\u2028|\u2029/g
+
+function escapeJsLineTerminators(json: string): string {
+  return json.replace(JS_LINE_TERMINATORS, c =>
+    c === '\u2028' ? '\\u2028' : '\\u2029',
+  )
+}
+
+/**
+ * JSON.stringify for one-message-per-line transports. Escapes U+2028
+ * LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR so the serialized output
+ * cannot be broken by a line-splitting receiver. Output is still valid
+ * JSON and parses to the same value.
+ */
+export function ndjsonSafeStringify(value: unknown): string {
+  return escapeJsLineTerminators(jsonStringify(value))
+}

+ 5594 - 0
src/cli/print.ts

@@ -0,0 +1,5594 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+import { feature } from 'bun:bundle'
+import { readFile, stat } from 'fs/promises'
+import { dirname } from 'path'
+import {
+  downloadUserSettings,
+  redownloadUserSettings,
+} from 'src/services/settingsSync/index.js'
+import { waitForRemoteManagedSettingsToLoad } from 'src/services/remoteManagedSettings/index.js'
+import { StructuredIO } from 'src/cli/structuredIO.js'
+import { RemoteIO } from 'src/cli/remoteIO.js'
+import {
+  type Command,
+  formatDescriptionWithSource,
+  getCommandName,
+} from 'src/commands.js'
+import { createStreamlinedTransformer } from 'src/utils/streamlinedTransform.js'
+import { installStreamJsonStdoutGuard } from 'src/utils/streamJsonStdoutGuard.js'
+import type { ToolPermissionContext } from 'src/Tool.js'
+import type { ThinkingConfig } from 'src/utils/thinking.js'
+import { assembleToolPool, filterToolsByDenyRules } from 'src/tools.js'
+import uniqBy from 'lodash-es/uniqBy.js'
+import { uniq } from 'src/utils/array.js'
+import { mergeAndFilterTools } from 'src/utils/toolPool.js'
+import {
+  logEvent,
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+} from 'src/services/analytics/index.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import {
+  logForDiagnosticsNoPII,
+  withDiagnosticsTiming,
+} from 'src/utils/diagLogs.js'
+import { toolMatchesName, type Tool, type Tools } from 'src/Tool.js'
+import {
+  type AgentDefinition,
+  isBuiltInAgent,
+  parseAgentsFromJson,
+} from 'src/tools/AgentTool/loadAgentsDir.js'
+import type { Message, NormalizedUserMessage } from 'src/types/message.js'
+import type { QueuedCommand } from 'src/types/textInputTypes.js'
+import {
+  dequeue,
+  dequeueAllMatching,
+  enqueue,
+  hasCommandsInQueue,
+  peek,
+  subscribeToCommandQueue,
+  getCommandsByMaxPriority,
+} from 'src/utils/messageQueueManager.js'
+import { notifyCommandLifecycle } from 'src/utils/commandLifecycle.js'
+import {
+  getSessionState,
+  notifySessionStateChanged,
+  notifySessionMetadataChanged,
+  setPermissionModeChangedListener,
+  type RequiresActionDetails,
+  type SessionExternalMetadata,
+} from 'src/utils/sessionState.js'
+import { externalMetadataToAppState } from 'src/state/onChangeAppState.js'
+import { getInMemoryErrors, logError, logMCPDebug } from 'src/utils/log.js'
+import {
+  writeToStdout,
+  registerProcessOutputErrorHandlers,
+} from 'src/utils/process.js'
+import type { Stream } from 'src/utils/stream.js'
+import { EMPTY_USAGE } from 'src/services/api/logging.js'
+import {
+  loadConversationForResume,
+  type TurnInterruptionState,
+} from 'src/utils/conversationRecovery.js'
+import type {
+  MCPServerConnection,
+  McpSdkServerConfig,
+  ScopedMcpServerConfig,
+} from 'src/services/mcp/types.js'
+import {
+  ChannelMessageNotificationSchema,
+  gateChannelServer,
+  wrapChannelMessage,
+  findChannelEntry,
+} from 'src/services/mcp/channelNotification.js'
+import {
+  isChannelAllowlisted,
+  isChannelsEnabled,
+} from 'src/services/mcp/channelAllowlist.js'
+import { parsePluginIdentifier } from 'src/utils/plugins/pluginIdentifier.js'
+import { validateUuid } from 'src/utils/uuid.js'
+import { fromArray } from 'src/utils/generators.js'
+import { ask } from 'src/QueryEngine.js'
+import type { PermissionPromptTool } from 'src/utils/queryHelpers.js'
+import {
+  createFileStateCacheWithSizeLimit,
+  mergeFileStateCaches,
+  READ_FILE_STATE_CACHE_SIZE,
+} from 'src/utils/fileStateCache.js'
+import { expandPath } from 'src/utils/path.js'
+import { extractReadFilesFromMessages } from 'src/utils/queryHelpers.js'
+import { registerHookEventHandler } from 'src/utils/hooks/hookEvents.js'
+import { executeFilePersistence } from 'src/utils/filePersistence/filePersistence.js'
+import { finalizePendingAsyncHooks } from 'src/utils/hooks/AsyncHookRegistry.js'
+import {
+  gracefulShutdown,
+  gracefulShutdownSync,
+  isShuttingDown,
+} from 'src/utils/gracefulShutdown.js'
+import { registerCleanup } from 'src/utils/cleanupRegistry.js'
+import { createIdleTimeoutManager } from 'src/utils/idleTimeout.js'
+import type {
+  SDKStatus,
+  ModelInfo,
+  SDKMessage,
+  SDKUserMessage,
+  SDKUserMessageReplay,
+  PermissionResult,
+  McpServerConfigForProcessTransport,
+  McpServerStatus,
+  RewindFilesResult,
+} from 'src/entrypoints/agentSdkTypes.js'
+import type {
+  StdoutMessage,
+  SDKControlInitializeRequest,
+  SDKControlInitializeResponse,
+  SDKControlRequest,
+  SDKControlResponse,
+  SDKControlMcpSetServersResponse,
+  SDKControlReloadPluginsResponse,
+} from 'src/entrypoints/sdk/controlTypes.js'
+import type { PermissionMode } from '@anthropic-ai/claude-agent-sdk'
+import type { PermissionMode as InternalPermissionMode } from 'src/types/permissions.js'
+import { cwd } from 'process'
+import { getCwd } from 'src/utils/cwd.js'
+import omit from 'lodash-es/omit.js'
+import reject from 'lodash-es/reject.js'
+import { isPolicyAllowed } from 'src/services/policyLimits/index.js'
+import type { ReplBridgeHandle } from 'src/bridge/replBridge.js'
+import { getRemoteSessionUrl } from 'src/constants/product.js'
+import { buildBridgeConnectUrl } from 'src/bridge/bridgeStatusUtil.js'
+import { extractInboundMessageFields } from 'src/bridge/inboundMessages.js'
+import { resolveAndPrepend } from 'src/bridge/inboundAttachments.js'
+import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js'
+import { hasPermissionsToUseTool } from 'src/utils/permissions/permissions.js'
+import { safeParseJSON } from 'src/utils/json.js'
+import {
+  outputSchema as permissionToolOutputSchema,
+  permissionPromptToolResultToPermissionDecision,
+} from 'src/utils/permissions/PermissionPromptToolResultSchema.js'
+import { createAbortController } from 'src/utils/abortController.js'
+import { createCombinedAbortSignal } from 'src/utils/combinedAbortSignal.js'
+import { generateSessionTitle } from 'src/utils/sessionTitle.js'
+import { buildSideQuestionFallbackParams } from 'src/utils/queryContext.js'
+import { runSideQuestion } from 'src/utils/sideQuestion.js'
+import {
+  processSessionStartHooks,
+  processSetupHooks,
+  takeInitialUserMessage,
+} from 'src/utils/sessionStart.js'
+import {
+  DEFAULT_OUTPUT_STYLE_NAME,
+  getAllOutputStyles,
+} from 'src/constants/outputStyles.js'
+import { TEAMMATE_MESSAGE_TAG, TICK_TAG } from 'src/constants/xml.js'
+import {
+  getSettings_DEPRECATED,
+  getSettingsWithSources,
+} from 'src/utils/settings/settings.js'
+import { settingsChangeDetector } from 'src/utils/settings/changeDetector.js'
+import { applySettingsChange } from 'src/utils/settings/applySettingsChange.js'
+import {
+  isFastModeAvailable,
+  isFastModeEnabled,
+  isFastModeSupportedByModel,
+  getFastModeState,
+} from 'src/utils/fastMode.js'
+import {
+  isAutoModeGateEnabled,
+  getAutoModeUnavailableNotification,
+  getAutoModeUnavailableReason,
+  isBypassPermissionsModeDisabled,
+  transitionPermissionMode,
+} from 'src/utils/permissions/permissionSetup.js'
+import {
+  tryGenerateSuggestion,
+  logSuggestionOutcome,
+  logSuggestionSuppressed,
+  type PromptVariant,
+} from 'src/services/PromptSuggestion/promptSuggestion.js'
+import { getLastCacheSafeParams } from 'src/utils/forkedAgent.js'
+import { getAccountInformation } from 'src/utils/auth.js'
+import { OAuthService } from 'src/services/oauth/index.js'
+import { installOAuthTokens } from 'src/cli/handlers/auth.js'
+import { getAPIProvider } from 'src/utils/model/providers.js'
+import type { HookCallbackMatcher } from 'src/types/hooks.js'
+import { AwsAuthStatusManager } from 'src/utils/awsAuthStatusManager.js'
+import type { HookEvent } from 'src/entrypoints/agentSdkTypes.js'
+import {
+  registerHookCallbacks,
+  setInitJsonSchema,
+  getInitJsonSchema,
+  setSdkAgentProgressSummariesEnabled,
+} from 'src/bootstrap/state.js'
+import { createSyntheticOutputTool } from 'src/tools/SyntheticOutputTool/SyntheticOutputTool.js'
+import { parseSessionIdentifier } from 'src/utils/sessionUrl.js'
+import {
+  hydrateRemoteSession,
+  hydrateFromCCRv2InternalEvents,
+  resetSessionFilePointer,
+  doesMessageExistInSession,
+  findUnresolvedToolUse,
+  recordAttributionSnapshot,
+  saveAgentSetting,
+  saveMode,
+  saveAiGeneratedTitle,
+  restoreSessionMetadata,
+} from 'src/utils/sessionStorage.js'
+import { incrementPromptCount } from 'src/utils/commitAttribution.js'
+import {
+  setupSdkMcpClients,
+  connectToServer,
+  clearServerCache,
+  fetchToolsForClient,
+  areMcpConfigsEqual,
+  reconnectMcpServerImpl,
+} from 'src/services/mcp/client.js'
+import {
+  filterMcpServersByPolicy,
+  getMcpConfigByName,
+  isMcpServerDisabled,
+  setMcpServerEnabled,
+} from 'src/services/mcp/config.js'
+import {
+  performMCPOAuthFlow,
+  revokeServerTokens,
+} from 'src/services/mcp/auth.js'
+import {
+  runElicitationHooks,
+  runElicitationResultHooks,
+} from 'src/services/mcp/elicitationHandler.js'
+import { executeNotificationHooks } from 'src/utils/hooks.js'
+import {
+  ElicitRequestSchema,
+  ElicitationCompleteNotificationSchema,
+} from '@modelcontextprotocol/sdk/types.js'
+import { getMcpPrefix } from 'src/services/mcp/mcpStringUtils.js'
+import {
+  commandBelongsToServer,
+  filterToolsByServer,
+} from 'src/services/mcp/utils.js'
+import { setupVscodeSdkMcp } from 'src/services/mcp/vscodeSdkMcp.js'
+import { getAllMcpConfigs } from 'src/services/mcp/config.js'
+import {
+  isQualifiedForGrove,
+  checkGroveForNonInteractive,
+} from 'src/services/api/grove.js'
+import {
+  toInternalMessages,
+  toSDKRateLimitInfo,
+} from 'src/utils/messages/mappers.js'
+import { createModelSwitchBreadcrumbs } from 'src/utils/messages.js'
+import { collectContextData } from 'src/commands/context/context-noninteractive.js'
+import { LOCAL_COMMAND_STDOUT_TAG } from 'src/constants/xml.js'
+import {
+  statusListeners,
+  type ClaudeAILimits,
+} from 'src/services/claudeAiLimits.js'
+import {
+  getDefaultMainLoopModel,
+  getMainLoopModel,
+  modelDisplayString,
+  parseUserSpecifiedModel,
+} from 'src/utils/model/model.js'
+import { getModelOptions } from 'src/utils/model/modelOptions.js'
+import {
+  modelSupportsEffort,
+  modelSupportsMaxEffort,
+  EFFORT_LEVELS,
+  resolveAppliedEffort,
+} from 'src/utils/effort.js'
+import { modelSupportsAdaptiveThinking } from 'src/utils/thinking.js'
+import { modelSupportsAutoMode } from 'src/utils/betas.js'
+import { ensureModelStringsInitialized } from 'src/utils/model/modelStrings.js'
+import {
+  getSessionId,
+  setMainLoopModelOverride,
+  setMainThreadAgentType,
+  switchSession,
+  isSessionPersistenceDisabled,
+  getIsRemoteMode,
+  getFlagSettingsInline,
+  setFlagSettingsInline,
+  getMainThreadAgentType,
+  getAllowedChannels,
+  setAllowedChannels,
+  type ChannelEntry,
+} from 'src/bootstrap/state.js'
+import { runWithWorkload, WORKLOAD_CRON } from 'src/utils/workloadContext.js'
+import type { UUID } from 'crypto'
+import { randomUUID } from 'crypto'
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import type { AppState } from 'src/state/AppStateStore.js'
+import {
+  fileHistoryRewind,
+  fileHistoryCanRestore,
+  fileHistoryEnabled,
+  fileHistoryGetDiffStats,
+} from 'src/utils/fileHistory.js'
+import {
+  restoreAgentFromSession,
+  restoreSessionStateFromLog,
+} from 'src/utils/sessionRestore.js'
+import { SandboxManager } from 'src/utils/sandbox/sandbox-adapter.js'
+import {
+  headlessProfilerStartTurn,
+  headlessProfilerCheckpoint,
+  logHeadlessProfilerTurn,
+} from 'src/utils/headlessProfiler.js'
+import {
+  startQueryProfile,
+  logQueryProfileReport,
+} from 'src/utils/queryProfiler.js'
+import { asSessionId } from 'src/types/ids.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+import { skillChangeDetector } from '../utils/skills/skillChangeDetector.js'
+import { getCommands, clearCommandsCache } from '../commands.js'
+import {
+  isBareMode,
+  isEnvTruthy,
+  isEnvDefinedFalsy,
+} from '../utils/envUtils.js'
+import { installPluginsForHeadless } from '../utils/plugins/headlessPluginInstall.js'
+import { refreshActivePlugins } from '../utils/plugins/refresh.js'
+import { loadAllPluginsCacheOnly } from '../utils/plugins/pluginLoader.js'
+import {
+  isTeamLead,
+  hasActiveInProcessTeammates,
+  hasWorkingInProcessTeammates,
+  waitForTeammatesToBecomeIdle,
+} from '../utils/teammate.js'
+import {
+  readUnreadMessages,
+  markMessagesAsRead,
+  isShutdownApproved,
+} from '../utils/teammateMailbox.js'
+import { removeTeammateFromTeamFile } from '../utils/swarm/teamHelpers.js'
+import { unassignTeammateTasks } from '../utils/tasks.js'
+import { getRunningTasks } from '../utils/task/framework.js'
+import { isBackgroundTask } from '../tasks/types.js'
+import { stopTask } from '../tasks/stopTask.js'
+import { drainSdkEvents } from '../utils/sdkEventQueue.js'
+import { initializeGrowthBook } from '../services/analytics/growthbook.js'
+import { errorMessage, toError } from '../utils/errors.js'
+import { sleep } from '../utils/sleep.js'
+import { isExtractModeActive } from '../memdir/paths.js'
+
+// Dead code elimination: conditional imports
+/* eslint-disable @typescript-eslint/no-require-imports */
+const coordinatorModeModule = feature('COORDINATOR_MODE')
+  ? (require('../coordinator/coordinatorMode.js') as typeof import('../coordinator/coordinatorMode.js'))
+  : null
+const proactiveModule =
+  feature('PROACTIVE') || feature('KAIROS')
+    ? (require('../proactive/index.js') as typeof import('../proactive/index.js'))
+    : null
+const cronSchedulerModule = feature('AGENT_TRIGGERS')
+  ? (require('../utils/cronScheduler.js') as typeof import('../utils/cronScheduler.js'))
+  : null
+const cronJitterConfigModule = feature('AGENT_TRIGGERS')
+  ? (require('../utils/cronJitterConfig.js') as typeof import('../utils/cronJitterConfig.js'))
+  : null
+const cronGate = feature('AGENT_TRIGGERS')
+  ? (require('../tools/ScheduleCronTool/prompt.js') as typeof import('../tools/ScheduleCronTool/prompt.js'))
+  : null
+const extractMemoriesModule = feature('EXTRACT_MEMORIES')
+  ? (require('../services/extractMemories/extractMemories.js') as typeof import('../services/extractMemories/extractMemories.js'))
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+const SHUTDOWN_TEAM_PROMPT = `<system-reminder>
+You are running in non-interactive mode and cannot return a response to the user until your team is shut down.
+
+You MUST shut down your team before preparing your final response:
+1. Use requestShutdown to ask each team member to shut down gracefully
+2. Wait for shutdown approvals
+3. Use the cleanup operation to clean up the team
+4. Only then provide your final response to the user
+
+The user cannot receive your response until the team is completely shut down.
+</system-reminder>
+
+Shut down your team and prepare your final response for the user.`
+
+// Track message UUIDs received during the current session runtime
+const MAX_RECEIVED_UUIDS = 10_000
+const receivedMessageUuids = new Set<UUID>()
+const receivedMessageUuidsOrder: UUID[] = []
+
+function trackReceivedMessageUuid(uuid: UUID): boolean {
+  if (receivedMessageUuids.has(uuid)) {
+    return false // duplicate
+  }
+  receivedMessageUuids.add(uuid)
+  receivedMessageUuidsOrder.push(uuid)
+  // Evict oldest entries when at capacity
+  if (receivedMessageUuidsOrder.length > MAX_RECEIVED_UUIDS) {
+    const toEvict = receivedMessageUuidsOrder.splice(
+      0,
+      receivedMessageUuidsOrder.length - MAX_RECEIVED_UUIDS,
+    )
+    for (const old of toEvict) {
+      receivedMessageUuids.delete(old)
+    }
+  }
+  return true // new UUID
+}
+
+type PromptValue = string | ContentBlockParam[]
+
+function toBlocks(v: PromptValue): ContentBlockParam[] {
+  return typeof v === 'string' ? [{ type: 'text', text: v }] : v
+}
+
+/**
+ * Join prompt values from multiple queued commands into one. Strings are
+ * newline-joined; if any value is a block array, all values are normalized
+ * to blocks and concatenated.
+ */
+export function joinPromptValues(values: PromptValue[]): PromptValue {
+  if (values.length === 1) return values[0]!
+  if (values.every(v => typeof v === 'string')) {
+    return values.join('\n')
+  }
+  return values.flatMap(toBlocks)
+}
+
+/**
+ * Whether `next` can be batched into the same ask() call as `head`. Only
+ * prompt-mode commands batch, and only when the workload tag matches (so the
+ * combined turn is attributed correctly) and the isMeta flag matches (so a
+ * proactive tick can't merge into a user prompt and lose its hidden-in-
+ * transcript marking when the head is spread over the merged command).
+ */
+export function canBatchWith(
+  head: QueuedCommand,
+  next: QueuedCommand | undefined,
+): boolean {
+  return (
+    next !== undefined &&
+    next.mode === 'prompt' &&
+    next.workload === head.workload &&
+    next.isMeta === head.isMeta
+  )
+}
+
+export async function runHeadless(
+  inputPrompt: string | AsyncIterable<string>,
+  getAppState: () => AppState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+  commands: Command[],
+  tools: Tools,
+  sdkMcpConfigs: Record<string, McpSdkServerConfig>,
+  agents: AgentDefinition[],
+  options: {
+    continue: boolean | undefined
+    resume: string | boolean | undefined
+    resumeSessionAt: string | undefined
+    verbose: boolean | undefined
+    outputFormat: string | undefined
+    jsonSchema: Record<string, unknown> | undefined
+    permissionPromptToolName: string | undefined
+    allowedTools: string[] | undefined
+    thinkingConfig: ThinkingConfig | undefined
+    maxTurns: number | undefined
+    maxBudgetUsd: number | undefined
+    taskBudget: { total: number } | undefined
+    systemPrompt: string | undefined
+    appendSystemPrompt: string | undefined
+    userSpecifiedModel: string | undefined
+    fallbackModel: string | undefined
+    teleport: string | true | null | undefined
+    sdkUrl: string | undefined
+    replayUserMessages: boolean | undefined
+    includePartialMessages: boolean | undefined
+    forkSession: boolean | undefined
+    rewindFiles: string | undefined
+    enableAuthStatus: boolean | undefined
+    agent: string | undefined
+    workload: string | undefined
+    setupTrigger?: 'init' | 'maintenance' | undefined
+    sessionStartHooksPromise?: ReturnType<typeof processSessionStartHooks>
+    setSDKStatus?: (status: SDKStatus) => void
+  },
+): Promise<void> {
+  if (
+    process.env.USER_TYPE === 'ant' &&
+    isEnvTruthy(process.env.CLAUDE_CODE_EXIT_AFTER_FIRST_RENDER)
+  ) {
+    process.stderr.write(
+      `\nStartup time: ${Math.round(process.uptime() * 1000)}ms\n`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(0)
+  }
+
+  // Fire user settings download now so it overlaps with the MCP/tool setup
+  // below. Managed settings already started in main.tsx preAction; this gives
+  // user settings a similar head start. The cached promise is joined in
+  // installPluginsAndApplyMcpInBackground before plugin install reads
+  // enabledPlugins.
+  if (
+    feature('DOWNLOAD_USER_SETTINGS') &&
+    (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) || getIsRemoteMode())
+  ) {
+    void downloadUserSettings()
+  }
+
+  // In headless mode there is no React tree, so the useSettingsChange hook
+  // never runs. Subscribe directly so that settings changes (including
+  // managed-settings / policy updates) are fully applied.
+  settingsChangeDetector.subscribe(source => {
+    applySettingsChange(source, setAppState)
+
+    // In headless mode, also sync the denormalized fastMode field from
+    // settings. The TUI manages fastMode via the UI so it skips this.
+    if (isFastModeEnabled()) {
+      setAppState(prev => {
+        const s = prev.settings as Record<string, unknown>
+        const fastMode = s.fastMode === true && !s.fastModePerSessionOptIn
+        return { ...prev, fastMode }
+      })
+    }
+  })
+
+  // Proactive activation is now handled in main.tsx before getTools() so
+  // SleepTool passes isEnabled() filtering. This fallback covers the case
+  // where CLAUDE_CODE_PROACTIVE is set but main.tsx's check didn't fire
+  // (e.g. env was injected by the SDK transport after argv parsing).
+  if (
+    (feature('PROACTIVE') || feature('KAIROS')) &&
+    proactiveModule &&
+    !proactiveModule.isProactiveActive() &&
+    isEnvTruthy(process.env.CLAUDE_CODE_PROACTIVE)
+  ) {
+    proactiveModule.activateProactive('command')
+  }
+
+  // Periodically force a full GC to keep memory usage in check
+  if (typeof Bun !== 'undefined') {
+    const gcTimer = setInterval(Bun.gc, 1000)
+    gcTimer.unref()
+  }
+
+  // Start headless profiler for first turn
+  headlessProfilerStartTurn()
+  headlessProfilerCheckpoint('runHeadless_entry')
+
+  // Check Grove requirements for non-interactive consumer subscribers
+  if (await isQualifiedForGrove()) {
+    await checkGroveForNonInteractive()
+  }
+  headlessProfilerCheckpoint('after_grove_check')
+
+  // Initialize GrowthBook so feature flags take effect in headless mode.
+  // Without this, the disk cache is empty and all flags fall back to defaults.
+  void initializeGrowthBook()
+
+  if (options.resumeSessionAt && !options.resume) {
+    process.stderr.write(`Error: --resume-session-at requires --resume\n`)
+    gracefulShutdownSync(1)
+    return
+  }
+
+  if (options.rewindFiles && !options.resume) {
+    process.stderr.write(`Error: --rewind-files requires --resume\n`)
+    gracefulShutdownSync(1)
+    return
+  }
+
+  if (options.rewindFiles && inputPrompt) {
+    process.stderr.write(
+      `Error: --rewind-files is a standalone operation and cannot be used with a prompt\n`,
+    )
+    gracefulShutdownSync(1)
+    return
+  }
+
+  const structuredIO = getStructuredIO(inputPrompt, options)
+
+  // When emitting NDJSON for SDK clients, any stray write to stdout (debug
+  // prints, dependency console.log, library banners) breaks the client's
+  // line-by-line JSON parser. Install a guard that diverts non-JSON lines to
+  // stderr so the stream stays clean. Must run before the first
+  // structuredIO.write below.
+  if (options.outputFormat === 'stream-json') {
+    installStreamJsonStdoutGuard()
+  }
+
+  // #34044: if user explicitly set sandbox.enabled=true but deps are missing,
+  // isSandboxingEnabled() returns false silently. Surface the reason so users
+  // know their security config isn't being enforced.
+  const sandboxUnavailableReason = SandboxManager.getSandboxUnavailableReason()
+  if (sandboxUnavailableReason) {
+    if (SandboxManager.isSandboxRequired()) {
+      process.stderr.write(
+        `\nError: sandbox required but unavailable: ${sandboxUnavailableReason}\n` +
+          `  sandbox.failIfUnavailable is set — refusing to start without a working sandbox.\n\n`,
+      )
+      gracefulShutdownSync(1)
+      return
+    }
+    process.stderr.write(
+      `\n⚠ Sandbox disabled: ${sandboxUnavailableReason}\n` +
+        `  Commands will run WITHOUT sandboxing. Network and filesystem restrictions will NOT be enforced.\n\n`,
+    )
+  } else if (SandboxManager.isSandboxingEnabled()) {
+    // Initialize sandbox with a callback that forwards network permission
+    // requests to the SDK host via the can_use_tool control_request protocol.
+    // This must happen after structuredIO is created so we can send requests.
+    try {
+      await SandboxManager.initialize(structuredIO.createSandboxAskCallback())
+    } catch (err) {
+      process.stderr.write(`\n❌ Sandbox Error: ${errorMessage(err)}\n`)
+      gracefulShutdownSync(1, 'other')
+      return
+    }
+  }
+
+  if (options.outputFormat === 'stream-json' && options.verbose) {
+    registerHookEventHandler(event => {
+      const message: StdoutMessage = (() => {
+        switch (event.type) {
+          case 'started':
+            return {
+              type: 'system' as const,
+              subtype: 'hook_started' as const,
+              hook_id: event.hookId,
+              hook_name: event.hookName,
+              hook_event: event.hookEvent,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            }
+          case 'progress':
+            return {
+              type: 'system' as const,
+              subtype: 'hook_progress' as const,
+              hook_id: event.hookId,
+              hook_name: event.hookName,
+              hook_event: event.hookEvent,
+              stdout: event.stdout,
+              stderr: event.stderr,
+              output: event.output,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            }
+          case 'response':
+            return {
+              type: 'system' as const,
+              subtype: 'hook_response' as const,
+              hook_id: event.hookId,
+              hook_name: event.hookName,
+              hook_event: event.hookEvent,
+              output: event.output,
+              stdout: event.stdout,
+              stderr: event.stderr,
+              exit_code: event.exitCode,
+              outcome: event.outcome,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            }
+        }
+      })()
+      void structuredIO.write(message)
+    })
+  }
+
+  if (options.setupTrigger) {
+    await processSetupHooks(options.setupTrigger)
+  }
+
+  headlessProfilerCheckpoint('before_loadInitialMessages')
+  const appState = getAppState()
+  const {
+    messages: initialMessages,
+    turnInterruptionState,
+    agentSetting: resumedAgentSetting,
+  } = await loadInitialMessages(setAppState, {
+    continue: options.continue,
+    teleport: options.teleport,
+    resume: options.resume,
+    resumeSessionAt: options.resumeSessionAt,
+    forkSession: options.forkSession,
+    outputFormat: options.outputFormat,
+    sessionStartHooksPromise: options.sessionStartHooksPromise,
+    restoredWorkerState: structuredIO.restoredWorkerState,
+  })
+
+  // SessionStart hooks can emit initialUserMessage — the first user turn for
+  // headless orchestrator sessions where stdin is empty and additionalContext
+  // alone (an attachment, not a turn) would leave the REPL with nothing to
+  // respond to. The hook promise is awaited inside loadInitialMessages, so the
+  // module-level pending value is set by the time we get here.
+  const hookInitialUserMessage = takeInitialUserMessage()
+  if (hookInitialUserMessage) {
+    structuredIO.prependUserMessage(hookInitialUserMessage)
+  }
+
+  // Restore agent setting from the resumed session (if not overridden by current --agent flag
+  // or settings-based agent, which would already have set mainThreadAgentType in main.tsx)
+  if (!options.agent && !getMainThreadAgentType() && resumedAgentSetting) {
+    const { agentDefinition: restoredAgent } = restoreAgentFromSession(
+      resumedAgentSetting,
+      undefined,
+      { activeAgents: agents, allAgents: agents },
+    )
+    if (restoredAgent) {
+      setAppState(prev => ({ ...prev, agent: restoredAgent.agentType }))
+      // Apply the agent's system prompt for non-built-in agents (mirrors main.tsx initial --agent path)
+      if (!options.systemPrompt && !isBuiltInAgent(restoredAgent)) {
+        const agentSystemPrompt = restoredAgent.getSystemPrompt()
+        if (agentSystemPrompt) {
+          options.systemPrompt = agentSystemPrompt
+        }
+      }
+      // Re-persist agent setting so future resumes maintain the agent
+      saveAgentSetting(restoredAgent.agentType)
+    }
+  }
+
+  // gracefulShutdownSync schedules an async shutdown and sets process.exitCode.
+  // If a loadInitialMessages error path triggered it, bail early to avoid
+  // unnecessary work while the process winds down.
+  if (initialMessages.length === 0 && process.exitCode !== undefined) {
+    return
+  }
+
+  // Handle --rewind-files: restore filesystem and exit immediately
+  if (options.rewindFiles) {
+    // File history snapshots are only created for user messages,
+    // so we require the target to be a user message
+    const targetMessage = initialMessages.find(
+      m => m.uuid === options.rewindFiles,
+    )
+
+    if (!targetMessage || targetMessage.type !== 'user') {
+      process.stderr.write(
+        `Error: --rewind-files requires a user message UUID, but ${options.rewindFiles} is not a user message in this session\n`,
+      )
+      gracefulShutdownSync(1)
+      return
+    }
+
+    const currentAppState = getAppState()
+    const result = await handleRewindFiles(
+      options.rewindFiles as UUID,
+      currentAppState,
+      setAppState,
+      false,
+    )
+    if (!result.canRewind) {
+      process.stderr.write(`Error: ${result.error || 'Unexpected error'}\n`)
+      gracefulShutdownSync(1)
+      return
+    }
+
+    // Rewind complete - exit successfully
+    process.stdout.write(
+      `Files rewound to state at message ${options.rewindFiles}\n`,
+    )
+    gracefulShutdownSync(0)
+    return
+  }
+
+  // Check if we need input prompt - skip if we're resuming with a valid session ID/JSONL file or using SDK URL
+  const hasValidResumeSessionId =
+    typeof options.resume === 'string' &&
+    (Boolean(validateUuid(options.resume)) || options.resume.endsWith('.jsonl'))
+  const isUsingSdkUrl = Boolean(options.sdkUrl)
+
+  if (!inputPrompt && !hasValidResumeSessionId && !isUsingSdkUrl) {
+    process.stderr.write(
+      `Error: Input must be provided either through stdin or as a prompt argument when using --print\n`,
+    )
+    gracefulShutdownSync(1)
+    return
+  }
+
+  if (options.outputFormat === 'stream-json' && !options.verbose) {
+    process.stderr.write(
+      'Error: When using --print, --output-format=stream-json requires --verbose\n',
+    )
+    gracefulShutdownSync(1)
+    return
+  }
+
+  // Filter out MCP tools that are in the deny list
+  const allowedMcpTools = filterToolsByDenyRules(
+    appState.mcp.tools,
+    appState.toolPermissionContext,
+  )
+  let filteredTools = [...tools, ...allowedMcpTools]
+
+  // When using SDK URL, always use stdio permission prompting to delegate to the SDK
+  const effectivePermissionPromptToolName = options.sdkUrl
+    ? 'stdio'
+    : options.permissionPromptToolName
+
+  // Callback for when a permission prompt is shown
+  const onPermissionPrompt = (details: RequiresActionDetails) => {
+    if (feature('COMMIT_ATTRIBUTION')) {
+      setAppState(prev => ({
+        ...prev,
+        attribution: {
+          ...prev.attribution,
+          permissionPromptCount: prev.attribution.permissionPromptCount + 1,
+        },
+      }))
+    }
+    notifySessionStateChanged('requires_action', details)
+  }
+
+  const canUseTool = getCanUseToolFn(
+    effectivePermissionPromptToolName,
+    structuredIO,
+    () => getAppState().mcp.tools,
+    onPermissionPrompt,
+  )
+  if (options.permissionPromptToolName) {
+    // Remove the permission prompt tool from the list of available tools.
+    filteredTools = filteredTools.filter(
+      tool => !toolMatchesName(tool, options.permissionPromptToolName!),
+    )
+  }
+
+  // Install errors handlers to gracefully handle broken pipes (e.g., when parent process dies)
+  registerProcessOutputErrorHandlers()
+
+  headlessProfilerCheckpoint('after_loadInitialMessages')
+
+  // Ensure model strings are initialized before generating model options.
+  // For Bedrock users, this waits for the profile fetch to get correct region strings.
+  await ensureModelStringsInitialized()
+  headlessProfilerCheckpoint('after_modelStrings')
+
+  // UDS inbox store registration is deferred until after `run` is defined
+  // so we can pass `run` as the onEnqueue callback (see below).
+
+  // Only `json` + `verbose` needs the full array (jsonStringify(messages) below).
+  // For stream-json (SDK/CCR) and default text output, only the last message is
+  // read for the exit code / final result. Avoid accumulating every message in
+  // memory for the entire session.
+  const needsFullArray = options.outputFormat === 'json' && options.verbose
+  const messages: SDKMessage[] = []
+  let lastMessage: SDKMessage | undefined
+  // Streamlined mode transforms messages when CLAUDE_CODE_STREAMLINED_OUTPUT=true and using stream-json
+  // Build flag gates this out of external builds; env var is the runtime opt-in for ant builds
+  const transformToStreamlined =
+    feature('STREAMLINED_OUTPUT') &&
+    isEnvTruthy(process.env.CLAUDE_CODE_STREAMLINED_OUTPUT) &&
+    options.outputFormat === 'stream-json'
+      ? createStreamlinedTransformer()
+      : null
+
+  headlessProfilerCheckpoint('before_runHeadlessStreaming')
+  for await (const message of runHeadlessStreaming(
+    structuredIO,
+    appState.mcp.clients,
+    [...commands, ...appState.mcp.commands],
+    filteredTools,
+    initialMessages,
+    canUseTool,
+    sdkMcpConfigs,
+    getAppState,
+    setAppState,
+    agents,
+    options,
+    turnInterruptionState,
+  )) {
+    if (transformToStreamlined) {
+      // Streamlined mode: transform messages and stream immediately
+      const transformed = transformToStreamlined(message)
+      if (transformed) {
+        await structuredIO.write(transformed)
+      }
+    } else if (options.outputFormat === 'stream-json' && options.verbose) {
+      await structuredIO.write(message)
+    }
+    // Should not be getting control messages or stream events in non-stream mode.
+    // Also filter out streamlined types since they're only produced by the transformer.
+    // SDK-only system events are excluded so lastMessage stays at the result
+    // (session_state_changed(idle) and any late task_notification drain after
+    // result in the finally block).
+    if (
+      message.type !== 'control_response' &&
+      message.type !== 'control_request' &&
+      message.type !== 'control_cancel_request' &&
+      !(
+        message.type === 'system' &&
+        (message.subtype === 'session_state_changed' ||
+          message.subtype === 'task_notification' ||
+          message.subtype === 'task_started' ||
+          message.subtype === 'task_progress' ||
+          message.subtype === 'post_turn_summary')
+      ) &&
+      message.type !== 'stream_event' &&
+      message.type !== 'keep_alive' &&
+      message.type !== 'streamlined_text' &&
+      message.type !== 'streamlined_tool_use_summary' &&
+      message.type !== 'prompt_suggestion'
+    ) {
+      if (needsFullArray) {
+        messages.push(message)
+      }
+      lastMessage = message
+    }
+  }
+
+  switch (options.outputFormat) {
+    case 'json':
+      if (!lastMessage || lastMessage.type !== 'result') {
+        throw new Error('No messages returned')
+      }
+      if (options.verbose) {
+        writeToStdout(jsonStringify(messages) + '\n')
+        break
+      }
+      writeToStdout(jsonStringify(lastMessage) + '\n')
+      break
+    case 'stream-json':
+      // already logged above
+      break
+    default:
+      if (!lastMessage || lastMessage.type !== 'result') {
+        throw new Error('No messages returned')
+      }
+      switch (lastMessage.subtype) {
+        case 'success':
+          writeToStdout(
+            lastMessage.result.endsWith('\n')
+              ? lastMessage.result
+              : lastMessage.result + '\n',
+          )
+          break
+        case 'error_during_execution':
+          writeToStdout(`Execution error`)
+          break
+        case 'error_max_turns':
+          writeToStdout(`Error: Reached max turns (${options.maxTurns})`)
+          break
+        case 'error_max_budget_usd':
+          writeToStdout(`Error: Exceeded USD budget (${options.maxBudgetUsd})`)
+          break
+        case 'error_max_structured_output_retries':
+          writeToStdout(
+            `Error: Failed to provide valid structured output after maximum retries`,
+          )
+      }
+  }
+
+  // Log headless latency metrics for the final turn
+  logHeadlessProfilerTurn()
+
+  // Drain any in-flight memory extraction before shutdown. The response is
+  // already flushed above, so this adds no user-visible latency — it just
+  // delays process exit so gracefulShutdownSync's 5s failsafe doesn't kill
+  // the forked agent mid-flight. Gated by isExtractModeActive so the
+  // tengu_slate_thimble flag controls non-interactive extraction end-to-end.
+  if (feature('EXTRACT_MEMORIES') && isExtractModeActive()) {
+    await extractMemoriesModule!.drainPendingExtraction()
+  }
+
+  gracefulShutdownSync(
+    lastMessage?.type === 'result' && lastMessage?.is_error ? 1 : 0,
+  )
+}
+
+function runHeadlessStreaming(
+  structuredIO: StructuredIO,
+  mcpClients: MCPServerConnection[],
+  commands: Command[],
+  tools: Tools,
+  initialMessages: Message[],
+  canUseTool: CanUseToolFn,
+  sdkMcpConfigs: Record<string, McpSdkServerConfig>,
+  getAppState: () => AppState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+  agents: AgentDefinition[],
+  options: {
+    verbose: boolean | undefined
+    jsonSchema: Record<string, unknown> | undefined
+    permissionPromptToolName: string | undefined
+    allowedTools: string[] | undefined
+    thinkingConfig: ThinkingConfig | undefined
+    maxTurns: number | undefined
+    maxBudgetUsd: number | undefined
+    taskBudget: { total: number } | undefined
+    systemPrompt: string | undefined
+    appendSystemPrompt: string | undefined
+    userSpecifiedModel: string | undefined
+    fallbackModel: string | undefined
+    replayUserMessages?: boolean | undefined
+    includePartialMessages?: boolean | undefined
+    enableAuthStatus?: boolean | undefined
+    agent?: string | undefined
+    setSDKStatus?: (status: SDKStatus) => void
+    promptSuggestions?: boolean | undefined
+    workload?: string | undefined
+  },
+  turnInterruptionState?: TurnInterruptionState,
+): AsyncIterable<StdoutMessage> {
+  let running = false
+  let runPhase:
+    | 'draining_commands'
+    | 'waiting_for_agents'
+    | 'finally_flush'
+    | 'finally_post_flush'
+    | undefined
+  let inputClosed = false
+  let shutdownPromptInjected = false
+  let heldBackResult: StdoutMessage | null = null
+  let abortController: AbortController | undefined
+  // Same queue sendRequest() enqueues to — one FIFO for everything.
+  const output = structuredIO.outbound
+
+  // Ctrl+C in -p mode: abort the in-flight query, then shut down gracefully.
+  // gracefulShutdown persists session state and flushes analytics, with a
+  // failsafe timer that force-exits if cleanup hangs.
+  const sigintHandler = () => {
+    logForDiagnosticsNoPII('info', 'shutdown_signal', { signal: 'SIGINT' })
+    if (abortController && !abortController.signal.aborted) {
+      abortController.abort()
+    }
+    void gracefulShutdown(0)
+  }
+  process.on('SIGINT', sigintHandler)
+
+  // Dump run()'s state at SIGTERM so a stuck session's healthsweep can name
+  // the do/while(waitingForAgents) poll without reading the transcript.
+  registerCleanup(async () => {
+    const bg: Record<string, number> = {}
+    for (const t of getRunningTasks(getAppState())) {
+      if (isBackgroundTask(t)) bg[t.type] = (bg[t.type] ?? 0) + 1
+    }
+    logForDiagnosticsNoPII('info', 'run_state_at_shutdown', {
+      run_active: running,
+      run_phase: runPhase,
+      worker_status: getSessionState(),
+      internal_events_pending: structuredIO.internalEventsPending,
+      bg_tasks: bg,
+    })
+  })
+
+  // Wire the central onChangeAppState mode-diff hook to the SDK output stream.
+  // This fires whenever ANY code path mutates toolPermissionContext.mode —
+  // Shift+Tab, ExitPlanMode dialog, /plan slash command, rewind, bridge
+  // set_permission_mode, the query loop, stop_task — rather than the two
+  // paths that previously went through a bespoke wrapper.
+  // The wrapper's body was fully redundant (it enqueued here AND called
+  // notifySessionMetadataChanged, both of which onChangeAppState now covers);
+  // keeping it would double-emit status messages.
+  setPermissionModeChangedListener(newMode => {
+    // Only emit for SDK-exposed modes.
+    if (
+      newMode === 'default' ||
+      newMode === 'acceptEdits' ||
+      newMode === 'bypassPermissions' ||
+      newMode === 'plan' ||
+      newMode === (feature('TRANSCRIPT_CLASSIFIER') && 'auto') ||
+      newMode === 'dontAsk'
+    ) {
+      output.enqueue({
+        type: 'system',
+        subtype: 'status',
+        status: null,
+        permissionMode: newMode as PermissionMode,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    }
+  })
+
+  // Prompt suggestion tracking (push model)
+  const suggestionState: {
+    abortController: AbortController | null
+    inflightPromise: Promise<void> | null
+    lastEmitted: {
+      text: string
+      emittedAt: number
+      promptId: PromptVariant
+      generationRequestId: string | null
+    } | null
+    pendingSuggestion: {
+      type: 'prompt_suggestion'
+      suggestion: string
+      uuid: UUID
+      session_id: string
+    } | null
+    pendingLastEmittedEntry: {
+      text: string
+      promptId: PromptVariant
+      generationRequestId: string | null
+    } | null
+  } = {
+    abortController: null,
+    inflightPromise: null,
+    lastEmitted: null,
+    pendingSuggestion: null,
+    pendingLastEmittedEntry: null,
+  }
+
+  // Set up AWS auth status listener if enabled
+  let unsubscribeAuthStatus: (() => void) | undefined
+  if (options.enableAuthStatus) {
+    const authStatusManager = AwsAuthStatusManager.getInstance()
+    unsubscribeAuthStatus = authStatusManager.subscribe(status => {
+      output.enqueue({
+        type: 'auth_status',
+        isAuthenticating: status.isAuthenticating,
+        output: status.output,
+        error: status.error,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    })
+  }
+
+  // Set up rate limit status listener to emit SDKRateLimitEvent for all status changes.
+  // Emitting for all statuses (including 'allowed') ensures consumers can clear warnings
+  // when rate limits reset. The upstream emitStatusChange already deduplicates via isEqual.
+  const rateLimitListener = (limits: ClaudeAILimits) => {
+    const rateLimitInfo = toSDKRateLimitInfo(limits)
+    if (rateLimitInfo) {
+      output.enqueue({
+        type: 'rate_limit_event',
+        rate_limit_info: rateLimitInfo,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    }
+  }
+  statusListeners.add(rateLimitListener)
+
+  // Messages for internal tracking, directly mutated by ask(). These messages
+  // include Assistant, User, Attachment, and Progress messages.
+  // TODO: Clean up this code to avoid passing around a mutable array.
+  const mutableMessages: Message[] = initialMessages
+
+  // Seed the readFileState cache from the transcript (content the model saw,
+  // with message timestamps) so getChangedFiles can detect external edits.
+  // This cache instance must persist across ask() calls, since the edit tool
+  // relies on this as a global state.
+  let readFileState = extractReadFilesFromMessages(
+    initialMessages,
+    cwd(),
+    READ_FILE_STATE_CACHE_SIZE,
+  )
+
+  // Client-supplied readFileState seeds (via seed_read_state control request).
+  // The stdin IIFE runs concurrently with ask() — a seed arriving mid-turn
+  // would be lost to ask()'s clone-then-replace (QueryEngine.ts finally block)
+  // if written directly into readFileState. Instead, seeds land here, merge
+  // into getReadFileCache's view (readFileState-wins-ties: seeds fill gaps),
+  // and are re-applied then CLEARED in setReadFileCache. One-shot: each seed
+  // survives exactly one clone-replace cycle, then becomes a regular
+  // readFileState entry subject to compact's clear like everything else.
+  const pendingSeeds = createFileStateCacheWithSizeLimit(
+    READ_FILE_STATE_CACHE_SIZE,
+  )
+
+  // Auto-resume interrupted turns on restart so CC continues from where it
+  // left off without requiring the SDK to re-send the prompt.
+  const resumeInterruptedTurnEnv =
+    process.env.CLAUDE_CODE_RESUME_INTERRUPTED_TURN
+  if (
+    turnInterruptionState &&
+    turnInterruptionState.kind !== 'none' &&
+    resumeInterruptedTurnEnv
+  ) {
+    logForDebugging(
+      `[print.ts] Auto-resuming interrupted turn (kind: ${turnInterruptionState.kind})`,
+    )
+
+    // Remove the interrupted message and its sentinel, then re-enqueue so
+    // the model sees it exactly once. For mid-turn interruptions, the
+    // deserialization layer transforms them into interrupted_prompt by
+    // appending a synthetic "Continue from where you left off." message.
+    removeInterruptedMessage(mutableMessages, turnInterruptionState.message)
+    enqueue({
+      mode: 'prompt',
+      value: turnInterruptionState.message.message.content,
+      uuid: randomUUID(),
+    })
+  }
+
+  const modelOptions = getModelOptions()
+  const modelInfos = modelOptions.map(option => {
+    const modelId = option.value === null ? 'default' : option.value
+    const resolvedModel =
+      modelId === 'default'
+        ? getDefaultMainLoopModel()
+        : parseUserSpecifiedModel(modelId)
+    const hasEffort = modelSupportsEffort(resolvedModel)
+    const hasAdaptiveThinking = modelSupportsAdaptiveThinking(resolvedModel)
+    const hasFastMode = isFastModeSupportedByModel(option.value)
+    const hasAutoMode = modelSupportsAutoMode(resolvedModel)
+    return {
+      value: modelId,
+      displayName: option.label,
+      description: option.description,
+      ...(hasEffort && {
+        supportsEffort: true,
+        supportedEffortLevels: modelSupportsMaxEffort(resolvedModel)
+          ? [...EFFORT_LEVELS]
+          : EFFORT_LEVELS.filter(l => l !== 'max'),
+      }),
+      ...(hasAdaptiveThinking && { supportsAdaptiveThinking: true }),
+      ...(hasFastMode && { supportsFastMode: true }),
+      ...(hasAutoMode && { supportsAutoMode: true }),
+    }
+  })
+  let activeUserSpecifiedModel = options.userSpecifiedModel
+
+  function injectModelSwitchBreadcrumbs(
+    modelArg: string,
+    resolvedModel: string,
+  ): void {
+    const breadcrumbs = createModelSwitchBreadcrumbs(
+      modelArg,
+      modelDisplayString(resolvedModel),
+    )
+    mutableMessages.push(...breadcrumbs)
+    for (const crumb of breadcrumbs) {
+      if (
+        typeof crumb.message.content === 'string' &&
+        crumb.message.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`)
+      ) {
+        output.enqueue({
+          type: 'user',
+          message: crumb.message,
+          session_id: getSessionId(),
+          parent_tool_use_id: null,
+          uuid: crumb.uuid,
+          timestamp: crumb.timestamp,
+          isReplay: true,
+        } satisfies SDKUserMessageReplay)
+      }
+    }
+  }
+
+  // Cache SDK MCP clients to avoid reconnecting on each run
+  let sdkClients: MCPServerConnection[] = []
+  let sdkTools: Tools = []
+
+  // Track which MCP clients have had elicitation handlers registered
+  const elicitationRegistered = new Set<string>()
+
+  /**
+   * Register elicitation request/completion handlers on connected MCP clients
+   * that haven't been registered yet. SDK MCP servers are excluded because they
+   * route through SdkControlClientTransport. Hooks run first (matching REPL
+   * behavior); if no hook responds, the request is forwarded to the SDK
+   * consumer via the control protocol.
+   */
+  function registerElicitationHandlers(clients: MCPServerConnection[]): void {
+    for (const connection of clients) {
+      if (
+        connection.type !== 'connected' ||
+        elicitationRegistered.has(connection.name)
+      ) {
+        continue
+      }
+      // Skip SDK MCP servers — elicitation flows through SdkControlClientTransport
+      if (connection.config.type === 'sdk') {
+        continue
+      }
+      const serverName = connection.name
+
+      // Wrapped in try/catch because setRequestHandler throws if the client wasn't
+      // created with elicitation capability declared (e.g., SDK-created clients).
+      try {
+        connection.client.setRequestHandler(
+          ElicitRequestSchema,
+          async (request, extra) => {
+            logMCPDebug(
+              serverName,
+              `Elicitation request received in print mode: ${jsonStringify(request)}`,
+            )
+
+            const mode = request.params.mode === 'url' ? 'url' : 'form'
+
+            logEvent('tengu_mcp_elicitation_shown', {
+              mode: mode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            })
+
+            // Run elicitation hooks first — they can provide a response programmatically
+            const hookResponse = await runElicitationHooks(
+              serverName,
+              request.params,
+              extra.signal,
+            )
+            if (hookResponse) {
+              logMCPDebug(
+                serverName,
+                `Elicitation resolved by hook: ${jsonStringify(hookResponse)}`,
+              )
+              logEvent('tengu_mcp_elicitation_response', {
+                mode: mode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+                action:
+                  hookResponse.action as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              })
+              return hookResponse
+            }
+
+            // Delegate to SDK consumer via control protocol
+            const url =
+              'url' in request.params
+                ? (request.params.url as string)
+                : undefined
+            const requestedSchema =
+              'requestedSchema' in request.params
+                ? (request.params.requestedSchema as
+                    | Record<string, unknown>
+                    | undefined)
+                : undefined
+
+            const elicitationId =
+              'elicitationId' in request.params
+                ? (request.params.elicitationId as string | undefined)
+                : undefined
+
+            const rawResult = await structuredIO.handleElicitation(
+              serverName,
+              request.params.message,
+              requestedSchema,
+              extra.signal,
+              mode,
+              url,
+              elicitationId,
+            )
+
+            const result = await runElicitationResultHooks(
+              serverName,
+              rawResult,
+              extra.signal,
+              mode,
+              elicitationId,
+            )
+
+            logEvent('tengu_mcp_elicitation_response', {
+              mode: mode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              action:
+                result.action as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            })
+            return result
+          },
+        )
+
+        // Surface completion notifications to SDK consumers (URL mode)
+        connection.client.setNotificationHandler(
+          ElicitationCompleteNotificationSchema,
+          notification => {
+            const { elicitationId } = notification.params
+            logMCPDebug(
+              serverName,
+              `Elicitation completion notification: ${elicitationId}`,
+            )
+            void executeNotificationHooks({
+              message: `MCP server "${serverName}" confirmed elicitation ${elicitationId} complete`,
+              notificationType: 'elicitation_complete',
+            })
+            output.enqueue({
+              type: 'system',
+              subtype: 'elicitation_complete',
+              mcp_server_name: serverName,
+              elicitation_id: elicitationId,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            })
+          },
+        )
+
+        elicitationRegistered.add(serverName)
+      } catch {
+        // setRequestHandler throws if the client wasn't created with
+        // elicitation capability — skip silently
+      }
+    }
+  }
+
+  async function updateSdkMcp() {
+    // Check if SDK MCP servers need to be updated (new servers added or removed)
+    const currentServerNames = new Set(Object.keys(sdkMcpConfigs))
+    const connectedServerNames = new Set(sdkClients.map(c => c.name))
+
+    // Check if there are any differences (additions or removals)
+    const hasNewServers = Array.from(currentServerNames).some(
+      name => !connectedServerNames.has(name),
+    )
+    const hasRemovedServers = Array.from(connectedServerNames).some(
+      name => !currentServerNames.has(name),
+    )
+    // Check if any SDK clients are pending and need to be upgraded
+    const hasPendingSdkClients = sdkClients.some(c => c.type === 'pending')
+    // Check if any SDK clients failed their handshake and need to be retried.
+    // Without this, a client that lands in 'failed' (e.g. handshake timeout on
+    // a WS reconnect race) stays failed forever — its name satisfies the
+    // connectedServerNames diff but it contributes zero tools.
+    const hasFailedSdkClients = sdkClients.some(c => c.type === 'failed')
+
+    const haveServersChanged =
+      hasNewServers ||
+      hasRemovedServers ||
+      hasPendingSdkClients ||
+      hasFailedSdkClients
+
+    if (haveServersChanged) {
+      // Clean up removed servers
+      for (const client of sdkClients) {
+        if (!currentServerNames.has(client.name)) {
+          if (client.type === 'connected') {
+            await client.cleanup()
+          }
+        }
+      }
+
+      // Re-initialize all SDK MCP servers with current config
+      const sdkSetup = await setupSdkMcpClients(
+        sdkMcpConfigs,
+        (serverName, message) =>
+          structuredIO.sendMcpMessage(serverName, message),
+      )
+      sdkClients = sdkSetup.clients
+      sdkTools = sdkSetup.tools
+
+      // Store SDK MCP tools in appState so subagents can access them via
+      // assembleToolPool. Only tools are stored here — SDK clients are already
+      // merged separately in the query loop (allMcpClients) and mcp_status handler.
+      // Use both old (connectedServerNames) and new (currentServerNames) to remove
+      // stale SDK tools when servers are added or removed.
+      const allSdkNames = uniq([...connectedServerNames, ...currentServerNames])
+      setAppState(prev => ({
+        ...prev,
+        mcp: {
+          ...prev.mcp,
+          tools: [
+            ...prev.mcp.tools.filter(
+              t =>
+                !allSdkNames.some(name =>
+                  t.name.startsWith(getMcpPrefix(name)),
+                ),
+            ),
+            ...sdkTools,
+          ],
+        },
+      }))
+
+      // Set up the special internal VSCode MCP server if necessary.
+      setupVscodeSdkMcp(sdkClients)
+    }
+  }
+
+  void updateSdkMcp()
+
+  // State for dynamically added MCP servers (via mcp_set_servers control message)
+  // These are separate from SDK MCP servers and support all transport types
+  let dynamicMcpState: DynamicMcpState = {
+    clients: [],
+    tools: [],
+    configs: {},
+  }
+
+  // Shared tool assembly for ask() and the get_context_usage control request.
+  // Closes over the mutable sdkTools/dynamicMcpState bindings so both call
+  // sites see late-connecting servers.
+  const buildAllTools = (appState: AppState): Tools => {
+    const assembledTools = assembleToolPool(
+      appState.toolPermissionContext,
+      appState.mcp.tools,
+    )
+    let allTools = uniqBy(
+      mergeAndFilterTools(
+        [...tools, ...sdkTools, ...dynamicMcpState.tools],
+        assembledTools,
+        appState.toolPermissionContext.mode,
+      ),
+      'name',
+    )
+    if (options.permissionPromptToolName) {
+      allTools = allTools.filter(
+        tool => !toolMatchesName(tool, options.permissionPromptToolName!),
+      )
+    }
+    const initJsonSchema = getInitJsonSchema()
+    if (initJsonSchema && !options.jsonSchema) {
+      const syntheticOutputResult = createSyntheticOutputTool(initJsonSchema)
+      if ('tool' in syntheticOutputResult) {
+        allTools = [...allTools, syntheticOutputResult.tool]
+      }
+    }
+    return allTools
+  }
+
+  // Bridge handle for remote-control (SDK control message).
+  // Mirrors the REPL's useReplBridge hook: the handle is created when
+  // `remote_control` is enabled and torn down when disabled.
+  let bridgeHandle: ReplBridgeHandle | null = null
+  // Cursor into mutableMessages — tracks how far we've forwarded.
+  // Same index-based diff as useReplBridge's lastWrittenIndexRef.
+  let bridgeLastForwardedIndex = 0
+
+  // Forward new messages from mutableMessages to the bridge.
+  // Called incrementally during each turn (so claude.ai sees progress
+  // and stays alive during permission waits) and again after the turn.
+  //
+  // writeMessages has its own UUID-based dedup (initialMessageUUIDs,
+  // recentPostedUUIDs) — the index cursor here is a pre-filter to avoid
+  // O(n) re-scanning of already-sent messages on every call.
+  function forwardMessagesToBridge(): void {
+    if (!bridgeHandle) return
+    // Guard against mutableMessages shrinking (compaction truncates it).
+    const startIndex = Math.min(
+      bridgeLastForwardedIndex,
+      mutableMessages.length,
+    )
+    const newMessages = mutableMessages
+      .slice(startIndex)
+      .filter(m => m.type === 'user' || m.type === 'assistant')
+    bridgeLastForwardedIndex = mutableMessages.length
+    if (newMessages.length > 0) {
+      bridgeHandle.writeMessages(newMessages)
+    }
+  }
+
+  // Helper to apply MCP server changes - used by both mcp_set_servers control message
+  // and background plugin installation.
+  // NOTE: Nested function required - mutates closure state (sdkMcpConfigs, sdkClients, etc.)
+  let mcpChangesPromise: Promise<{
+    response: SDKControlMcpSetServersResponse
+    sdkServersChanged: boolean
+  }> = Promise.resolve({
+    response: {
+      added: [] as string[],
+      removed: [] as string[],
+      errors: {} as Record<string, string>,
+    },
+    sdkServersChanged: false,
+  })
+
+  function applyMcpServerChanges(
+    servers: Record<string, McpServerConfigForProcessTransport>,
+  ): Promise<{
+    response: SDKControlMcpSetServersResponse
+    sdkServersChanged: boolean
+  }> {
+    // Serialize calls to prevent race conditions between concurrent callers
+    // (background plugin install and mcp_set_servers control messages)
+    const doWork = async (): Promise<{
+      response: SDKControlMcpSetServersResponse
+      sdkServersChanged: boolean
+    }> => {
+      const oldSdkClientNames = new Set(sdkClients.map(c => c.name))
+
+      const result = await handleMcpSetServers(
+        servers,
+        { configs: sdkMcpConfigs, clients: sdkClients, tools: sdkTools },
+        dynamicMcpState,
+        setAppState,
+      )
+
+      // Update SDK state (need to mutate sdkMcpConfigs since it's shared)
+      for (const key of Object.keys(sdkMcpConfigs)) {
+        delete sdkMcpConfigs[key]
+      }
+      Object.assign(sdkMcpConfigs, result.newSdkState.configs)
+      sdkClients = result.newSdkState.clients
+      sdkTools = result.newSdkState.tools
+      dynamicMcpState = result.newDynamicState
+
+      // Keep appState.mcp.tools in sync so subagents can see SDK MCP tools.
+      // Use both old and new SDK client names to remove stale tools.
+      if (result.sdkServersChanged) {
+        const newSdkClientNames = new Set(sdkClients.map(c => c.name))
+        const allSdkNames = uniq([...oldSdkClientNames, ...newSdkClientNames])
+        setAppState(prev => ({
+          ...prev,
+          mcp: {
+            ...prev.mcp,
+            tools: [
+              ...prev.mcp.tools.filter(
+                t =>
+                  !allSdkNames.some(name =>
+                    t.name.startsWith(getMcpPrefix(name)),
+                  ),
+              ),
+              ...sdkTools,
+            ],
+          },
+        }))
+      }
+
+      return {
+        response: result.response,
+        sdkServersChanged: result.sdkServersChanged,
+      }
+    }
+
+    mcpChangesPromise = mcpChangesPromise.then(doWork, doWork)
+    return mcpChangesPromise
+  }
+
+  // Build McpServerStatus[] for control responses. Shared by mcp_status and
+  // reload_plugins handlers. Reads closure state: sdkClients, dynamicMcpState.
+  function buildMcpServerStatuses(): McpServerStatus[] {
+    const currentAppState = getAppState()
+    const currentMcpClients = currentAppState.mcp.clients
+    const allMcpTools = uniqBy(
+      [...currentAppState.mcp.tools, ...dynamicMcpState.tools],
+      'name',
+    )
+    const existingNames = new Set([
+      ...currentMcpClients.map(c => c.name),
+      ...sdkClients.map(c => c.name),
+    ])
+    return [
+      ...currentMcpClients,
+      ...sdkClients,
+      ...dynamicMcpState.clients.filter(c => !existingNames.has(c.name)),
+    ].map(connection => {
+      let config
+      if (
+        connection.config.type === 'sse' ||
+        connection.config.type === 'http'
+      ) {
+        config = {
+          type: connection.config.type,
+          url: connection.config.url,
+          headers: connection.config.headers,
+          oauth: connection.config.oauth,
+        }
+      } else if (connection.config.type === 'claudeai-proxy') {
+        config = {
+          type: 'claudeai-proxy' as const,
+          url: connection.config.url,
+          id: connection.config.id,
+        }
+      } else if (
+        connection.config.type === 'stdio' ||
+        connection.config.type === undefined
+      ) {
+        config = {
+          type: 'stdio' as const,
+          command: connection.config.command,
+          args: connection.config.args,
+        }
+      }
+      const serverTools =
+        connection.type === 'connected'
+          ? filterToolsByServer(allMcpTools, connection.name).map(tool => ({
+              name: tool.mcpInfo?.toolName ?? tool.name,
+              annotations: {
+                readOnly: tool.isReadOnly({}) || undefined,
+                destructive: tool.isDestructive?.({}) || undefined,
+                openWorld: tool.isOpenWorld?.({}) || undefined,
+              },
+            }))
+          : undefined
+      // Capabilities passthrough with allowlist pre-filter. The IDE reads
+      // experimental['claude/channel'] to decide whether to show the
+      // Enable-channel prompt — only echo it if channel_enable would
+      // actually pass the allowlist. Not a security boundary (the
+      // handler re-runs the full gate); just avoids dead buttons.
+      let capabilities: { experimental?: Record<string, unknown> } | undefined
+      if (
+        (feature('KAIROS') || feature('KAIROS_CHANNELS')) &&
+        connection.type === 'connected' &&
+        connection.capabilities.experimental
+      ) {
+        const exp = { ...connection.capabilities.experimental }
+        if (
+          exp['claude/channel'] &&
+          (!isChannelsEnabled() ||
+            !isChannelAllowlisted(connection.config.pluginSource))
+        ) {
+          delete exp['claude/channel']
+        }
+        if (Object.keys(exp).length > 0) {
+          capabilities = { experimental: exp }
+        }
+      }
+      return {
+        name: connection.name,
+        status: connection.type,
+        serverInfo:
+          connection.type === 'connected' ? connection.serverInfo : undefined,
+        error: connection.type === 'failed' ? connection.error : undefined,
+        config,
+        scope: connection.config.scope,
+        tools: serverTools,
+        capabilities,
+      }
+    })
+  }
+
+  // NOTE: Nested function required - needs closure access to applyMcpServerChanges and updateSdkMcp
+  async function installPluginsAndApplyMcpInBackground(): Promise<void> {
+    try {
+      // Join point for user settings (fired at runHeadless entry) and managed
+      // settings (fired in main.tsx preAction). downloadUserSettings() caches
+      // its promise so this awaits the same in-flight request.
+      await Promise.all([
+        feature('DOWNLOAD_USER_SETTINGS') &&
+        (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) || getIsRemoteMode())
+          ? withDiagnosticsTiming('headless_user_settings_download', () =>
+              downloadUserSettings(),
+            )
+          : Promise.resolve(),
+        withDiagnosticsTiming('headless_managed_settings_wait', () =>
+          waitForRemoteManagedSettingsToLoad(),
+        ),
+      ])
+
+      const pluginsInstalled = await installPluginsForHeadless()
+
+      if (pluginsInstalled) {
+        await applyPluginMcpDiff()
+      }
+    } catch (error) {
+      logError(error)
+    }
+  }
+
+  // Background plugin installation for all headless users
+  // Installs marketplaces from extraKnownMarketplaces and missing enabled plugins
+  // CLAUDE_CODE_SYNC_PLUGIN_INSTALL=true: resolved in run() before the first
+  // query so plugins are guaranteed available on the first ask().
+  let pluginInstallPromise: Promise<void> | null = null
+  // --bare / SIMPLE: skip plugin install. Scripted calls don't add plugins
+  // mid-session; the next interactive run reconciles.
+  if (!isBareMode()) {
+    if (isEnvTruthy(process.env.CLAUDE_CODE_SYNC_PLUGIN_INSTALL)) {
+      pluginInstallPromise = installPluginsAndApplyMcpInBackground()
+    } else {
+      void installPluginsAndApplyMcpInBackground()
+    }
+  }
+
+  // Idle timeout management
+  const idleTimeout = createIdleTimeoutManager(() => !running)
+
+  // Mutable commands and agents for hot reloading
+  let currentCommands = commands
+  let currentAgents = agents
+
+  // Clear all plugin-related caches, reload commands/agents/hooks.
+  // Called after CLAUDE_CODE_SYNC_PLUGIN_INSTALL completes (before first query)
+  // and after non-sync background install finishes.
+  // refreshActivePlugins calls clearAllCaches() which is required because
+  // loadAllPlugins() may have run during main.tsx startup BEFORE managed
+  // settings were fetched. Without clearing, getCommands() would rebuild
+  // from a stale plugin list.
+  async function refreshPluginState(): Promise<void> {
+    // refreshActivePlugins handles the full cache sweep (clearAllCaches),
+    // reloads all plugin component loaders, writes AppState.plugins +
+    // AppState.agentDefinitions, registers hooks, and bumps mcp.pluginReconnectKey.
+    const { agentDefinitions: freshAgentDefs } =
+      await refreshActivePlugins(setAppState)
+
+    // Headless-specific: currentCommands/currentAgents are local mutable refs
+    // captured by the query loop (REPL uses AppState instead). getCommands is
+    // fresh because refreshActivePlugins cleared its cache.
+    currentCommands = await getCommands(cwd())
+
+    // Preserve SDK-provided agents (--agents CLI flag or SDK initialize
+    // control_request) — both inject via parseAgentsFromJson with
+    // source='flagSettings'. loadMarkdownFilesForSubdir never assigns this
+    // source, so it cleanly discriminates "injected, not disk-loadable".
+    //
+    // The previous filter used a negative set-diff (!freshAgentTypes.has(a))
+    // which also matched plugin agents that were in the poisoned initial
+    // currentAgents but correctly excluded from freshAgentDefs after managed
+    // settings applied — leaking policy-blocked agents into the init message.
+    // See gh-23085: isBridgeEnabled() at Commander-definition time poisoned
+    // the settings cache before setEligibility(true) ran.
+    const sdkAgents = currentAgents.filter(a => a.source === 'flagSettings')
+    currentAgents = [...freshAgentDefs.allAgents, ...sdkAgents]
+  }
+
+  // Re-diff MCP configs after plugin state changes. Filters to
+  // process-transport-supported types and carries SDK-mode servers through
+  // so applyMcpServerChanges' diff doesn't close their transports.
+  // Nested: needs closure access to sdkMcpConfigs, applyMcpServerChanges,
+  // updateSdkMcp.
+  async function applyPluginMcpDiff(): Promise<void> {
+    const { servers: newConfigs } = await getAllMcpConfigs()
+    const supportedConfigs: Record<string, McpServerConfigForProcessTransport> =
+      {}
+    for (const [name, config] of Object.entries(newConfigs)) {
+      const type = config.type
+      if (
+        type === undefined ||
+        type === 'stdio' ||
+        type === 'sse' ||
+        type === 'http' ||
+        type === 'sdk'
+      ) {
+        supportedConfigs[name] = config
+      }
+    }
+    for (const [name, config] of Object.entries(sdkMcpConfigs)) {
+      if (config.type === 'sdk' && !(name in supportedConfigs)) {
+        supportedConfigs[name] = config
+      }
+    }
+    const { response, sdkServersChanged } =
+      await applyMcpServerChanges(supportedConfigs)
+    if (sdkServersChanged) {
+      void updateSdkMcp()
+    }
+    logForDebugging(
+      `Headless MCP refresh: added=${response.added.length}, removed=${response.removed.length}`,
+    )
+  }
+
+  // Subscribe to skill changes for hot reloading
+  const unsubscribeSkillChanges = skillChangeDetector.subscribe(() => {
+    clearCommandsCache()
+    void getCommands(cwd()).then(newCommands => {
+      currentCommands = newCommands
+    })
+  })
+
+  // Proactive mode: schedule a tick to keep the model looping autonomously.
+  // setTimeout(0) yields to the event loop so pending stdin messages
+  // (interrupts, user messages) are processed before the tick fires.
+  const scheduleProactiveTick =
+    feature('PROACTIVE') || feature('KAIROS')
+      ? () => {
+          setTimeout(() => {
+            if (
+              !proactiveModule?.isProactiveActive() ||
+              proactiveModule.isProactivePaused() ||
+              inputClosed
+            ) {
+              return
+            }
+            const tickContent = `<${TICK_TAG}>${new Date().toLocaleTimeString()}</${TICK_TAG}>`
+            enqueue({
+              mode: 'prompt' as const,
+              value: tickContent,
+              uuid: randomUUID(),
+              priority: 'later',
+              isMeta: true,
+            })
+            void run()
+          }, 0)
+        }
+      : undefined
+
+  // Abort the current operation when a 'now' priority message arrives.
+  subscribeToCommandQueue(() => {
+    if (abortController && getCommandsByMaxPriority('now').length > 0) {
+      abortController.abort('interrupt')
+    }
+  })
+
+  const run = async () => {
+    if (running) {
+      return
+    }
+
+    running = true
+    runPhase = undefined
+    notifySessionStateChanged('running')
+    idleTimeout.stop()
+
+    headlessProfilerCheckpoint('run_entry')
+    // TODO(custom-tool-refactor): Should move to the init message, like browser
+
+    await updateSdkMcp()
+    headlessProfilerCheckpoint('after_updateSdkMcp')
+
+    // Resolve deferred plugin installation (CLAUDE_CODE_SYNC_PLUGIN_INSTALL).
+    // The promise was started eagerly so installation overlaps with other init.
+    // Awaiting here guarantees plugins are available before the first ask().
+    // If CLAUDE_CODE_SYNC_PLUGIN_INSTALL_TIMEOUT_MS is set, races against that
+    // deadline and proceeds without plugins on timeout (logging an error).
+    if (pluginInstallPromise) {
+      const timeoutMs = parseInt(
+        process.env.CLAUDE_CODE_SYNC_PLUGIN_INSTALL_TIMEOUT_MS || '',
+        10,
+      )
+      if (timeoutMs > 0) {
+        const timeout = sleep(timeoutMs).then(() => 'timeout' as const)
+        const result = await Promise.race([pluginInstallPromise, timeout])
+        if (result === 'timeout') {
+          logError(
+            new Error(
+              `CLAUDE_CODE_SYNC_PLUGIN_INSTALL: plugin installation timed out after ${timeoutMs}ms`,
+            ),
+          )
+          logEvent('tengu_sync_plugin_install_timeout', {
+            timeout_ms: timeoutMs,
+          })
+        }
+      } else {
+        await pluginInstallPromise
+      }
+      pluginInstallPromise = null
+
+      // Refresh commands, agents, and hooks now that plugins are installed
+      await refreshPluginState()
+
+      // Set up hot-reload for plugin hooks now that the initial install is done.
+      // In sync-install mode, setup.ts skips this to avoid racing with the install.
+      const { setupPluginHookHotReload } = await import(
+        '../utils/plugins/loadPluginHooks.js'
+      )
+      setupPluginHookHotReload()
+    }
+
+    // Only main-thread commands (agentId===undefined) — subagent
+    // notifications are drained by the subagent's mid-turn gate in query.ts.
+    // Defined outside the try block so it's accessible in the post-finally
+    // queue re-checks at the bottom of run().
+    const isMainThread = (cmd: QueuedCommand) => cmd.agentId === undefined
+
+    try {
+      let command: QueuedCommand | undefined
+      let waitingForAgents = false
+
+      // Extract command processing into a named function for the do-while pattern.
+      // Drains the queue, batching consecutive prompt-mode commands into one
+      // ask() call so messages that queued up during a long turn coalesce
+      // into a single follow-up turn instead of N separate turns.
+      const drainCommandQueue = async () => {
+        while ((command = dequeue(isMainThread))) {
+          if (
+            command.mode !== 'prompt' &&
+            command.mode !== 'orphaned-permission' &&
+            command.mode !== 'task-notification'
+          ) {
+            throw new Error(
+              'only prompt commands are supported in streaming mode',
+            )
+          }
+
+          // Non-prompt commands (task-notification, orphaned-permission) carry
+          // side effects or orphanedPermission state, so they process singly.
+          // Prompt commands greedily collect followers with matching workload.
+          const batch: QueuedCommand[] = [command]
+          if (command.mode === 'prompt') {
+            while (canBatchWith(command, peek(isMainThread))) {
+              batch.push(dequeue(isMainThread)!)
+            }
+            if (batch.length > 1) {
+              command = {
+                ...command,
+                value: joinPromptValues(batch.map(c => c.value)),
+                uuid: batch.findLast(c => c.uuid)?.uuid ?? command.uuid,
+              }
+            }
+          }
+          const batchUuids = batch.map(c => c.uuid).filter(u => u !== undefined)
+
+          // QueryEngine will emit a replay for command.uuid (the last uuid in
+          // the batch) via its messagesToAck path. Emit replays here for the
+          // rest so consumers that track per-uuid delivery (clank's
+          // asyncMessages footer, CCR) see an ack for every message they sent,
+          // not just the one that survived the merge.
+          if (options.replayUserMessages && batch.length > 1) {
+            for (const c of batch) {
+              if (c.uuid && c.uuid !== command.uuid) {
+                output.enqueue({
+                  type: 'user',
+                  message: { role: 'user', content: c.value },
+                  session_id: getSessionId(),
+                  parent_tool_use_id: null,
+                  uuid: c.uuid,
+                  isReplay: true,
+                } satisfies SDKUserMessageReplay)
+              }
+            }
+          }
+
+          // Combine all MCP clients. appState.mcp is populated incrementally
+          // per-server by main.tsx (mirrors useManageMCPConnections). Reading
+          // fresh per-command means late-connecting servers are visible on the
+          // next turn. registerElicitationHandlers is idempotent (tracking set).
+          const appState = getAppState()
+          const allMcpClients = [
+            ...appState.mcp.clients,
+            ...sdkClients,
+            ...dynamicMcpState.clients,
+          ]
+          registerElicitationHandlers(allMcpClients)
+          // Channel handlers for servers allowlisted via --channels at
+          // construction time (or enableChannel() mid-session). Runs every
+          // turn like registerElicitationHandlers — idempotent per-client
+          // (setNotificationHandler replaces, not stacks) and no-ops for
+          // non-allowlisted servers (one feature-flag check).
+          for (const client of allMcpClients) {
+            reregisterChannelHandlerAfterReconnect(client)
+          }
+
+          const allTools = buildAllTools(appState)
+
+          for (const uuid of batchUuids) {
+            notifyCommandLifecycle(uuid, 'started')
+          }
+
+          // Task notifications arrive when background agents complete.
+          // Emit an SDK system event for SDK consumers, then fall through
+          // to ask() so the model sees the agent result and can act on it.
+          // This matches TUI behavior where useQueueProcessor always feeds
+          // notifications to the model regardless of coordinator mode.
+          if (command.mode === 'task-notification') {
+            const notificationText =
+              typeof command.value === 'string' ? command.value : ''
+            // Parse the XML-formatted notification
+            const taskIdMatch = notificationText.match(
+              /<task-id>([^<]+)<\/task-id>/,
+            )
+            const toolUseIdMatch = notificationText.match(
+              /<tool-use-id>([^<]+)<\/tool-use-id>/,
+            )
+            const outputFileMatch = notificationText.match(
+              /<output-file>([^<]+)<\/output-file>/,
+            )
+            const statusMatch = notificationText.match(
+              /<status>([^<]+)<\/status>/,
+            )
+            const summaryMatch = notificationText.match(
+              /<summary>([^<]+)<\/summary>/,
+            )
+
+            const isValidStatus = (
+              s: string | undefined,
+            ): s is 'completed' | 'failed' | 'stopped' | 'killed' =>
+              s === 'completed' ||
+              s === 'failed' ||
+              s === 'stopped' ||
+              s === 'killed'
+            const rawStatus = statusMatch?.[1]
+            const status = isValidStatus(rawStatus)
+              ? rawStatus === 'killed'
+                ? 'stopped'
+                : rawStatus
+              : 'completed'
+
+            const usageMatch = notificationText.match(
+              /<usage>([\s\S]*?)<\/usage>/,
+            )
+            const usageContent = usageMatch?.[1] ?? ''
+            const totalTokensMatch = usageContent.match(
+              /<total_tokens>(\d+)<\/total_tokens>/,
+            )
+            const toolUsesMatch = usageContent.match(
+              /<tool_uses>(\d+)<\/tool_uses>/,
+            )
+            const durationMsMatch = usageContent.match(
+              /<duration_ms>(\d+)<\/duration_ms>/,
+            )
+
+            // Only emit a task_notification SDK event when a <status> tag is
+            // present — that means this is a terminal notification (completed/
+            // failed/stopped). Stream events from enqueueStreamEvent carry no
+            // <status> (they're progress pings); emitting them here would
+            // default to 'completed' and falsely close the task for SDK
+            // consumers. Terminal bookends are now emitted directly via
+            // emitTaskTerminatedSdk, so skipping statusless events is safe.
+            if (statusMatch) {
+              output.enqueue({
+                type: 'system',
+                subtype: 'task_notification',
+                task_id: taskIdMatch?.[1] ?? '',
+                tool_use_id: toolUseIdMatch?.[1],
+                status,
+                output_file: outputFileMatch?.[1] ?? '',
+                summary: summaryMatch?.[1] ?? '',
+                usage:
+                  totalTokensMatch && toolUsesMatch
+                    ? {
+                        total_tokens: parseInt(totalTokensMatch[1]!, 10),
+                        tool_uses: parseInt(toolUsesMatch[1]!, 10),
+                        duration_ms: durationMsMatch
+                          ? parseInt(durationMsMatch[1]!, 10)
+                          : 0,
+                      }
+                    : undefined,
+                session_id: getSessionId(),
+                uuid: randomUUID(),
+              })
+            }
+            // No continue -- fall through to ask() so the model processes the result
+          }
+
+          const input = command.value
+
+          if (structuredIO instanceof RemoteIO && command.mode === 'prompt') {
+            logEvent('tengu_bridge_message_received', {
+              is_repl: false,
+            })
+          }
+
+          // Abort any in-flight suggestion generation and track acceptance
+          suggestionState.abortController?.abort()
+          suggestionState.abortController = null
+          suggestionState.pendingSuggestion = null
+          suggestionState.pendingLastEmittedEntry = null
+          if (suggestionState.lastEmitted) {
+            if (command.mode === 'prompt') {
+              // SDK user messages enqueue ContentBlockParam[], not a plain string
+              const inputText =
+                typeof input === 'string'
+                  ? input
+                  : (
+                      input.find(b => b.type === 'text') as
+                        | { type: 'text'; text: string }
+                        | undefined
+                    )?.text
+              if (typeof inputText === 'string') {
+                logSuggestionOutcome(
+                  suggestionState.lastEmitted.text,
+                  inputText,
+                  suggestionState.lastEmitted.emittedAt,
+                  suggestionState.lastEmitted.promptId,
+                  suggestionState.lastEmitted.generationRequestId,
+                )
+              }
+              suggestionState.lastEmitted = null
+            }
+          }
+
+          abortController = createAbortController()
+          const turnStartTime = feature('FILE_PERSISTENCE')
+            ? Date.now()
+            : undefined
+
+          headlessProfilerCheckpoint('before_ask')
+          startQueryProfile()
+          // Per-iteration ALS context so bg agents spawned inside ask()
+          // inherit workload across their detached awaits. In-process cron
+          // stamps cmd.workload; the SDK --workload flag is options.workload.
+          // const-capture: TS loses `while ((command = dequeue()))` narrowing
+          // inside the closure.
+          const cmd = command
+          await runWithWorkload(cmd.workload ?? options.workload, async () => {
+            for await (const message of ask({
+              commands: uniqBy(
+                [...currentCommands, ...appState.mcp.commands],
+                'name',
+              ),
+              prompt: input,
+              promptUuid: cmd.uuid,
+              isMeta: cmd.isMeta,
+              cwd: cwd(),
+              tools: allTools,
+              verbose: options.verbose,
+              mcpClients: allMcpClients,
+              thinkingConfig: options.thinkingConfig,
+              maxTurns: options.maxTurns,
+              maxBudgetUsd: options.maxBudgetUsd,
+              taskBudget: options.taskBudget,
+              canUseTool,
+              userSpecifiedModel: activeUserSpecifiedModel,
+              fallbackModel: options.fallbackModel,
+              jsonSchema: getInitJsonSchema() ?? options.jsonSchema,
+              mutableMessages,
+              getReadFileCache: () =>
+                pendingSeeds.size === 0
+                  ? readFileState
+                  : mergeFileStateCaches(readFileState, pendingSeeds),
+              setReadFileCache: cache => {
+                readFileState = cache
+                for (const [path, seed] of pendingSeeds.entries()) {
+                  const existing = readFileState.get(path)
+                  if (!existing || seed.timestamp > existing.timestamp) {
+                    readFileState.set(path, seed)
+                  }
+                }
+                pendingSeeds.clear()
+              },
+              customSystemPrompt: options.systemPrompt,
+              appendSystemPrompt: options.appendSystemPrompt,
+              getAppState,
+              setAppState,
+              abortController,
+              replayUserMessages: options.replayUserMessages,
+              includePartialMessages: options.includePartialMessages,
+              handleElicitation: (serverName, params, elicitSignal) =>
+                structuredIO.handleElicitation(
+                  serverName,
+                  params.message,
+                  undefined,
+                  elicitSignal,
+                  params.mode,
+                  params.url,
+                  'elicitationId' in params ? params.elicitationId : undefined,
+                ),
+              agents: currentAgents,
+              orphanedPermission: cmd.orphanedPermission,
+              setSDKStatus: status => {
+                output.enqueue({
+                  type: 'system',
+                  subtype: 'status',
+                  status,
+                  session_id: getSessionId(),
+                  uuid: randomUUID(),
+                })
+              },
+            })) {
+              // Forward messages to bridge incrementally (mid-turn) so
+              // claude.ai sees progress and the connection stays alive
+              // while blocked on permission requests.
+              forwardMessagesToBridge()
+
+              if (message.type === 'result') {
+                // Flush pending SDK events so they appear before result on the stream.
+                for (const event of drainSdkEvents()) {
+                  output.enqueue(event)
+                }
+
+                // Hold-back: don't emit result while background agents are running
+                const currentState = getAppState()
+                if (
+                  getRunningTasks(currentState).some(
+                    t =>
+                      (t.type === 'local_agent' ||
+                        t.type === 'local_workflow') &&
+                      isBackgroundTask(t),
+                  )
+                ) {
+                  heldBackResult = message
+                } else {
+                  heldBackResult = null
+                  output.enqueue(message)
+                }
+              } else {
+                // Flush SDK events (task_started, task_progress) so background
+                // agent progress is streamed in real-time, not batched until result.
+                for (const event of drainSdkEvents()) {
+                  output.enqueue(event)
+                }
+                output.enqueue(message)
+              }
+            }
+          }) // end runWithWorkload
+
+          for (const uuid of batchUuids) {
+            notifyCommandLifecycle(uuid, 'completed')
+          }
+
+          // Forward messages to bridge after each turn
+          forwardMessagesToBridge()
+          bridgeHandle?.sendResult()
+
+          if (feature('FILE_PERSISTENCE') && turnStartTime !== undefined) {
+            void executeFilePersistence(
+              turnStartTime,
+              abortController.signal,
+              result => {
+                output.enqueue({
+                  type: 'system' as const,
+                  subtype: 'files_persisted' as const,
+                  files: result.files,
+                  failed: result.failed,
+                  processed_at: new Date().toISOString(),
+                  uuid: randomUUID(),
+                  session_id: getSessionId(),
+                })
+              },
+            )
+          }
+
+          // Generate and emit prompt suggestion for SDK consumers
+          if (
+            options.promptSuggestions &&
+            !isEnvDefinedFalsy(process.env.CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION)
+          ) {
+            // TS narrows suggestionState to never in the while loop body;
+            // cast via unknown to reset narrowing.
+            const state = suggestionState as unknown as typeof suggestionState
+            state.abortController?.abort()
+            const localAbort = new AbortController()
+            suggestionState.abortController = localAbort
+
+            const cacheSafeParams = getLastCacheSafeParams()
+            if (!cacheSafeParams) {
+              logSuggestionSuppressed(
+                'sdk_no_params',
+                undefined,
+                undefined,
+                'sdk',
+              )
+            } else {
+              // Use a ref object so the IIFE's finally can compare against its own
+              // promise without a self-reference (which upsets TypeScript's flow analysis).
+              const ref: { promise: Promise<void> | null } = { promise: null }
+              ref.promise = (async () => {
+                try {
+                  const result = await tryGenerateSuggestion(
+                    localAbort,
+                    mutableMessages,
+                    getAppState,
+                    cacheSafeParams,
+                    'sdk',
+                  )
+                  if (!result || localAbort.signal.aborted) return
+                  const suggestionMsg = {
+                    type: 'prompt_suggestion' as const,
+                    suggestion: result.suggestion,
+                    uuid: randomUUID(),
+                    session_id: getSessionId(),
+                  }
+                  const lastEmittedEntry = {
+                    text: result.suggestion,
+                    emittedAt: Date.now(),
+                    promptId: result.promptId,
+                    generationRequestId: result.generationRequestId,
+                  }
+                  // Defer emission if the result is being held for background agents,
+                  // so that prompt_suggestion always arrives after result.
+                  // Only set lastEmitted when the suggestion is actually delivered
+                  // to the consumer; deferred suggestions may be discarded before
+                  // delivery if a new command arrives first.
+                  if (heldBackResult) {
+                    suggestionState.pendingSuggestion = suggestionMsg
+                    suggestionState.pendingLastEmittedEntry = {
+                      text: lastEmittedEntry.text,
+                      promptId: lastEmittedEntry.promptId,
+                      generationRequestId: lastEmittedEntry.generationRequestId,
+                    }
+                  } else {
+                    suggestionState.lastEmitted = lastEmittedEntry
+                    output.enqueue(suggestionMsg)
+                  }
+                } catch (error) {
+                  if (
+                    error instanceof Error &&
+                    (error.name === 'AbortError' ||
+                      error.name === 'APIUserAbortError')
+                  ) {
+                    logSuggestionSuppressed(
+                      'aborted',
+                      undefined,
+                      undefined,
+                      'sdk',
+                    )
+                    return
+                  }
+                  logError(toError(error))
+                } finally {
+                  if (suggestionState.inflightPromise === ref.promise) {
+                    suggestionState.inflightPromise = null
+                  }
+                }
+              })()
+              suggestionState.inflightPromise = ref.promise
+            }
+          }
+
+          // Log headless profiler metrics for this turn and start next turn
+          logHeadlessProfilerTurn()
+          logQueryProfileReport()
+          headlessProfilerStartTurn()
+        }
+      }
+
+      // Use a do-while loop to drain commands and then wait for any
+      // background agents that are still running. When agents complete,
+      // their notifications are enqueued and the loop re-drains.
+      do {
+        // Drain SDK events (task_started, task_progress) before command queue
+        // so progress events precede task_notification on the stream.
+        for (const event of drainSdkEvents()) {
+          output.enqueue(event)
+        }
+
+        runPhase = 'draining_commands'
+        await drainCommandQueue()
+
+        // Check for running background tasks before exiting.
+        // Exclude in_process_teammate — teammates are long-lived by design
+        // (status: 'running' for their whole lifetime, cleaned up by the
+        // shutdown protocol, not by transitioning to 'completed'). Waiting
+        // on them here loops forever (gh-30008). Same exclusion already
+        // exists at useBackgroundTaskNavigation.ts:55 for the same reason;
+        // L1839 above is already narrower (type === 'local_agent') so it
+        // doesn't hit this.
+        waitingForAgents = false
+        {
+          const state = getAppState()
+          const hasRunningBg = getRunningTasks(state).some(
+            t => isBackgroundTask(t) && t.type !== 'in_process_teammate',
+          )
+          const hasMainThreadQueued = peek(isMainThread) !== undefined
+          if (hasRunningBg || hasMainThreadQueued) {
+            waitingForAgents = true
+            if (!hasMainThreadQueued) {
+              runPhase = 'waiting_for_agents'
+              // No commands ready yet, wait for tasks to complete
+              await sleep(100)
+            }
+            // Loop back to drain any newly queued commands
+          }
+        }
+      } while (waitingForAgents)
+
+      if (heldBackResult) {
+        output.enqueue(heldBackResult)
+        heldBackResult = null
+        if (suggestionState.pendingSuggestion) {
+          output.enqueue(suggestionState.pendingSuggestion)
+          // Now that the suggestion is actually delivered, record it for acceptance tracking
+          if (suggestionState.pendingLastEmittedEntry) {
+            suggestionState.lastEmitted = {
+              ...suggestionState.pendingLastEmittedEntry,
+              emittedAt: Date.now(),
+            }
+            suggestionState.pendingLastEmittedEntry = null
+          }
+          suggestionState.pendingSuggestion = null
+        }
+      }
+    } catch (error) {
+      // Emit error result message before shutting down
+      // Write directly to structuredIO to ensure immediate delivery
+      try {
+        await structuredIO.write({
+          type: 'result',
+          subtype: 'error_during_execution',
+          duration_ms: 0,
+          duration_api_ms: 0,
+          is_error: true,
+          num_turns: 0,
+          stop_reason: null,
+          session_id: getSessionId(),
+          total_cost_usd: 0,
+          usage: EMPTY_USAGE,
+          modelUsage: {},
+          permission_denials: [],
+          uuid: randomUUID(),
+          errors: [
+            errorMessage(error),
+            ...getInMemoryErrors().map(_ => _.error),
+          ],
+        })
+      } catch {
+        // If we can't emit the error result, continue with shutdown anyway
+      }
+      suggestionState.abortController?.abort()
+      gracefulShutdownSync(1)
+      return
+    } finally {
+      runPhase = 'finally_flush'
+      // Flush pending internal events before going idle
+      await structuredIO.flushInternalEvents()
+      runPhase = 'finally_post_flush'
+      if (!isShuttingDown()) {
+        notifySessionStateChanged('idle')
+        // Drain so the idle session_state_changed SDK event (plus any
+        // terminal task_notification bookends emitted during bg-agent
+        // teardown) reach the output stream before we block on the next
+        // command. The do-while drain above only runs while
+        // waitingForAgents; once we're here the next drain would be the
+        // top of the next run(), which won't come if input is idle.
+        for (const event of drainSdkEvents()) {
+          output.enqueue(event)
+        }
+      }
+      running = false
+      // Start idle timer when we finish processing and are waiting for input
+      idleTimeout.start()
+    }
+
+    // Proactive tick: if proactive is active and queue is empty, inject a tick
+    if (
+      (feature('PROACTIVE') || feature('KAIROS')) &&
+      proactiveModule?.isProactiveActive() &&
+      !proactiveModule.isProactivePaused()
+    ) {
+      if (peek(isMainThread) === undefined && !inputClosed) {
+        scheduleProactiveTick!()
+        return
+      }
+    }
+
+    // Re-check the queue after releasing the mutex. A message may have
+    // arrived (and called run()) between the last dequeue() returning
+    // undefined and `running = false` above. In that case the caller
+    // saw `running === true` and returned immediately, leaving the
+    // message stranded in the queue with no one to process it.
+    if (peek(isMainThread) !== undefined) {
+      void run()
+      return
+    }
+
+    // Check for unread teammate messages and process them
+    // This mirrors what useInboxPoller does in interactive REPL mode
+    // Poll until no more messages (teammates may still be working)
+    {
+      const currentAppState = getAppState()
+      const teamContext = currentAppState.teamContext
+
+      if (teamContext && isTeamLead(teamContext)) {
+        const agentName = 'team-lead'
+
+        // Poll for messages while teammates are active
+        // This is needed because teammates may send messages while we're waiting
+        // Keep polling until the team is shut down
+        const POLL_INTERVAL_MS = 500
+
+        while (true) {
+          // Check if teammates are still active
+          const refreshedState = getAppState()
+          const hasActiveTeammates =
+            hasActiveInProcessTeammates(refreshedState) ||
+            (refreshedState.teamContext &&
+              Object.keys(refreshedState.teamContext.teammates).length > 0)
+
+          if (!hasActiveTeammates) {
+            logForDebugging(
+              '[print.ts] No more active teammates, stopping poll',
+            )
+            break
+          }
+
+          const unread = await readUnreadMessages(
+            agentName,
+            refreshedState.teamContext?.teamName,
+          )
+
+          if (unread.length > 0) {
+            logForDebugging(
+              `[print.ts] Team-lead found ${unread.length} unread messages`,
+            )
+
+            // Mark as read immediately to avoid duplicate processing
+            await markMessagesAsRead(
+              agentName,
+              refreshedState.teamContext?.teamName,
+            )
+
+            // Process shutdown_approved messages - remove teammates from team file
+            // This mirrors what useInboxPoller does in interactive mode (lines 546-606)
+            const teamName = refreshedState.teamContext?.teamName
+            for (const m of unread) {
+              const shutdownApproval = isShutdownApproved(m.text)
+              if (shutdownApproval && teamName) {
+                const teammateToRemove = shutdownApproval.from
+                logForDebugging(
+                  `[print.ts] Processing shutdown_approved from ${teammateToRemove}`,
+                )
+
+                // Find the teammate ID by name
+                const teammateId = refreshedState.teamContext?.teammates
+                  ? Object.entries(refreshedState.teamContext.teammates).find(
+                      ([, t]) => t.name === teammateToRemove,
+                    )?.[0]
+                  : undefined
+
+                if (teammateId) {
+                  // Remove from team file
+                  removeTeammateFromTeamFile(teamName, {
+                    agentId: teammateId,
+                    name: teammateToRemove,
+                  })
+                  logForDebugging(
+                    `[print.ts] Removed ${teammateToRemove} from team file`,
+                  )
+
+                  // Unassign tasks owned by this teammate
+                  await unassignTeammateTasks(
+                    teamName,
+                    teammateId,
+                    teammateToRemove,
+                    'shutdown',
+                  )
+
+                  // Remove from teamContext in AppState
+                  setAppState(prev => {
+                    if (!prev.teamContext?.teammates) return prev
+                    if (!(teammateId in prev.teamContext.teammates)) return prev
+                    const { [teammateId]: _, ...remainingTeammates } =
+                      prev.teamContext.teammates
+                    return {
+                      ...prev,
+                      teamContext: {
+                        ...prev.teamContext,
+                        teammates: remainingTeammates,
+                      },
+                    }
+                  })
+                }
+              }
+            }
+
+            // Format messages same as useInboxPoller
+            const formatted = unread
+              .map(
+                (m: { from: string; text: string; color?: string }) =>
+                  `<${TEAMMATE_MESSAGE_TAG} teammate_id="${m.from}"${m.color ? ` color="${m.color}"` : ''}>\n${m.text}\n</${TEAMMATE_MESSAGE_TAG}>`,
+              )
+              .join('\n\n')
+
+            // Enqueue and process
+            enqueue({
+              mode: 'prompt',
+              value: formatted,
+              uuid: randomUUID(),
+            })
+            void run()
+            return // run() will come back here after processing
+          }
+
+          // No messages - check if we need to prompt for shutdown
+          // If input is closed and teammates are active, inject shutdown prompt once
+          if (inputClosed && !shutdownPromptInjected) {
+            shutdownPromptInjected = true
+            logForDebugging(
+              '[print.ts] Input closed with active teammates, injecting shutdown prompt',
+            )
+            enqueue({
+              mode: 'prompt',
+              value: SHUTDOWN_TEAM_PROMPT,
+              uuid: randomUUID(),
+            })
+            void run()
+            return // run() will come back here after processing
+          }
+
+          // Wait and check again
+          await sleep(POLL_INTERVAL_MS)
+        }
+      }
+    }
+
+    if (inputClosed) {
+      // Check for active swarm that needs shutdown
+      const hasActiveSwarm = await (async () => {
+        // Wait for any working in-process team members to finish
+        const currentAppState = getAppState()
+        if (hasWorkingInProcessTeammates(currentAppState)) {
+          await waitForTeammatesToBecomeIdle(setAppState, currentAppState)
+        }
+
+        // Re-fetch state after potential wait
+        const refreshedAppState = getAppState()
+        const refreshedTeamContext = refreshedAppState.teamContext
+        const hasTeamMembersNotCleanedUp =
+          refreshedTeamContext &&
+          Object.keys(refreshedTeamContext.teammates).length > 0
+
+        return (
+          hasTeamMembersNotCleanedUp ||
+          hasActiveInProcessTeammates(refreshedAppState)
+        )
+      })()
+
+      if (hasActiveSwarm) {
+        // Team members are idle or pane-based - inject prompt to shut down team
+        enqueue({
+          mode: 'prompt',
+          value: SHUTDOWN_TEAM_PROMPT,
+          uuid: randomUUID(),
+        })
+        void run()
+      } else {
+        // Wait for any in-flight push suggestion before closing the output stream.
+        if (suggestionState.inflightPromise) {
+          await Promise.race([suggestionState.inflightPromise, sleep(5000)])
+        }
+        suggestionState.abortController?.abort()
+        suggestionState.abortController = null
+        await finalizePendingAsyncHooks()
+        unsubscribeSkillChanges()
+        unsubscribeAuthStatus?.()
+        statusListeners.delete(rateLimitListener)
+        output.done()
+      }
+    }
+  }
+
+  // Set up UDS inbox callback so the query loop is kicked off
+  // when a message arrives via the UDS socket in headless mode.
+  if (feature('UDS_INBOX')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { setOnEnqueue } = require('../utils/udsMessaging.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    setOnEnqueue(() => {
+      if (!inputClosed) {
+        void run()
+      }
+    })
+  }
+
+  // Cron scheduler: runs scheduled_tasks.json tasks in SDK/-p mode.
+  // Mirrors REPL's useScheduledTasks hook. Fired prompts enqueue + kick
+  // off run() directly — unlike REPL, there's no queue subscriber here
+  // that drains on enqueue while idle. The run() mutex makes this safe
+  // during an active turn: the call no-ops and the post-run recheck at
+  // the end of run() picks up the queued command.
+  let cronScheduler: import('../utils/cronScheduler.js').CronScheduler | null =
+    null
+  if (
+    feature('AGENT_TRIGGERS') &&
+    cronSchedulerModule &&
+    cronGate?.isKairosCronEnabled()
+  ) {
+    cronScheduler = cronSchedulerModule.createCronScheduler({
+      onFire: prompt => {
+        if (inputClosed) return
+        enqueue({
+          mode: 'prompt',
+          value: prompt,
+          uuid: randomUUID(),
+          priority: 'later',
+          // System-generated — matches useScheduledTasks.ts REPL equivalent.
+          // Without this, messages.ts metaProp eval is {} → prompt leaks
+          // into visible transcript when cron fires mid-turn in -p mode.
+          isMeta: true,
+          // Threaded to cc_workload= in the billing-header attribution block
+          // so the API can serve cron requests at lower QoS. drainCommandQueue
+          // reads this per-iteration and hoists it into bootstrap state for
+          // the ask() call.
+          workload: WORKLOAD_CRON,
+        })
+        void run()
+      },
+      isLoading: () => running || inputClosed,
+      getJitterConfig: cronJitterConfigModule?.getCronJitterConfig,
+      isKilled: () => !cronGate?.isKairosCronEnabled(),
+    })
+    cronScheduler.start()
+  }
+
+  const sendControlResponseSuccess = function (
+    message: SDKControlRequest,
+    response?: Record<string, unknown>,
+  ) {
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'success',
+        request_id: message.request_id,
+        response: response,
+      },
+    })
+  }
+
+  const sendControlResponseError = function (
+    message: SDKControlRequest,
+    errorMessage: string,
+  ) {
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        request_id: message.request_id,
+        error: errorMessage,
+      },
+    })
+  }
+
+  // Handle unexpected permission responses by looking up the unresolved tool
+  // call in the transcript and executing it
+  const handledOrphanedToolUseIds = new Set<string>()
+  structuredIO.setUnexpectedResponseCallback(async message => {
+    await handleOrphanedPermissionResponse({
+      message,
+      setAppState,
+      handledToolUseIds: handledOrphanedToolUseIds,
+      onEnqueued: () => {
+        // The first message of a session might be the orphaned permission
+        // check rather than a user prompt, so kick off the loop.
+        void run()
+      },
+    })
+  })
+
+  // Track active OAuth flows per server so we can abort a previous flow
+  // when a new mcp_authenticate request arrives for the same server.
+  const activeOAuthFlows = new Map<string, AbortController>()
+  // Track manual callback URL submit functions for active OAuth flows.
+  // Used when localhost is not reachable (e.g., browser-based IDEs).
+  const oauthCallbackSubmitters = new Map<
+    string,
+    (callbackUrl: string) => void
+  >()
+  // Track servers where the manual callback was actually invoked (so the
+  // automatic reconnect path knows to skip — the extension will reconnect).
+  const oauthManualCallbackUsed = new Set<string>()
+  // Track OAuth auth-only promises so mcp_oauth_callback_url can await
+  // token exchange completion. Reconnect is handled separately by the
+  // extension via handleAuthDone → mcp_reconnect.
+  const oauthAuthPromises = new Map<string, Promise<void>>()
+
+  // In-flight Anthropic OAuth flow (claude_authenticate). Single-slot: a
+  // second authenticate request cleans up the first. The service holds the
+  // PKCE verifier + localhost listener; the promise settles after
+  // installOAuthTokens — after it resolves, the in-process memoized token
+  // cache is already cleared and the next API call picks up the new creds.
+  let claudeOAuth: {
+    service: OAuthService
+    flow: Promise<void>
+  } | null = null
+
+  // This is essentially spawning a parallel async task- we have two
+  // running in parallel- one reading from stdin and adding to the
+  // queue to be processed and another reading from the queue,
+  // processing and returning the result of the generation.
+  // The process is complete when the input stream completes and
+  // the last generation of the queue has complete.
+  void (async () => {
+    let initialized = false
+    logForDiagnosticsNoPII('info', 'cli_message_loop_started')
+    for await (const message of structuredIO.structuredInput) {
+      // Non-user events are handled inline (no queue). started→completed in
+      // the same tick carries no information, so only fire completed.
+      // control_response is reported by StructuredIO.processLine (which also
+      // sees orphans that never yield here).
+      const eventId = 'uuid' in message ? message.uuid : undefined
+      if (
+        eventId &&
+        message.type !== 'user' &&
+        message.type !== 'control_response'
+      ) {
+        notifyCommandLifecycle(eventId, 'completed')
+      }
+
+      if (message.type === 'control_request') {
+        if (message.request.subtype === 'interrupt') {
+          // Track escapes for attribution (ant-only feature)
+          if (feature('COMMIT_ATTRIBUTION')) {
+            setAppState(prev => ({
+              ...prev,
+              attribution: {
+                ...prev.attribution,
+                escapeCount: prev.attribution.escapeCount + 1,
+              },
+            }))
+          }
+          if (abortController) {
+            abortController.abort()
+          }
+          suggestionState.abortController?.abort()
+          suggestionState.abortController = null
+          suggestionState.lastEmitted = null
+          suggestionState.pendingSuggestion = null
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'end_session') {
+          logForDebugging(
+            `[print.ts] end_session received, reason=${message.request.reason ?? 'unspecified'}`,
+          )
+          if (abortController) {
+            abortController.abort()
+          }
+          suggestionState.abortController?.abort()
+          suggestionState.abortController = null
+          suggestionState.lastEmitted = null
+          suggestionState.pendingSuggestion = null
+          sendControlResponseSuccess(message)
+          break // exits for-await → falls through to inputClosed=true drain below
+        } else if (message.request.subtype === 'initialize') {
+          // SDK MCP server names from the initialize message
+          // Populated by both browser and ProcessTransport sessions
+          if (
+            message.request.sdkMcpServers &&
+            message.request.sdkMcpServers.length > 0
+          ) {
+            for (const serverName of message.request.sdkMcpServers) {
+              // Create placeholder config for SDK MCP servers
+              // The actual server connection is managed by the SDK Query class
+              sdkMcpConfigs[serverName] = {
+                type: 'sdk',
+                name: serverName,
+              }
+            }
+          }
+
+          await handleInitializeRequest(
+            message.request,
+            message.request_id,
+            initialized,
+            output,
+            commands,
+            modelInfos,
+            structuredIO,
+            !!options.enableAuthStatus,
+            options,
+            agents,
+            getAppState,
+          )
+
+          // Enable prompt suggestions in AppState when SDK consumer opts in.
+          // shouldEnablePromptSuggestion() returns false for non-interactive
+          // sessions, but the SDK consumer explicitly requested suggestions.
+          if (message.request.promptSuggestions) {
+            setAppState(prev => {
+              if (prev.promptSuggestionEnabled) return prev
+              return { ...prev, promptSuggestionEnabled: true }
+            })
+          }
+
+          if (
+            message.request.agentProgressSummaries &&
+            getFeatureValue_CACHED_MAY_BE_STALE('tengu_slate_prism', true)
+          ) {
+            setSdkAgentProgressSummariesEnabled(true)
+          }
+
+          initialized = true
+
+          // If the auto-resume logic pre-enqueued a command, drain it now
+          // that initialize has set up systemPrompt, agents, hooks, etc.
+          if (hasCommandsInQueue()) {
+            void run()
+          }
+        } else if (message.request.subtype === 'set_permission_mode') {
+          const m = message.request // for typescript (TODO: use readonly types to avoid this)
+          setAppState(prev => ({
+            ...prev,
+            toolPermissionContext: handleSetPermissionMode(
+              m,
+              message.request_id,
+              prev.toolPermissionContext,
+              output,
+            ),
+            isUltraplanMode: m.ultraplan ?? prev.isUltraplanMode,
+          }))
+          // handleSetPermissionMode sends the control_response; the
+          // notifySessionMetadataChanged that used to follow here is
+          // now fired by onChangeAppState (with externalized mode name).
+        } else if (message.request.subtype === 'set_model') {
+          const requestedModel = message.request.model ?? 'default'
+          const model =
+            requestedModel === 'default'
+              ? getDefaultMainLoopModel()
+              : requestedModel
+          activeUserSpecifiedModel = model
+          setMainLoopModelOverride(model)
+          notifySessionMetadataChanged({ model })
+          injectModelSwitchBreadcrumbs(requestedModel, model)
+
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'set_max_thinking_tokens') {
+          if (message.request.max_thinking_tokens === null) {
+            options.thinkingConfig = undefined
+          } else if (message.request.max_thinking_tokens === 0) {
+            options.thinkingConfig = { type: 'disabled' }
+          } else {
+            options.thinkingConfig = {
+              type: 'enabled',
+              budgetTokens: message.request.max_thinking_tokens,
+            }
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'mcp_status') {
+          sendControlResponseSuccess(message, {
+            mcpServers: buildMcpServerStatuses(),
+          })
+        } else if (message.request.subtype === 'get_context_usage') {
+          try {
+            const appState = getAppState()
+            const data = await collectContextData({
+              messages: mutableMessages,
+              getAppState,
+              options: {
+                mainLoopModel: getMainLoopModel(),
+                tools: buildAllTools(appState),
+                agentDefinitions: appState.agentDefinitions,
+                customSystemPrompt: options.systemPrompt,
+                appendSystemPrompt: options.appendSystemPrompt,
+              },
+            })
+            sendControlResponseSuccess(message, { ...data })
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (message.request.subtype === 'mcp_message') {
+          // Handle MCP notifications from SDK servers
+          const mcpRequest = message.request
+          const sdkClient = sdkClients.find(
+            client => client.name === mcpRequest.server_name,
+          )
+          // Check client exists - dynamically added SDK servers may have
+          // placeholder clients with null client until updateSdkMcp() runs
+          if (
+            sdkClient &&
+            sdkClient.type === 'connected' &&
+            sdkClient.client?.transport?.onmessage
+          ) {
+            sdkClient.client.transport.onmessage(mcpRequest.message)
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'rewind_files') {
+          const appState = getAppState()
+          const result = await handleRewindFiles(
+            message.request.user_message_id as UUID,
+            appState,
+            setAppState,
+            message.request.dry_run ?? false,
+          )
+          if (result.canRewind || message.request.dry_run) {
+            sendControlResponseSuccess(message, result)
+          } else {
+            sendControlResponseError(
+              message,
+              result.error ?? 'Unexpected error',
+            )
+          }
+        } else if (message.request.subtype === 'cancel_async_message') {
+          const targetUuid = message.request.message_uuid
+          const removed = dequeueAllMatching(cmd => cmd.uuid === targetUuid)
+          sendControlResponseSuccess(message, {
+            cancelled: removed.length > 0,
+          })
+        } else if (message.request.subtype === 'seed_read_state') {
+          // Client observed a Read that was later removed from context (e.g.
+          // by snip), so transcript-based seeding missed it. Queued into
+          // pendingSeeds; applied at the next clone-replace boundary.
+          try {
+            // expandPath: all other readFileState writers normalize (~, relative,
+            // session cwd vs process cwd). FileEditTool looks up by expandPath'd
+            // key — a verbatim client path would miss.
+            const normalizedPath = expandPath(message.request.path)
+            // Check disk mtime before reading content. If the file changed
+            // since the client's observation, readFile would return C_current
+            // but we'd store it with the client's M_observed — getChangedFiles
+            // then sees disk > cache.timestamp, re-reads, diffs C_current vs
+            // C_current = empty, emits no attachment, and the model is never
+            // told about the C_observed → C_current change. Skipping the seed
+            // makes Edit fail "file not read yet" → forces a fresh Read.
+            // Math.floor matches FileReadTool and getFileModificationTime.
+            const diskMtime = Math.floor((await stat(normalizedPath)).mtimeMs)
+            if (diskMtime <= message.request.mtime) {
+              const raw = await readFile(normalizedPath, 'utf-8')
+              // Strip BOM + normalize CRLF→LF to match readFileInRange and
+              // readFileSyncWithMetadata. FileEditTool's content-compare
+              // fallback (for Windows mtime bumps without content change)
+              // compares against LF-normalized disk reads.
+              const content = (
+                raw.charCodeAt(0) === 0xfeff ? raw.slice(1) : raw
+              ).replaceAll('\r\n', '\n')
+              pendingSeeds.set(normalizedPath, {
+                content,
+                timestamp: diskMtime,
+                offset: undefined,
+                limit: undefined,
+              })
+            }
+          } catch {
+            // ENOENT etc — skip seeding but still succeed
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'mcp_set_servers') {
+          const { response, sdkServersChanged } = await applyMcpServerChanges(
+            message.request.servers,
+          )
+          sendControlResponseSuccess(message, response)
+
+          // Connect SDK servers AFTER response to avoid deadlock
+          if (sdkServersChanged) {
+            void updateSdkMcp()
+          }
+        } else if (message.request.subtype === 'reload_plugins') {
+          try {
+            if (
+              feature('DOWNLOAD_USER_SETTINGS') &&
+              (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) || getIsRemoteMode())
+            ) {
+              // Re-pull user settings so enabledPlugins pushed from the
+              // user's local CLI take effect before the cache sweep.
+              const applied = await redownloadUserSettings()
+              if (applied) {
+                settingsChangeDetector.notifyChange('userSettings')
+              }
+            }
+
+            const r = await refreshActivePlugins(setAppState)
+
+            const sdkAgents = currentAgents.filter(
+              a => a.source === 'flagSettings',
+            )
+            currentAgents = [...r.agentDefinitions.allAgents, ...sdkAgents]
+
+            // Reload succeeded — gather response data best-effort so a
+            // read failure doesn't mask the successful state change.
+            // allSettled so one failure doesn't discard the others.
+            let plugins: SDKControlReloadPluginsResponse['plugins'] = []
+            const [cmdsR, mcpR, pluginsR] = await Promise.allSettled([
+              getCommands(cwd()),
+              applyPluginMcpDiff(),
+              loadAllPluginsCacheOnly(),
+            ])
+            if (cmdsR.status === 'fulfilled') {
+              currentCommands = cmdsR.value
+            } else {
+              logError(cmdsR.reason)
+            }
+            if (mcpR.status === 'rejected') {
+              logError(mcpR.reason)
+            }
+            if (pluginsR.status === 'fulfilled') {
+              plugins = pluginsR.value.enabled.map(p => ({
+                name: p.name,
+                path: p.path,
+                source: p.source,
+              }))
+            } else {
+              logError(pluginsR.reason)
+            }
+
+            sendControlResponseSuccess(message, {
+              commands: currentCommands
+                .filter(cmd => cmd.userInvocable !== false)
+                .map(cmd => ({
+                  name: getCommandName(cmd),
+                  description: formatDescriptionWithSource(cmd),
+                  argumentHint: cmd.argumentHint || '',
+                })),
+              agents: currentAgents.map(a => ({
+                name: a.agentType,
+                description: a.whenToUse,
+                model: a.model === 'inherit' ? undefined : a.model,
+              })),
+              plugins,
+              mcpServers: buildMcpServerStatuses(),
+              error_count: r.error_count,
+            } satisfies SDKControlReloadPluginsResponse)
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (message.request.subtype === 'mcp_reconnect') {
+          const currentAppState = getAppState()
+          const { serverName } = message.request
+          elicitationRegistered.delete(serverName)
+          // Config-existence gate must cover the SAME sources as the
+          // operations below. SDK-injected servers (query({mcpServers:{...}}))
+          // and dynamically-added servers were missing here, so
+          // toggleMcpServer/reconnect returned "Server not found" even though
+          // the disconnect/reconnect would have worked (gh-31339 / CC-314).
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            sdkClients.find(c => c.name === serverName)?.config ??
+            dynamicMcpState.clients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else {
+            const result = await reconnectMcpServerImpl(serverName, config)
+            // Update appState.mcp with the new client, tools, commands, and resources
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName ? result.client : c,
+                ),
+                tools: [
+                  ...reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                  ...result.tools,
+                ],
+                commands: [
+                  ...reject(prev.mcp.commands, c =>
+                    commandBelongsToServer(c, serverName),
+                  ),
+                  ...result.commands,
+                ],
+                resources:
+                  result.resources && result.resources.length > 0
+                    ? { ...prev.mcp.resources, [serverName]: result.resources }
+                    : omit(prev.mcp.resources, serverName),
+              },
+            }))
+            // Also update dynamicMcpState so run() picks up the new tools
+            // on the next turn (run() reads dynamicMcpState, not appState)
+            dynamicMcpState = {
+              ...dynamicMcpState,
+              clients: [
+                ...dynamicMcpState.clients.filter(c => c.name !== serverName),
+                result.client,
+              ],
+              tools: [
+                ...dynamicMcpState.tools.filter(
+                  t => !t.name?.startsWith(prefix),
+                ),
+                ...result.tools,
+              ],
+            }
+            if (result.client.type === 'connected') {
+              registerElicitationHandlers([result.client])
+              reregisterChannelHandlerAfterReconnect(result.client)
+              sendControlResponseSuccess(message)
+            } else {
+              const errorMessage =
+                result.client.type === 'failed'
+                  ? (result.client.error ?? 'Connection failed')
+                  : `Server status: ${result.client.type}`
+              sendControlResponseError(message, errorMessage)
+            }
+          }
+        } else if (message.request.subtype === 'mcp_toggle') {
+          const currentAppState = getAppState()
+          const { serverName, enabled } = message.request
+          elicitationRegistered.delete(serverName)
+          // Gate must match the client-lookup spread below (which
+          // includes sdkClients and dynamicMcpState.clients). Same fix as
+          // mcp_reconnect above (gh-31339 / CC-314).
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            sdkClients.find(c => c.name === serverName)?.config ??
+            dynamicMcpState.clients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else if (!enabled) {
+            // Disabling: persist + disconnect (matches TUI toggleMcpServer behavior)
+            setMcpServerEnabled(serverName, false)
+            const client = [
+              ...mcpClients,
+              ...sdkClients,
+              ...dynamicMcpState.clients,
+              ...currentAppState.mcp.clients,
+            ].find(c => c.name === serverName)
+            if (client && client.type === 'connected') {
+              await clearServerCache(serverName, config)
+            }
+            // Update appState.mcp to reflect disabled status and remove tools/commands/resources
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName
+                    ? { name: serverName, type: 'disabled' as const, config }
+                    : c,
+                ),
+                tools: reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                commands: reject(prev.mcp.commands, c =>
+                  commandBelongsToServer(c, serverName),
+                ),
+                resources: omit(prev.mcp.resources, serverName),
+              },
+            }))
+            sendControlResponseSuccess(message)
+          } else {
+            // Enabling: persist + reconnect
+            setMcpServerEnabled(serverName, true)
+            const result = await reconnectMcpServerImpl(serverName, config)
+            // Update appState.mcp with the new client, tools, commands, and resources
+            // This ensures the LLM sees updated tools after enabling the server
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName ? result.client : c,
+                ),
+                tools: [
+                  ...reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                  ...result.tools,
+                ],
+                commands: [
+                  ...reject(prev.mcp.commands, c =>
+                    commandBelongsToServer(c, serverName),
+                  ),
+                  ...result.commands,
+                ],
+                resources:
+                  result.resources && result.resources.length > 0
+                    ? { ...prev.mcp.resources, [serverName]: result.resources }
+                    : omit(prev.mcp.resources, serverName),
+              },
+            }))
+            if (result.client.type === 'connected') {
+              registerElicitationHandlers([result.client])
+              reregisterChannelHandlerAfterReconnect(result.client)
+              sendControlResponseSuccess(message)
+            } else {
+              const errorMessage =
+                result.client.type === 'failed'
+                  ? (result.client.error ?? 'Connection failed')
+                  : `Server status: ${result.client.type}`
+              sendControlResponseError(message, errorMessage)
+            }
+          }
+        } else if (message.request.subtype === 'channel_enable') {
+          const currentAppState = getAppState()
+          handleChannelEnable(
+            message.request_id,
+            message.request.serverName,
+            // Pool spread matches mcp_status — all three client sources.
+            [
+              ...currentAppState.mcp.clients,
+              ...sdkClients,
+              ...dynamicMcpState.clients,
+            ],
+            output,
+          )
+        } else if (message.request.subtype === 'mcp_authenticate') {
+          const { serverName } = message.request
+          const currentAppState = getAppState()
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else if (config.type !== 'sse' && config.type !== 'http') {
+            sendControlResponseError(
+              message,
+              `Server type "${config.type}" does not support OAuth authentication`,
+            )
+          } else {
+            try {
+              // Abort any previous in-flight OAuth flow for this server
+              activeOAuthFlows.get(serverName)?.abort()
+              const controller = new AbortController()
+              activeOAuthFlows.set(serverName, controller)
+
+              // Capture the auth URL from the callback
+              let resolveAuthUrl: (url: string) => void
+              const authUrlPromise = new Promise<string>(resolve => {
+                resolveAuthUrl = resolve
+              })
+
+              // Start the OAuth flow in the background
+              const oauthPromise = performMCPOAuthFlow(
+                serverName,
+                config,
+                url => resolveAuthUrl!(url),
+                controller.signal,
+                {
+                  skipBrowserOpen: true,
+                  onWaitingForCallback: submit => {
+                    oauthCallbackSubmitters.set(serverName, submit)
+                  },
+                },
+              )
+
+              // Wait for the auth URL (or the flow to complete without needing redirect)
+              const authUrl = await Promise.race([
+                authUrlPromise,
+                oauthPromise.then(() => null as string | null),
+              ])
+
+              if (authUrl) {
+                sendControlResponseSuccess(message, {
+                  authUrl,
+                  requiresUserAction: true,
+                })
+              } else {
+                sendControlResponseSuccess(message, {
+                  requiresUserAction: false,
+                })
+              }
+
+              // Store auth-only promise for mcp_oauth_callback_url handler.
+              // Don't swallow errors — the callback handler needs to detect
+              // auth failures and report them to the caller.
+              oauthAuthPromises.set(serverName, oauthPromise)
+
+              // Handle background completion — reconnect after auth.
+              // When manual callback is used, skip the reconnect here;
+              // the extension's handleAuthDone → mcp_reconnect handles it
+              // (which also updates dynamicMcpState for tool registration).
+              const fullFlowPromise = oauthPromise
+                .then(async () => {
+                  // Don't reconnect if the server was disabled during the OAuth flow
+                  if (isMcpServerDisabled(serverName)) {
+                    return
+                  }
+                  // Skip reconnect if the manual callback path was used —
+                  // handleAuthDone will do it via mcp_reconnect (which
+                  // updates dynamicMcpState for tool registration).
+                  if (oauthManualCallbackUsed.has(serverName)) {
+                    return
+                  }
+                  // Reconnect the server after successful auth
+                  const result = await reconnectMcpServerImpl(
+                    serverName,
+                    config,
+                  )
+                  const prefix = getMcpPrefix(serverName)
+                  setAppState(prev => ({
+                    ...prev,
+                    mcp: {
+                      ...prev.mcp,
+                      clients: prev.mcp.clients.map(c =>
+                        c.name === serverName ? result.client : c,
+                      ),
+                      tools: [
+                        ...reject(prev.mcp.tools, t =>
+                          t.name?.startsWith(prefix),
+                        ),
+                        ...result.tools,
+                      ],
+                      commands: [
+                        ...reject(prev.mcp.commands, c =>
+                          commandBelongsToServer(c, serverName),
+                        ),
+                        ...result.commands,
+                      ],
+                      resources:
+                        result.resources && result.resources.length > 0
+                          ? {
+                              ...prev.mcp.resources,
+                              [serverName]: result.resources,
+                            }
+                          : omit(prev.mcp.resources, serverName),
+                    },
+                  }))
+                  // Also update dynamicMcpState so run() picks up the new tools
+                  // on the next turn (run() reads dynamicMcpState, not appState)
+                  dynamicMcpState = {
+                    ...dynamicMcpState,
+                    clients: [
+                      ...dynamicMcpState.clients.filter(
+                        c => c.name !== serverName,
+                      ),
+                      result.client,
+                    ],
+                    tools: [
+                      ...dynamicMcpState.tools.filter(
+                        t => !t.name?.startsWith(prefix),
+                      ),
+                      ...result.tools,
+                    ],
+                  }
+                })
+                .catch(error => {
+                  logForDebugging(
+                    `MCP OAuth failed for ${serverName}: ${error}`,
+                    { level: 'error' },
+                  )
+                })
+                .finally(() => {
+                  // Clean up only if this is still the active flow
+                  if (activeOAuthFlows.get(serverName) === controller) {
+                    activeOAuthFlows.delete(serverName)
+                    oauthCallbackSubmitters.delete(serverName)
+                    oauthManualCallbackUsed.delete(serverName)
+                    oauthAuthPromises.delete(serverName)
+                  }
+                })
+              void fullFlowPromise
+            } catch (error) {
+              sendControlResponseError(message, errorMessage(error))
+            }
+          }
+        } else if (message.request.subtype === 'mcp_oauth_callback_url') {
+          const { serverName, callbackUrl } = message.request
+          const submit = oauthCallbackSubmitters.get(serverName)
+          if (submit) {
+            // Validate the callback URL before submitting. The submit
+            // callback in auth.ts silently ignores URLs missing a code
+            // param, which would leave the auth promise unresolved and
+            // block the control message loop until timeout.
+            let hasCodeOrError = false
+            try {
+              const parsed = new URL(callbackUrl)
+              hasCodeOrError =
+                parsed.searchParams.has('code') ||
+                parsed.searchParams.has('error')
+            } catch {
+              // Invalid URL
+            }
+            if (!hasCodeOrError) {
+              sendControlResponseError(
+                message,
+                'Invalid callback URL: missing authorization code. Please paste the full redirect URL including the code parameter.',
+              )
+            } else {
+              oauthManualCallbackUsed.add(serverName)
+              submit(callbackUrl)
+              // Wait for auth (token exchange) to complete before responding.
+              // Reconnect is handled by the extension via handleAuthDone →
+              // mcp_reconnect (which updates dynamicMcpState for tools).
+              const authPromise = oauthAuthPromises.get(serverName)
+              if (authPromise) {
+                try {
+                  await authPromise
+                  sendControlResponseSuccess(message)
+                } catch (error) {
+                  sendControlResponseError(
+                    message,
+                    error instanceof Error
+                      ? error.message
+                      : 'OAuth authentication failed',
+                  )
+                }
+              } else {
+                sendControlResponseSuccess(message)
+              }
+            }
+          } else {
+            sendControlResponseError(
+              message,
+              `No active OAuth flow for server: ${serverName}`,
+            )
+          }
+        } else if (message.request.subtype === 'claude_authenticate') {
+          // Anthropic OAuth over the control channel. The SDK client owns
+          // the user's browser (we're headless in -p mode); we hand back
+          // both URLs and wait. Automatic URL → localhost listener catches
+          // the redirect if the browser is on this host; manual URL → the
+          // success page shows "code#state" for claude_oauth_callback.
+          const { loginWithClaudeAi } = message.request
+
+          // Clean up any prior flow. cleanup() closes the localhost listener
+          // and nulls the manual resolver. The prior `flow` promise is left
+          // pending (AuthCodeListener.close() does not reject) but its object
+          // graph becomes unreachable once the server handle is released and
+          // is GC'd — no fd or port is held.
+          claudeOAuth?.service.cleanup()
+
+          logEvent('tengu_oauth_flow_start', {
+            loginWithClaudeAi: loginWithClaudeAi ?? true,
+          })
+
+          const service = new OAuthService()
+          let urlResolver!: (urls: {
+            manualUrl: string
+            automaticUrl: string
+          }) => void
+          const urlPromise = new Promise<{
+            manualUrl: string
+            automaticUrl: string
+          }>(resolve => {
+            urlResolver = resolve
+          })
+
+          const flow = service
+            .startOAuthFlow(
+              async (manualUrl, automaticUrl) => {
+                // automaticUrl is always defined when skipBrowserOpen is set;
+                // the signature is optional only for the existing single-arg callers.
+                urlResolver({ manualUrl, automaticUrl: automaticUrl! })
+              },
+              {
+                loginWithClaudeAi: loginWithClaudeAi ?? true,
+                skipBrowserOpen: true,
+              },
+            )
+            .then(async tokens => {
+              // installOAuthTokens: performLogout (clear stale state) →
+              // store profile → saveOAuthTokensIfNeeded → clearOAuthTokenCache
+              // → clearAuthRelatedCaches. After this resolves, the memoized
+              // getClaudeAIOAuthTokens in this process is invalidated; the
+              // next API call re-reads keychain/file and works. No respawn.
+              await installOAuthTokens(tokens)
+              logEvent('tengu_oauth_success', {
+                loginWithClaudeAi: loginWithClaudeAi ?? true,
+              })
+            })
+            .finally(() => {
+              service.cleanup()
+              if (claudeOAuth?.service === service) {
+                claudeOAuth = null
+              }
+            })
+
+          claudeOAuth = { service, flow }
+
+          // Attach the rejection handler before awaiting so a synchronous
+          // startOAuthFlow failure doesn't surface as an unhandled rejection.
+          // The claude_oauth_callback handler re-awaits flow for the manual
+          // path and surfaces the real error to the client.
+          void flow.catch(err =>
+            logForDebugging(`claude_authenticate flow ended: ${err}`, {
+              level: 'info',
+            }),
+          )
+
+          try {
+            // Race against flow: if startOAuthFlow rejects before calling
+            // the authURLHandler (e.g. AuthCodeListener.start() fails with
+            // EACCES or fd exhaustion), urlPromise would pend forever and
+            // wedge the stdin loop. flow resolving first is unreachable in
+            // practice (it's suspended on the same urls we're waiting for).
+            const { manualUrl, automaticUrl } = await Promise.race([
+              urlPromise,
+              flow.then(() => {
+                throw new Error(
+                  'OAuth flow completed without producing auth URLs',
+                )
+              }),
+            ])
+            sendControlResponseSuccess(message, {
+              manualUrl,
+              automaticUrl,
+            })
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (
+          message.request.subtype === 'claude_oauth_callback' ||
+          message.request.subtype === 'claude_oauth_wait_for_completion'
+        ) {
+          if (!claudeOAuth) {
+            sendControlResponseError(
+              message,
+              'No active claude_authenticate flow',
+            )
+          } else {
+            // Inject the manual code synchronously — must happen in stdin
+            // message order so a subsequent claude_authenticate doesn't
+            // replace the service before this code lands.
+            if (message.request.subtype === 'claude_oauth_callback') {
+              claudeOAuth.service.handleManualAuthCodeInput({
+                authorizationCode: message.request.authorizationCode,
+                state: message.request.state,
+              })
+            }
+            // Detach the await — the stdin reader is serial and blocking
+            // here deadlocks claude_oauth_wait_for_completion: flow may
+            // only resolve via a future claude_oauth_callback on stdin,
+            // which can't be read while we're parked. Capture the binding;
+            // claudeOAuth is nulled in flow's own .finally.
+            const { flow } = claudeOAuth
+            void flow.then(
+              () => {
+                const accountInfo = getAccountInformation()
+                sendControlResponseSuccess(message, {
+                  account: {
+                    email: accountInfo?.email,
+                    organization: accountInfo?.organization,
+                    subscriptionType: accountInfo?.subscription,
+                    tokenSource: accountInfo?.tokenSource,
+                    apiKeySource: accountInfo?.apiKeySource,
+                    apiProvider: getAPIProvider(),
+                  },
+                })
+              },
+              (error: unknown) =>
+                sendControlResponseError(message, errorMessage(error)),
+            )
+          }
+        } else if (message.request.subtype === 'mcp_clear_auth') {
+          const { serverName } = message.request
+          const currentAppState = getAppState()
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else if (config.type !== 'sse' && config.type !== 'http') {
+            sendControlResponseError(
+              message,
+              `Cannot clear auth for server type "${config.type}"`,
+            )
+          } else {
+            await revokeServerTokens(serverName, config)
+            const result = await reconnectMcpServerImpl(serverName, config)
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName ? result.client : c,
+                ),
+                tools: [
+                  ...reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                  ...result.tools,
+                ],
+                commands: [
+                  ...reject(prev.mcp.commands, c =>
+                    commandBelongsToServer(c, serverName),
+                  ),
+                  ...result.commands,
+                ],
+                resources:
+                  result.resources && result.resources.length > 0
+                    ? {
+                        ...prev.mcp.resources,
+                        [serverName]: result.resources,
+                      }
+                    : omit(prev.mcp.resources, serverName),
+              },
+            }))
+            sendControlResponseSuccess(message, {})
+          }
+        } else if (message.request.subtype === 'apply_flag_settings') {
+          // Snapshot the current model before applying — we need to detect
+          // model switches so we can inject breadcrumbs and notify listeners.
+          const prevModel = getMainLoopModel()
+
+          // Merge the provided settings into the in-memory flag settings
+          const existing = getFlagSettingsInline() ?? {}
+          const incoming = message.request.settings
+          // Shallow-merge top-level keys; getSettingsForSource handles
+          // the deep merge with file-based flag settings via mergeWith.
+          // JSON serialization drops `undefined`, so callers use `null`
+          // to signal "clear this key". Convert nulls to deletions so
+          // SettingsSchema().safeParse() doesn't reject the whole object
+          // (z.string().optional() accepts string | undefined, not null).
+          const merged = { ...existing, ...incoming }
+          for (const key of Object.keys(merged)) {
+            if (merged[key as keyof typeof merged] === null) {
+              delete merged[key as keyof typeof merged]
+            }
+          }
+          setFlagSettingsInline(merged)
+          // Route through notifyChange so fanOut() resets the settings cache
+          // before listeners run. The subscriber at :392 calls
+          // applySettingsChange for us. Pre-#20625 this was a direct
+          // applySettingsChange() call that relied on its own internal reset —
+          // now that the reset is centralized in fanOut, a direct call here
+          // would read stale cached settings and silently drop the update.
+          // Bonus: going through notifyChange also tells the other subscribers
+          // (loadPluginHooks, sandbox-adapter) about the change, which the
+          // previous direct call skipped.
+          settingsChangeDetector.notifyChange('flagSettings')
+
+          // If the incoming settings include a model change, update the
+          // override so getMainLoopModel() reflects it. The override has
+          // higher priority than the settings cascade in
+          // getUserSpecifiedModelSetting(), so without this update,
+          // getMainLoopModel() returns the stale override and the model
+          // change is silently ignored (matching set_model at :2811).
+          if ('model' in incoming) {
+            if (incoming.model != null) {
+              setMainLoopModelOverride(String(incoming.model))
+            } else {
+              setMainLoopModelOverride(undefined)
+            }
+          }
+
+          // If the model changed, inject breadcrumbs so the model sees the
+          // mid-conversation switch, and notify metadata listeners (CCR).
+          const newModel = getMainLoopModel()
+          if (newModel !== prevModel) {
+            activeUserSpecifiedModel = newModel
+            const modelArg = incoming.model ? String(incoming.model) : 'default'
+            notifySessionMetadataChanged({ model: newModel })
+            injectModelSwitchBreadcrumbs(modelArg, newModel)
+          }
+
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'get_settings') {
+          const currentAppState = getAppState()
+          const model = getMainLoopModel()
+          // modelSupportsEffort gate matches claude.ts — applied.effort must
+          // mirror what actually goes to the API, not just what's configured.
+          const effort = modelSupportsEffort(model)
+            ? resolveAppliedEffort(model, currentAppState.effortValue)
+            : undefined
+          sendControlResponseSuccess(message, {
+            ...getSettingsWithSources(),
+            applied: {
+              model,
+              // Numeric effort (ant-only) → null; SDK schema is string-level only.
+              effort: typeof effort === 'string' ? effort : null,
+            },
+          })
+        } else if (message.request.subtype === 'stop_task') {
+          const { task_id: taskId } = message.request
+          try {
+            await stopTask(taskId, {
+              getAppState,
+              setAppState,
+            })
+            sendControlResponseSuccess(message, {})
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (message.request.subtype === 'generate_session_title') {
+          // Fire-and-forget so the Haiku call does not block the stdin loop
+          // (which would delay processing of subsequent user messages /
+          // interrupts for the duration of the API roundtrip).
+          const { description, persist } = message.request
+          // Reuse the live controller only if it has not already been aborted
+          // (e.g. by interrupt()); an aborted signal would cause queryHaiku to
+          // immediately throw APIUserAbortError → {title: null}.
+          const titleSignal = (
+            abortController && !abortController.signal.aborted
+              ? abortController
+              : createAbortController()
+          ).signal
+          void (async () => {
+            try {
+              const title = await generateSessionTitle(description, titleSignal)
+              if (title && persist) {
+                try {
+                  saveAiGeneratedTitle(getSessionId() as UUID, title)
+                } catch (e) {
+                  logError(e)
+                }
+              }
+              sendControlResponseSuccess(message, { title })
+            } catch (e) {
+              // Unreachable in practice — generateSessionTitle wraps its
+              // own body and returns null, saveAiGeneratedTitle is wrapped
+              // above. Propagate (not swallow) so unexpected failures are
+              // visible to the SDK caller (hostComms.ts catches and logs).
+              sendControlResponseError(message, errorMessage(e))
+            }
+          })()
+        } else if (message.request.subtype === 'side_question') {
+          // Same fire-and-forget pattern as generate_session_title above —
+          // the forked agent's API roundtrip must not block the stdin loop.
+          //
+          // The snapshot captured by stopHooks (for querySource === 'sdk')
+          // holds the exact systemPrompt/userContext/systemContext/messages
+          // sent on the last main-thread turn. Reusing them gives a byte-
+          // identical prefix → prompt cache hit.
+          //
+          // Fallback (resume before first turn completes — no snapshot yet):
+          // rebuild from scratch. buildSideQuestionFallbackParams mirrors
+          // QueryEngine.ts:ask()'s system prompt assembly (including
+          // --system-prompt / --append-system-prompt) so the rebuilt prefix
+          // matches in the common case. May still miss the cache for
+          // coordinator mode or memory-mechanics extras — acceptable, the
+          // alternative is the side question failing entirely.
+          const { question } = message.request
+          void (async () => {
+            try {
+              const saved = getLastCacheSafeParams()
+              const cacheSafeParams = saved
+                ? {
+                    ...saved,
+                    // If the last turn was interrupted, the snapshot holds an
+                    // already-aborted controller; createChildAbortController in
+                    // createSubagentContext would propagate it and the fork
+                    // would die before sending a request. The controller is
+                    // not part of the cache key — swapping in a fresh one is
+                    // safe. Same guard as generate_session_title above.
+                    toolUseContext: {
+                      ...saved.toolUseContext,
+                      abortController: createAbortController(),
+                    },
+                  }
+                : await buildSideQuestionFallbackParams({
+                    tools: buildAllTools(getAppState()),
+                    commands: currentCommands,
+                    mcpClients: [
+                      ...getAppState().mcp.clients,
+                      ...sdkClients,
+                      ...dynamicMcpState.clients,
+                    ],
+                    messages: mutableMessages,
+                    readFileState,
+                    getAppState,
+                    setAppState,
+                    customSystemPrompt: options.systemPrompt,
+                    appendSystemPrompt: options.appendSystemPrompt,
+                    thinkingConfig: options.thinkingConfig,
+                    agents: currentAgents,
+                  })
+              const result = await runSideQuestion({
+                question,
+                cacheSafeParams,
+              })
+              sendControlResponseSuccess(message, { response: result.response })
+            } catch (e) {
+              sendControlResponseError(message, errorMessage(e))
+            }
+          })()
+        } else if (
+          (feature('PROACTIVE') || feature('KAIROS')) &&
+          (message.request as { subtype: string }).subtype === 'set_proactive'
+        ) {
+          const req = message.request as unknown as {
+            subtype: string
+            enabled: boolean
+          }
+          if (req.enabled) {
+            if (!proactiveModule!.isProactiveActive()) {
+              proactiveModule!.activateProactive('command')
+              scheduleProactiveTick!()
+            }
+          } else {
+            proactiveModule!.deactivateProactive()
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'remote_control') {
+          if (message.request.enabled) {
+            if (bridgeHandle) {
+              // Already connected
+              sendControlResponseSuccess(message, {
+                session_url: getRemoteSessionUrl(
+                  bridgeHandle.bridgeSessionId,
+                  bridgeHandle.sessionIngressUrl,
+                ),
+                connect_url: buildBridgeConnectUrl(
+                  bridgeHandle.environmentId,
+                  bridgeHandle.sessionIngressUrl,
+                ),
+                environment_id: bridgeHandle.environmentId,
+              })
+            } else {
+              // initReplBridge surfaces gate-failure reasons via
+              // onStateChange('failed', detail) before returning null.
+              // Capture so the control-response error is actionable
+              // ("/login", "disabled by your organization's policy", etc.)
+              // instead of a generic "initialization failed".
+              let bridgeFailureDetail: string | undefined
+              try {
+                const { initReplBridge } = await import(
+                  'src/bridge/initReplBridge.js'
+                )
+                const handle = await initReplBridge({
+                  onInboundMessage(msg) {
+                    const fields = extractInboundMessageFields(msg)
+                    if (!fields) return
+                    const { content, uuid } = fields
+                    enqueue({
+                      value: content,
+                      mode: 'prompt' as const,
+                      uuid,
+                      skipSlashCommands: true,
+                    })
+                    void run()
+                  },
+                  onPermissionResponse(response) {
+                    // Forward bridge permission responses into the
+                    // stdin processing loop so they resolve pending
+                    // permission requests from the SDK consumer.
+                    structuredIO.injectControlResponse(response)
+                  },
+                  onInterrupt() {
+                    abortController?.abort()
+                  },
+                  onSetModel(model) {
+                    const resolved =
+                      model === 'default' ? getDefaultMainLoopModel() : model
+                    activeUserSpecifiedModel = resolved
+                    setMainLoopModelOverride(resolved)
+                  },
+                  onSetMaxThinkingTokens(maxTokens) {
+                    if (maxTokens === null) {
+                      options.thinkingConfig = undefined
+                    } else if (maxTokens === 0) {
+                      options.thinkingConfig = { type: 'disabled' }
+                    } else {
+                      options.thinkingConfig = {
+                        type: 'enabled',
+                        budgetTokens: maxTokens,
+                      }
+                    }
+                  },
+                  onStateChange(state, detail) {
+                    if (state === 'failed') {
+                      bridgeFailureDetail = detail
+                    }
+                    logForDebugging(
+                      `[bridge:sdk] State change: ${state}${detail ? ` — ${detail}` : ''}`,
+                    )
+                    output.enqueue({
+                      type: 'system' as StdoutMessage['type'],
+                      subtype: 'bridge_state' as string,
+                      state,
+                      detail,
+                      uuid: randomUUID(),
+                      session_id: getSessionId(),
+                    } as StdoutMessage)
+                  },
+                  initialMessages:
+                    mutableMessages.length > 0 ? mutableMessages : undefined,
+                })
+                if (!handle) {
+                  sendControlResponseError(
+                    message,
+                    bridgeFailureDetail ??
+                      'Remote Control initialization failed',
+                  )
+                } else {
+                  bridgeHandle = handle
+                  bridgeLastForwardedIndex = mutableMessages.length
+                  // Forward permission requests to the bridge
+                  structuredIO.setOnControlRequestSent(request => {
+                    handle.sendControlRequest(request)
+                  })
+                  // Cancel stale bridge permission prompts when the SDK
+                  // consumer resolves a can_use_tool request first.
+                  structuredIO.setOnControlRequestResolved(requestId => {
+                    handle.sendControlCancelRequest(requestId)
+                  })
+                  sendControlResponseSuccess(message, {
+                    session_url: getRemoteSessionUrl(
+                      handle.bridgeSessionId,
+                      handle.sessionIngressUrl,
+                    ),
+                    connect_url: buildBridgeConnectUrl(
+                      handle.environmentId,
+                      handle.sessionIngressUrl,
+                    ),
+                    environment_id: handle.environmentId,
+                  })
+                }
+              } catch (err) {
+                sendControlResponseError(message, errorMessage(err))
+              }
+            }
+          } else {
+            // Disable
+            if (bridgeHandle) {
+              structuredIO.setOnControlRequestSent(undefined)
+              structuredIO.setOnControlRequestResolved(undefined)
+              await bridgeHandle.teardown()
+              bridgeHandle = null
+            }
+            sendControlResponseSuccess(message)
+          }
+        } else {
+          // Unknown control request subtype — send an error response so
+          // the caller doesn't hang waiting for a reply that never comes.
+          sendControlResponseError(
+            message,
+            `Unsupported control request subtype: ${(message.request as { subtype: string }).subtype}`,
+          )
+        }
+        continue
+      } else if (message.type === 'control_response') {
+        // Replay control_response messages when replay mode is enabled
+        if (options.replayUserMessages) {
+          output.enqueue(message)
+        }
+        continue
+      } else if (message.type === 'keep_alive') {
+        // Silently ignore keep-alive messages
+        continue
+      } else if (message.type === 'update_environment_variables') {
+        // Handled in structuredIO.ts, but TypeScript needs the type guard
+        continue
+      } else if (message.type === 'assistant' || message.type === 'system') {
+        // History replay from bridge: inject into mutableMessages as
+        // conversation context so the model sees prior turns.
+        const internalMsgs = toInternalMessages([message])
+        mutableMessages.push(...internalMsgs)
+        // Echo assistant messages back so CCR displays them
+        if (message.type === 'assistant' && options.replayUserMessages) {
+          output.enqueue(message)
+        }
+        continue
+      }
+      // After handling control, keep-alive, env-var, assistant, and system
+      // messages above, only user messages should remain.
+      if (message.type !== 'user') {
+        continue
+      }
+
+      // First prompt message implicitly initializes if not already done.
+      initialized = true
+
+      // Check for duplicate user message - skip if already processed
+      if (message.uuid) {
+        const sessionId = getSessionId() as UUID
+        const existsInSession = await doesMessageExistInSession(
+          sessionId,
+          message.uuid,
+        )
+
+        // Check both historical duplicates (from file) and runtime duplicates (this session)
+        if (existsInSession || receivedMessageUuids.has(message.uuid)) {
+          logForDebugging(`Skipping duplicate user message: ${message.uuid}`)
+          // Send acknowledgment for duplicate message if replay mode is enabled
+          if (options.replayUserMessages) {
+            logForDebugging(
+              `Sending acknowledgment for duplicate user message: ${message.uuid}`,
+            )
+            output.enqueue({
+              type: 'user',
+              message: message.message,
+              session_id: sessionId,
+              parent_tool_use_id: null,
+              uuid: message.uuid,
+              timestamp: message.timestamp,
+              isReplay: true,
+            } as SDKUserMessageReplay)
+          }
+          // Historical dup = transcript already has this turn's output, so it
+          // ran but its lifecycle was never closed (interrupted before ack).
+          // Runtime dups don't need this — the original enqueue path closes them.
+          if (existsInSession) {
+            notifyCommandLifecycle(message.uuid, 'completed')
+          }
+          // Don't enqueue duplicate messages for execution
+          continue
+        }
+
+        // Track this UUID to prevent runtime duplicates
+        trackReceivedMessageUuid(message.uuid)
+      }
+
+      enqueue({
+        mode: 'prompt' as const,
+        // file_attachments rides the protobuf catchall from the web composer.
+        // Same-ref no-op when absent (no 'file_attachments' key).
+        value: await resolveAndPrepend(message, message.message.content),
+        uuid: message.uuid,
+        priority: message.priority,
+      })
+      // Increment prompt count for attribution tracking and save snapshot
+      // The snapshot persists promptCount so it survives compaction
+      if (feature('COMMIT_ATTRIBUTION')) {
+        setAppState(prev => ({
+          ...prev,
+          attribution: incrementPromptCount(prev.attribution, snapshot => {
+            void recordAttributionSnapshot(snapshot).catch(error => {
+              logForDebugging(`Attribution: Failed to save snapshot: ${error}`)
+            })
+          }),
+        }))
+      }
+      void run()
+    }
+    inputClosed = true
+    cronScheduler?.stop()
+    if (!running) {
+      // If a push-suggestion is in-flight, wait for it to emit before closing
+      // the output stream (5 s safety timeout to prevent hanging).
+      if (suggestionState.inflightPromise) {
+        await Promise.race([suggestionState.inflightPromise, sleep(5000)])
+      }
+      suggestionState.abortController?.abort()
+      suggestionState.abortController = null
+      await finalizePendingAsyncHooks()
+      unsubscribeSkillChanges()
+      unsubscribeAuthStatus?.()
+      statusListeners.delete(rateLimitListener)
+      output.done()
+    }
+  })()
+
+  return output
+}
+
+/**
+ * Creates a CanUseToolFn that incorporates a custom permission prompt tool.
+ * This function converts the permissionPromptTool into a CanUseToolFn that can be used in ask.tsx
+ */
+export function createCanUseToolWithPermissionPrompt(
+  permissionPromptTool: PermissionPromptTool,
+): CanUseToolFn {
+  const canUseTool: CanUseToolFn = async (
+    tool,
+    input,
+    toolUseContext,
+    assistantMessage,
+    toolUseId,
+    forceDecision,
+  ) => {
+    const mainPermissionResult =
+      forceDecision ??
+      (await hasPermissionsToUseTool(
+        tool,
+        input,
+        toolUseContext,
+        assistantMessage,
+        toolUseId,
+      ))
+
+    // If the tool is allowed or denied, return the result
+    if (
+      mainPermissionResult.behavior === 'allow' ||
+      mainPermissionResult.behavior === 'deny'
+    ) {
+      return mainPermissionResult
+    }
+
+    // Race the permission prompt tool against the abort signal.
+    //
+    // Why we need this: The permission prompt tool may block indefinitely waiting
+    // for user input (e.g., via stdin or a UI dialog). If the user triggers an
+    // interrupt (Ctrl+C), we need to detect it even while the tool is blocked.
+    // Without this race, the abort check would only run AFTER the tool completes,
+    // which may never happen if the tool is waiting for input that will never come.
+    //
+    // The second check (combinedSignal.aborted) handles a race condition where
+    // abort fires after Promise.race resolves but before we reach this check.
+    const { signal: combinedSignal, cleanup: cleanupAbortListener } =
+      createCombinedAbortSignal(toolUseContext.abortController.signal)
+
+    // Check if already aborted before starting the race
+    if (combinedSignal.aborted) {
+      cleanupAbortListener()
+      return {
+        behavior: 'deny',
+        message: 'Permission prompt was aborted.',
+        decisionReason: {
+          type: 'permissionPromptTool' as const,
+          permissionPromptToolName: tool.name,
+          toolResult: undefined,
+        },
+      }
+    }
+
+    const abortPromise = new Promise<'aborted'>(resolve => {
+      combinedSignal.addEventListener('abort', () => resolve('aborted'), {
+        once: true,
+      })
+    })
+
+    const toolCallPromise = permissionPromptTool.call(
+      {
+        tool_name: tool.name,
+        input,
+        tool_use_id: toolUseId,
+      },
+      toolUseContext,
+      canUseTool,
+      assistantMessage,
+    )
+
+    const raceResult = await Promise.race([toolCallPromise, abortPromise])
+    cleanupAbortListener()
+
+    if (raceResult === 'aborted' || combinedSignal.aborted) {
+      return {
+        behavior: 'deny',
+        message: 'Permission prompt was aborted.',
+        decisionReason: {
+          type: 'permissionPromptTool' as const,
+          permissionPromptToolName: tool.name,
+          toolResult: undefined,
+        },
+      }
+    }
+
+    // TypeScript narrowing: after the abort check, raceResult must be ToolResult
+    const result = raceResult as Awaited<typeof toolCallPromise>
+
+    const permissionToolResultBlockParam =
+      permissionPromptTool.mapToolResultToToolResultBlockParam(result.data, '1')
+    if (
+      !permissionToolResultBlockParam.content ||
+      !Array.isArray(permissionToolResultBlockParam.content) ||
+      !permissionToolResultBlockParam.content[0] ||
+      permissionToolResultBlockParam.content[0].type !== 'text' ||
+      typeof permissionToolResultBlockParam.content[0].text !== 'string'
+    ) {
+      throw new Error(
+        'Permission prompt tool returned an invalid result. Expected a single text block param with type="text" and a string text value.',
+      )
+    }
+    return permissionPromptToolResultToPermissionDecision(
+      permissionToolOutputSchema().parse(
+        safeParseJSON(permissionToolResultBlockParam.content[0].text),
+      ),
+      permissionPromptTool,
+      input,
+      toolUseContext,
+    )
+  }
+  return canUseTool
+}
+
+// Exported for testing — regression: this used to crash at construction when
+// getMcpTools() was empty (before per-server connects populated appState).
+export function getCanUseToolFn(
+  permissionPromptToolName: string | undefined,
+  structuredIO: StructuredIO,
+  getMcpTools: () => Tool[],
+  onPermissionPrompt?: (details: RequiresActionDetails) => void,
+): CanUseToolFn {
+  if (permissionPromptToolName === 'stdio') {
+    return structuredIO.createCanUseTool(onPermissionPrompt)
+  }
+  if (!permissionPromptToolName) {
+    return async (
+      tool,
+      input,
+      toolUseContext,
+      assistantMessage,
+      toolUseId,
+      forceDecision,
+    ) =>
+      forceDecision ??
+      (await hasPermissionsToUseTool(
+        tool,
+        input,
+        toolUseContext,
+        assistantMessage,
+        toolUseId,
+      ))
+  }
+  // Lazy lookup: MCP connects are per-server incremental in print mode, so
+  // the tool may not be in appState yet at init time. Resolve on first call
+  // (first permission prompt), by which point connects have had time to finish.
+  let resolved: CanUseToolFn | null = null
+  return async (
+    tool,
+    input,
+    toolUseContext,
+    assistantMessage,
+    toolUseId,
+    forceDecision,
+  ) => {
+    if (!resolved) {
+      const mcpTools = getMcpTools()
+      const permissionPromptTool = mcpTools.find(t =>
+        toolMatchesName(t, permissionPromptToolName),
+      ) as PermissionPromptTool | undefined
+      if (!permissionPromptTool) {
+        const error = `Error: MCP tool ${permissionPromptToolName} (passed via --permission-prompt-tool) not found. Available MCP tools: ${mcpTools.map(t => t.name).join(', ') || 'none'}`
+        process.stderr.write(`${error}\n`)
+        gracefulShutdownSync(1)
+        throw new Error(error)
+      }
+      if (!permissionPromptTool.inputJSONSchema) {
+        const error = `Error: tool ${permissionPromptToolName} (passed via --permission-prompt-tool) must be an MCP tool`
+        process.stderr.write(`${error}\n`)
+        gracefulShutdownSync(1)
+        throw new Error(error)
+      }
+      resolved = createCanUseToolWithPermissionPrompt(permissionPromptTool)
+    }
+    return resolved(
+      tool,
+      input,
+      toolUseContext,
+      assistantMessage,
+      toolUseId,
+      forceDecision,
+    )
+  }
+}
+
+async function handleInitializeRequest(
+  request: SDKControlInitializeRequest,
+  requestId: string,
+  initialized: boolean,
+  output: Stream<StdoutMessage>,
+  commands: Command[],
+  modelInfos: ModelInfo[],
+  structuredIO: StructuredIO,
+  enableAuthStatus: boolean,
+  options: {
+    systemPrompt: string | undefined
+    appendSystemPrompt: string | undefined
+    agent?: string | undefined
+    userSpecifiedModel?: string | undefined
+    [key: string]: unknown
+  },
+  agents: AgentDefinition[],
+  getAppState: () => AppState,
+): Promise<void> {
+  if (initialized) {
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        error: 'Already initialized',
+        request_id: requestId,
+        pending_permission_requests:
+          structuredIO.getPendingPermissionRequests(),
+      },
+    })
+    return
+  }
+
+  // Apply systemPrompt/appendSystemPrompt from stdin to avoid ARG_MAX limits
+  if (request.systemPrompt !== undefined) {
+    options.systemPrompt = request.systemPrompt
+  }
+  if (request.appendSystemPrompt !== undefined) {
+    options.appendSystemPrompt = request.appendSystemPrompt
+  }
+  if (request.promptSuggestions !== undefined) {
+    options.promptSuggestions = request.promptSuggestions
+  }
+
+  // Merge agents from stdin to avoid ARG_MAX limits
+  if (request.agents) {
+    const stdinAgents = parseAgentsFromJson(request.agents, 'flagSettings')
+    agents.push(...stdinAgents)
+  }
+
+  // Re-evaluate main thread agent after SDK agents are merged
+  // This allows --agent to reference agents defined via SDK
+  if (options.agent) {
+    // If main.tsx already found this agent (filesystem-defined), it already
+    // applied systemPrompt/model/initialPrompt. Skip to avoid double-apply.
+    const alreadyResolved = getMainThreadAgentType() === options.agent
+    const mainThreadAgent = agents.find(a => a.agentType === options.agent)
+    if (mainThreadAgent && !alreadyResolved) {
+      // Update the main thread agent type in bootstrap state
+      setMainThreadAgentType(mainThreadAgent.agentType)
+
+      // Apply the agent's system prompt if user hasn't specified a custom one
+      // SDK agents are always custom agents (not built-in), so getSystemPrompt() takes no args
+      if (!options.systemPrompt && !isBuiltInAgent(mainThreadAgent)) {
+        const agentSystemPrompt = mainThreadAgent.getSystemPrompt()
+        if (agentSystemPrompt) {
+          options.systemPrompt = agentSystemPrompt
+        }
+      }
+
+      // Apply the agent's model if user didn't specify one and agent has a model
+      if (
+        !options.userSpecifiedModel &&
+        mainThreadAgent.model &&
+        mainThreadAgent.model !== 'inherit'
+      ) {
+        const agentModel = parseUserSpecifiedModel(mainThreadAgent.model)
+        setMainLoopModelOverride(agentModel)
+      }
+
+      // SDK-defined agents arrive via init, so main.tsx's lookup missed them.
+      if (mainThreadAgent.initialPrompt) {
+        structuredIO.prependUserMessage(mainThreadAgent.initialPrompt)
+      }
+    } else if (mainThreadAgent?.initialPrompt) {
+      // Filesystem-defined agent (alreadyResolved by main.tsx). main.tsx
+      // handles initialPrompt for the string inputPrompt case, but when
+      // inputPrompt is an AsyncIterable (SDK stream-json), it can't
+      // concatenate — fall back to prependUserMessage here.
+      structuredIO.prependUserMessage(mainThreadAgent.initialPrompt)
+    }
+  }
+
+  const settings = getSettings_DEPRECATED()
+  const outputStyle = settings?.outputStyle || DEFAULT_OUTPUT_STYLE_NAME
+  const availableOutputStyles = await getAllOutputStyles(getCwd())
+
+  // Get account information
+  const accountInfo = getAccountInformation()
+  if (request.hooks) {
+    const hooks: Partial<Record<HookEvent, HookCallbackMatcher[]>> = {}
+    for (const [event, matchers] of Object.entries(request.hooks)) {
+      hooks[event as HookEvent] = matchers.map(matcher => {
+        const callbacks = matcher.hookCallbackIds.map(callbackId => {
+          return structuredIO.createHookCallback(callbackId, matcher.timeout)
+        })
+        return {
+          matcher: matcher.matcher,
+          hooks: callbacks,
+        }
+      })
+    }
+    registerHookCallbacks(hooks)
+  }
+  if (request.jsonSchema) {
+    setInitJsonSchema(request.jsonSchema)
+  }
+  const initResponse: SDKControlInitializeResponse = {
+    commands: commands
+      .filter(cmd => cmd.userInvocable !== false)
+      .map(cmd => ({
+        name: getCommandName(cmd),
+        description: formatDescriptionWithSource(cmd),
+        argumentHint: cmd.argumentHint || '',
+      })),
+    agents: agents.map(agent => ({
+      name: agent.agentType,
+      description: agent.whenToUse,
+      // 'inherit' is an internal sentinel; normalize to undefined for the public API
+      model: agent.model === 'inherit' ? undefined : agent.model,
+    })),
+    output_style: outputStyle,
+    available_output_styles: Object.keys(availableOutputStyles),
+    models: modelInfos,
+    account: {
+      email: accountInfo?.email,
+      organization: accountInfo?.organization,
+      subscriptionType: accountInfo?.subscription,
+      tokenSource: accountInfo?.tokenSource,
+      apiKeySource: accountInfo?.apiKeySource,
+      // getAccountInformation() returns undefined under 3P providers, so the
+      // other fields are all absent. apiProvider disambiguates "not logged
+      // in" (firstParty + tokenSource:none) from "3P, login not applicable".
+      apiProvider: getAPIProvider(),
+    },
+    pid: process.pid,
+  }
+
+  if (isFastModeEnabled() && isFastModeAvailable()) {
+    const appState = getAppState()
+    initResponse.fast_mode_state = getFastModeState(
+      options.userSpecifiedModel ?? null,
+      appState.fastMode,
+    )
+  }
+
+  output.enqueue({
+    type: 'control_response',
+    response: {
+      subtype: 'success',
+      request_id: requestId,
+      response: initResponse,
+    },
+  })
+
+  // After the initialize message, check the auth status-
+  // This will get notified of changes, but we also want to send the
+  // initial state.
+  if (enableAuthStatus) {
+    const authStatusManager = AwsAuthStatusManager.getInstance()
+    const status = authStatusManager.getStatus()
+    if (status) {
+      output.enqueue({
+        type: 'auth_status',
+        isAuthenticating: status.isAuthenticating,
+        output: status.output,
+        error: status.error,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    }
+  }
+}
+
+async function handleRewindFiles(
+  userMessageId: UUID,
+  appState: AppState,
+  setAppState: (updater: (prev: AppState) => AppState) => void,
+  dryRun: boolean,
+): Promise<RewindFilesResult> {
+  if (!fileHistoryEnabled()) {
+    return { canRewind: false, error: 'File rewinding is not enabled.' }
+  }
+  if (!fileHistoryCanRestore(appState.fileHistory, userMessageId)) {
+    return {
+      canRewind: false,
+      error: 'No file checkpoint found for this message.',
+    }
+  }
+
+  if (dryRun) {
+    const diffStats = await fileHistoryGetDiffStats(
+      appState.fileHistory,
+      userMessageId,
+    )
+    return {
+      canRewind: true,
+      filesChanged: diffStats?.filesChanged,
+      insertions: diffStats?.insertions,
+      deletions: diffStats?.deletions,
+    }
+  }
+
+  try {
+    await fileHistoryRewind(
+      updater =>
+        setAppState(prev => ({
+          ...prev,
+          fileHistory: updater(prev.fileHistory),
+        })),
+      userMessageId,
+    )
+  } catch (error) {
+    return {
+      canRewind: false,
+      error: `Failed to rewind: ${errorMessage(error)}`,
+    }
+  }
+
+  return { canRewind: true }
+}
+
+function handleSetPermissionMode(
+  request: { mode: InternalPermissionMode },
+  requestId: string,
+  toolPermissionContext: ToolPermissionContext,
+  output: Stream<StdoutMessage>,
+): ToolPermissionContext {
+  // Check if trying to switch to bypassPermissions mode
+  if (request.mode === 'bypassPermissions') {
+    if (isBypassPermissionsModeDisabled()) {
+      output.enqueue({
+        type: 'control_response',
+        response: {
+          subtype: 'error',
+          request_id: requestId,
+          error:
+            'Cannot set permission mode to bypassPermissions because it is disabled by settings or configuration',
+        },
+      })
+      return toolPermissionContext
+    }
+    if (!toolPermissionContext.isBypassPermissionsModeAvailable) {
+      output.enqueue({
+        type: 'control_response',
+        response: {
+          subtype: 'error',
+          request_id: requestId,
+          error:
+            'Cannot set permission mode to bypassPermissions because the session was not launched with --dangerously-skip-permissions',
+        },
+      })
+      return toolPermissionContext
+    }
+  }
+
+  // Check if trying to switch to auto mode without the classifier gate
+  if (
+    feature('TRANSCRIPT_CLASSIFIER') &&
+    request.mode === 'auto' &&
+    !isAutoModeGateEnabled()
+  ) {
+    const reason = getAutoModeUnavailableReason()
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        request_id: requestId,
+        error: reason
+          ? `Cannot set permission mode to auto: ${getAutoModeUnavailableNotification(reason)}`
+          : 'Cannot set permission mode to auto',
+      },
+    })
+    return toolPermissionContext
+  }
+
+  // Allow the mode switch
+  output.enqueue({
+    type: 'control_response',
+    response: {
+      subtype: 'success',
+      request_id: requestId,
+      response: {
+        mode: request.mode,
+      },
+    },
+  })
+
+  return {
+    ...transitionPermissionMode(
+      toolPermissionContext.mode,
+      request.mode,
+      toolPermissionContext,
+    ),
+    mode: request.mode,
+  }
+}
+
+/**
+ * IDE-triggered channel enable. Derives the ChannelEntry from the connection's
+ * pluginSource (IDE can't spoof kind/marketplace — we only take the server
+ * name), appends it to session allowedChannels, and runs the full gate. On
+ * gate failure, rolls back the append. On success, registers a notification
+ * handler that enqueues channel messages at priority:'next' — drainCommandQueue
+ * picks them up between turns.
+ *
+ * Intentionally does NOT register the claude/channel/permission handler that
+ * useManageMCPConnections sets up for interactive mode. That handler resolves
+ * a pending dialog inside handleInteractivePermission — but print.ts never
+ * calls handleInteractivePermission. When SDK permission lands on 'ask', it
+ * goes to the consumer's canUseTool callback over stdio; there is no CLI-side
+ * dialog for a remote "yes tbxkq" to resolve. If an IDE wants channel-relayed
+ * tool approval, that's IDE-side plumbing against its own pending-map. (Also
+ * gated separately by tengu_harbor_permissions — not yet shipping on
+ * interactive either.)
+ */
+function handleChannelEnable(
+  requestId: string,
+  serverName: string,
+  connectionPool: readonly MCPServerConnection[],
+  output: Stream<StdoutMessage>,
+): void {
+  const respondError = (error: string) =>
+    output.enqueue({
+      type: 'control_response',
+      response: { subtype: 'error', request_id: requestId, error },
+    })
+
+  if (!(feature('KAIROS') || feature('KAIROS_CHANNELS'))) {
+    return respondError('channels feature not available in this build')
+  }
+
+  // Only a 'connected' client has .capabilities and .client to register the
+  // handler on. The pool spread at the call site matches mcp_status.
+  const connection = connectionPool.find(
+    c => c.name === serverName && c.type === 'connected',
+  )
+  if (!connection || connection.type !== 'connected') {
+    return respondError(`server ${serverName} is not connected`)
+  }
+
+  const pluginSource = connection.config.pluginSource
+  const parsed = pluginSource ? parsePluginIdentifier(pluginSource) : undefined
+  if (!parsed?.marketplace) {
+    // No pluginSource or @-less source — can never pass the {plugin,
+    // marketplace}-keyed allowlist. Short-circuit with the same reason the
+    // gate would produce.
+    return respondError(
+      `server ${serverName} is not plugin-sourced; channel_enable requires a marketplace plugin`,
+    )
+  }
+
+  const entry: ChannelEntry = {
+    kind: 'plugin',
+    name: parsed.name,
+    marketplace: parsed.marketplace,
+  }
+  // Idempotency: don't double-append on repeat enable.
+  const prior = getAllowedChannels()
+  const already = prior.some(
+    e =>
+      e.kind === 'plugin' &&
+      e.name === entry.name &&
+      e.marketplace === entry.marketplace,
+  )
+  if (!already) setAllowedChannels([...prior, entry])
+
+  const gate = gateChannelServer(
+    serverName,
+    connection.capabilities,
+    pluginSource,
+  )
+  if (gate.action === 'skip') {
+    // Rollback — only remove the entry we appended.
+    if (!already) setAllowedChannels(prior)
+    return respondError(gate.reason)
+  }
+
+  const pluginId =
+    `${entry.name}@${entry.marketplace}` as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  logMCPDebug(serverName, 'Channel notifications registered')
+  logEvent('tengu_mcp_channel_enable', { plugin: pluginId })
+
+  // Identical enqueue shape to the interactive register block in
+  // useManageMCPConnections. drainCommandQueue processes it between turns —
+  // channel messages queue at priority 'next' and are seen by the model on
+  // the turn after they arrive.
+  connection.client.setNotificationHandler(
+    ChannelMessageNotificationSchema(),
+    async notification => {
+      const { content, meta } = notification.params
+      logMCPDebug(
+        serverName,
+        `notifications/claude/channel: ${content.slice(0, 80)}`,
+      )
+      logEvent('tengu_mcp_channel_message', {
+        content_length: content.length,
+        meta_key_count: Object.keys(meta ?? {}).length,
+        entry_kind:
+          'plugin' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        is_dev: false,
+        plugin: pluginId,
+      })
+      enqueue({
+        mode: 'prompt',
+        value: wrapChannelMessage(serverName, content, meta),
+        priority: 'next',
+        isMeta: true,
+        origin: { kind: 'channel', server: serverName },
+        skipSlashCommands: true,
+      })
+    },
+  )
+
+  output.enqueue({
+    type: 'control_response',
+    response: {
+      subtype: 'success',
+      request_id: requestId,
+      response: undefined,
+    },
+  })
+}
+
+/**
+ * Re-register the channel notification handler after mcp_reconnect /
+ * mcp_toggle creates a new client. handleChannelEnable bound the handler to
+ * the OLD client object; allowedChannels survives the reconnect but the
+ * handler binding does not. Without this, channel messages silently drop
+ * after a reconnect while the IDE still believes the channel is live.
+ *
+ * Mirrors the interactive CLI's onConnectionAttempt in
+ * useManageMCPConnections, which re-gates on every new connection. Paired
+ * with registerElicitationHandlers at the same call sites.
+ *
+ * No-op if the server was never channel-enabled: gateChannelServer calls
+ * findChannelEntry internally and returns skip/session for an unlisted
+ * server, so reconnecting a non-channel MCP server costs one feature-flag
+ * check.
+ */
+function reregisterChannelHandlerAfterReconnect(
+  connection: MCPServerConnection,
+): void {
+  if (!(feature('KAIROS') || feature('KAIROS_CHANNELS'))) return
+  if (connection.type !== 'connected') return
+
+  const gate = gateChannelServer(
+    connection.name,
+    connection.capabilities,
+    connection.config.pluginSource,
+  )
+  if (gate.action !== 'register') return
+
+  const entry = findChannelEntry(connection.name, getAllowedChannels())
+  const pluginId =
+    entry?.kind === 'plugin'
+      ? (`${entry.name}@${entry.marketplace}` as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+      : undefined
+
+  logMCPDebug(
+    connection.name,
+    'Channel notifications re-registered after reconnect',
+  )
+  connection.client.setNotificationHandler(
+    ChannelMessageNotificationSchema(),
+    async notification => {
+      const { content, meta } = notification.params
+      logMCPDebug(
+        connection.name,
+        `notifications/claude/channel: ${content.slice(0, 80)}`,
+      )
+      logEvent('tengu_mcp_channel_message', {
+        content_length: content.length,
+        meta_key_count: Object.keys(meta ?? {}).length,
+        entry_kind:
+          entry?.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        is_dev: entry?.dev ?? false,
+        plugin: pluginId,
+      })
+      enqueue({
+        mode: 'prompt',
+        value: wrapChannelMessage(connection.name, content, meta),
+        priority: 'next',
+        isMeta: true,
+        origin: { kind: 'channel', server: connection.name },
+        skipSlashCommands: true,
+      })
+    },
+  )
+}
+
+/**
+ * Emits an error message in the correct format based on outputFormat.
+ * When using stream-json, writes JSON to stdout; otherwise writes plain text to stderr.
+ */
+function emitLoadError(
+  message: string,
+  outputFormat: string | undefined,
+): void {
+  if (outputFormat === 'stream-json') {
+    const errorResult = {
+      type: 'result',
+      subtype: 'error_during_execution',
+      duration_ms: 0,
+      duration_api_ms: 0,
+      is_error: true,
+      num_turns: 0,
+      stop_reason: null,
+      session_id: getSessionId(),
+      total_cost_usd: 0,
+      usage: EMPTY_USAGE,
+      modelUsage: {},
+      permission_denials: [],
+      uuid: randomUUID(),
+      errors: [message],
+    }
+    process.stdout.write(jsonStringify(errorResult) + '\n')
+  } else {
+    process.stderr.write(message + '\n')
+  }
+}
+
+/**
+ * Removes an interrupted user message and its synthetic assistant sentinel
+ * from the message array. Used during gateway-triggered restarts to clean up
+ * the message history before re-enqueuing the interrupted prompt.
+ *
+ * @internal Exported for testing
+ */
+export function removeInterruptedMessage(
+  messages: Message[],
+  interruptedUserMessage: NormalizedUserMessage,
+): void {
+  const idx = messages.findIndex(m => m.uuid === interruptedUserMessage.uuid)
+  if (idx !== -1) {
+    // Remove the user message and the sentinel that immediately follows it.
+    // splice safely handles the case where idx is the last element.
+    messages.splice(idx, 2)
+  }
+}
+
+type LoadInitialMessagesResult = {
+  messages: Message[]
+  turnInterruptionState?: TurnInterruptionState
+  agentSetting?: string
+}
+
+async function loadInitialMessages(
+  setAppState: (f: (prev: AppState) => AppState) => void,
+  options: {
+    continue: boolean | undefined
+    teleport: string | true | null | undefined
+    resume: string | boolean | undefined
+    resumeSessionAt: string | undefined
+    forkSession: boolean | undefined
+    outputFormat: string | undefined
+    sessionStartHooksPromise?: ReturnType<typeof processSessionStartHooks>
+    restoredWorkerState: Promise<SessionExternalMetadata | null>
+  },
+): Promise<LoadInitialMessagesResult> {
+  const persistSession = !isSessionPersistenceDisabled()
+  // Handle continue in print mode
+  if (options.continue) {
+    try {
+      logEvent('tengu_continue_print', {})
+
+      const result = await loadConversationForResume(
+        undefined /* sessionId */,
+        undefined /* file path */,
+      )
+      if (result) {
+        // Match coordinator mode to the resumed session's mode
+        if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+          const warning = coordinatorModeModule.matchSessionMode(result.mode)
+          if (warning) {
+            process.stderr.write(warning + '\n')
+            // Refresh agent definitions to reflect the mode switch
+            const {
+              getAgentDefinitionsWithOverrides,
+              getActiveAgentsFromList,
+            } =
+              // eslint-disable-next-line @typescript-eslint/no-require-imports
+              require('../tools/AgentTool/loadAgentsDir.js') as typeof import('../tools/AgentTool/loadAgentsDir.js')
+            getAgentDefinitionsWithOverrides.cache.clear?.()
+            const freshAgentDefs = await getAgentDefinitionsWithOverrides(
+              getCwd(),
+            )
+
+            setAppState(prev => ({
+              ...prev,
+              agentDefinitions: {
+                ...freshAgentDefs,
+                allAgents: freshAgentDefs.allAgents,
+                activeAgents: getActiveAgentsFromList(freshAgentDefs.allAgents),
+              },
+            }))
+          }
+        }
+
+        // Reuse the resumed session's ID
+        if (!options.forkSession) {
+          if (result.sessionId) {
+            switchSession(
+              asSessionId(result.sessionId),
+              result.fullPath ? dirname(result.fullPath) : null,
+            )
+            if (persistSession) {
+              await resetSessionFilePointer()
+            }
+          }
+        }
+        restoreSessionStateFromLog(result, setAppState)
+
+        // Restore session metadata so it's re-appended on exit via reAppendSessionMetadata
+        restoreSessionMetadata(
+          options.forkSession
+            ? { ...result, worktreeSession: undefined }
+            : result,
+        )
+
+        // Write mode entry for the resumed session
+        if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+          saveMode(
+            coordinatorModeModule.isCoordinatorMode()
+              ? 'coordinator'
+              : 'normal',
+          )
+        }
+
+        return {
+          messages: result.messages,
+          turnInterruptionState: result.turnInterruptionState,
+          agentSetting: result.agentSetting,
+        }
+      }
+    } catch (error) {
+      logError(error)
+      gracefulShutdownSync(1)
+      return { messages: [] }
+    }
+  }
+
+  // Handle teleport in print mode
+  if (options.teleport) {
+    try {
+      if (!isPolicyAllowed('allow_remote_sessions')) {
+        throw new Error(
+          "Remote sessions are disabled by your organization's policy.",
+        )
+      }
+
+      logEvent('tengu_teleport_print', {})
+
+      if (typeof options.teleport !== 'string') {
+        throw new Error('No session ID provided for teleport')
+      }
+
+      const {
+        checkOutTeleportedSessionBranch,
+        processMessagesForTeleportResume,
+        teleportResumeCodeSession,
+        validateGitState,
+      } = await import('src/utils/teleport.js')
+      await validateGitState()
+      const teleportResult = await teleportResumeCodeSession(options.teleport)
+      const { branchError } = await checkOutTeleportedSessionBranch(
+        teleportResult.branch,
+      )
+      return {
+        messages: processMessagesForTeleportResume(
+          teleportResult.log,
+          branchError,
+        ),
+      }
+    } catch (error) {
+      logError(error)
+      gracefulShutdownSync(1)
+      return { messages: [] }
+    }
+  }
+
+  // Handle resume in print mode (accepts session ID or URL)
+  // URLs are [ANT-ONLY]
+  if (options.resume) {
+    try {
+      logEvent('tengu_resume_print', {})
+
+      // In print mode - we require a valid session ID, JSONL file or URL
+      const parsedSessionId = parseSessionIdentifier(
+        typeof options.resume === 'string' ? options.resume : '',
+      )
+      if (!parsedSessionId) {
+        let errorMessage =
+          'Error: --resume requires a valid session ID when used with --print. Usage: claude -p --resume <session-id>'
+        if (typeof options.resume === 'string') {
+          errorMessage += `. Session IDs must be in UUID format (e.g., 550e8400-e29b-41d4-a716-446655440000). Provided value "${options.resume}" is not a valid UUID`
+        }
+        emitLoadError(errorMessage, options.outputFormat)
+        gracefulShutdownSync(1)
+        return { messages: [] }
+      }
+
+      // Hydrate local transcript from remote before loading
+      if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
+        // Await restore alongside hydration so SSE catchup lands on
+        // restored state, not a fresh default.
+        const [, metadata] = await Promise.all([
+          hydrateFromCCRv2InternalEvents(parsedSessionId.sessionId),
+          options.restoredWorkerState,
+        ])
+        if (metadata) {
+          setAppState(externalMetadataToAppState(metadata))
+          if (typeof metadata.model === 'string') {
+            setMainLoopModelOverride(metadata.model)
+          }
+        }
+      } else if (
+        parsedSessionId.isUrl &&
+        parsedSessionId.ingressUrl &&
+        isEnvTruthy(process.env.ENABLE_SESSION_PERSISTENCE)
+      ) {
+        // v1: fetch session logs from Session Ingress
+        await hydrateRemoteSession(
+          parsedSessionId.sessionId,
+          parsedSessionId.ingressUrl,
+        )
+      }
+
+      // Load the conversation with the specified session ID
+      const result = await loadConversationForResume(
+        parsedSessionId.sessionId,
+        parsedSessionId.jsonlFile || undefined,
+      )
+
+      // hydrateFromCCRv2InternalEvents writes an empty transcript file for
+      // fresh sessions (writeFile(sessionFile, '') with zero events), so
+      // loadConversationForResume returns {messages: []} not null. Treat
+      // empty the same as null so SessionStart still fires.
+      if (!result || result.messages.length === 0) {
+        // For URL-based or CCR v2 resume, start with empty session (it was hydrated but empty)
+        if (
+          parsedSessionId.isUrl ||
+          isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)
+        ) {
+          // Execute SessionStart hooks for startup since we're starting a new session
+          return {
+            messages: await (options.sessionStartHooksPromise ??
+              processSessionStartHooks('startup')),
+          }
+        } else {
+          emitLoadError(
+            `No conversation found with session ID: ${parsedSessionId.sessionId}`,
+            options.outputFormat,
+          )
+          gracefulShutdownSync(1)
+          return { messages: [] }
+        }
+      }
+
+      // Handle resumeSessionAt feature
+      if (options.resumeSessionAt) {
+        const index = result.messages.findIndex(
+          m => m.uuid === options.resumeSessionAt,
+        )
+        if (index < 0) {
+          emitLoadError(
+            `No message found with message.uuid of: ${options.resumeSessionAt}`,
+            options.outputFormat,
+          )
+          gracefulShutdownSync(1)
+          return { messages: [] }
+        }
+
+        result.messages = index >= 0 ? result.messages.slice(0, index + 1) : []
+      }
+
+      // Match coordinator mode to the resumed session's mode
+      if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+        const warning = coordinatorModeModule.matchSessionMode(result.mode)
+        if (warning) {
+          process.stderr.write(warning + '\n')
+          // Refresh agent definitions to reflect the mode switch
+          const { getAgentDefinitionsWithOverrides, getActiveAgentsFromList } =
+            // eslint-disable-next-line @typescript-eslint/no-require-imports
+            require('../tools/AgentTool/loadAgentsDir.js') as typeof import('../tools/AgentTool/loadAgentsDir.js')
+          getAgentDefinitionsWithOverrides.cache.clear?.()
+          const freshAgentDefs = await getAgentDefinitionsWithOverrides(
+            getCwd(),
+          )
+
+          setAppState(prev => ({
+            ...prev,
+            agentDefinitions: {
+              ...freshAgentDefs,
+              allAgents: freshAgentDefs.allAgents,
+              activeAgents: getActiveAgentsFromList(freshAgentDefs.allAgents),
+            },
+          }))
+        }
+      }
+
+      // Reuse the resumed session's ID
+      if (!options.forkSession && result.sessionId) {
+        switchSession(
+          asSessionId(result.sessionId),
+          result.fullPath ? dirname(result.fullPath) : null,
+        )
+        if (persistSession) {
+          await resetSessionFilePointer()
+        }
+      }
+      restoreSessionStateFromLog(result, setAppState)
+
+      // Restore session metadata so it's re-appended on exit via reAppendSessionMetadata
+      restoreSessionMetadata(
+        options.forkSession
+          ? { ...result, worktreeSession: undefined }
+          : result,
+      )
+
+      // Write mode entry for the resumed session
+      if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+        saveMode(
+          coordinatorModeModule.isCoordinatorMode() ? 'coordinator' : 'normal',
+        )
+      }
+
+      return {
+        messages: result.messages,
+        turnInterruptionState: result.turnInterruptionState,
+        agentSetting: result.agentSetting,
+      }
+    } catch (error) {
+      logError(error)
+      const errorMessage =
+        error instanceof Error
+          ? `Failed to resume session: ${error.message}`
+          : 'Failed to resume session with --print mode'
+      emitLoadError(errorMessage, options.outputFormat)
+      gracefulShutdownSync(1)
+      return { messages: [] }
+    }
+  }
+
+  // Join the SessionStart hooks promise kicked in main.tsx (or run fresh if
+  // it wasn't kicked — e.g. --continue with no prior session falls through
+  // here with sessionStartHooksPromise undefined because main.tsx guards on continue)
+  return {
+    messages: await (options.sessionStartHooksPromise ??
+      processSessionStartHooks('startup')),
+  }
+}
+
+function getStructuredIO(
+  inputPrompt: string | AsyncIterable<string>,
+  options: {
+    sdkUrl: string | undefined
+    replayUserMessages?: boolean
+  },
+): StructuredIO {
+  let inputStream: AsyncIterable<string>
+  if (typeof inputPrompt === 'string') {
+    if (inputPrompt.trim() !== '') {
+      // Normalize to a streaming input.
+      inputStream = fromArray([
+        jsonStringify({
+          type: 'user',
+          session_id: '',
+          message: {
+            role: 'user',
+            content: inputPrompt,
+          },
+          parent_tool_use_id: null,
+        } satisfies SDKUserMessage),
+      ])
+    } else {
+      // Empty string - create empty stream
+      inputStream = fromArray([])
+    }
+  } else {
+    inputStream = inputPrompt
+  }
+
+  // Use RemoteIO if sdkUrl is provided, otherwise use regular StructuredIO
+  return options.sdkUrl
+    ? new RemoteIO(options.sdkUrl, inputStream, options.replayUserMessages)
+    : new StructuredIO(inputStream, options.replayUserMessages)
+}
+
+/**
+ * Handles unexpected permission responses by looking up the unresolved tool
+ * call in the transcript and enqueuing it for execution.
+ *
+ * Returns true if a permission was enqueued, false otherwise.
+ */
+export async function handleOrphanedPermissionResponse({
+  message,
+  setAppState,
+  onEnqueued,
+  handledToolUseIds,
+}: {
+  message: SDKControlResponse
+  setAppState: (f: (prev: AppState) => AppState) => void
+  onEnqueued?: () => void
+  handledToolUseIds: Set<string>
+}): Promise<boolean> {
+  if (
+    message.response.subtype === 'success' &&
+    message.response.response?.toolUseID &&
+    typeof message.response.response.toolUseID === 'string'
+  ) {
+    const permissionResult = message.response.response as PermissionResult
+    const { toolUseID } = permissionResult
+    if (!toolUseID) {
+      return false
+    }
+
+    logForDebugging(
+      `handleOrphanedPermissionResponse: received orphaned control_response for toolUseID=${toolUseID} request_id=${message.response.request_id}`,
+    )
+
+    // Prevent re-processing the same orphaned tool_use. Without this guard,
+    // duplicate control_response deliveries (e.g. from WebSocket reconnect)
+    // cause the same tool to be executed multiple times, producing duplicate
+    // tool_use IDs in the messages array and a 400 error from the API.
+    // Once corrupted, every retry accumulates more duplicates.
+    if (handledToolUseIds.has(toolUseID)) {
+      logForDebugging(
+        `handleOrphanedPermissionResponse: skipping duplicate orphaned permission for toolUseID=${toolUseID} (already handled)`,
+      )
+      return false
+    }
+
+    const assistantMessage = await findUnresolvedToolUse(toolUseID)
+    if (!assistantMessage) {
+      logForDebugging(
+        `handleOrphanedPermissionResponse: no unresolved tool_use found for toolUseID=${toolUseID} (already resolved in transcript)`,
+      )
+      return false
+    }
+
+    handledToolUseIds.add(toolUseID)
+    logForDebugging(
+      `handleOrphanedPermissionResponse: enqueuing orphaned permission for toolUseID=${toolUseID} messageID=${assistantMessage.message.id}`,
+    )
+    enqueue({
+      mode: 'orphaned-permission' as const,
+      value: [],
+      orphanedPermission: {
+        permissionResult,
+        assistantMessage,
+      },
+    })
+
+    onEnqueued?.()
+    return true
+  }
+  return false
+}
+
+export type DynamicMcpState = {
+  clients: MCPServerConnection[]
+  tools: Tools
+  configs: Record<string, ScopedMcpServerConfig>
+}
+
+/**
+ * Converts a process transport config to a scoped config.
+ * The types are structurally compatible, so we just add the scope.
+ */
+function toScopedConfig(
+  config: McpServerConfigForProcessTransport,
+): ScopedMcpServerConfig {
+  // McpServerConfigForProcessTransport is a subset of McpServerConfig
+  // (it excludes IDE-specific types like sse-ide and ws-ide)
+  // Adding scope makes it a valid ScopedMcpServerConfig
+  return { ...config, scope: 'dynamic' } as ScopedMcpServerConfig
+}
+
+/**
+ * State for SDK MCP servers that run in the SDK process.
+ */
+export type SdkMcpState = {
+  configs: Record<string, McpSdkServerConfig>
+  clients: MCPServerConnection[]
+  tools: Tools
+}
+
+/**
+ * Result of handleMcpSetServers - contains new state and response data.
+ */
+export type McpSetServersResult = {
+  response: SDKControlMcpSetServersResponse
+  newSdkState: SdkMcpState
+  newDynamicState: DynamicMcpState
+  sdkServersChanged: boolean
+}
+
+/**
+ * Handles mcp_set_servers requests by processing both SDK and process-based servers.
+ * SDK servers run in the SDK process; process-based servers are spawned by the CLI.
+ *
+ * Applies enterprise allowedMcpServers/deniedMcpServers policy — same filter as
+ * --mcp-config (see filterMcpServersByPolicy call in main.tsx). Without this,
+ * SDK V2 Query.setMcpServers() was a second policy bypass vector. Blocked servers
+ * are reported in response.errors so the SDK consumer knows why they weren't added.
+ */
+export async function handleMcpSetServers(
+  servers: Record<string, McpServerConfigForProcessTransport>,
+  sdkState: SdkMcpState,
+  dynamicState: DynamicMcpState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+): Promise<McpSetServersResult> {
+  // Enforce enterprise MCP policy on process-based servers (stdio/http/sse).
+  // Mirrors the --mcp-config filter in main.tsx — both user-controlled injection
+  // paths must have the same gate. type:'sdk' servers are exempt (SDK-managed,
+  // CLI never spawns/connects for them — see filterMcpServersByPolicy jsdoc).
+  // Blocked servers go into response.errors so the SDK caller sees why.
+  const { allowed: allowedServers, blocked } = filterMcpServersByPolicy(servers)
+  const policyErrors: Record<string, string> = {}
+  for (const name of blocked) {
+    policyErrors[name] =
+      'Blocked by enterprise policy (allowedMcpServers/deniedMcpServers)'
+  }
+
+  // Separate SDK servers from process-based servers
+  const sdkServers: Record<string, McpSdkServerConfig> = {}
+  const processServers: Record<string, McpServerConfigForProcessTransport> = {}
+
+  for (const [name, config] of Object.entries(allowedServers)) {
+    if (config.type === 'sdk') {
+      sdkServers[name] = config
+    } else {
+      processServers[name] = config
+    }
+  }
+
+  // Handle SDK servers
+  const currentSdkNames = new Set(Object.keys(sdkState.configs))
+  const newSdkNames = new Set(Object.keys(sdkServers))
+  const sdkAdded: string[] = []
+  const sdkRemoved: string[] = []
+
+  const newSdkConfigs = { ...sdkState.configs }
+  let newSdkClients = [...sdkState.clients]
+  let newSdkTools = [...sdkState.tools]
+
+  // Remove SDK servers no longer in desired state
+  for (const name of currentSdkNames) {
+    if (!newSdkNames.has(name)) {
+      const client = newSdkClients.find(c => c.name === name)
+      if (client && client.type === 'connected') {
+        await client.cleanup()
+      }
+      newSdkClients = newSdkClients.filter(c => c.name !== name)
+      const prefix = `mcp__${name}__`
+      newSdkTools = newSdkTools.filter(t => !t.name.startsWith(prefix))
+      delete newSdkConfigs[name]
+      sdkRemoved.push(name)
+    }
+  }
+
+  // Add new SDK servers as pending - they'll be upgraded to connected
+  // when updateSdkMcp() runs on the next query
+  for (const [name, config] of Object.entries(sdkServers)) {
+    if (!currentSdkNames.has(name)) {
+      newSdkConfigs[name] = config
+      const pendingClient: MCPServerConnection = {
+        type: 'pending',
+        name,
+        config: { ...config, scope: 'dynamic' as const },
+      }
+      newSdkClients = [...newSdkClients, pendingClient]
+      sdkAdded.push(name)
+    }
+  }
+
+  // Handle process-based servers
+  const processResult = await reconcileMcpServers(
+    processServers,
+    dynamicState,
+    setAppState,
+  )
+
+  return {
+    response: {
+      added: [...sdkAdded, ...processResult.response.added],
+      removed: [...sdkRemoved, ...processResult.response.removed],
+      errors: { ...policyErrors, ...processResult.response.errors },
+    },
+    newSdkState: {
+      configs: newSdkConfigs,
+      clients: newSdkClients,
+      tools: newSdkTools,
+    },
+    newDynamicState: processResult.newState,
+    sdkServersChanged: sdkAdded.length > 0 || sdkRemoved.length > 0,
+  }
+}
+
+/**
+ * Reconciles the current set of dynamic MCP servers with a new desired state.
+ * Handles additions, removals, and config changes.
+ */
+export async function reconcileMcpServers(
+  desiredConfigs: Record<string, McpServerConfigForProcessTransport>,
+  currentState: DynamicMcpState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+): Promise<{
+  response: SDKControlMcpSetServersResponse
+  newState: DynamicMcpState
+}> {
+  const currentNames = new Set(Object.keys(currentState.configs))
+  const desiredNames = new Set(Object.keys(desiredConfigs))
+
+  const toRemove = [...currentNames].filter(n => !desiredNames.has(n))
+  const toAdd = [...desiredNames].filter(n => !currentNames.has(n))
+
+  // Check for config changes (same name, different config)
+  const toCheck = [...currentNames].filter(n => desiredNames.has(n))
+  const toReplace = toCheck.filter(name => {
+    const currentConfig = currentState.configs[name]
+    const desiredConfigRaw = desiredConfigs[name]
+    if (!currentConfig || !desiredConfigRaw) return true
+    const desiredConfig = toScopedConfig(desiredConfigRaw)
+    return !areMcpConfigsEqual(currentConfig, desiredConfig)
+  })
+
+  const removed: string[] = []
+  const added: string[] = []
+  const errors: Record<string, string> = {}
+
+  let newClients = [...currentState.clients]
+  let newTools = [...currentState.tools]
+
+  // Remove old servers (including ones being replaced)
+  for (const name of [...toRemove, ...toReplace]) {
+    const client = newClients.find(c => c.name === name)
+    const config = currentState.configs[name]
+    if (client && config) {
+      if (client.type === 'connected') {
+        try {
+          await client.cleanup()
+        } catch (e) {
+          logError(e)
+        }
+      }
+      // Clear the memoization cache
+      await clearServerCache(name, config)
+    }
+
+    // Remove tools from this server
+    const prefix = `mcp__${name}__`
+    newTools = newTools.filter(t => !t.name.startsWith(prefix))
+
+    // Remove from clients list
+    newClients = newClients.filter(c => c.name !== name)
+
+    // Track removal (only for actually removed, not replaced)
+    if (toRemove.includes(name)) {
+      removed.push(name)
+    }
+  }
+
+  // Add new servers (including replacements)
+  for (const name of [...toAdd, ...toReplace]) {
+    const config = desiredConfigs[name]
+    if (!config) continue
+    const scopedConfig = toScopedConfig(config)
+
+    // SDK servers are managed by the SDK process, not the CLI.
+    // Just track them without trying to connect.
+    if (config.type === 'sdk') {
+      added.push(name)
+      continue
+    }
+
+    try {
+      const client = await connectToServer(name, scopedConfig)
+      newClients.push(client)
+
+      if (client.type === 'connected') {
+        const serverTools = await fetchToolsForClient(client)
+        newTools.push(...serverTools)
+      } else if (client.type === 'failed') {
+        errors[name] = client.error || 'Connection failed'
+      }
+
+      added.push(name)
+    } catch (e) {
+      const err = toError(e)
+      errors[name] = err.message
+      logError(err)
+    }
+  }
+
+  // Build new configs
+  const newConfigs: Record<string, ScopedMcpServerConfig> = {}
+  for (const name of desiredNames) {
+    const config = desiredConfigs[name]
+    if (config) {
+      newConfigs[name] = toScopedConfig(config)
+    }
+  }
+
+  const newState: DynamicMcpState = {
+    clients: newClients,
+    tools: newTools,
+    configs: newConfigs,
+  }
+
+  // Update AppState with the new tools
+  setAppState(prev => {
+    // Get all dynamic server names (current + new)
+    const allDynamicServerNames = new Set([
+      ...Object.keys(currentState.configs),
+      ...Object.keys(newConfigs),
+    ])
+
+    // Remove old dynamic tools
+    const nonDynamicTools = prev.mcp.tools.filter(t => {
+      for (const serverName of allDynamicServerNames) {
+        if (t.name.startsWith(`mcp__${serverName}__`)) {
+          return false
+        }
+      }
+      return true
+    })
+
+    // Remove old dynamic clients
+    const nonDynamicClients = prev.mcp.clients.filter(c => {
+      return !allDynamicServerNames.has(c.name)
+    })
+
+    return {
+      ...prev,
+      mcp: {
+        ...prev.mcp,
+        tools: [...nonDynamicTools, ...newTools],
+        clients: [...nonDynamicClients, ...newClients],
+      },
+    }
+  })
+
+  return {
+    response: { added, removed, errors },
+    newState,
+  }
+}

+ 255 - 0
src/cli/remoteIO.ts

@@ -0,0 +1,255 @@
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { PassThrough } from 'stream'
+import { URL } from 'url'
+import { getSessionId } from '../bootstrap/state.js'
+import { getPollIntervalConfig } from '../bridge/pollConfig.js'
+import { registerCleanup } from '../utils/cleanupRegistry.js'
+import { setCommandLifecycleListener } from '../utils/commandLifecycle.js'
+import { isDebugMode, logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
+import { errorMessage } from '../utils/errors.js'
+import { gracefulShutdown } from '../utils/gracefulShutdown.js'
+import { logError } from '../utils/log.js'
+import { writeToStdout } from '../utils/process.js'
+import { getSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
+import {
+  setSessionMetadataChangedListener,
+  setSessionStateChangedListener,
+} from '../utils/sessionState.js'
+import {
+  setInternalEventReader,
+  setInternalEventWriter,
+} from '../utils/sessionStorage.js'
+import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
+import { StructuredIO } from './structuredIO.js'
+import { CCRClient, CCRInitError } from './transports/ccrClient.js'
+import { SSETransport } from './transports/SSETransport.js'
+import type { Transport } from './transports/Transport.js'
+import { getTransportForUrl } from './transports/transportUtils.js'
+
+/**
+ * Bidirectional streaming for SDK mode with session tracking
+ * Supports WebSocket transport
+ */
+export class RemoteIO extends StructuredIO {
+  private url: URL
+  private transport: Transport
+  private inputStream: PassThrough
+  private readonly isBridge: boolean = false
+  private readonly isDebug: boolean = false
+  private ccrClient: CCRClient | null = null
+  private keepAliveTimer: ReturnType<typeof setInterval> | null = null
+
+  constructor(
+    streamUrl: string,
+    initialPrompt?: AsyncIterable<string>,
+    replayUserMessages?: boolean,
+  ) {
+    const inputStream = new PassThrough({ encoding: 'utf8' })
+    super(inputStream, replayUserMessages)
+    this.inputStream = inputStream
+    this.url = new URL(streamUrl)
+
+    // Prepare headers with session token if available
+    const headers: Record<string, string> = {}
+    const sessionToken = getSessionIngressAuthToken()
+    if (sessionToken) {
+      headers['Authorization'] = `Bearer ${sessionToken}`
+    } else {
+      logForDebugging('[remote-io] No session ingress token available', {
+        level: 'error',
+      })
+    }
+
+    // Add environment runner version if available (set by Environment Manager)
+    const erVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
+    if (erVersion) {
+      headers['x-environment-runner-version'] = erVersion
+    }
+
+    // Provide a callback that re-reads the session token dynamically.
+    // When the parent process refreshes the token (via token file or env var),
+    // the transport can pick it up on reconnection.
+    const refreshHeaders = (): Record<string, string> => {
+      const h: Record<string, string> = {}
+      const freshToken = getSessionIngressAuthToken()
+      if (freshToken) {
+        h['Authorization'] = `Bearer ${freshToken}`
+      }
+      const freshErVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
+      if (freshErVersion) {
+        h['x-environment-runner-version'] = freshErVersion
+      }
+      return h
+    }
+
+    // Get appropriate transport based on URL protocol
+    this.transport = getTransportForUrl(
+      this.url,
+      headers,
+      getSessionId(),
+      refreshHeaders,
+    )
+
+    // Set up data callback
+    this.isBridge = process.env.CLAUDE_CODE_ENVIRONMENT_KIND === 'bridge'
+    this.isDebug = isDebugMode()
+    this.transport.setOnData((data: string) => {
+      this.inputStream.write(data)
+      if (this.isBridge && this.isDebug) {
+        writeToStdout(data.endsWith('\n') ? data : data + '\n')
+      }
+    })
+
+    // Set up close callback to handle connection failures
+    this.transport.setOnClose(() => {
+      // End the input stream to trigger graceful shutdown
+      this.inputStream.end()
+    })
+
+    // Initialize CCR v2 client (heartbeats, epoch, state reporting, event writes).
+    // The CCRClient constructor wires the SSE received-ack handler
+    // synchronously, so new CCRClient() MUST run before transport.connect() —
+    // otherwise early SSE frames hit an unwired onEventCallback and their
+    // 'received' delivery acks are silently dropped.
+    if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
+      // CCR v2 is SSE+POST by definition. getTransportForUrl returns
+      // SSETransport under the same env var, but the two checks live in
+      // different files — assert the invariant so a future decoupling
+      // fails loudly here instead of confusingly inside CCRClient.
+      if (!(this.transport instanceof SSETransport)) {
+        throw new Error(
+          'CCR v2 requires SSETransport; check getTransportForUrl',
+        )
+      }
+      this.ccrClient = new CCRClient(this.transport, this.url)
+      const init = this.ccrClient.initialize()
+      this.restoredWorkerState = init.catch(() => null)
+      init.catch((error: unknown) => {
+        logForDiagnosticsNoPII('error', 'cli_worker_lifecycle_init_failed', {
+          reason: error instanceof CCRInitError ? error.reason : 'unknown',
+        })
+        logError(
+          new Error(`CCRClient initialization failed: ${errorMessage(error)}`),
+        )
+        void gracefulShutdown(1, 'other')
+      })
+      registerCleanup(async () => this.ccrClient?.close())
+
+      // Register internal event writer for transcript persistence.
+      // When set, sessionStorage writes transcript messages as CCR v2
+      // internal events instead of v1 Session Ingress.
+      setInternalEventWriter((eventType, payload, options) =>
+        this.ccrClient!.writeInternalEvent(eventType, payload, options),
+      )
+
+      // Register internal event readers for session resume.
+      // When set, hydrateFromCCRv2InternalEvents() can fetch foreground
+      // and subagent internal events to reconstruct conversation state.
+      setInternalEventReader(
+        () => this.ccrClient!.readInternalEvents(),
+        () => this.ccrClient!.readSubagentInternalEvents(),
+      )
+
+      const LIFECYCLE_TO_DELIVERY = {
+        started: 'processing',
+        completed: 'processed',
+      } as const
+      setCommandLifecycleListener((uuid, state) => {
+        this.ccrClient?.reportDelivery(uuid, LIFECYCLE_TO_DELIVERY[state])
+      })
+      setSessionStateChangedListener((state, details) => {
+        this.ccrClient?.reportState(state, details)
+      })
+      setSessionMetadataChangedListener(metadata => {
+        this.ccrClient?.reportMetadata(metadata)
+      })
+    }
+
+    // Start connection only after all callbacks are wired (setOnData above,
+    // setOnEvent inside new CCRClient() when CCR v2 is enabled).
+    void this.transport.connect()
+
+    // Push a silent keep_alive frame on a fixed interval so upstream
+    // proxies and the session-ingress layer don't GC an otherwise-idle
+    // remote control session. The keep_alive type is filtered before
+    // reaching any client UI (Query.ts drops it; structuredIO.ts drops it;
+    // web/iOS/Android never see it in their message loop). Interval comes
+    // from GrowthBook (tengu_bridge_poll_interval_config
+    // session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
+    // Bridge-only: fixes Envoy idle timeout on bridge-topology sessions
+    // (#21931). byoc workers ran without this before #21931 and do not
+    // need it — different network path.
+    const keepAliveIntervalMs =
+      getPollIntervalConfig().session_keepalive_interval_v2_ms
+    if (this.isBridge && keepAliveIntervalMs > 0) {
+      this.keepAliveTimer = setInterval(() => {
+        logForDebugging('[remote-io] keep_alive sent')
+        void this.write({ type: 'keep_alive' }).catch(err => {
+          logForDebugging(
+            `[remote-io] keep_alive write failed: ${errorMessage(err)}`,
+          )
+        })
+      }, keepAliveIntervalMs)
+      this.keepAliveTimer.unref?.()
+    }
+
+    // Register for graceful shutdown cleanup
+    registerCleanup(async () => this.close())
+
+    // If initial prompt is provided, send it through the input stream
+    if (initialPrompt) {
+      // Convert the initial prompt to the input stream format.
+      // Chunks from stdin may already contain trailing newlines, so strip
+      // them before appending our own to avoid double-newline issues that
+      // cause structuredIO to parse empty lines. String() handles both
+      // string chunks and Buffer objects from process.stdin.
+      const stream = this.inputStream
+      void (async () => {
+        for await (const chunk of initialPrompt) {
+          stream.write(String(chunk).replace(/\n$/, '') + '\n')
+        }
+      })()
+    }
+  }
+
+  override flushInternalEvents(): Promise<void> {
+    return this.ccrClient?.flushInternalEvents() ?? Promise.resolve()
+  }
+
+  override get internalEventsPending(): number {
+    return this.ccrClient?.internalEventsPending ?? 0
+  }
+
+  /**
+   * Send output to the transport.
+   * In bridge mode, control_request messages are always echoed to stdout so the
+   * bridge parent can detect permission requests. Other messages are echoed only
+   * in debug mode.
+   */
+  async write(message: StdoutMessage): Promise<void> {
+    if (this.ccrClient) {
+      await this.ccrClient.writeEvent(message)
+    } else {
+      await this.transport.write(message)
+    }
+    if (this.isBridge) {
+      if (message.type === 'control_request' || this.isDebug) {
+        writeToStdout(ndjsonSafeStringify(message) + '\n')
+      }
+    }
+  }
+
+  /**
+   * Clean up connections gracefully
+   */
+  close(): void {
+    if (this.keepAliveTimer) {
+      clearInterval(this.keepAliveTimer)
+      this.keepAliveTimer = null
+    }
+    this.transport.close()
+    this.inputStream.end()
+  }
+}

+ 859 - 0
src/cli/structuredIO.ts

@@ -0,0 +1,859 @@
+import { feature } from 'bun:bundle'
+import type {
+  ElicitResult,
+  JSONRPCMessage,
+} from '@modelcontextprotocol/sdk/types.js'
+import { randomUUID } from 'crypto'
+import type { AssistantMessage } from 'src//types/message.js'
+import type {
+  HookInput,
+  HookJSONOutput,
+  PermissionUpdate,
+  SDKMessage,
+  SDKUserMessage,
+} from 'src/entrypoints/agentSdkTypes.js'
+import { SDKControlElicitationResponseSchema } from 'src/entrypoints/sdk/controlSchemas.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+  StdinMessage,
+  StdoutMessage,
+} from 'src/entrypoints/sdk/controlTypes.js'
+import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js'
+import type { Tool, ToolUseContext } from 'src/Tool.js'
+import { type HookCallback, hookJSONOutputSchema } from 'src/types/hooks.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
+import { AbortError } from 'src/utils/errors.js'
+import {
+  type Output as PermissionToolOutput,
+  permissionPromptToolResultToPermissionDecision,
+  outputSchema as permissionToolOutputSchema,
+} from 'src/utils/permissions/PermissionPromptToolResultSchema.js'
+import type {
+  PermissionDecision,
+  PermissionDecisionReason,
+} from 'src/utils/permissions/PermissionResult.js'
+import { hasPermissionsToUseTool } from 'src/utils/permissions/permissions.js'
+import { writeToStdout } from 'src/utils/process.js'
+import { jsonStringify } from 'src/utils/slowOperations.js'
+import { z } from 'zod/v4'
+import { notifyCommandLifecycle } from '../utils/commandLifecycle.js'
+import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
+import { executePermissionRequestHooks } from '../utils/hooks.js'
+import {
+  applyPermissionUpdates,
+  persistPermissionUpdates,
+} from '../utils/permissions/PermissionUpdate.js'
+import {
+  notifySessionStateChanged,
+  type RequiresActionDetails,
+  type SessionExternalMetadata,
+} from '../utils/sessionState.js'
+import { jsonParse } from '../utils/slowOperations.js'
+import { Stream } from '../utils/stream.js'
+import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
+
+/**
+ * Synthetic tool name used when forwarding sandbox network permission
+ * requests via the can_use_tool control_request protocol. SDK hosts
+ * see this as a normal tool permission prompt.
+ */
+export const SANDBOX_NETWORK_ACCESS_TOOL_NAME = 'SandboxNetworkAccess'
+
+function serializeDecisionReason(
+  reason: PermissionDecisionReason | undefined,
+): string | undefined {
+  if (!reason) {
+    return undefined
+  }
+
+  if (
+    (feature('BASH_CLASSIFIER') || feature('TRANSCRIPT_CLASSIFIER')) &&
+    reason.type === 'classifier'
+  ) {
+    return reason.reason
+  }
+  switch (reason.type) {
+    case 'rule':
+    case 'mode':
+    case 'subcommandResults':
+    case 'permissionPromptTool':
+      return undefined
+    case 'hook':
+    case 'asyncAgent':
+    case 'sandboxOverride':
+    case 'workingDir':
+    case 'safetyCheck':
+    case 'other':
+      return reason.reason
+  }
+}
+
+function buildRequiresActionDetails(
+  tool: Tool,
+  input: Record<string, unknown>,
+  toolUseID: string,
+  requestId: string,
+): RequiresActionDetails {
+  // Per-tool summary methods may throw on malformed input; permission
+  // handling must not break because of a bad description.
+  let description: string
+  try {
+    description =
+      tool.getActivityDescription?.(input) ??
+      tool.getToolUseSummary?.(input) ??
+      tool.userFacingName(input)
+  } catch {
+    description = tool.name
+  }
+  return {
+    tool_name: tool.name,
+    action_description: description,
+    tool_use_id: toolUseID,
+    request_id: requestId,
+    input,
+  }
+}
+
+type PendingRequest<T> = {
+  resolve: (result: T) => void
+  reject: (error: unknown) => void
+  schema?: z.Schema
+  request: SDKControlRequest
+}
+
+/**
+ * Provides a structured way to read and write SDK messages from stdio,
+ * capturing the SDK protocol.
+ */
+// Maximum number of resolved tool_use IDs to track. Once exceeded, the oldest
+// entry is evicted. This bounds memory in very long sessions while keeping
+// enough history to catch duplicate control_response deliveries.
+const MAX_RESOLVED_TOOL_USE_IDS = 1000
+
+export class StructuredIO {
+  readonly structuredInput: AsyncGenerator<StdinMessage | SDKMessage>
+  private readonly pendingRequests = new Map<string, PendingRequest<unknown>>()
+
+  // CCR external_metadata read back on worker start; null when the
+  // transport doesn't restore. Assigned by RemoteIO.
+  restoredWorkerState: Promise<SessionExternalMetadata | null> =
+    Promise.resolve(null)
+
+  private inputClosed = false
+  private unexpectedResponseCallback?: (
+    response: SDKControlResponse,
+  ) => Promise<void>
+
+  // Tracks tool_use IDs that have been resolved through the normal permission
+  // flow (or aborted by a hook). When a duplicate control_response arrives
+  // after the original was already handled, this Set prevents the orphan
+  // handler from re-processing it — which would push duplicate assistant
+  // messages into mutableMessages and cause a 400 "tool_use ids must be unique"
+  // error from the API.
+  private readonly resolvedToolUseIds = new Set<string>()
+  private prependedLines: string[] = []
+  private onControlRequestSent?: (request: SDKControlRequest) => void
+  private onControlRequestResolved?: (requestId: string) => void
+
+  // sendRequest() and print.ts both enqueue here; the drain loop is the
+  // only writer. Prevents control_request from overtaking queued stream_events.
+  readonly outbound = new Stream<StdoutMessage>()
+
+  constructor(
+    private readonly input: AsyncIterable<string>,
+    private readonly replayUserMessages?: boolean,
+  ) {
+    this.input = input
+    this.structuredInput = this.read()
+  }
+
+  /**
+   * Records a tool_use ID as resolved so that late/duplicate control_response
+   * messages for the same tool are ignored by the orphan handler.
+   */
+  private trackResolvedToolUseId(request: SDKControlRequest): void {
+    if (request.request.subtype === 'can_use_tool') {
+      this.resolvedToolUseIds.add(request.request.tool_use_id)
+      if (this.resolvedToolUseIds.size > MAX_RESOLVED_TOOL_USE_IDS) {
+        // Evict the oldest entry (Sets iterate in insertion order)
+        const first = this.resolvedToolUseIds.values().next().value
+        if (first !== undefined) {
+          this.resolvedToolUseIds.delete(first)
+        }
+      }
+    }
+  }
+
+  /** Flush pending internal events. No-op for non-remote IO. Overridden by RemoteIO. */
+  flushInternalEvents(): Promise<void> {
+    return Promise.resolve()
+  }
+
+  /** Internal-event queue depth. Overridden by RemoteIO; zero otherwise. */
+  get internalEventsPending(): number {
+    return 0
+  }
+
+  /**
+   * Queue a user turn to be yielded before the next message from this.input.
+   * Works before iteration starts and mid-stream — read() re-checks
+   * prependedLines between each yielded message.
+   */
+  prependUserMessage(content: string): void {
+    this.prependedLines.push(
+      jsonStringify({
+        type: 'user',
+        session_id: '',
+        message: { role: 'user', content },
+        parent_tool_use_id: null,
+      } satisfies SDKUserMessage) + '\n',
+    )
+  }
+
+  private async *read() {
+    let content = ''
+
+    // Called once before for-await (an empty this.input otherwise skips the
+    // loop body entirely), then again per block. prependedLines re-check is
+    // inside the while so a prepend pushed between two messages in the SAME
+    // block still lands first.
+    const splitAndProcess = async function* (this: StructuredIO) {
+      for (;;) {
+        if (this.prependedLines.length > 0) {
+          content = this.prependedLines.join('') + content
+          this.prependedLines = []
+        }
+        const newline = content.indexOf('\n')
+        if (newline === -1) break
+        const line = content.slice(0, newline)
+        content = content.slice(newline + 1)
+        const message = await this.processLine(line)
+        if (message) {
+          logForDiagnosticsNoPII('info', 'cli_stdin_message_parsed', {
+            type: message.type,
+          })
+          yield message
+        }
+      }
+    }.bind(this)
+
+    yield* splitAndProcess()
+
+    for await (const block of this.input) {
+      content += block
+      yield* splitAndProcess()
+    }
+    if (content) {
+      const message = await this.processLine(content)
+      if (message) {
+        yield message
+      }
+    }
+    this.inputClosed = true
+    for (const request of this.pendingRequests.values()) {
+      // Reject all pending requests if the input stream
+      request.reject(
+        new Error('Tool permission stream closed before response received'),
+      )
+    }
+  }
+
+  getPendingPermissionRequests() {
+    return Array.from(this.pendingRequests.values())
+      .map(entry => entry.request)
+      .filter(pr => pr.request.subtype === 'can_use_tool')
+  }
+
+  setUnexpectedResponseCallback(
+    callback: (response: SDKControlResponse) => Promise<void>,
+  ): void {
+    this.unexpectedResponseCallback = callback
+  }
+
+  /**
+   * Inject a control_response message to resolve a pending permission request.
+   * Used by the bridge to feed permission responses from claude.ai into the
+   * SDK permission flow.
+   *
+   * Also sends a control_cancel_request to the SDK consumer so its canUseTool
+   * callback is aborted via the signal — otherwise the callback hangs.
+   */
+  injectControlResponse(response: SDKControlResponse): void {
+    const requestId = response.response?.request_id
+    if (!requestId) return
+    const request = this.pendingRequests.get(requestId)
+    if (!request) return
+    this.trackResolvedToolUseId(request.request)
+    this.pendingRequests.delete(requestId)
+    // Cancel the SDK consumer's canUseTool callback — the bridge won.
+    void this.write({
+      type: 'control_cancel_request',
+      request_id: requestId,
+    })
+    if (response.response.subtype === 'error') {
+      request.reject(new Error(response.response.error))
+    } else {
+      const result = response.response.response
+      if (request.schema) {
+        try {
+          request.resolve(request.schema.parse(result))
+        } catch (error) {
+          request.reject(error)
+        }
+      } else {
+        request.resolve({})
+      }
+    }
+  }
+
+  /**
+   * Register a callback invoked whenever a can_use_tool control_request
+   * is written to stdout. Used by the bridge to forward permission
+   * requests to claude.ai.
+   */
+  setOnControlRequestSent(
+    callback: ((request: SDKControlRequest) => void) | undefined,
+  ): void {
+    this.onControlRequestSent = callback
+  }
+
+  /**
+   * Register a callback invoked when a can_use_tool control_response arrives
+   * from the SDK consumer (via stdin). Used by the bridge to cancel the
+   * stale permission prompt on claude.ai when the SDK consumer wins the race.
+   */
+  setOnControlRequestResolved(
+    callback: ((requestId: string) => void) | undefined,
+  ): void {
+    this.onControlRequestResolved = callback
+  }
+
+  private async processLine(
+    line: string,
+  ): Promise<StdinMessage | SDKMessage | undefined> {
+    // Skip empty lines (e.g. from double newlines in piped stdin)
+    if (!line) {
+      return undefined
+    }
+    try {
+      const message = normalizeControlMessageKeys(jsonParse(line)) as
+        | StdinMessage
+        | SDKMessage
+      if (message.type === 'keep_alive') {
+        // Silently ignore keep-alive messages
+        return undefined
+      }
+      if (message.type === 'update_environment_variables') {
+        // Apply environment variable updates directly to process.env.
+        // Used by bridge session runner for auth token refresh
+        // (CLAUDE_CODE_SESSION_ACCESS_TOKEN) which must be readable
+        // by the REPL process itself, not just child Bash commands.
+        const keys = Object.keys(message.variables)
+        for (const [key, value] of Object.entries(message.variables)) {
+          process.env[key] = value
+        }
+        logForDebugging(
+          `[structuredIO] applied update_environment_variables: ${keys.join(', ')}`,
+        )
+        return undefined
+      }
+      if (message.type === 'control_response') {
+        // Close lifecycle for every control_response, including duplicates
+        // and orphans — orphans don't yield to print.ts's main loop, so this
+        // is the only path that sees them. uuid is server-injected into the
+        // payload.
+        const uuid =
+          'uuid' in message && typeof message.uuid === 'string'
+            ? message.uuid
+            : undefined
+        if (uuid) {
+          notifyCommandLifecycle(uuid, 'completed')
+        }
+        const request = this.pendingRequests.get(message.response.request_id)
+        if (!request) {
+          // Check if this tool_use was already resolved through the normal
+          // permission flow. Duplicate control_response deliveries (e.g. from
+          // WebSocket reconnects) arrive after the original was handled, and
+          // re-processing them would push duplicate assistant messages into
+          // the conversation, causing API 400 errors.
+          const responsePayload =
+            message.response.subtype === 'success'
+              ? message.response.response
+              : undefined
+          const toolUseID = responsePayload?.toolUseID
+          if (
+            typeof toolUseID === 'string' &&
+            this.resolvedToolUseIds.has(toolUseID)
+          ) {
+            logForDebugging(
+              `Ignoring duplicate control_response for already-resolved toolUseID=${toolUseID} request_id=${message.response.request_id}`,
+            )
+            return undefined
+          }
+          if (this.unexpectedResponseCallback) {
+            await this.unexpectedResponseCallback(message)
+          }
+          return undefined // Ignore responses for requests we don't know about
+        }
+        this.trackResolvedToolUseId(request.request)
+        this.pendingRequests.delete(message.response.request_id)
+        // Notify the bridge when the SDK consumer resolves a can_use_tool
+        // request, so it can cancel the stale permission prompt on claude.ai.
+        if (
+          request.request.request.subtype === 'can_use_tool' &&
+          this.onControlRequestResolved
+        ) {
+          this.onControlRequestResolved(message.response.request_id)
+        }
+
+        if (message.response.subtype === 'error') {
+          request.reject(new Error(message.response.error))
+          return undefined
+        }
+        const result = message.response.response
+        if (request.schema) {
+          try {
+            request.resolve(request.schema.parse(result))
+          } catch (error) {
+            request.reject(error)
+          }
+        } else {
+          request.resolve({})
+        }
+        // Propagate control responses when replay is enabled
+        if (this.replayUserMessages) {
+          return message
+        }
+        return undefined
+      }
+      if (
+        message.type !== 'user' &&
+        message.type !== 'control_request' &&
+        message.type !== 'assistant' &&
+        message.type !== 'system'
+      ) {
+        logForDebugging(`Ignoring unknown message type: ${message.type}`, {
+          level: 'warn',
+        })
+        return undefined
+      }
+      if (message.type === 'control_request') {
+        if (!message.request) {
+          exitWithMessage(`Error: Missing request on control_request`)
+        }
+        return message
+      }
+      if (message.type === 'assistant' || message.type === 'system') {
+        return message
+      }
+      if (message.message.role !== 'user') {
+        exitWithMessage(
+          `Error: Expected message role 'user', got '${message.message.role}'`,
+        )
+      }
+      return message
+    } catch (error) {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.error(`Error parsing streaming input line: ${line}: ${error}`)
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+  }
+
+  async write(message: StdoutMessage): Promise<void> {
+    writeToStdout(ndjsonSafeStringify(message) + '\n')
+  }
+
+  private async sendRequest<Response>(
+    request: SDKControlRequest['request'],
+    schema: z.Schema,
+    signal?: AbortSignal,
+    requestId: string = randomUUID(),
+  ): Promise<Response> {
+    const message: SDKControlRequest = {
+      type: 'control_request',
+      request_id: requestId,
+      request,
+    }
+    if (this.inputClosed) {
+      throw new Error('Stream closed')
+    }
+    if (signal?.aborted) {
+      throw new Error('Request aborted')
+    }
+    this.outbound.enqueue(message)
+    if (request.subtype === 'can_use_tool' && this.onControlRequestSent) {
+      this.onControlRequestSent(message)
+    }
+    const aborted = () => {
+      this.outbound.enqueue({
+        type: 'control_cancel_request',
+        request_id: requestId,
+      })
+      // Immediately reject the outstanding promise, without
+      // waiting for the host to acknowledge the cancellation.
+      const request = this.pendingRequests.get(requestId)
+      if (request) {
+        // Track the tool_use ID as resolved before rejecting, so that a
+        // late response from the host is ignored by the orphan handler.
+        this.trackResolvedToolUseId(request.request)
+        request.reject(new AbortError())
+      }
+    }
+    if (signal) {
+      signal.addEventListener('abort', aborted, {
+        once: true,
+      })
+    }
+    try {
+      return await new Promise<Response>((resolve, reject) => {
+        this.pendingRequests.set(requestId, {
+          request: {
+            type: 'control_request',
+            request_id: requestId,
+            request,
+          },
+          resolve: result => {
+            resolve(result as Response)
+          },
+          reject,
+          schema,
+        })
+      })
+    } finally {
+      if (signal) {
+        signal.removeEventListener('abort', aborted)
+      }
+      this.pendingRequests.delete(requestId)
+    }
+  }
+
+  createCanUseTool(
+    onPermissionPrompt?: (details: RequiresActionDetails) => void,
+  ): CanUseToolFn {
+    return async (
+      tool: Tool,
+      input: { [key: string]: unknown },
+      toolUseContext: ToolUseContext,
+      assistantMessage: AssistantMessage,
+      toolUseID: string,
+      forceDecision?: PermissionDecision,
+    ): Promise<PermissionDecision> => {
+      const mainPermissionResult =
+        forceDecision ??
+        (await hasPermissionsToUseTool(
+          tool,
+          input,
+          toolUseContext,
+          assistantMessage,
+          toolUseID,
+        ))
+      // If the tool is allowed or denied, return the result
+      if (
+        mainPermissionResult.behavior === 'allow' ||
+        mainPermissionResult.behavior === 'deny'
+      ) {
+        return mainPermissionResult
+      }
+
+      // Run PermissionRequest hooks in parallel with the SDK permission
+      // prompt.  In the terminal CLI, hooks race against the interactive
+      // prompt so that e.g. a hook with --delay 20 doesn't block the UI.
+      // We need the same behavior here: the SDK host (VS Code, etc.) shows
+      // its permission dialog immediately while hooks run in the background.
+      // Whichever resolves first wins; the loser is cancelled/ignored.
+
+      // AbortController used to cancel the SDK request if a hook decides first
+      const hookAbortController = new AbortController()
+      const parentSignal = toolUseContext.abortController.signal
+      // Forward parent abort to our local controller
+      const onParentAbort = () => hookAbortController.abort()
+      parentSignal.addEventListener('abort', onParentAbort, { once: true })
+
+      try {
+        // Start the hook evaluation (runs in background)
+        const hookPromise = executePermissionRequestHooksForSDK(
+          tool.name,
+          toolUseID,
+          input,
+          toolUseContext,
+          mainPermissionResult.suggestions,
+        ).then(decision => ({ source: 'hook' as const, decision }))
+
+        // Start the SDK permission prompt immediately (don't wait for hooks)
+        const requestId = randomUUID()
+        onPermissionPrompt?.(
+          buildRequiresActionDetails(tool, input, toolUseID, requestId),
+        )
+        const sdkPromise = this.sendRequest<PermissionToolOutput>(
+          {
+            subtype: 'can_use_tool',
+            tool_name: tool.name,
+            input,
+            permission_suggestions: mainPermissionResult.suggestions,
+            blocked_path: mainPermissionResult.blockedPath,
+            decision_reason: serializeDecisionReason(
+              mainPermissionResult.decisionReason,
+            ),
+            tool_use_id: toolUseID,
+            agent_id: toolUseContext.agentId,
+          },
+          permissionToolOutputSchema(),
+          hookAbortController.signal,
+          requestId,
+        ).then(result => ({ source: 'sdk' as const, result }))
+
+        // Race: hook completion vs SDK prompt response.
+        // The hook promise always resolves (never rejects), returning
+        // undefined if no hook made a decision.
+        const winner = await Promise.race([hookPromise, sdkPromise])
+
+        if (winner.source === 'hook') {
+          if (winner.decision) {
+            // Hook decided — abort the pending SDK request.
+            // Suppress the expected AbortError rejection from sdkPromise.
+            sdkPromise.catch(() => {})
+            hookAbortController.abort()
+            return winner.decision
+          }
+          // Hook passed through (no decision) — wait for the SDK prompt
+          const sdkResult = await sdkPromise
+          return permissionPromptToolResultToPermissionDecision(
+            sdkResult.result,
+            tool,
+            input,
+            toolUseContext,
+          )
+        }
+
+        // SDK prompt responded first — use its result (hook still running
+        // in background but its result will be ignored)
+        return permissionPromptToolResultToPermissionDecision(
+          winner.result,
+          tool,
+          input,
+          toolUseContext,
+        )
+      } catch (error) {
+        return permissionPromptToolResultToPermissionDecision(
+          {
+            behavior: 'deny',
+            message: `Tool permission request failed: ${error}`,
+            toolUseID,
+          },
+          tool,
+          input,
+          toolUseContext,
+        )
+      } finally {
+        // Only transition back to 'running' if no other permission prompts
+        // are pending (concurrent tool execution can have multiple in-flight).
+        if (this.getPendingPermissionRequests().length === 0) {
+          notifySessionStateChanged('running')
+        }
+        parentSignal.removeEventListener('abort', onParentAbort)
+      }
+    }
+  }
+
+  createHookCallback(callbackId: string, timeout?: number): HookCallback {
+    return {
+      type: 'callback',
+      timeout,
+      callback: async (
+        input: HookInput,
+        toolUseID: string | null,
+        abort: AbortSignal | undefined,
+      ): Promise<HookJSONOutput> => {
+        try {
+          const result = await this.sendRequest<HookJSONOutput>(
+            {
+              subtype: 'hook_callback',
+              callback_id: callbackId,
+              input,
+              tool_use_id: toolUseID || undefined,
+            },
+            hookJSONOutputSchema(),
+            abort,
+          )
+          return result
+        } catch (error) {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.error(`Error in hook callback ${callbackId}:`, error)
+          return {}
+        }
+      },
+    }
+  }
+
+  /**
+   * Sends an elicitation request to the SDK consumer and returns the response.
+   */
+  async handleElicitation(
+    serverName: string,
+    message: string,
+    requestedSchema?: Record<string, unknown>,
+    signal?: AbortSignal,
+    mode?: 'form' | 'url',
+    url?: string,
+    elicitationId?: string,
+  ): Promise<ElicitResult> {
+    try {
+      const result = await this.sendRequest<ElicitResult>(
+        {
+          subtype: 'elicitation',
+          mcp_server_name: serverName,
+          message,
+          mode,
+          url,
+          elicitation_id: elicitationId,
+          requested_schema: requestedSchema,
+        },
+        SDKControlElicitationResponseSchema(),
+        signal,
+      )
+      return result
+    } catch {
+      return { action: 'cancel' as const }
+    }
+  }
+
+  /**
+   * Creates a SandboxAskCallback that forwards sandbox network permission
+   * requests to the SDK host as can_use_tool control_requests.
+   *
+   * This piggybacks on the existing can_use_tool protocol with a synthetic
+   * tool name so that SDK hosts (VS Code, CCR, etc.) can prompt the user
+   * for network access without requiring a new protocol subtype.
+   */
+  createSandboxAskCallback(): (hostPattern: {
+    host: string
+    port?: number
+  }) => Promise<boolean> {
+    return async (hostPattern): Promise<boolean> => {
+      try {
+        const result = await this.sendRequest<PermissionToolOutput>(
+          {
+            subtype: 'can_use_tool',
+            tool_name: SANDBOX_NETWORK_ACCESS_TOOL_NAME,
+            input: { host: hostPattern.host },
+            tool_use_id: randomUUID(),
+            description: `Allow network connection to ${hostPattern.host}?`,
+          },
+          permissionToolOutputSchema(),
+        )
+        return result.behavior === 'allow'
+      } catch {
+        // If the request fails (stream closed, abort, etc.), deny the connection
+        return false
+      }
+    }
+  }
+
+  /**
+   * Sends an MCP message to an SDK server and waits for the response
+   */
+  async sendMcpMessage(
+    serverName: string,
+    message: JSONRPCMessage,
+  ): Promise<JSONRPCMessage> {
+    const response = await this.sendRequest<{ mcp_response: JSONRPCMessage }>(
+      {
+        subtype: 'mcp_message',
+        server_name: serverName,
+        message,
+      },
+      z.object({
+        mcp_response: z.any() as z.Schema<JSONRPCMessage>,
+      }),
+    )
+    return response.mcp_response
+  }
+}
+
+function exitWithMessage(message: string): never {
+  // biome-ignore lint/suspicious/noConsole:: intentional console output
+  console.error(message)
+  // eslint-disable-next-line custom-rules/no-process-exit
+  process.exit(1)
+}
+
+/**
+ * Execute PermissionRequest hooks and return a decision if one is made.
+ * Returns undefined if no hook made a decision.
+ */
+async function executePermissionRequestHooksForSDK(
+  toolName: string,
+  toolUseID: string,
+  input: Record<string, unknown>,
+  toolUseContext: ToolUseContext,
+  suggestions: PermissionUpdate[] | undefined,
+): Promise<PermissionDecision | undefined> {
+  const appState = toolUseContext.getAppState()
+  const permissionMode = appState.toolPermissionContext.mode
+
+  // Iterate directly over the generator instead of using `all`
+  const hookGenerator = executePermissionRequestHooks(
+    toolName,
+    toolUseID,
+    input,
+    toolUseContext,
+    permissionMode,
+    suggestions,
+    toolUseContext.abortController.signal,
+  )
+
+  for await (const hookResult of hookGenerator) {
+    if (
+      hookResult.permissionRequestResult &&
+      (hookResult.permissionRequestResult.behavior === 'allow' ||
+        hookResult.permissionRequestResult.behavior === 'deny')
+    ) {
+      const decision = hookResult.permissionRequestResult
+      if (decision.behavior === 'allow') {
+        const finalInput = decision.updatedInput || input
+
+        // Apply permission updates if provided by hook ("always allow")
+        const permissionUpdates = decision.updatedPermissions ?? []
+        if (permissionUpdates.length > 0) {
+          persistPermissionUpdates(permissionUpdates)
+          const currentAppState = toolUseContext.getAppState()
+          const updatedContext = applyPermissionUpdates(
+            currentAppState.toolPermissionContext,
+            permissionUpdates,
+          )
+          // Update permission context via setAppState
+          toolUseContext.setAppState(prev => {
+            if (prev.toolPermissionContext === updatedContext) return prev
+            return { ...prev, toolPermissionContext: updatedContext }
+          })
+        }
+
+        return {
+          behavior: 'allow',
+          updatedInput: finalInput,
+          userModified: false,
+          decisionReason: {
+            type: 'hook',
+            hookName: 'PermissionRequest',
+          },
+        }
+      } else {
+        // Hook denied the permission
+        return {
+          behavior: 'deny',
+          message:
+            decision.message || 'Permission denied by PermissionRequest hook',
+          decisionReason: {
+            type: 'hook',
+            hookName: 'PermissionRequest',
+          },
+        }
+      }
+    }
+  }
+
+  return undefined
+}

+ 282 - 0
src/cli/transports/HybridTransport.ts

@@ -0,0 +1,282 @@
+import axios, { type AxiosError } from 'axios'
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
+import { SerialBatchEventUploader } from './SerialBatchEventUploader.js'
+import {
+  WebSocketTransport,
+  type WebSocketTransportOptions,
+} from './WebSocketTransport.js'
+
+const BATCH_FLUSH_INTERVAL_MS = 100
+// Per-attempt POST timeout. Bounds how long a single stuck POST can block
+// the serialized queue. Without this, a hung connection stalls all writes.
+const POST_TIMEOUT_MS = 15_000
+// Grace period for queued writes on close(). Covers a healthy POST (~100ms)
+// plus headroom; best-effort, not a delivery guarantee under degraded network.
+// Void-ed (nothing awaits it) so this is a last resort — replBridge teardown
+// now closes AFTER archive so archive latency is the primary drain window.
+// NOTE: gracefulShutdown's cleanup budget is 2s (not the 5s outer failsafe);
+// 3s here exceeds it, but the process lives ~2s longer for hooks+analytics.
+const CLOSE_GRACE_MS = 3000
+
+/**
+ * Hybrid transport: WebSocket for reads, HTTP POST for writes.
+ *
+ * Write flow:
+ *
+ *   write(stream_event) ─┐
+ *                        │ (100ms timer)
+ *                        │
+ *                        ▼
+ *   write(other) ────► uploader.enqueue()  (SerialBatchEventUploader)
+ *                        ▲    │
+ *   writeBatch() ────────┘    │ serial, batched, retries indefinitely,
+ *                             │ backpressure at maxQueueSize
+ *                             ▼
+ *                        postOnce()  (single HTTP POST, throws on retryable)
+ *
+ * stream_event messages accumulate in streamEventBuffer for up to 100ms
+ * before enqueue (reduces POST count for high-volume content deltas). A
+ * non-stream write flushes any buffered stream_events first to preserve order.
+ *
+ * Serialization + retry + backpressure are delegated to SerialBatchEventUploader
+ * (same primitive CCR uses). At most one POST in-flight; events arriving during
+ * a POST batch into the next one. On failure, the uploader re-queues and retries
+ * with exponential backoff + jitter. If the queue fills past maxQueueSize,
+ * enqueue() blocks — giving awaiting callers backpressure.
+ *
+ * Why serialize? Bridge mode fires writes via `void transport.write()`
+ * (fire-and-forget). Without this, concurrent POSTs → concurrent Firestore
+ * writes to the same document → collisions → retry storms → pages oncall.
+ */
+export class HybridTransport extends WebSocketTransport {
+  private postUrl: string
+  private uploader: SerialBatchEventUploader<StdoutMessage>
+
+  // stream_event delay buffer — accumulates content deltas for up to
+  // BATCH_FLUSH_INTERVAL_MS before enqueueing (reduces POST count)
+  private streamEventBuffer: StdoutMessage[] = []
+  private streamEventTimer: ReturnType<typeof setTimeout> | null = null
+
+  constructor(
+    url: URL,
+    headers: Record<string, string> = {},
+    sessionId?: string,
+    refreshHeaders?: () => Record<string, string>,
+    options?: WebSocketTransportOptions & {
+      maxConsecutiveFailures?: number
+      onBatchDropped?: (batchSize: number, failures: number) => void
+    },
+  ) {
+    super(url, headers, sessionId, refreshHeaders, options)
+    const { maxConsecutiveFailures, onBatchDropped } = options ?? {}
+    this.postUrl = convertWsUrlToPostUrl(url)
+    this.uploader = new SerialBatchEventUploader<StdoutMessage>({
+      // Large cap — session-ingress accepts arbitrary batch sizes. Events
+      // naturally batch during in-flight POSTs; this just bounds the payload.
+      maxBatchSize: 500,
+      // Bridge callers use `void transport.write()` — backpressure doesn't
+      // apply (they don't await). A batch >maxQueueSize deadlocks (see
+      // SerialBatchEventUploader backpressure check). So set it high enough
+      // to be a memory bound only. Wire real backpressure in a follow-up
+      // once callers await.
+      maxQueueSize: 100_000,
+      baseDelayMs: 500,
+      maxDelayMs: 8000,
+      jitterMs: 1000,
+      // Optional cap so a persistently-failing server can't pin the drain
+      // loop for the lifetime of the process. Undefined = indefinite retry.
+      // replBridge sets this; the 1P transportUtils path does not.
+      maxConsecutiveFailures,
+      onBatchDropped: (batchSize, failures) => {
+        logForDiagnosticsNoPII(
+          'error',
+          'cli_hybrid_batch_dropped_max_failures',
+          {
+            batchSize,
+            failures,
+          },
+        )
+        onBatchDropped?.(batchSize, failures)
+      },
+      send: batch => this.postOnce(batch),
+    })
+    logForDebugging(`HybridTransport: POST URL = ${this.postUrl}`)
+    logForDiagnosticsNoPII('info', 'cli_hybrid_transport_initialized')
+  }
+
+  /**
+   * Enqueue a message and wait for the queue to drain. Returning flush()
+   * preserves the contract that `await write()` resolves after the event is
+   * POSTed (relied on by tests and replBridge's initial flush). Fire-and-forget
+   * callers (`void transport.write()`) are unaffected — they don't await,
+   * so the later resolution doesn't add latency.
+   */
+  override async write(message: StdoutMessage): Promise<void> {
+    if (message.type === 'stream_event') {
+      // Delay: accumulate stream_events briefly before enqueueing.
+      // Promise resolves immediately — callers don't await stream_events.
+      this.streamEventBuffer.push(message)
+      if (!this.streamEventTimer) {
+        this.streamEventTimer = setTimeout(
+          () => this.flushStreamEvents(),
+          BATCH_FLUSH_INTERVAL_MS,
+        )
+      }
+      return
+    }
+    // Immediate: flush any buffered stream_events (ordering), then this event.
+    await this.uploader.enqueue([...this.takeStreamEvents(), message])
+    return this.uploader.flush()
+  }
+
+  async writeBatch(messages: StdoutMessage[]): Promise<void> {
+    await this.uploader.enqueue([...this.takeStreamEvents(), ...messages])
+    return this.uploader.flush()
+  }
+
+  /** Snapshot before/after writeBatch() to detect silent drops. */
+  get droppedBatchCount(): number {
+    return this.uploader.droppedBatchCount
+  }
+
+  /**
+   * Block until all pending events are POSTed. Used by bridge's initial
+   * history flush so onStateChange('connected') fires after persistence.
+   */
+  flush(): Promise<void> {
+    void this.uploader.enqueue(this.takeStreamEvents())
+    return this.uploader.flush()
+  }
+
+  /** Take ownership of buffered stream_events and clear the delay timer. */
+  private takeStreamEvents(): StdoutMessage[] {
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    const buffered = this.streamEventBuffer
+    this.streamEventBuffer = []
+    return buffered
+  }
+
+  /** Delay timer fired — enqueue accumulated stream_events. */
+  private flushStreamEvents(): void {
+    this.streamEventTimer = null
+    void this.uploader.enqueue(this.takeStreamEvents())
+  }
+
+  override close(): void {
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    this.streamEventBuffer = []
+    // Grace period for queued writes — fallback. replBridge teardown now
+    // awaits archive between write and close (see CLOSE_GRACE_MS), so
+    // archive latency is the primary drain window and this is a last
+    // resort. Keep close() sync (returns immediately) but defer
+    // uploader.close() so any remaining queue gets a chance to finish.
+    const uploader = this.uploader
+    let graceTimer: ReturnType<typeof setTimeout> | undefined
+    void Promise.race([
+      uploader.flush(),
+      new Promise<void>(r => {
+        // eslint-disable-next-line no-restricted-syntax -- need timer ref for clearTimeout
+        graceTimer = setTimeout(r, CLOSE_GRACE_MS)
+      }),
+    ]).finally(() => {
+      clearTimeout(graceTimer)
+      uploader.close()
+    })
+    super.close()
+  }
+
+  /**
+   * Single-attempt POST. Throws on retryable failures (429, 5xx, network)
+   * so SerialBatchEventUploader re-queues and retries. Returns on success
+   * and on permanent failures (4xx non-429, no token) so the uploader moves on.
+   */
+  private async postOnce(events: StdoutMessage[]): Promise<void> {
+    const sessionToken = getSessionIngressAuthToken()
+    if (!sessionToken) {
+      logForDebugging('HybridTransport: No session token available for POST')
+      logForDiagnosticsNoPII('warn', 'cli_hybrid_post_no_token')
+      return
+    }
+
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${sessionToken}`,
+      'Content-Type': 'application/json',
+    }
+
+    let response
+    try {
+      response = await axios.post(
+        this.postUrl,
+        { events },
+        {
+          headers,
+          validateStatus: () => true,
+          timeout: POST_TIMEOUT_MS,
+        },
+      )
+    } catch (error) {
+      const axiosError = error as AxiosError
+      logForDebugging(`HybridTransport: POST error: ${axiosError.message}`)
+      logForDiagnosticsNoPII('warn', 'cli_hybrid_post_network_error')
+      throw error
+    }
+
+    if (response.status >= 200 && response.status < 300) {
+      logForDebugging(`HybridTransport: POST success count=${events.length}`)
+      return
+    }
+
+    // 4xx (except 429) are permanent — drop, don't retry.
+    if (
+      response.status >= 400 &&
+      response.status < 500 &&
+      response.status !== 429
+    ) {
+      logForDebugging(
+        `HybridTransport: POST returned ${response.status} (permanent), dropping`,
+      )
+      logForDiagnosticsNoPII('warn', 'cli_hybrid_post_client_error', {
+        status: response.status,
+      })
+      return
+    }
+
+    // 429 / 5xx — retryable. Throw so uploader re-queues and backs off.
+    logForDebugging(
+      `HybridTransport: POST returned ${response.status} (retryable)`,
+    )
+    logForDiagnosticsNoPII('warn', 'cli_hybrid_post_retryable_error', {
+      status: response.status,
+    })
+    throw new Error(`POST failed with ${response.status}`)
+  }
+}
+
+/**
+ * Convert a WebSocket URL to the HTTP POST endpoint URL.
+ * From: wss://api.example.com/v2/session_ingress/ws/<session_id>
+ * To: https://api.example.com/v2/session_ingress/session/<session_id>/events
+ */
+function convertWsUrlToPostUrl(wsUrl: URL): string {
+  const protocol = wsUrl.protocol === 'wss:' ? 'https:' : 'http:'
+
+  // Replace /ws/ with /session/ and append /events
+  let pathname = wsUrl.pathname
+  pathname = pathname.replace('/ws/', '/session/')
+  if (!pathname.endsWith('/events')) {
+    pathname = pathname.endsWith('/')
+      ? pathname + 'events'
+      : pathname + '/events'
+  }
+
+  return `${protocol}//${wsUrl.host}${pathname}${wsUrl.search}`
+}

+ 711 - 0
src/cli/transports/SSETransport.ts

@@ -0,0 +1,711 @@
+import axios, { type AxiosError } from 'axios'
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { errorMessage } from '../../utils/errors.js'
+import { getSessionIngressAuthHeaders } from '../../utils/sessionIngressAuth.js'
+import { sleep } from '../../utils/sleep.js'
+import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+import type { Transport } from './Transport.js'
+
+// ---------------------------------------------------------------------------
+// Configuration
+// ---------------------------------------------------------------------------
+
+const RECONNECT_BASE_DELAY_MS = 1000
+const RECONNECT_MAX_DELAY_MS = 30_000
+/** Time budget for reconnection attempts before giving up (10 minutes). */
+const RECONNECT_GIVE_UP_MS = 600_000
+/** Server sends keepalives every 15s; treat connection as dead after 45s of silence. */
+const LIVENESS_TIMEOUT_MS = 45_000
+
+/**
+ * HTTP status codes that indicate a permanent server-side rejection.
+ * The transport transitions to 'closed' immediately without retrying.
+ */
+const PERMANENT_HTTP_CODES = new Set([401, 403, 404])
+
+// POST retry configuration (matches HybridTransport)
+const POST_MAX_RETRIES = 10
+const POST_BASE_DELAY_MS = 500
+const POST_MAX_DELAY_MS = 8000
+
+/** Hoisted TextDecoder options to avoid per-chunk allocation in readStream. */
+const STREAM_DECODE_OPTS: TextDecodeOptions = { stream: true }
+
+/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
+function alwaysValidStatus(): boolean {
+  return true
+}
+
+// ---------------------------------------------------------------------------
+// SSE Frame Parser
+// ---------------------------------------------------------------------------
+
+type SSEFrame = {
+  event?: string
+  id?: string
+  data?: string
+}
+
+/**
+ * Incrementally parse SSE frames from a text buffer.
+ * Returns parsed frames and the remaining (incomplete) buffer.
+ *
+ * @internal exported for testing
+ */
+export function parseSSEFrames(buffer: string): {
+  frames: SSEFrame[]
+  remaining: string
+} {
+  const frames: SSEFrame[] = []
+  let pos = 0
+
+  // SSE frames are delimited by double newlines
+  let idx: number
+  while ((idx = buffer.indexOf('\n\n', pos)) !== -1) {
+    const rawFrame = buffer.slice(pos, idx)
+    pos = idx + 2
+
+    // Skip empty frames
+    if (!rawFrame.trim()) continue
+
+    const frame: SSEFrame = {}
+    let isComment = false
+
+    for (const line of rawFrame.split('\n')) {
+      if (line.startsWith(':')) {
+        // SSE comment (e.g., `:keepalive`)
+        isComment = true
+        continue
+      }
+
+      const colonIdx = line.indexOf(':')
+      if (colonIdx === -1) continue
+
+      const field = line.slice(0, colonIdx)
+      // Per SSE spec, strip one leading space after colon if present
+      const value =
+        line[colonIdx + 1] === ' '
+          ? line.slice(colonIdx + 2)
+          : line.slice(colonIdx + 1)
+
+      switch (field) {
+        case 'event':
+          frame.event = value
+          break
+        case 'id':
+          frame.id = value
+          break
+        case 'data':
+          // Per SSE spec, multiple data: lines are concatenated with \n
+          frame.data = frame.data ? frame.data + '\n' + value : value
+          break
+        // Ignore other fields (retry:, etc.)
+      }
+    }
+
+    // Only emit frames that have data (or are pure comments which reset liveness)
+    if (frame.data || isComment) {
+      frames.push(frame)
+    }
+  }
+
+  return { frames, remaining: buffer.slice(pos) }
+}
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+type SSETransportState =
+  | 'idle'
+  | 'connected'
+  | 'reconnecting'
+  | 'closing'
+  | 'closed'
+
+/**
+ * Payload for `event: client_event` frames, matching the StreamClientEvent
+ * proto message in session_stream.proto. This is the only event type sent
+ * to worker subscribers — delivery_update, session_update, ephemeral_event,
+ * and catch_up_truncated are client-channel-only (see notifier.go and
+ * event_stream.go SubscriberClient guard).
+ */
+export type StreamClientEvent = {
+  event_id: string
+  sequence_num: number
+  event_type: string
+  source: string
+  payload: Record<string, unknown>
+  created_at: string
+}
+
+// ---------------------------------------------------------------------------
+// SSETransport
+// ---------------------------------------------------------------------------
+
+/**
+ * Transport that uses SSE for reading and HTTP POST for writing.
+ *
+ * Reads events via Server-Sent Events from the CCR v2 event stream endpoint.
+ * Writes events via HTTP POST with retry logic (same pattern as HybridTransport).
+ *
+ * Each `event: client_event` frame carries a StreamClientEvent proto JSON
+ * directly in `data:`. The transport extracts `payload` and passes it to
+ * `onData` as newline-delimited JSON for StructuredIO consumers.
+ *
+ * Supports automatic reconnection with exponential backoff and Last-Event-ID
+ * for resumption after disconnection.
+ */
+export class SSETransport implements Transport {
+  private state: SSETransportState = 'idle'
+  private onData?: (data: string) => void
+  private onCloseCallback?: (closeCode?: number) => void
+  private onEventCallback?: (event: StreamClientEvent) => void
+  private headers: Record<string, string>
+  private sessionId?: string
+  private refreshHeaders?: () => Record<string, string>
+  private readonly getAuthHeaders: () => Record<string, string>
+
+  // SSE connection state
+  private abortController: AbortController | null = null
+  private lastSequenceNum = 0
+  private seenSequenceNums = new Set<number>()
+
+  // Reconnection state
+  private reconnectAttempts = 0
+  private reconnectStartTime: number | null = null
+  private reconnectTimer: NodeJS.Timeout | null = null
+
+  // Liveness detection
+  private livenessTimer: NodeJS.Timeout | null = null
+
+  // POST URL (derived from SSE URL)
+  private postUrl: string
+
+  // Runtime epoch for CCR v2 event format
+
+  constructor(
+    private readonly url: URL,
+    headers: Record<string, string> = {},
+    sessionId?: string,
+    refreshHeaders?: () => Record<string, string>,
+    initialSequenceNum?: number,
+    /**
+     * Per-instance auth header source. Omit to read the process-wide
+     * CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers). Required
+     * for concurrent multi-session callers — the env-var path is a process
+     * global and would stomp across sessions.
+     */
+    getAuthHeaders?: () => Record<string, string>,
+  ) {
+    this.headers = headers
+    this.sessionId = sessionId
+    this.refreshHeaders = refreshHeaders
+    this.getAuthHeaders = getAuthHeaders ?? getSessionIngressAuthHeaders
+    this.postUrl = convertSSEUrlToPostUrl(url)
+    // Seed with a caller-provided high-water mark so the first connect()
+    // sends from_sequence_num / Last-Event-ID. Without this, a fresh
+    // SSETransport always asks the server to replay from sequence 0 —
+    // the entire session history on every transport swap.
+    if (initialSequenceNum !== undefined && initialSequenceNum > 0) {
+      this.lastSequenceNum = initialSequenceNum
+    }
+    logForDebugging(`SSETransport: SSE URL = ${url.href}`)
+    logForDebugging(`SSETransport: POST URL = ${this.postUrl}`)
+    logForDiagnosticsNoPII('info', 'cli_sse_transport_initialized')
+  }
+
+  /**
+   * High-water mark of sequence numbers seen on this stream. Callers that
+   * recreate the transport (e.g. replBridge onWorkReceived) read this before
+   * close() and pass it as `initialSequenceNum` to the next instance so the
+   * server resumes from the right point instead of replaying everything.
+   */
+  getLastSequenceNum(): number {
+    return this.lastSequenceNum
+  }
+
+  async connect(): Promise<void> {
+    if (this.state !== 'idle' && this.state !== 'reconnecting') {
+      logForDebugging(
+        `SSETransport: Cannot connect, current state is ${this.state}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_connect_failed')
+      return
+    }
+
+    this.state = 'reconnecting'
+    const connectStartTime = Date.now()
+
+    // Build SSE URL with sequence number for resumption
+    const sseUrl = new URL(this.url.href)
+    if (this.lastSequenceNum > 0) {
+      sseUrl.searchParams.set('from_sequence_num', String(this.lastSequenceNum))
+    }
+
+    // Build headers -- use fresh auth headers (supports Cookie for session keys).
+    // Remove stale Authorization header from this.headers when Cookie auth is used,
+    // since sending both confuses the auth interceptor.
+    const authHeaders = this.getAuthHeaders()
+    const headers: Record<string, string> = {
+      ...this.headers,
+      ...authHeaders,
+      Accept: 'text/event-stream',
+      'anthropic-version': '2023-06-01',
+      'User-Agent': getClaudeCodeUserAgent(),
+    }
+    if (authHeaders['Cookie']) {
+      delete headers['Authorization']
+    }
+    if (this.lastSequenceNum > 0) {
+      headers['Last-Event-ID'] = String(this.lastSequenceNum)
+    }
+
+    logForDebugging(`SSETransport: Opening ${sseUrl.href}`)
+    logForDiagnosticsNoPII('info', 'cli_sse_connect_opening')
+
+    this.abortController = new AbortController()
+
+    try {
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      const response = await fetch(sseUrl.href, {
+        headers,
+        signal: this.abortController.signal,
+      })
+
+      if (!response.ok) {
+        const isPermanent = PERMANENT_HTTP_CODES.has(response.status)
+        logForDebugging(
+          `SSETransport: HTTP ${response.status}${isPermanent ? ' (permanent)' : ''}`,
+          { level: 'error' },
+        )
+        logForDiagnosticsNoPII('error', 'cli_sse_connect_http_error', {
+          status: response.status,
+        })
+
+        if (isPermanent) {
+          this.state = 'closed'
+          this.onCloseCallback?.(response.status)
+          return
+        }
+
+        this.handleConnectionError()
+        return
+      }
+
+      if (!response.body) {
+        logForDebugging('SSETransport: No response body')
+        this.handleConnectionError()
+        return
+      }
+
+      // Successfully connected
+      const connectDuration = Date.now() - connectStartTime
+      logForDebugging('SSETransport: Connected')
+      logForDiagnosticsNoPII('info', 'cli_sse_connect_connected', {
+        duration_ms: connectDuration,
+      })
+
+      this.state = 'connected'
+      this.reconnectAttempts = 0
+      this.reconnectStartTime = null
+      this.resetLivenessTimer()
+
+      // Read the SSE stream
+      await this.readStream(response.body)
+    } catch (error) {
+      if (this.abortController?.signal.aborted) {
+        // Intentional close
+        return
+      }
+
+      logForDebugging(
+        `SSETransport: Connection error: ${errorMessage(error)}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_connect_error')
+      this.handleConnectionError()
+    }
+  }
+
+  /**
+   * Read and process the SSE stream body.
+   */
+  // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+  private async readStream(body: ReadableStream<Uint8Array>): Promise<void> {
+    const reader = body.getReader()
+    const decoder = new TextDecoder()
+    let buffer = ''
+
+    try {
+      while (true) {
+        const { done, value } = await reader.read()
+        if (done) break
+
+        buffer += decoder.decode(value, STREAM_DECODE_OPTS)
+        const { frames, remaining } = parseSSEFrames(buffer)
+        buffer = remaining
+
+        for (const frame of frames) {
+          // Any frame (including keepalive comments) proves the connection is alive
+          this.resetLivenessTimer()
+
+          if (frame.id) {
+            const seqNum = parseInt(frame.id, 10)
+            if (!isNaN(seqNum)) {
+              if (this.seenSequenceNums.has(seqNum)) {
+                logForDebugging(
+                  `SSETransport: DUPLICATE frame seq=${seqNum} (lastSequenceNum=${this.lastSequenceNum}, seenCount=${this.seenSequenceNums.size})`,
+                  { level: 'warn' },
+                )
+                logForDiagnosticsNoPII('warn', 'cli_sse_duplicate_sequence')
+              } else {
+                this.seenSequenceNums.add(seqNum)
+                // Prevent unbounded growth: once we have many entries, prune
+                // old sequence numbers that are well below the high-water mark.
+                // Only sequence numbers near lastSequenceNum matter for dedup.
+                if (this.seenSequenceNums.size > 1000) {
+                  const threshold = this.lastSequenceNum - 200
+                  for (const s of this.seenSequenceNums) {
+                    if (s < threshold) {
+                      this.seenSequenceNums.delete(s)
+                    }
+                  }
+                }
+              }
+              if (seqNum > this.lastSequenceNum) {
+                this.lastSequenceNum = seqNum
+              }
+            }
+          }
+
+          if (frame.event && frame.data) {
+            this.handleSSEFrame(frame.event, frame.data)
+          } else if (frame.data) {
+            // data: without event: — server is emitting the old envelope format
+            // or a bug. Log so incidents show as a signal instead of silent drops.
+            logForDebugging(
+              'SSETransport: Frame has data: but no event: field — dropped',
+              { level: 'warn' },
+            )
+            logForDiagnosticsNoPII('warn', 'cli_sse_frame_missing_event_field')
+          }
+        }
+      }
+    } catch (error) {
+      if (this.abortController?.signal.aborted) return
+      logForDebugging(
+        `SSETransport: Stream read error: ${errorMessage(error)}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_stream_read_error')
+    } finally {
+      reader.releaseLock()
+    }
+
+    // Stream ended — reconnect unless we're closing
+    if (this.state !== 'closing' && this.state !== 'closed') {
+      logForDebugging('SSETransport: Stream ended, reconnecting')
+      this.handleConnectionError()
+    }
+  }
+
+  /**
+   * Handle a single SSE frame. The event: field names the variant; data:
+   * carries the inner proto JSON directly (no envelope).
+   *
+   * Worker subscribers only receive client_event frames (see notifier.go) —
+   * any other event type indicates a server-side change that CC doesn't yet
+   * understand. Log a diagnostic so we notice in telemetry.
+   */
+  private handleSSEFrame(eventType: string, data: string): void {
+    if (eventType !== 'client_event') {
+      logForDebugging(
+        `SSETransport: Unexpected SSE event type '${eventType}' on worker stream`,
+        { level: 'warn' },
+      )
+      logForDiagnosticsNoPII('warn', 'cli_sse_unexpected_event_type', {
+        event_type: eventType,
+      })
+      return
+    }
+
+    let ev: StreamClientEvent
+    try {
+      ev = jsonParse(data) as StreamClientEvent
+    } catch (error) {
+      logForDebugging(
+        `SSETransport: Failed to parse client_event data: ${errorMessage(error)}`,
+        { level: 'error' },
+      )
+      return
+    }
+
+    const payload = ev.payload
+    if (payload && typeof payload === 'object' && 'type' in payload) {
+      const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
+      logForDebugging(
+        `SSETransport: Event seq=${ev.sequence_num} event_id=${ev.event_id} event_type=${ev.event_type} payload_type=${String(payload.type)}${sessionLabel}`,
+      )
+      logForDiagnosticsNoPII('info', 'cli_sse_message_received')
+      // Pass the unwrapped payload as newline-delimited JSON,
+      // matching the format that StructuredIO/WebSocketTransport consumers expect
+      this.onData?.(jsonStringify(payload) + '\n')
+    } else {
+      logForDebugging(
+        `SSETransport: Ignoring client_event with no type in payload: event_id=${ev.event_id}`,
+      )
+    }
+
+    this.onEventCallback?.(ev)
+  }
+
+  /**
+   * Handle connection errors with exponential backoff and time budget.
+   */
+  private handleConnectionError(): void {
+    this.clearLivenessTimer()
+
+    if (this.state === 'closing' || this.state === 'closed') return
+
+    // Abort any in-flight SSE fetch
+    this.abortController?.abort()
+    this.abortController = null
+
+    const now = Date.now()
+    if (!this.reconnectStartTime) {
+      this.reconnectStartTime = now
+    }
+
+    const elapsed = now - this.reconnectStartTime
+    if (elapsed < RECONNECT_GIVE_UP_MS) {
+      // Clear any existing timer
+      if (this.reconnectTimer) {
+        clearTimeout(this.reconnectTimer)
+        this.reconnectTimer = null
+      }
+
+      // Refresh headers before reconnecting
+      if (this.refreshHeaders) {
+        const freshHeaders = this.refreshHeaders()
+        Object.assign(this.headers, freshHeaders)
+        logForDebugging('SSETransport: Refreshed headers for reconnect')
+      }
+
+      this.state = 'reconnecting'
+      this.reconnectAttempts++
+
+      const baseDelay = Math.min(
+        RECONNECT_BASE_DELAY_MS * Math.pow(2, this.reconnectAttempts - 1),
+        RECONNECT_MAX_DELAY_MS,
+      )
+      // Add ±25% jitter
+      const delay = Math.max(
+        0,
+        baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
+      )
+
+      logForDebugging(
+        `SSETransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_reconnect_attempt', {
+        reconnectAttempts: this.reconnectAttempts,
+      })
+
+      this.reconnectTimer = setTimeout(() => {
+        this.reconnectTimer = null
+        void this.connect()
+      }, delay)
+    } else {
+      logForDebugging(
+        `SSETransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_reconnect_exhausted', {
+        reconnectAttempts: this.reconnectAttempts,
+        elapsedMs: elapsed,
+      })
+      this.state = 'closed'
+      this.onCloseCallback?.()
+    }
+  }
+
+  /**
+   * Bound timeout callback. Hoisted from an inline closure so that
+   * resetLivenessTimer (called per-frame) does not allocate a new closure
+   * on every SSE frame.
+   */
+  private readonly onLivenessTimeout = (): void => {
+    this.livenessTimer = null
+    logForDebugging('SSETransport: Liveness timeout, reconnecting', {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_sse_liveness_timeout')
+    this.abortController?.abort()
+    this.handleConnectionError()
+  }
+
+  /**
+   * Reset the liveness timer. If no SSE frame arrives within the timeout,
+   * treat the connection as dead and reconnect.
+   */
+  private resetLivenessTimer(): void {
+    this.clearLivenessTimer()
+    this.livenessTimer = setTimeout(this.onLivenessTimeout, LIVENESS_TIMEOUT_MS)
+  }
+
+  private clearLivenessTimer(): void {
+    if (this.livenessTimer) {
+      clearTimeout(this.livenessTimer)
+      this.livenessTimer = null
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Write (HTTP POST) — same pattern as HybridTransport
+  // -----------------------------------------------------------------------
+
+  async write(message: StdoutMessage): Promise<void> {
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) {
+      logForDebugging('SSETransport: No session token available for POST')
+      logForDiagnosticsNoPII('warn', 'cli_sse_post_no_token')
+      return
+    }
+
+    const headers: Record<string, string> = {
+      ...authHeaders,
+      'Content-Type': 'application/json',
+      'anthropic-version': '2023-06-01',
+      'User-Agent': getClaudeCodeUserAgent(),
+    }
+
+    logForDebugging(
+      `SSETransport: POST body keys=${Object.keys(message as Record<string, unknown>).join(',')}`,
+    )
+
+    for (let attempt = 1; attempt <= POST_MAX_RETRIES; attempt++) {
+      try {
+        const response = await axios.post(this.postUrl, message, {
+          headers,
+          validateStatus: alwaysValidStatus,
+        })
+
+        if (response.status === 200 || response.status === 201) {
+          logForDebugging(`SSETransport: POST success type=${message.type}`)
+          return
+        }
+
+        logForDebugging(
+          `SSETransport: POST ${response.status} body=${jsonStringify(response.data).slice(0, 200)}`,
+        )
+        // 4xx errors (except 429) are permanent - don't retry
+        if (
+          response.status >= 400 &&
+          response.status < 500 &&
+          response.status !== 429
+        ) {
+          logForDebugging(
+            `SSETransport: POST returned ${response.status} (client error), not retrying`,
+          )
+          logForDiagnosticsNoPII('warn', 'cli_sse_post_client_error', {
+            status: response.status,
+          })
+          return
+        }
+
+        // 429 or 5xx - retry
+        logForDebugging(
+          `SSETransport: POST returned ${response.status}, attempt ${attempt}/${POST_MAX_RETRIES}`,
+        )
+        logForDiagnosticsNoPII('warn', 'cli_sse_post_retryable_error', {
+          status: response.status,
+          attempt,
+        })
+      } catch (error) {
+        const axiosError = error as AxiosError
+        logForDebugging(
+          `SSETransport: POST error: ${axiosError.message}, attempt ${attempt}/${POST_MAX_RETRIES}`,
+        )
+        logForDiagnosticsNoPII('warn', 'cli_sse_post_network_error', {
+          attempt,
+        })
+      }
+
+      if (attempt === POST_MAX_RETRIES) {
+        logForDebugging(
+          `SSETransport: POST failed after ${POST_MAX_RETRIES} attempts, continuing`,
+        )
+        logForDiagnosticsNoPII('warn', 'cli_sse_post_retries_exhausted')
+        return
+      }
+
+      const delayMs = Math.min(
+        POST_BASE_DELAY_MS * Math.pow(2, attempt - 1),
+        POST_MAX_DELAY_MS,
+      )
+      await sleep(delayMs)
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Transport interface
+  // -----------------------------------------------------------------------
+
+  isConnectedStatus(): boolean {
+    return this.state === 'connected'
+  }
+
+  isClosedStatus(): boolean {
+    return this.state === 'closed'
+  }
+
+  setOnData(callback: (data: string) => void): void {
+    this.onData = callback
+  }
+
+  setOnClose(callback: (closeCode?: number) => void): void {
+    this.onCloseCallback = callback
+  }
+
+  setOnEvent(callback: (event: StreamClientEvent) => void): void {
+    this.onEventCallback = callback
+  }
+
+  close(): void {
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer)
+      this.reconnectTimer = null
+    }
+    this.clearLivenessTimer()
+
+    this.state = 'closing'
+    this.abortController?.abort()
+    this.abortController = null
+  }
+}
+
+// ---------------------------------------------------------------------------
+// URL Conversion
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert an SSE URL to the HTTP POST endpoint URL.
+ * The SSE stream URL and POST URL share the same base; the POST endpoint
+ * is at `/events` (without `/stream`).
+ *
+ * From: https://api.example.com/v2/session_ingress/session/<session_id>/events/stream
+ * To:   https://api.example.com/v2/session_ingress/session/<session_id>/events
+ */
+function convertSSEUrlToPostUrl(sseUrl: URL): string {
+  let pathname = sseUrl.pathname
+  // Remove /stream suffix to get the POST events endpoint
+  if (pathname.endsWith('/stream')) {
+    pathname = pathname.slice(0, -'/stream'.length)
+  }
+  return `${sseUrl.protocol}//${sseUrl.host}${pathname}`
+}

+ 275 - 0
src/cli/transports/SerialBatchEventUploader.ts

@@ -0,0 +1,275 @@
+import { jsonStringify } from '../../utils/slowOperations.js'
+
+/**
+ * Serial ordered event uploader with batching, retry, and backpressure.
+ *
+ * - enqueue() adds events to a pending buffer
+ * - At most 1 POST in-flight at a time
+ * - Drains up to maxBatchSize items per POST
+ * - New events accumulate while in-flight
+ * - On failure: exponential backoff (clamped), retries indefinitely
+ *   until success or close() — unless maxConsecutiveFailures is set,
+ *   in which case the failing batch is dropped and drain advances
+ * - flush() blocks until pending is empty and kicks drain if needed
+ * - Backpressure: enqueue() blocks when maxQueueSize is reached
+ */
+
+/**
+ * Throw from config.send() to make the uploader wait a server-supplied
+ * duration before retrying (e.g. 429 with Retry-After). When retryAfterMs
+ * is set, it overrides exponential backoff for that attempt — clamped to
+ * [baseDelayMs, maxDelayMs] and jittered so a misbehaving server can
+ * neither hot-loop nor stall the client, and many sessions sharing a rate
+ * limit don't all pounce at the same instant. Without retryAfterMs, behaves
+ * like any other thrown error (exponential backoff).
+ */
+export class RetryableError extends Error {
+  constructor(
+    message: string,
+    readonly retryAfterMs?: number,
+  ) {
+    super(message)
+  }
+}
+
+type SerialBatchEventUploaderConfig<T> = {
+  /** Max items per POST (1 = no batching) */
+  maxBatchSize: number
+  /**
+   * Max serialized bytes per POST. First item always goes in regardless of
+   * size; subsequent items only if cumulative JSON bytes stay under this.
+   * Undefined = no byte limit (count-only batching).
+   */
+  maxBatchBytes?: number
+  /** Max pending items before enqueue() blocks */
+  maxQueueSize: number
+  /** The actual HTTP call — caller controls payload format */
+  send: (batch: T[]) => Promise<void>
+  /** Base delay for exponential backoff (ms) */
+  baseDelayMs: number
+  /** Max delay cap (ms) */
+  maxDelayMs: number
+  /** Random jitter range added to retry delay (ms) */
+  jitterMs: number
+  /**
+   * After this many consecutive send() failures, drop the failing batch
+   * and move on to the next pending item with a fresh failure budget.
+   * Undefined = retry indefinitely (default).
+   */
+  maxConsecutiveFailures?: number
+  /** Called when a batch is dropped for hitting maxConsecutiveFailures. */
+  onBatchDropped?: (batchSize: number, failures: number) => void
+}
+
+export class SerialBatchEventUploader<T> {
+  private pending: T[] = []
+  private pendingAtClose = 0
+  private draining = false
+  private closed = false
+  private backpressureResolvers: Array<() => void> = []
+  private sleepResolve: (() => void) | null = null
+  private flushResolvers: Array<() => void> = []
+  private droppedBatches = 0
+  private readonly config: SerialBatchEventUploaderConfig<T>
+
+  constructor(config: SerialBatchEventUploaderConfig<T>) {
+    this.config = config
+  }
+
+  /**
+   * Monotonic count of batches dropped via maxConsecutiveFailures. Callers
+   * can snapshot before flush() and compare after to detect silent drops
+   * (flush() resolves normally even when batches were dropped).
+   */
+  get droppedBatchCount(): number {
+    return this.droppedBatches
+  }
+
+  /**
+   * Pending queue depth. After close(), returns the count at close time —
+   * close() clears the queue but shutdown diagnostics may read this after.
+   */
+  get pendingCount(): number {
+    return this.closed ? this.pendingAtClose : this.pending.length
+  }
+
+  /**
+   * Add events to the pending buffer. Returns immediately if space is
+   * available. Blocks (awaits) if the buffer is full — caller pauses
+   * until drain frees space.
+   */
+  async enqueue(events: T | T[]): Promise<void> {
+    if (this.closed) return
+    const items = Array.isArray(events) ? events : [events]
+    if (items.length === 0) return
+
+    // Backpressure: wait until there's space
+    while (
+      this.pending.length + items.length > this.config.maxQueueSize &&
+      !this.closed
+    ) {
+      await new Promise<void>(resolve => {
+        this.backpressureResolvers.push(resolve)
+      })
+    }
+
+    if (this.closed) return
+    this.pending.push(...items)
+    void this.drain()
+  }
+
+  /**
+   * Block until all pending events have been sent.
+   * Used at turn boundaries and graceful shutdown.
+   */
+  flush(): Promise<void> {
+    if (this.pending.length === 0 && !this.draining) {
+      return Promise.resolve()
+    }
+    void this.drain()
+    return new Promise<void>(resolve => {
+      this.flushResolvers.push(resolve)
+    })
+  }
+
+  /**
+   * Drop pending events and stop processing.
+   * Resolves any blocked enqueue() and flush() callers.
+   */
+  close(): void {
+    if (this.closed) return
+    this.closed = true
+    this.pendingAtClose = this.pending.length
+    this.pending = []
+    this.sleepResolve?.()
+    this.sleepResolve = null
+    for (const resolve of this.backpressureResolvers) resolve()
+    this.backpressureResolvers = []
+    for (const resolve of this.flushResolvers) resolve()
+    this.flushResolvers = []
+  }
+
+  /**
+   * Drain loop. At most one instance runs at a time (guarded by this.draining).
+   * Sends batches serially. On failure, backs off and retries indefinitely.
+   */
+  private async drain(): Promise<void> {
+    if (this.draining || this.closed) return
+    this.draining = true
+    let failures = 0
+
+    try {
+      while (this.pending.length > 0 && !this.closed) {
+        const batch = this.takeBatch()
+        if (batch.length === 0) continue
+
+        try {
+          await this.config.send(batch)
+          failures = 0
+        } catch (err) {
+          failures++
+          if (
+            this.config.maxConsecutiveFailures !== undefined &&
+            failures >= this.config.maxConsecutiveFailures
+          ) {
+            this.droppedBatches++
+            this.config.onBatchDropped?.(batch.length, failures)
+            failures = 0
+            this.releaseBackpressure()
+            continue
+          }
+          // Re-queue the failed batch at the front. Use concat (single
+          // allocation) instead of unshift(...batch) which shifts every
+          // pending item batch.length times. Only hit on failure path.
+          this.pending = batch.concat(this.pending)
+          const retryAfterMs =
+            err instanceof RetryableError ? err.retryAfterMs : undefined
+          await this.sleep(this.retryDelay(failures, retryAfterMs))
+          continue
+        }
+
+        // Release backpressure waiters if space opened up
+        this.releaseBackpressure()
+      }
+    } finally {
+      this.draining = false
+      // Notify flush waiters if queue is empty
+      if (this.pending.length === 0) {
+        for (const resolve of this.flushResolvers) resolve()
+        this.flushResolvers = []
+      }
+    }
+  }
+
+  /**
+   * Pull the next batch from pending. Respects both maxBatchSize and
+   * maxBatchBytes. The first item is always taken; subsequent items only
+   * if adding them keeps the cumulative JSON size under maxBatchBytes.
+   *
+   * Un-serializable items (BigInt, circular refs, throwing toJSON) are
+   * dropped in place — they can never be sent and leaving them at
+   * pending[0] would poison the queue and hang flush() forever.
+   */
+  private takeBatch(): T[] {
+    const { maxBatchSize, maxBatchBytes } = this.config
+    if (maxBatchBytes === undefined) {
+      return this.pending.splice(0, maxBatchSize)
+    }
+    let bytes = 0
+    let count = 0
+    while (count < this.pending.length && count < maxBatchSize) {
+      let itemBytes: number
+      try {
+        itemBytes = Buffer.byteLength(jsonStringify(this.pending[count]))
+      } catch {
+        this.pending.splice(count, 1)
+        continue
+      }
+      if (count > 0 && bytes + itemBytes > maxBatchBytes) break
+      bytes += itemBytes
+      count++
+    }
+    return this.pending.splice(0, count)
+  }
+
+  private retryDelay(failures: number, retryAfterMs?: number): number {
+    const jitter = Math.random() * this.config.jitterMs
+    if (retryAfterMs !== undefined) {
+      // Jitter on top of the server's hint prevents thundering herd when
+      // many sessions share a rate limit and all receive the same
+      // Retry-After. Clamp first, then spread — same shape as the
+      // exponential path (effective ceiling is maxDelayMs + jitterMs).
+      const clamped = Math.max(
+        this.config.baseDelayMs,
+        Math.min(retryAfterMs, this.config.maxDelayMs),
+      )
+      return clamped + jitter
+    }
+    const exponential = Math.min(
+      this.config.baseDelayMs * 2 ** (failures - 1),
+      this.config.maxDelayMs,
+    )
+    return exponential + jitter
+  }
+
+  private releaseBackpressure(): void {
+    const resolvers = this.backpressureResolvers
+    this.backpressureResolvers = []
+    for (const resolve of resolvers) resolve()
+  }
+
+  private sleep(ms: number): Promise<void> {
+    return new Promise(resolve => {
+      this.sleepResolve = resolve
+      setTimeout(
+        (self, resolve) => {
+          self.sleepResolve = null
+          resolve()
+        },
+        ms,
+        this,
+        resolve,
+      )
+    })
+  }
+}

+ 800 - 0
src/cli/transports/WebSocketTransport.ts

@@ -0,0 +1,800 @@
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import type WsWebSocket from 'ws'
+import { logEvent } from '../../services/analytics/index.js'
+import { CircularBuffer } from '../../utils/CircularBuffer.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { getWebSocketTLSOptions } from '../../utils/mtls.js'
+import {
+  getWebSocketProxyAgent,
+  getWebSocketProxyUrl,
+} from '../../utils/proxy.js'
+import {
+  registerSessionActivityCallback,
+  unregisterSessionActivityCallback,
+} from '../../utils/sessionActivity.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import type { Transport } from './Transport.js'
+
+const KEEP_ALIVE_FRAME = '{"type":"keep_alive"}\n'
+
+const DEFAULT_MAX_BUFFER_SIZE = 1000
+const DEFAULT_BASE_RECONNECT_DELAY = 1000
+const DEFAULT_MAX_RECONNECT_DELAY = 30000
+/** Time budget for reconnection attempts before giving up (10 minutes). */
+const DEFAULT_RECONNECT_GIVE_UP_MS = 600_000
+const DEFAULT_PING_INTERVAL = 10000
+const DEFAULT_KEEPALIVE_INTERVAL = 300_000 // 5 minutes
+
+/**
+ * Threshold for detecting system sleep/wake. If the gap between consecutive
+ * reconnection attempts exceeds this, the machine likely slept. We reset
+ * the reconnection budget and retry — the server will reject with permanent
+ * close codes (4001/1002) if the session was reaped during sleep.
+ */
+const SLEEP_DETECTION_THRESHOLD_MS = DEFAULT_MAX_RECONNECT_DELAY * 2 // 60s
+
+/**
+ * WebSocket close codes that indicate a permanent server-side rejection.
+ * The transport transitions to 'closed' immediately without retrying.
+ */
+const PERMANENT_CLOSE_CODES = new Set([
+  1002, // protocol error — server rejected handshake (e.g. session reaped)
+  4001, // session expired / not found
+  4003, // unauthorized
+])
+
+export type WebSocketTransportOptions = {
+  /** When false, the transport does not attempt automatic reconnection on
+   *  disconnect. Use this when the caller has its own recovery mechanism
+   *  (e.g. the REPL bridge poll loop). Defaults to true. */
+  autoReconnect?: boolean
+  /** Gates the tengu_ws_transport_* telemetry events. Set true at the
+   *  REPL-bridge construction site so only Remote Control sessions (the
+   *  Cloudflare-idle-timeout population) emit; print-mode workers stay
+   *  silent. Defaults to false. */
+  isBridge?: boolean
+}
+
+type WebSocketTransportState =
+  | 'idle'
+  | 'connected'
+  | 'reconnecting'
+  | 'closing'
+  | 'closed'
+
+// Common interface between globalThis.WebSocket and ws.WebSocket
+type WebSocketLike = {
+  close(): void
+  send(data: string): void
+  ping?(): void // Bun & ws both support this
+}
+
+export class WebSocketTransport implements Transport {
+  private ws: WebSocketLike | null = null
+  private lastSentId: string | null = null
+  protected url: URL
+  protected state: WebSocketTransportState = 'idle'
+  protected onData?: (data: string) => void
+  private onCloseCallback?: (closeCode?: number) => void
+  private onConnectCallback?: () => void
+  private headers: Record<string, string>
+  private sessionId?: string
+  private autoReconnect: boolean
+  private isBridge: boolean
+
+  // Reconnection state
+  private reconnectAttempts = 0
+  private reconnectStartTime: number | null = null
+  private reconnectTimer: NodeJS.Timeout | null = null
+  private lastReconnectAttemptTime: number | null = null
+  // Wall-clock of last WS data-frame activity (inbound message or outbound
+  // ws.send). Used to compute idle time at close — the signal for diagnosing
+  // proxy idle-timeout RSTs (e.g. Cloudflare 5-min). Excludes ping/pong
+  // control frames (proxies don't count those).
+  private lastActivityTime = 0
+
+  // Ping interval for connection health checks
+  private pingInterval: NodeJS.Timeout | null = null
+  private pongReceived = true
+
+  // Periodic keep_alive data frames to reset proxy idle timers
+  private keepAliveInterval: NodeJS.Timeout | null = null
+
+  // Message buffering for replay on reconnection
+  private messageBuffer: CircularBuffer<StdoutMessage>
+  // Track which runtime's WS we're using so we can detach listeners
+  // with the matching API (removeEventListener vs. off).
+  private isBunWs = false
+
+  // Captured at connect() time for handleOpenEvent timing. Stored as an
+  // instance field so the onOpen handler can be a stable class-property
+  // arrow function (removable in doDisconnect) instead of a closure over
+  // a local variable.
+  private connectStartTime = 0
+
+  private refreshHeaders?: () => Record<string, string>
+
+  constructor(
+    url: URL,
+    headers: Record<string, string> = {},
+    sessionId?: string,
+    refreshHeaders?: () => Record<string, string>,
+    options?: WebSocketTransportOptions,
+  ) {
+    this.url = url
+    this.headers = headers
+    this.sessionId = sessionId
+    this.refreshHeaders = refreshHeaders
+    this.autoReconnect = options?.autoReconnect ?? true
+    this.isBridge = options?.isBridge ?? false
+    this.messageBuffer = new CircularBuffer(DEFAULT_MAX_BUFFER_SIZE)
+  }
+
+  public async connect(): Promise<void> {
+    if (this.state !== 'idle' && this.state !== 'reconnecting') {
+      logForDebugging(
+        `WebSocketTransport: Cannot connect, current state is ${this.state}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_connect_failed')
+      return
+    }
+    this.state = 'reconnecting'
+
+    this.connectStartTime = Date.now()
+    logForDebugging(`WebSocketTransport: Opening ${this.url.href}`)
+    logForDiagnosticsNoPII('info', 'cli_websocket_connect_opening')
+
+    // Start with provided headers and add runtime headers
+    const headers = { ...this.headers }
+    if (this.lastSentId) {
+      headers['X-Last-Request-Id'] = this.lastSentId
+      logForDebugging(
+        `WebSocketTransport: Adding X-Last-Request-Id header: ${this.lastSentId}`,
+      )
+    }
+
+    if (typeof Bun !== 'undefined') {
+      // Bun's WebSocket supports headers/proxy options but the DOM typings don't
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      const ws = new globalThis.WebSocket(this.url.href, {
+        headers,
+        proxy: getWebSocketProxyUrl(this.url.href),
+        tls: getWebSocketTLSOptions() || undefined,
+      } as unknown as string[])
+      this.ws = ws
+      this.isBunWs = true
+
+      ws.addEventListener('open', this.onBunOpen)
+      ws.addEventListener('message', this.onBunMessage)
+      ws.addEventListener('error', this.onBunError)
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      ws.addEventListener('close', this.onBunClose)
+      // 'pong' is Bun-specific — not in DOM typings.
+      ws.addEventListener('pong', this.onPong)
+    } else {
+      const { default: WS } = await import('ws')
+      const ws = new WS(this.url.href, {
+        headers,
+        agent: getWebSocketProxyAgent(this.url.href),
+        ...getWebSocketTLSOptions(),
+      })
+      this.ws = ws
+      this.isBunWs = false
+
+      ws.on('open', this.onNodeOpen)
+      ws.on('message', this.onNodeMessage)
+      ws.on('error', this.onNodeError)
+      ws.on('close', this.onNodeClose)
+      ws.on('pong', this.onPong)
+    }
+  }
+
+  // --- Bun (native WebSocket) event handlers ---
+  // Stored as class-property arrow functions so they can be removed in
+  // doDisconnect(). Without removal, each reconnect orphans the old WS
+  // object + its 5 closures until GC, which accumulates under network
+  // instability. Mirrors the pattern in src/utils/mcpWebSocketTransport.ts.
+
+  private onBunOpen = () => {
+    this.handleOpenEvent()
+    // Bun's WebSocket doesn't expose upgrade response headers,
+    // so replay all buffered messages. The server deduplicates by UUID.
+    if (this.lastSentId) {
+      this.replayBufferedMessages('')
+    }
+  }
+
+  private onBunMessage = (event: MessageEvent) => {
+    const message =
+      typeof event.data === 'string' ? event.data : String(event.data)
+    this.lastActivityTime = Date.now()
+    logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
+      length: message.length,
+    })
+    if (this.onData) {
+      this.onData(message)
+    }
+  }
+
+  private onBunError = () => {
+    logForDebugging('WebSocketTransport: Error', {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
+    // close event fires after error — let it call handleConnectionError
+  }
+
+  // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+  private onBunClose = (event: CloseEvent) => {
+    const isClean = event.code === 1000 || event.code === 1001
+    logForDebugging(
+      `WebSocketTransport: Closed: ${event.code}`,
+      isClean ? undefined : { level: 'error' },
+    )
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
+    this.handleConnectionError(event.code)
+  }
+
+  // --- Node (ws package) event handlers ---
+
+  private onNodeOpen = () => {
+    // Capture ws before handleOpenEvent() invokes onConnectCallback — if the
+    // callback synchronously closes the transport, this.ws becomes null.
+    // The old inline-closure code had this safety implicitly via closure capture.
+    const ws = this.ws
+    this.handleOpenEvent()
+    if (!ws) return
+    // Check for last-id in upgrade response headers (ws package only)
+    const nws = ws as unknown as WsWebSocket & {
+      upgradeReq?: { headers?: Record<string, string> }
+    }
+    const upgradeResponse = nws.upgradeReq
+    if (upgradeResponse?.headers?.['x-last-request-id']) {
+      const serverLastId = upgradeResponse.headers['x-last-request-id']
+      this.replayBufferedMessages(serverLastId)
+    }
+  }
+
+  private onNodeMessage = (data: Buffer) => {
+    const message = data.toString()
+    this.lastActivityTime = Date.now()
+    logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
+      length: message.length,
+    })
+    if (this.onData) {
+      this.onData(message)
+    }
+  }
+
+  private onNodeError = (err: Error) => {
+    logForDebugging(`WebSocketTransport: Error: ${err.message}`, {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
+    // close event fires after error — let it call handleConnectionError
+  }
+
+  private onNodeClose = (code: number, _reason: Buffer) => {
+    const isClean = code === 1000 || code === 1001
+    logForDebugging(
+      `WebSocketTransport: Closed: ${code}`,
+      isClean ? undefined : { level: 'error' },
+    )
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
+    this.handleConnectionError(code)
+  }
+
+  // --- Shared handlers ---
+
+  private onPong = () => {
+    this.pongReceived = true
+  }
+
+  private handleOpenEvent(): void {
+    const connectDuration = Date.now() - this.connectStartTime
+    logForDebugging('WebSocketTransport: Connected')
+    logForDiagnosticsNoPII('info', 'cli_websocket_connect_connected', {
+      duration_ms: connectDuration,
+    })
+
+    // Reconnect success — capture attempt count + downtime before resetting.
+    // reconnectStartTime is null on first connect, non-null on reopen.
+    if (this.isBridge && this.reconnectStartTime !== null) {
+      logEvent('tengu_ws_transport_reconnected', {
+        attempts: this.reconnectAttempts,
+        downtimeMs: Date.now() - this.reconnectStartTime,
+      })
+    }
+
+    this.reconnectAttempts = 0
+    this.reconnectStartTime = null
+    this.lastReconnectAttemptTime = null
+    this.lastActivityTime = Date.now()
+    this.state = 'connected'
+    this.onConnectCallback?.()
+
+    // Start periodic pings to detect dead connections
+    this.startPingInterval()
+
+    // Start periodic keep_alive data frames to reset proxy idle timers
+    this.startKeepaliveInterval()
+
+    // Register callback for session activity signals
+    registerSessionActivityCallback(() => {
+      void this.write({ type: 'keep_alive' })
+    })
+  }
+
+  protected sendLine(line: string): boolean {
+    if (!this.ws || this.state !== 'connected') {
+      logForDebugging('WebSocketTransport: Not connected')
+      logForDiagnosticsNoPII('info', 'cli_websocket_send_not_connected')
+      return false
+    }
+
+    try {
+      this.ws.send(line)
+      this.lastActivityTime = Date.now()
+      return true
+    } catch (error) {
+      logForDebugging(`WebSocketTransport: Failed to send: ${error}`, {
+        level: 'error',
+      })
+      logForDiagnosticsNoPII('error', 'cli_websocket_send_error')
+      // Don't null this.ws here — let doDisconnect() (via handleConnectionError)
+      // handle cleanup so listeners are removed before the WS is released.
+      this.handleConnectionError()
+      return false
+    }
+  }
+
+  /**
+   * Remove all listeners attached in connect() for the given WebSocket.
+   * Without this, each reconnect orphans the old WS object + its closures
+   * until GC — these accumulate under network instability. Mirrors the
+   * pattern in src/utils/mcpWebSocketTransport.ts.
+   */
+  private removeWsListeners(ws: WebSocketLike): void {
+    if (this.isBunWs) {
+      const nws = ws as unknown as globalThis.WebSocket
+      nws.removeEventListener('open', this.onBunOpen)
+      nws.removeEventListener('message', this.onBunMessage)
+      nws.removeEventListener('error', this.onBunError)
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      nws.removeEventListener('close', this.onBunClose)
+      // 'pong' is Bun-specific — not in DOM typings
+      nws.removeEventListener('pong' as 'message', this.onPong)
+    } else {
+      const nws = ws as unknown as WsWebSocket
+      nws.off('open', this.onNodeOpen)
+      nws.off('message', this.onNodeMessage)
+      nws.off('error', this.onNodeError)
+      nws.off('close', this.onNodeClose)
+      nws.off('pong', this.onPong)
+    }
+  }
+
+  protected doDisconnect(): void {
+    // Stop pinging and keepalive when disconnecting
+    this.stopPingInterval()
+    this.stopKeepaliveInterval()
+
+    // Unregister session activity callback
+    unregisterSessionActivityCallback()
+
+    if (this.ws) {
+      // Remove listeners BEFORE close() so the old WS + closures can be
+      // GC'd promptly instead of lingering until the next mark-and-sweep.
+      this.removeWsListeners(this.ws)
+      this.ws.close()
+      this.ws = null
+    }
+  }
+
+  private handleConnectionError(closeCode?: number): void {
+    logForDebugging(
+      `WebSocketTransport: Disconnected from ${this.url.href}` +
+        (closeCode != null ? ` (code ${closeCode})` : ''),
+    )
+    logForDiagnosticsNoPII('info', 'cli_websocket_disconnected')
+    if (this.isBridge) {
+      // Fire on every close — including intermediate ones during a reconnect
+      // storm (those never surface to the onCloseCallback consumer). For the
+      // Cloudflare-5min-idle hypothesis: cluster msSinceLastActivity; if the
+      // peak sits at ~300s with closeCode 1006, that's the proxy RST.
+      logEvent('tengu_ws_transport_closed', {
+        closeCode,
+        msSinceLastActivity:
+          this.lastActivityTime > 0 ? Date.now() - this.lastActivityTime : -1,
+        // 'connected' = healthy drop (the Cloudflare case); 'reconnecting' =
+        // connect-rejection mid-storm. State isn't mutated until the branches
+        // below, so this reads the pre-close value.
+        wasConnected: this.state === 'connected',
+        reconnectAttempts: this.reconnectAttempts,
+      })
+    }
+    this.doDisconnect()
+
+    if (this.state === 'closing' || this.state === 'closed') return
+
+    // Permanent codes: don't retry — server has definitively ended the session.
+    // Exception: 4003 (unauthorized) can be retried when refreshHeaders is
+    // available and returns a new token (e.g. after the parent process mints
+    // a fresh session ingress token during reconnection).
+    let headersRefreshed = false
+    if (closeCode === 4003 && this.refreshHeaders) {
+      const freshHeaders = this.refreshHeaders()
+      if (freshHeaders.Authorization !== this.headers.Authorization) {
+        Object.assign(this.headers, freshHeaders)
+        headersRefreshed = true
+        logForDebugging(
+          'WebSocketTransport: 4003 received but headers refreshed, scheduling reconnect',
+        )
+        logForDiagnosticsNoPII('info', 'cli_websocket_4003_token_refreshed')
+      }
+    }
+
+    if (
+      closeCode != null &&
+      PERMANENT_CLOSE_CODES.has(closeCode) &&
+      !headersRefreshed
+    ) {
+      logForDebugging(
+        `WebSocketTransport: Permanent close code ${closeCode}, not reconnecting`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_permanent_close', {
+        closeCode,
+      })
+      this.state = 'closed'
+      this.onCloseCallback?.(closeCode)
+      return
+    }
+
+    // When autoReconnect is disabled, go straight to closed state.
+    // The caller (e.g. REPL bridge poll loop) handles recovery.
+    if (!this.autoReconnect) {
+      this.state = 'closed'
+      this.onCloseCallback?.(closeCode)
+      return
+    }
+
+    // Schedule reconnection with exponential backoff and time budget
+    const now = Date.now()
+    if (!this.reconnectStartTime) {
+      this.reconnectStartTime = now
+    }
+
+    // Detect system sleep/wake: if the gap since our last reconnection
+    // attempt greatly exceeds the max delay, the machine likely slept
+    // (e.g. laptop lid closed). Reset the budget and retry from scratch —
+    // the server will reject with permanent close codes (4001/1002) if
+    // the session was reaped while we were asleep.
+    if (
+      this.lastReconnectAttemptTime !== null &&
+      now - this.lastReconnectAttemptTime > SLEEP_DETECTION_THRESHOLD_MS
+    ) {
+      logForDebugging(
+        `WebSocketTransport: Detected system sleep (${Math.round((now - this.lastReconnectAttemptTime) / 1000)}s gap), resetting reconnection budget`,
+      )
+      logForDiagnosticsNoPII('info', 'cli_websocket_sleep_detected', {
+        gapMs: now - this.lastReconnectAttemptTime,
+      })
+      this.reconnectStartTime = now
+      this.reconnectAttempts = 0
+    }
+    this.lastReconnectAttemptTime = now
+
+    const elapsed = now - this.reconnectStartTime
+    if (elapsed < DEFAULT_RECONNECT_GIVE_UP_MS) {
+      // Clear any existing reconnection timer to avoid duplicates
+      if (this.reconnectTimer) {
+        clearTimeout(this.reconnectTimer)
+        this.reconnectTimer = null
+      }
+
+      // Refresh headers before reconnecting (e.g. to pick up a new session token).
+      // Skip if already refreshed by the 4003 path above.
+      if (!headersRefreshed && this.refreshHeaders) {
+        const freshHeaders = this.refreshHeaders()
+        Object.assign(this.headers, freshHeaders)
+        logForDebugging('WebSocketTransport: Refreshed headers for reconnect')
+      }
+
+      this.state = 'reconnecting'
+      this.reconnectAttempts++
+
+      const baseDelay = Math.min(
+        DEFAULT_BASE_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts - 1),
+        DEFAULT_MAX_RECONNECT_DELAY,
+      )
+      // Add ±25% jitter to avoid thundering herd
+      const delay = Math.max(
+        0,
+        baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
+      )
+
+      logForDebugging(
+        `WebSocketTransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_attempt', {
+        reconnectAttempts: this.reconnectAttempts,
+      })
+      if (this.isBridge) {
+        logEvent('tengu_ws_transport_reconnecting', {
+          attempt: this.reconnectAttempts,
+          elapsedMs: elapsed,
+          delayMs: Math.round(delay),
+        })
+      }
+
+      this.reconnectTimer = setTimeout(() => {
+        this.reconnectTimer = null
+        void this.connect()
+      }, delay)
+    } else {
+      logForDebugging(
+        `WebSocketTransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s for ${this.url.href}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_exhausted', {
+        reconnectAttempts: this.reconnectAttempts,
+        elapsedMs: elapsed,
+      })
+      this.state = 'closed'
+
+      // Notify close callback
+      if (this.onCloseCallback) {
+        this.onCloseCallback(closeCode)
+      }
+    }
+  }
+
+  close(): void {
+    // Clear any pending reconnection timer
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer)
+      this.reconnectTimer = null
+    }
+
+    // Clear ping and keepalive intervals
+    this.stopPingInterval()
+    this.stopKeepaliveInterval()
+
+    // Unregister session activity callback
+    unregisterSessionActivityCallback()
+
+    this.state = 'closing'
+    this.doDisconnect()
+  }
+
+  private replayBufferedMessages(lastId: string): void {
+    const messages = this.messageBuffer.toArray()
+    if (messages.length === 0) return
+
+    // Find where to start replay based on server's last received message
+    let startIndex = 0
+    if (lastId) {
+      const lastConfirmedIndex = messages.findIndex(
+        message => 'uuid' in message && message.uuid === lastId,
+      )
+      if (lastConfirmedIndex >= 0) {
+        // Server confirmed messages up to lastConfirmedIndex — evict them
+        startIndex = lastConfirmedIndex + 1
+        // Rebuild the buffer with only unconfirmed messages
+        const remaining = messages.slice(startIndex)
+        this.messageBuffer.clear()
+        this.messageBuffer.addAll(remaining)
+        if (remaining.length === 0) {
+          this.lastSentId = null
+        }
+        logForDebugging(
+          `WebSocketTransport: Evicted ${startIndex} confirmed messages, ${remaining.length} remaining`,
+        )
+        logForDiagnosticsNoPII(
+          'info',
+          'cli_websocket_evicted_confirmed_messages',
+          {
+            evicted: startIndex,
+            remaining: remaining.length,
+          },
+        )
+      }
+    }
+
+    const messagesToReplay = messages.slice(startIndex)
+    if (messagesToReplay.length === 0) {
+      logForDebugging('WebSocketTransport: No new messages to replay')
+      logForDiagnosticsNoPII('info', 'cli_websocket_no_messages_to_replay')
+      return
+    }
+
+    logForDebugging(
+      `WebSocketTransport: Replaying ${messagesToReplay.length} buffered messages`,
+    )
+    logForDiagnosticsNoPII('info', 'cli_websocket_messages_to_replay', {
+      count: messagesToReplay.length,
+    })
+
+    for (const message of messagesToReplay) {
+      const line = jsonStringify(message) + '\n'
+      const success = this.sendLine(line)
+      if (!success) {
+        this.handleConnectionError()
+        break
+      }
+    }
+    // Do NOT clear the buffer after replay — messages remain buffered until
+    // the server confirms receipt on the next reconnection. This prevents
+    // message loss if the connection drops after replay but before the server
+    // processes the messages.
+  }
+
+  isConnectedStatus(): boolean {
+    return this.state === 'connected'
+  }
+
+  isClosedStatus(): boolean {
+    return this.state === 'closed'
+  }
+
+  setOnData(callback: (data: string) => void): void {
+    this.onData = callback
+  }
+
+  setOnConnect(callback: () => void): void {
+    this.onConnectCallback = callback
+  }
+
+  setOnClose(callback: (closeCode?: number) => void): void {
+    this.onCloseCallback = callback
+  }
+
+  getStateLabel(): string {
+    return this.state
+  }
+
+  async write(message: StdoutMessage): Promise<void> {
+    if ('uuid' in message && typeof message.uuid === 'string') {
+      this.messageBuffer.add(message)
+      this.lastSentId = message.uuid
+    }
+
+    const line = jsonStringify(message) + '\n'
+
+    if (this.state !== 'connected') {
+      // Message buffered for replay when connected (if it has a UUID)
+      return
+    }
+
+    const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
+    const detailLabel = this.getControlMessageDetailLabel(message)
+
+    logForDebugging(
+      `WebSocketTransport: Sending message type=${message.type}${sessionLabel}${detailLabel}`,
+    )
+
+    this.sendLine(line)
+  }
+
+  private getControlMessageDetailLabel(message: StdoutMessage): string {
+    if (message.type === 'control_request') {
+      const { request_id, request } = message
+      const toolName =
+        request.subtype === 'can_use_tool' ? request.tool_name : ''
+      return ` subtype=${request.subtype} request_id=${request_id}${toolName ? ` tool=${toolName}` : ''}`
+    }
+    if (message.type === 'control_response') {
+      const { subtype, request_id } = message.response
+      return ` subtype=${subtype} request_id=${request_id}`
+    }
+    return ''
+  }
+
+  private startPingInterval(): void {
+    // Clear any existing interval
+    this.stopPingInterval()
+
+    this.pongReceived = true
+    let lastTickTime = Date.now()
+
+    // Send ping periodically to detect dead connections.
+    // If the previous ping got no pong, treat the connection as dead.
+    this.pingInterval = setInterval(() => {
+      if (this.state === 'connected' && this.ws) {
+        const now = Date.now()
+        const gap = now - lastTickTime
+        lastTickTime = now
+
+        // Process-suspension detector. If the wall-clock gap between ticks
+        // greatly exceeds the 10s interval, the process was suspended
+        // (laptop lid, SIGSTOP, VM pause). setInterval does not queue
+        // missed ticks — it coalesces — so on wake this callback fires
+        // once with a huge gap. The socket is almost certainly dead:
+        // NAT mappings drop in 30s–5min, and the server has been
+        // retransmitting into the void. Don't wait for a ping/pong
+        // round-trip to confirm (ws.ping() on a dead socket returns
+        // immediately with no error — bytes go into the kernel send
+        // buffer). Assume dead and reconnect now. A spurious reconnect
+        // after a short sleep is cheap — replayBufferedMessages() handles
+        // it and the server dedups by UUID.
+        if (gap > SLEEP_DETECTION_THRESHOLD_MS) {
+          logForDebugging(
+            `WebSocketTransport: ${Math.round(gap / 1000)}s tick gap detected — process was suspended, forcing reconnect`,
+          )
+          logForDiagnosticsNoPII(
+            'info',
+            'cli_websocket_sleep_detected_on_ping',
+            { gapMs: gap },
+          )
+          this.handleConnectionError()
+          return
+        }
+
+        if (!this.pongReceived) {
+          logForDebugging(
+            'WebSocketTransport: No pong received, connection appears dead',
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_websocket_pong_timeout')
+          this.handleConnectionError()
+          return
+        }
+
+        this.pongReceived = false
+        try {
+          this.ws.ping?.()
+        } catch (error) {
+          logForDebugging(`WebSocketTransport: Ping failed: ${error}`, {
+            level: 'error',
+          })
+          logForDiagnosticsNoPII('error', 'cli_websocket_ping_failed')
+        }
+      }
+    }, DEFAULT_PING_INTERVAL)
+  }
+
+  private stopPingInterval(): void {
+    if (this.pingInterval) {
+      clearInterval(this.pingInterval)
+      this.pingInterval = null
+    }
+  }
+
+  private startKeepaliveInterval(): void {
+    this.stopKeepaliveInterval()
+
+    // In CCR sessions, session activity heartbeats handle keep-alives
+    if (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) {
+      return
+    }
+
+    this.keepAliveInterval = setInterval(() => {
+      if (this.state === 'connected' && this.ws) {
+        try {
+          this.ws.send(KEEP_ALIVE_FRAME)
+          this.lastActivityTime = Date.now()
+          logForDebugging(
+            'WebSocketTransport: Sent periodic keep_alive data frame',
+          )
+        } catch (error) {
+          logForDebugging(
+            `WebSocketTransport: Periodic keep_alive failed: ${error}`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_websocket_keepalive_failed')
+        }
+      }
+    }, DEFAULT_KEEPALIVE_INTERVAL)
+  }
+
+  private stopKeepaliveInterval(): void {
+    if (this.keepAliveInterval) {
+      clearInterval(this.keepAliveInterval)
+      this.keepAliveInterval = null
+    }
+  }
+}

+ 131 - 0
src/cli/transports/WorkerStateUploader.ts

@@ -0,0 +1,131 @@
+import { sleep } from '../../utils/sleep.js'
+
+/**
+ * Coalescing uploader for PUT /worker (session state + metadata).
+ *
+ * - 1 in-flight PUT + 1 pending patch
+ * - New calls coalesce into pending (never grows beyond 1 slot)
+ * - On success: send pending if exists
+ * - On failure: exponential backoff (clamped), retries indefinitely
+ *   until success or close(). Absorbs any pending patches before each retry.
+ * - No backpressure needed — naturally bounded at 2 slots
+ *
+ * Coalescing rules:
+ * - Top-level keys (worker_status, external_metadata) — last value wins
+ * - Inside external_metadata / internal_metadata — RFC 7396 merge:
+ *   keys are added/overwritten, null values preserved (server deletes)
+ */
+
+type WorkerStateUploaderConfig = {
+  send: (body: Record<string, unknown>) => Promise<boolean>
+  /** Base delay for exponential backoff (ms) */
+  baseDelayMs: number
+  /** Max delay cap (ms) */
+  maxDelayMs: number
+  /** Random jitter range added to retry delay (ms) */
+  jitterMs: number
+}
+
+export class WorkerStateUploader {
+  private inflight: Promise<void> | null = null
+  private pending: Record<string, unknown> | null = null
+  private closed = false
+  private readonly config: WorkerStateUploaderConfig
+
+  constructor(config: WorkerStateUploaderConfig) {
+    this.config = config
+  }
+
+  /**
+   * Enqueue a patch to PUT /worker. Coalesces with any existing pending
+   * patch. Fire-and-forget — callers don't need to await.
+   */
+  enqueue(patch: Record<string, unknown>): void {
+    if (this.closed) return
+    this.pending = this.pending ? coalescePatches(this.pending, patch) : patch
+    void this.drain()
+  }
+
+  close(): void {
+    this.closed = true
+    this.pending = null
+  }
+
+  private async drain(): Promise<void> {
+    if (this.inflight || this.closed) return
+    if (!this.pending) return
+
+    const payload = this.pending
+    this.pending = null
+
+    this.inflight = this.sendWithRetry(payload).then(() => {
+      this.inflight = null
+      if (this.pending && !this.closed) {
+        void this.drain()
+      }
+    })
+  }
+
+  /** Retries indefinitely with exponential backoff until success or close(). */
+  private async sendWithRetry(payload: Record<string, unknown>): Promise<void> {
+    let current = payload
+    let failures = 0
+    while (!this.closed) {
+      const ok = await this.config.send(current)
+      if (ok) return
+
+      failures++
+      await sleep(this.retryDelay(failures))
+
+      // Absorb any patches that arrived during the retry
+      if (this.pending && !this.closed) {
+        current = coalescePatches(current, this.pending)
+        this.pending = null
+      }
+    }
+  }
+
+  private retryDelay(failures: number): number {
+    const exponential = Math.min(
+      this.config.baseDelayMs * 2 ** (failures - 1),
+      this.config.maxDelayMs,
+    )
+    const jitter = Math.random() * this.config.jitterMs
+    return exponential + jitter
+  }
+}
+
+/**
+ * Coalesce two patches for PUT /worker.
+ *
+ * Top-level keys: overlay replaces base (last value wins).
+ * Metadata keys (external_metadata, internal_metadata): RFC 7396 merge
+ * one level deep — overlay keys are added/overwritten, null values
+ * preserved for server-side delete.
+ */
+function coalescePatches(
+  base: Record<string, unknown>,
+  overlay: Record<string, unknown>,
+): Record<string, unknown> {
+  const merged = { ...base }
+
+  for (const [key, value] of Object.entries(overlay)) {
+    if (
+      (key === 'external_metadata' || key === 'internal_metadata') &&
+      merged[key] &&
+      typeof merged[key] === 'object' &&
+      typeof value === 'object' &&
+      value !== null
+    ) {
+      // RFC 7396 merge — overlay keys win, nulls preserved for server
+      merged[key] = {
+        ...(merged[key] as Record<string, unknown>),
+        ...(value as Record<string, unknown>),
+      }
+    } else {
+      merged[key] = value
+    }
+  }
+
+  return merged
+}

+ 998 - 0
src/cli/transports/ccrClient.ts

@@ -0,0 +1,998 @@
+import { randomUUID } from 'crypto'
+import type {
+  SDKPartialAssistantMessage,
+  StdoutMessage,
+} from 'src/entrypoints/sdk/controlTypes.js'
+import { decodeJwtExpiry } from '../../bridge/jwtUtils.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { errorMessage, getErrnoCode } from '../../utils/errors.js'
+import { createAxiosInstance } from '../../utils/proxy.js'
+import {
+  registerSessionActivityCallback,
+  unregisterSessionActivityCallback,
+} from '../../utils/sessionActivity.js'
+import {
+  getSessionIngressAuthHeaders,
+  getSessionIngressAuthToken,
+} from '../../utils/sessionIngressAuth.js'
+import type {
+  RequiresActionDetails,
+  SessionState,
+} from '../../utils/sessionState.js'
+import { sleep } from '../../utils/sleep.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+import {
+  RetryableError,
+  SerialBatchEventUploader,
+} from './SerialBatchEventUploader.js'
+import type { SSETransport, StreamClientEvent } from './SSETransport.js'
+import { WorkerStateUploader } from './WorkerStateUploader.js'
+
+/** Default interval between heartbeat events (20s; server TTL is 60s). */
+const DEFAULT_HEARTBEAT_INTERVAL_MS = 20_000
+
+/**
+ * stream_event messages accumulate in a delay buffer for up to this many ms
+ * before enqueue. Mirrors HybridTransport's batching window. text_delta
+ * events for the same content block accumulate into a single full-so-far
+ * snapshot per flush — each emitted event is self-contained so a client
+ * connecting mid-stream sees complete text, not a fragment.
+ */
+const STREAM_EVENT_FLUSH_INTERVAL_MS = 100
+
+/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
+function alwaysValidStatus(): boolean {
+  return true
+}
+
+export type CCRInitFailReason =
+  | 'no_auth_headers'
+  | 'missing_epoch'
+  | 'worker_register_failed'
+
+/** Thrown by initialize(); carries a typed reason for the diag classifier. */
+export class CCRInitError extends Error {
+  constructor(readonly reason: CCRInitFailReason) {
+    super(`CCRClient init failed: ${reason}`)
+  }
+}
+
+/**
+ * Consecutive 401/403 with a VALID-LOOKING token before giving up. An
+ * expired JWT short-circuits this (exits immediately — deterministic,
+ * retry is futile). This threshold is for the uncertain case: token's
+ * exp is in the future but server says 401 (userauth down, KMS hiccup,
+ * clock skew). 10 × 20s heartbeat ≈ 200s to ride it out.
+ */
+const MAX_CONSECUTIVE_AUTH_FAILURES = 10
+
+type EventPayload = {
+  uuid: string
+  type: string
+  [key: string]: unknown
+}
+
+type ClientEvent = {
+  payload: EventPayload
+  ephemeral?: boolean
+}
+
+/**
+ * Structural subset of a stream_event carrying a text_delta. Not a narrowing
+ * of SDKPartialAssistantMessage — RawMessageStreamEvent's delta is a union and
+ * narrowing through two levels defeats the discriminant.
+ */
+type CoalescedStreamEvent = {
+  type: 'stream_event'
+  uuid: string
+  session_id: string
+  parent_tool_use_id: string | null
+  event: {
+    type: 'content_block_delta'
+    index: number
+    delta: { type: 'text_delta'; text: string }
+  }
+}
+
+/**
+ * Accumulator state for text_delta coalescing. Keyed by API message ID so
+ * lifetime is tied to the assistant message — cleared when the complete
+ * SDKAssistantMessage arrives (writeEvent), which is reliable even when
+ * abort/error paths skip content_block_stop/message_stop delivery.
+ */
+export type StreamAccumulatorState = {
+  /** API message ID (msg_...) → blocks[blockIndex] → chunk array. */
+  byMessage: Map<string, string[][]>
+  /**
+   * {session_id}:{parent_tool_use_id} → active message ID.
+   * content_block_delta events don't carry the message ID (only
+   * message_start does), so we track which message is currently streaming
+   * for each scope. At most one message streams per scope at a time.
+   */
+  scopeToMessage: Map<string, string>
+}
+
+export function createStreamAccumulator(): StreamAccumulatorState {
+  return { byMessage: new Map(), scopeToMessage: new Map() }
+}
+
+function scopeKey(m: {
+  session_id: string
+  parent_tool_use_id: string | null
+}): string {
+  return `${m.session_id}:${m.parent_tool_use_id ?? ''}`
+}
+
+/**
+ * Accumulate text_delta stream_events into full-so-far snapshots per content
+ * block. Each flush emits ONE event per touched block containing the FULL
+ * accumulated text from the start of the block — a client connecting
+ * mid-stream receives a self-contained snapshot, not a fragment.
+ *
+ * Non-text-delta events pass through unchanged. message_start records the
+ * active message ID for the scope; content_block_delta appends chunks;
+ * the snapshot event reuses the first text_delta UUID seen for that block in
+ * this flush so server-side idempotency remains stable across retries.
+ *
+ * Cleanup happens in writeEvent when the complete assistant message arrives
+ * (reliable), not here on stop events (abort/error paths skip those).
+ */
+export function accumulateStreamEvents(
+  buffer: SDKPartialAssistantMessage[],
+  state: StreamAccumulatorState,
+): EventPayload[] {
+  const out: EventPayload[] = []
+  // chunks[] → snapshot already in `out` this flush. Keyed by the chunks
+  // array reference (stable per {messageId, index}) so subsequent deltas
+  // rewrite the same entry instead of emitting one event per delta.
+  const touched = new Map<string[], CoalescedStreamEvent>()
+  for (const msg of buffer) {
+    switch (msg.event.type) {
+      case 'message_start': {
+        const id = msg.event.message.id
+        const prevId = state.scopeToMessage.get(scopeKey(msg))
+        if (prevId) state.byMessage.delete(prevId)
+        state.scopeToMessage.set(scopeKey(msg), id)
+        state.byMessage.set(id, [])
+        out.push(msg)
+        break
+      }
+      case 'content_block_delta': {
+        if (msg.event.delta.type !== 'text_delta') {
+          out.push(msg)
+          break
+        }
+        const messageId = state.scopeToMessage.get(scopeKey(msg))
+        const blocks = messageId ? state.byMessage.get(messageId) : undefined
+        if (!blocks) {
+          // Delta without a preceding message_start (reconnect mid-stream,
+          // or message_start was in a prior buffer that got dropped). Pass
+          // through raw — can't produce a full-so-far snapshot without the
+          // prior chunks anyway.
+          out.push(msg)
+          break
+        }
+        const chunks = (blocks[msg.event.index] ??= [])
+        chunks.push(msg.event.delta.text)
+        const existing = touched.get(chunks)
+        if (existing) {
+          existing.event.delta.text = chunks.join('')
+          break
+        }
+        const snapshot: CoalescedStreamEvent = {
+          type: 'stream_event',
+          uuid: msg.uuid,
+          session_id: msg.session_id,
+          parent_tool_use_id: msg.parent_tool_use_id,
+          event: {
+            type: 'content_block_delta',
+            index: msg.event.index,
+            delta: { type: 'text_delta', text: chunks.join('') },
+          },
+        }
+        touched.set(chunks, snapshot)
+        out.push(snapshot)
+        break
+      }
+      default:
+        out.push(msg)
+    }
+  }
+  return out
+}
+
+/**
+ * Clear accumulator entries for a completed assistant message. Called from
+ * writeEvent when the SDKAssistantMessage arrives — the reliable end-of-stream
+ * signal that fires even when abort/interrupt/error skip SSE stop events.
+ */
+export function clearStreamAccumulatorForMessage(
+  state: StreamAccumulatorState,
+  assistant: {
+    session_id: string
+    parent_tool_use_id: string | null
+    message: { id: string }
+  },
+): void {
+  state.byMessage.delete(assistant.message.id)
+  const scope = scopeKey(assistant)
+  if (state.scopeToMessage.get(scope) === assistant.message.id) {
+    state.scopeToMessage.delete(scope)
+  }
+}
+
+type RequestResult = { ok: true } | { ok: false; retryAfterMs?: number }
+
+type WorkerEvent = {
+  payload: EventPayload
+  is_compaction?: boolean
+  agent_id?: string
+}
+
+export type InternalEvent = {
+  event_id: string
+  event_type: string
+  payload: Record<string, unknown>
+  event_metadata?: Record<string, unknown> | null
+  is_compaction: boolean
+  created_at: string
+  agent_id?: string
+}
+
+type ListInternalEventsResponse = {
+  data: InternalEvent[]
+  next_cursor?: string
+}
+
+type WorkerStateResponse = {
+  worker?: {
+    external_metadata?: Record<string, unknown>
+  }
+}
+
+/**
+ * Manages the worker lifecycle protocol with CCR v2:
+ * - Epoch management: reads worker_epoch from CLAUDE_CODE_WORKER_EPOCH env var
+ * - Runtime state reporting: PUT /sessions/{id}/worker
+ * - Heartbeat: POST /sessions/{id}/worker/heartbeat for liveness detection
+ *
+ * All writes go through this.request().
+ */
+export class CCRClient {
+  private workerEpoch = 0
+  private readonly heartbeatIntervalMs: number
+  private readonly heartbeatJitterFraction: number
+  private heartbeatTimer: NodeJS.Timeout | null = null
+  private heartbeatInFlight = false
+  private closed = false
+  private consecutiveAuthFailures = 0
+  private currentState: SessionState | null = null
+  private readonly sessionBaseUrl: string
+  private readonly sessionId: string
+  private readonly http = createAxiosInstance({ keepAlive: true })
+
+  // stream_event delay buffer — accumulates content deltas for up to
+  // STREAM_EVENT_FLUSH_INTERVAL_MS before enqueueing (reduces POST count
+  // and enables text_delta coalescing). Mirrors HybridTransport's pattern.
+  private streamEventBuffer: SDKPartialAssistantMessage[] = []
+  private streamEventTimer: ReturnType<typeof setTimeout> | null = null
+  // Full-so-far text accumulator. Persists across flushes so each emitted
+  // text_delta event carries the complete text from the start of the block —
+  // mid-stream reconnects see a self-contained snapshot. Keyed by API message
+  // ID; cleared in writeEvent when the complete assistant message arrives.
+  private streamTextAccumulator = createStreamAccumulator()
+
+  private readonly workerState: WorkerStateUploader
+  private readonly eventUploader: SerialBatchEventUploader<ClientEvent>
+  private readonly internalEventUploader: SerialBatchEventUploader<WorkerEvent>
+  private readonly deliveryUploader: SerialBatchEventUploader<{
+    eventId: string
+    status: 'received' | 'processing' | 'processed'
+  }>
+
+  /**
+   * Called when the server returns 409 (a newer worker epoch superseded ours).
+   * Default: process.exit(1) — correct for spawn-mode children where the
+   * parent bridge re-spawns. In-process callers (replBridge) MUST override
+   * this to close gracefully instead; exit would kill the user's REPL.
+   */
+  private readonly onEpochMismatch: () => never
+
+  /**
+   * Auth header source. Defaults to the process-wide session-ingress token
+   * (CLAUDE_CODE_SESSION_ACCESS_TOKEN env var). Callers managing multiple
+   * concurrent sessions with distinct JWTs MUST inject this — the env-var
+   * path is a process global and would stomp across sessions.
+   */
+  private readonly getAuthHeaders: () => Record<string, string>
+
+  constructor(
+    transport: SSETransport,
+    sessionUrl: URL,
+    opts?: {
+      onEpochMismatch?: () => never
+      heartbeatIntervalMs?: number
+      heartbeatJitterFraction?: number
+      /**
+       * Per-instance auth header source. Omit to read the process-wide
+       * CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers — REPL,
+       * daemon). Required for concurrent multi-session callers.
+       */
+      getAuthHeaders?: () => Record<string, string>
+    },
+  ) {
+    this.onEpochMismatch =
+      opts?.onEpochMismatch ??
+      (() => {
+        // eslint-disable-next-line custom-rules/no-process-exit
+        process.exit(1)
+      })
+    this.heartbeatIntervalMs =
+      opts?.heartbeatIntervalMs ?? DEFAULT_HEARTBEAT_INTERVAL_MS
+    this.heartbeatJitterFraction = opts?.heartbeatJitterFraction ?? 0
+    this.getAuthHeaders = opts?.getAuthHeaders ?? getSessionIngressAuthHeaders
+    // Session URL: https://host/v1/code/sessions/{id}
+    if (sessionUrl.protocol !== 'http:' && sessionUrl.protocol !== 'https:') {
+      throw new Error(
+        `CCRClient: Expected http(s) URL, got ${sessionUrl.protocol}`,
+      )
+    }
+    const pathname = sessionUrl.pathname.replace(/\/$/, '')
+    this.sessionBaseUrl = `${sessionUrl.protocol}//${sessionUrl.host}${pathname}`
+    // Extract session ID from the URL path (last segment)
+    this.sessionId = pathname.split('/').pop() || ''
+
+    this.workerState = new WorkerStateUploader({
+      send: body =>
+        this.request(
+          'put',
+          '/worker',
+          { worker_epoch: this.workerEpoch, ...body },
+          'PUT worker',
+        ).then(r => r.ok),
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    this.eventUploader = new SerialBatchEventUploader<ClientEvent>({
+      maxBatchSize: 100,
+      maxBatchBytes: 10 * 1024 * 1024,
+      // flushStreamEventBuffer() enqueues a full 100ms window of accumulated
+      // stream_events in one call. A burst of mixed delta types that don't
+      // fold into a single snapshot could exceed the old cap (50) and deadlock
+      // on the SerialBatchEventUploader backpressure check. Match
+      // HybridTransport's bound — high enough to be memory-only.
+      maxQueueSize: 100_000,
+      send: async batch => {
+        const result = await this.request(
+          'post',
+          '/worker/events',
+          { worker_epoch: this.workerEpoch, events: batch },
+          'client events',
+        )
+        if (!result.ok) {
+          throw new RetryableError(
+            'client event POST failed',
+            result.retryAfterMs,
+          )
+        }
+      },
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    this.internalEventUploader = new SerialBatchEventUploader<WorkerEvent>({
+      maxBatchSize: 100,
+      maxBatchBytes: 10 * 1024 * 1024,
+      maxQueueSize: 200,
+      send: async batch => {
+        const result = await this.request(
+          'post',
+          '/worker/internal-events',
+          { worker_epoch: this.workerEpoch, events: batch },
+          'internal events',
+        )
+        if (!result.ok) {
+          throw new RetryableError(
+            'internal event POST failed',
+            result.retryAfterMs,
+          )
+        }
+      },
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    this.deliveryUploader = new SerialBatchEventUploader<{
+      eventId: string
+      status: 'received' | 'processing' | 'processed'
+    }>({
+      maxBatchSize: 64,
+      maxQueueSize: 64,
+      send: async batch => {
+        const result = await this.request(
+          'post',
+          '/worker/events/delivery',
+          {
+            worker_epoch: this.workerEpoch,
+            updates: batch.map(d => ({
+              event_id: d.eventId,
+              status: d.status,
+            })),
+          },
+          'delivery batch',
+        )
+        if (!result.ok) {
+          throw new RetryableError('delivery POST failed', result.retryAfterMs)
+        }
+      },
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    // Ack each received client_event so CCR can track delivery status.
+    // Wired here (not in initialize()) so the callback is registered the
+    // moment new CCRClient() returns — remoteIO must be free to call
+    // transport.connect() immediately after without racing the first
+    // SSE catch-up frame against an unwired onEventCallback.
+    transport.setOnEvent((event: StreamClientEvent) => {
+      this.reportDelivery(event.event_id, 'received')
+    })
+  }
+
+  /**
+   * Initialize the session worker:
+   * 1. Take worker_epoch from the argument, or fall back to
+   *    CLAUDE_CODE_WORKER_EPOCH (set by env-manager / bridge spawner)
+   * 2. Report state as 'idle'
+   * 3. Start heartbeat timer
+   *
+   * In-process callers (replBridge) pass the epoch directly — they
+   * registered the worker themselves and there is no parent process
+   * setting env vars.
+   */
+  async initialize(epoch?: number): Promise<Record<string, unknown> | null> {
+    const startMs = Date.now()
+    if (Object.keys(this.getAuthHeaders()).length === 0) {
+      throw new CCRInitError('no_auth_headers')
+    }
+    if (epoch === undefined) {
+      const rawEpoch = process.env.CLAUDE_CODE_WORKER_EPOCH
+      epoch = rawEpoch ? parseInt(rawEpoch, 10) : NaN
+    }
+    if (isNaN(epoch)) {
+      throw new CCRInitError('missing_epoch')
+    }
+    this.workerEpoch = epoch
+
+    // Concurrent with the init PUT — neither depends on the other.
+    const restoredPromise = this.getWorkerState()
+
+    const result = await this.request(
+      'put',
+      '/worker',
+      {
+        worker_status: 'idle',
+        worker_epoch: this.workerEpoch,
+        // Clear stale pending_action/task_summary left by a prior
+        // worker crash — the in-session clears don't survive process restart.
+        external_metadata: {
+          pending_action: null,
+          task_summary: null,
+        },
+      },
+      'PUT worker (init)',
+    )
+    if (!result.ok) {
+      // 409 → onEpochMismatch may throw, but request() catches it and returns
+      // false. Without this check we'd continue to startHeartbeat(), leaking a
+      // 20s timer against a dead epoch. Throw so connect()'s rejection handler
+      // fires instead of the success path.
+      throw new CCRInitError('worker_register_failed')
+    }
+    this.currentState = 'idle'
+    this.startHeartbeat()
+
+    // sessionActivity's refcount-gated timer fires while an API call or tool
+    // is in-flight; without a write the container lease can expire mid-wait.
+    // v1 wires this in WebSocketTransport per-connection.
+    registerSessionActivityCallback(() => {
+      void this.writeEvent({ type: 'keep_alive' })
+    })
+
+    logForDebugging(`CCRClient: initialized, epoch=${this.workerEpoch}`)
+    logForDiagnosticsNoPII('info', 'cli_worker_lifecycle_initialized', {
+      epoch: this.workerEpoch,
+      duration_ms: Date.now() - startMs,
+    })
+
+    // Await the concurrent GET and log state_restored here, after the PUT
+    // has succeeded — logging inside getWorkerState() raced: if the GET
+    // resolved before the PUT failed, diagnostics showed both init_failed
+    // and state_restored for the same session.
+    const { metadata, durationMs } = await restoredPromise
+    if (!this.closed) {
+      logForDiagnosticsNoPII('info', 'cli_worker_state_restored', {
+        duration_ms: durationMs,
+        had_state: metadata !== null,
+      })
+    }
+    return metadata
+  }
+
+  // Control_requests are marked processed and not re-delivered on
+  // restart, so read back what the prior worker wrote.
+  private async getWorkerState(): Promise<{
+    metadata: Record<string, unknown> | null
+    durationMs: number
+  }> {
+    const startMs = Date.now()
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) {
+      return { metadata: null, durationMs: 0 }
+    }
+    const data = await this.getWithRetry<WorkerStateResponse>(
+      `${this.sessionBaseUrl}/worker`,
+      authHeaders,
+      'worker_state',
+    )
+    return {
+      metadata: data?.worker?.external_metadata ?? null,
+      durationMs: Date.now() - startMs,
+    }
+  }
+
+  /**
+   * Send an authenticated HTTP request to CCR. Handles auth headers,
+   * 409 epoch mismatch, and error logging. Returns { ok: true } on 2xx.
+   * On 429, reads Retry-After (integer seconds) so the uploader can honor
+   * the server's backoff hint instead of blindly exponentiating.
+   */
+  private async request(
+    method: 'post' | 'put',
+    path: string,
+    body: unknown,
+    label: string,
+    { timeout = 10_000 }: { timeout?: number } = {},
+  ): Promise<RequestResult> {
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) return { ok: false }
+
+    try {
+      const response = await this.http[method](
+        `${this.sessionBaseUrl}${path}`,
+        body,
+        {
+          headers: {
+            ...authHeaders,
+            'Content-Type': 'application/json',
+            'anthropic-version': '2023-06-01',
+            'User-Agent': getClaudeCodeUserAgent(),
+          },
+          validateStatus: alwaysValidStatus,
+          timeout,
+        },
+      )
+
+      if (response.status >= 200 && response.status < 300) {
+        this.consecutiveAuthFailures = 0
+        return { ok: true }
+      }
+      if (response.status === 409) {
+        this.handleEpochMismatch()
+      }
+      if (response.status === 401 || response.status === 403) {
+        // A 401 with an expired JWT is deterministic — no retry will
+        // ever succeed. Check the token's own exp before burning
+        // wall-clock on the threshold loop.
+        const tok = getSessionIngressAuthToken()
+        const exp = tok ? decodeJwtExpiry(tok) : null
+        if (exp !== null && exp * 1000 < Date.now()) {
+          logForDebugging(
+            `CCRClient: session_token expired (exp=${new Date(exp * 1000).toISOString()}) — no refresh was delivered, exiting`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_worker_token_expired_no_refresh')
+          this.onEpochMismatch()
+        }
+        // Token looks valid but server says 401 — possible server-side
+        // blip (userauth down, KMS hiccup). Count toward threshold.
+        this.consecutiveAuthFailures++
+        if (this.consecutiveAuthFailures >= MAX_CONSECUTIVE_AUTH_FAILURES) {
+          logForDebugging(
+            `CCRClient: ${this.consecutiveAuthFailures} consecutive auth failures with a valid-looking token — server-side auth unrecoverable, exiting`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_worker_auth_failures_exhausted')
+          this.onEpochMismatch()
+        }
+      }
+      logForDebugging(`CCRClient: ${label} returned ${response.status}`, {
+        level: 'warn',
+      })
+      logForDiagnosticsNoPII('warn', 'cli_worker_request_failed', {
+        method,
+        path,
+        status: response.status,
+      })
+      if (response.status === 429) {
+        const raw = response.headers?.['retry-after']
+        const seconds = typeof raw === 'string' ? parseInt(raw, 10) : NaN
+        if (!isNaN(seconds) && seconds >= 0) {
+          return { ok: false, retryAfterMs: seconds * 1000 }
+        }
+      }
+      return { ok: false }
+    } catch (error) {
+      logForDebugging(`CCRClient: ${label} failed: ${errorMessage(error)}`, {
+        level: 'warn',
+      })
+      logForDiagnosticsNoPII('warn', 'cli_worker_request_error', {
+        method,
+        path,
+        error_code: getErrnoCode(error),
+      })
+      return { ok: false }
+    }
+  }
+
+  /** Report worker state to CCR via PUT /sessions/{id}/worker. */
+  reportState(state: SessionState, details?: RequiresActionDetails): void {
+    if (state === this.currentState && !details) return
+    this.currentState = state
+    this.workerState.enqueue({
+      worker_status: state,
+      requires_action_details: details
+        ? {
+            tool_name: details.tool_name,
+            action_description: details.action_description,
+            request_id: details.request_id,
+          }
+        : null,
+    })
+  }
+
+  /** Report external metadata to CCR via PUT /worker. */
+  reportMetadata(metadata: Record<string, unknown>): void {
+    this.workerState.enqueue({ external_metadata: metadata })
+  }
+
+  /**
+   * Handle epoch mismatch (409 Conflict). A newer CC instance has replaced
+   * this one — exit immediately.
+   */
+  private handleEpochMismatch(): never {
+    logForDebugging('CCRClient: Epoch mismatch (409), shutting down', {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_worker_epoch_mismatch')
+    this.onEpochMismatch()
+  }
+
+  /** Start periodic heartbeat. */
+  private startHeartbeat(): void {
+    this.stopHeartbeat()
+    const schedule = (): void => {
+      const jitter =
+        this.heartbeatIntervalMs *
+        this.heartbeatJitterFraction *
+        (2 * Math.random() - 1)
+      this.heartbeatTimer = setTimeout(tick, this.heartbeatIntervalMs + jitter)
+    }
+    const tick = (): void => {
+      void this.sendHeartbeat()
+      // stopHeartbeat nulls the timer; check after the fire-and-forget send
+      // but before rescheduling so close() during sendHeartbeat is honored.
+      if (this.heartbeatTimer === null) return
+      schedule()
+    }
+    schedule()
+  }
+
+  /** Stop heartbeat timer. */
+  private stopHeartbeat(): void {
+    if (this.heartbeatTimer) {
+      clearTimeout(this.heartbeatTimer)
+      this.heartbeatTimer = null
+    }
+  }
+
+  /** Send a heartbeat via POST /sessions/{id}/worker/heartbeat. */
+  private async sendHeartbeat(): Promise<void> {
+    if (this.heartbeatInFlight) return
+    this.heartbeatInFlight = true
+    try {
+      const result = await this.request(
+        'post',
+        '/worker/heartbeat',
+        { session_id: this.sessionId, worker_epoch: this.workerEpoch },
+        'Heartbeat',
+        { timeout: 5_000 },
+      )
+      if (result.ok) {
+        logForDebugging('CCRClient: Heartbeat sent')
+      }
+    } finally {
+      this.heartbeatInFlight = false
+    }
+  }
+
+  /**
+   * Write a StdoutMessage as a client event via POST /sessions/{id}/worker/events.
+   * These events are visible to frontend clients via the SSE stream.
+   * Injects a UUID if missing to ensure server-side idempotency on retry.
+   *
+   * stream_event messages are held in a 100ms delay buffer and accumulated
+   * (text_deltas for the same content block emit a full-so-far snapshot per
+   * flush). A non-stream_event write flushes the buffer first so downstream
+   * ordering is preserved.
+   */
+  async writeEvent(message: StdoutMessage): Promise<void> {
+    if (message.type === 'stream_event') {
+      this.streamEventBuffer.push(message)
+      if (!this.streamEventTimer) {
+        this.streamEventTimer = setTimeout(
+          () => void this.flushStreamEventBuffer(),
+          STREAM_EVENT_FLUSH_INTERVAL_MS,
+        )
+      }
+      return
+    }
+    await this.flushStreamEventBuffer()
+    if (message.type === 'assistant') {
+      clearStreamAccumulatorForMessage(this.streamTextAccumulator, message)
+    }
+    await this.eventUploader.enqueue(this.toClientEvent(message))
+  }
+
+  /** Wrap a StdoutMessage as a ClientEvent, injecting a UUID if missing. */
+  private toClientEvent(message: StdoutMessage): ClientEvent {
+    const msg = message as unknown as Record<string, unknown>
+    return {
+      payload: {
+        ...msg,
+        uuid: typeof msg.uuid === 'string' ? msg.uuid : randomUUID(),
+      } as EventPayload,
+    }
+  }
+
+  /**
+   * Drain the stream_event delay buffer: accumulate text_deltas into
+   * full-so-far snapshots, clear the timer, enqueue the resulting events.
+   * Called from the timer, from writeEvent on a non-stream message, and from
+   * flush(). close() drops the buffer — call flush() first if you need
+   * delivery.
+   */
+  private async flushStreamEventBuffer(): Promise<void> {
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    if (this.streamEventBuffer.length === 0) return
+    const buffered = this.streamEventBuffer
+    this.streamEventBuffer = []
+    const payloads = accumulateStreamEvents(
+      buffered,
+      this.streamTextAccumulator,
+    )
+    await this.eventUploader.enqueue(
+      payloads.map(payload => ({ payload, ephemeral: true })),
+    )
+  }
+
+  /**
+   * Write an internal worker event via POST /sessions/{id}/worker/internal-events.
+   * These events are NOT visible to frontend clients — they store worker-internal
+   * state (transcript messages, compaction markers) needed for session resume.
+   */
+  async writeInternalEvent(
+    eventType: string,
+    payload: Record<string, unknown>,
+    {
+      isCompaction = false,
+      agentId,
+    }: {
+      isCompaction?: boolean
+      agentId?: string
+    } = {},
+  ): Promise<void> {
+    const event: WorkerEvent = {
+      payload: {
+        type: eventType,
+        ...payload,
+        uuid: typeof payload.uuid === 'string' ? payload.uuid : randomUUID(),
+      } as EventPayload,
+      ...(isCompaction && { is_compaction: true }),
+      ...(agentId && { agent_id: agentId }),
+    }
+    await this.internalEventUploader.enqueue(event)
+  }
+
+  /**
+   * Flush pending internal events. Call between turns and on shutdown
+   * to ensure transcript entries are persisted.
+   */
+  flushInternalEvents(): Promise<void> {
+    return this.internalEventUploader.flush()
+  }
+
+  /**
+   * Flush pending client events (writeEvent queue). Call before close()
+   * when the caller needs delivery confirmation — close() abandons the
+   * queue. Resolves once the uploader drains or rejects; returns
+   * regardless of whether individual POSTs succeeded (check server state
+   * separately if that matters).
+   */
+  async flush(): Promise<void> {
+    await this.flushStreamEventBuffer()
+    return this.eventUploader.flush()
+  }
+
+  /**
+   * Read foreground agent internal events from
+   * GET /sessions/{id}/worker/internal-events.
+   * Returns transcript entries from the last compaction boundary, or null on failure.
+   * Used for session resume.
+   */
+  async readInternalEvents(): Promise<InternalEvent[] | null> {
+    return this.paginatedGet('/worker/internal-events', {}, 'internal_events')
+  }
+
+  /**
+   * Read all subagent internal events from
+   * GET /sessions/{id}/worker/internal-events?subagents=true.
+   * Returns a merged stream across all non-foreground agents, each from its
+   * compaction point. Used for session resume.
+   */
+  async readSubagentInternalEvents(): Promise<InternalEvent[] | null> {
+    return this.paginatedGet(
+      '/worker/internal-events',
+      { subagents: 'true' },
+      'subagent_events',
+    )
+  }
+
+  /**
+   * Paginated GET with retry. Fetches all pages from a list endpoint,
+   * retrying each page on failure with exponential backoff + jitter.
+   */
+  private async paginatedGet(
+    path: string,
+    params: Record<string, string>,
+    context: string,
+  ): Promise<InternalEvent[] | null> {
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) return null
+
+    const allEvents: InternalEvent[] = []
+    let cursor: string | undefined
+
+    do {
+      const url = new URL(`${this.sessionBaseUrl}${path}`)
+      for (const [k, v] of Object.entries(params)) {
+        url.searchParams.set(k, v)
+      }
+      if (cursor) {
+        url.searchParams.set('cursor', cursor)
+      }
+
+      const page = await this.getWithRetry<ListInternalEventsResponse>(
+        url.toString(),
+        authHeaders,
+        context,
+      )
+      if (!page) return null
+
+      allEvents.push(...(page.data ?? []))
+      cursor = page.next_cursor
+    } while (cursor)
+
+    logForDebugging(
+      `CCRClient: Read ${allEvents.length} internal events from ${path}${params.subagents ? ' (subagents)' : ''}`,
+    )
+    return allEvents
+  }
+
+  /**
+   * Single GET request with retry. Returns the parsed response body
+   * on success, null if all retries are exhausted.
+   */
+  private async getWithRetry<T>(
+    url: string,
+    authHeaders: Record<string, string>,
+    context: string,
+  ): Promise<T | null> {
+    for (let attempt = 1; attempt <= 10; attempt++) {
+      let response
+      try {
+        response = await this.http.get<T>(url, {
+          headers: {
+            ...authHeaders,
+            'anthropic-version': '2023-06-01',
+            'User-Agent': getClaudeCodeUserAgent(),
+          },
+          validateStatus: alwaysValidStatus,
+          timeout: 30_000,
+        })
+      } catch (error) {
+        logForDebugging(
+          `CCRClient: GET ${url} failed (attempt ${attempt}/10): ${errorMessage(error)}`,
+          { level: 'warn' },
+        )
+        if (attempt < 10) {
+          const delay =
+            Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
+          await sleep(delay)
+        }
+        continue
+      }
+
+      if (response.status >= 200 && response.status < 300) {
+        return response.data
+      }
+      if (response.status === 409) {
+        this.handleEpochMismatch()
+      }
+      logForDebugging(
+        `CCRClient: GET ${url} returned ${response.status} (attempt ${attempt}/10)`,
+        { level: 'warn' },
+      )
+
+      if (attempt < 10) {
+        const delay =
+          Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
+        await sleep(delay)
+      }
+    }
+
+    logForDebugging('CCRClient: GET retries exhausted', { level: 'error' })
+    logForDiagnosticsNoPII('error', 'cli_worker_get_retries_exhausted', {
+      context,
+    })
+    return null
+  }
+
+  /**
+   * Report delivery status for a client-to-worker event.
+   * POST /v1/code/sessions/{id}/worker/events/delivery (batch endpoint)
+   */
+  reportDelivery(
+    eventId: string,
+    status: 'received' | 'processing' | 'processed',
+  ): void {
+    void this.deliveryUploader.enqueue({ eventId, status })
+  }
+
+  /** Get the current epoch (for external use). */
+  getWorkerEpoch(): number {
+    return this.workerEpoch
+  }
+
+  /** Internal-event queue depth — shutdown-snapshot backpressure signal. */
+  get internalEventsPending(): number {
+    return this.internalEventUploader.pendingCount
+  }
+
+  /** Clean up uploaders and timers. */
+  close(): void {
+    this.closed = true
+    this.stopHeartbeat()
+    unregisterSessionActivityCallback()
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    this.streamEventBuffer = []
+    this.streamTextAccumulator.byMessage.clear()
+    this.streamTextAccumulator.scopeToMessage.clear()
+    this.workerState.close()
+    this.eventUploader.close()
+    this.internalEventUploader.close()
+    this.deliveryUploader.close()
+  }
+}

+ 45 - 0
src/cli/transports/transportUtils.ts

@@ -0,0 +1,45 @@
+import { URL } from 'url'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { HybridTransport } from './HybridTransport.js'
+import { SSETransport } from './SSETransport.js'
+import type { Transport } from './Transport.js'
+import { WebSocketTransport } from './WebSocketTransport.js'
+
+/**
+ * Helper function to get the appropriate transport for a URL.
+ *
+ * Transport selection priority:
+ * 1. SSETransport (SSE reads + POST writes) when CLAUDE_CODE_USE_CCR_V2 is set
+ * 2. HybridTransport (WS reads + POST writes) when CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2 is set
+ * 3. WebSocketTransport (WS reads + WS writes) — default
+ */
+export function getTransportForUrl(
+  url: URL,
+  headers: Record<string, string> = {},
+  sessionId?: string,
+  refreshHeaders?: () => Record<string, string>,
+): Transport {
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
+    // v2: SSE for reads, HTTP POST for writes
+    // --sdk-url is the session URL (.../sessions/{id});
+    // derive the SSE stream URL by appending /worker/events/stream
+    const sseUrl = new URL(url.href)
+    if (sseUrl.protocol === 'wss:') {
+      sseUrl.protocol = 'https:'
+    } else if (sseUrl.protocol === 'ws:') {
+      sseUrl.protocol = 'http:'
+    }
+    sseUrl.pathname =
+      sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
+    return new SSETransport(sseUrl, headers, sessionId, refreshHeaders)
+  }
+
+  if (url.protocol === 'ws:' || url.protocol === 'wss:') {
+    if (isEnvTruthy(process.env.CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2)) {
+      return new HybridTransport(url, headers, sessionId, refreshHeaders)
+    }
+    return new WebSocketTransport(url, headers, sessionId, refreshHeaders)
+  } else {
+    throw new Error(`Unsupported protocol: ${url.protocol}`)
+  }
+}

+ 422 - 0
src/cli/update.ts

@@ -0,0 +1,422 @@
+import chalk from 'chalk'
+import { logEvent } from 'src/services/analytics/index.js'
+import {
+  getLatestVersion,
+  type InstallStatus,
+  installGlobalPackage,
+} from 'src/utils/autoUpdater.js'
+import { regenerateCompletionCache } from 'src/utils/completionCache.js'
+import {
+  getGlobalConfig,
+  type InstallMethod,
+  saveGlobalConfig,
+} from 'src/utils/config.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { getDoctorDiagnostic } from 'src/utils/doctorDiagnostic.js'
+import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
+import {
+  installOrUpdateClaudePackage,
+  localInstallationExists,
+} from 'src/utils/localInstaller.js'
+import {
+  installLatest as installLatestNative,
+  removeInstalledSymlink,
+} from 'src/utils/nativeInstaller/index.js'
+import { getPackageManager } from 'src/utils/nativeInstaller/packageManagers.js'
+import { writeToStdout } from 'src/utils/process.js'
+import { gte } from 'src/utils/semver.js'
+import { getInitialSettings } from 'src/utils/settings/settings.js'
+
+export async function update() {
+  logEvent('tengu_update_check', {})
+  writeToStdout(`Current version: ${MACRO.VERSION}\n`)
+
+  const channel = getInitialSettings()?.autoUpdatesChannel ?? 'latest'
+  writeToStdout(`Checking for updates to ${channel} version...\n`)
+
+  logForDebugging('update: Starting update check')
+
+  // Run diagnostic to detect potential issues
+  logForDebugging('update: Running diagnostic')
+  const diagnostic = await getDoctorDiagnostic()
+  logForDebugging(`update: Installation type: ${diagnostic.installationType}`)
+  logForDebugging(
+    `update: Config install method: ${diagnostic.configInstallMethod}`,
+  )
+
+  // Check for multiple installations
+  if (diagnostic.multipleInstallations.length > 1) {
+    writeToStdout('\n')
+    writeToStdout(chalk.yellow('Warning: Multiple installations found') + '\n')
+    for (const install of diagnostic.multipleInstallations) {
+      const current =
+        diagnostic.installationType === install.type
+          ? ' (currently running)'
+          : ''
+      writeToStdout(`- ${install.type} at ${install.path}${current}\n`)
+    }
+  }
+
+  // Display warnings if any exist
+  if (diagnostic.warnings.length > 0) {
+    writeToStdout('\n')
+    for (const warning of diagnostic.warnings) {
+      logForDebugging(`update: Warning detected: ${warning.issue}`)
+
+      // Don't skip PATH warnings - they're always relevant
+      // The user needs to know that 'which claude' points elsewhere
+      logForDebugging(`update: Showing warning: ${warning.issue}`)
+
+      writeToStdout(chalk.yellow(`Warning: ${warning.issue}\n`))
+
+      writeToStdout(chalk.bold(`Fix: ${warning.fix}\n`))
+    }
+  }
+
+  // Update config if installMethod is not set (but skip for package managers)
+  const config = getGlobalConfig()
+  if (
+    !config.installMethod &&
+    diagnostic.installationType !== 'package-manager'
+  ) {
+    writeToStdout('\n')
+    writeToStdout('Updating configuration to track installation method...\n')
+    let detectedMethod: 'local' | 'native' | 'global' | 'unknown' = 'unknown'
+
+    // Map diagnostic installation type to config install method
+    switch (diagnostic.installationType) {
+      case 'npm-local':
+        detectedMethod = 'local'
+        break
+      case 'native':
+        detectedMethod = 'native'
+        break
+      case 'npm-global':
+        detectedMethod = 'global'
+        break
+      default:
+        detectedMethod = 'unknown'
+    }
+
+    saveGlobalConfig(current => ({
+      ...current,
+      installMethod: detectedMethod,
+    }))
+    writeToStdout(`Installation method set to: ${detectedMethod}\n`)
+  }
+
+  // Check if running from development build
+  if (diagnostic.installationType === 'development') {
+    writeToStdout('\n')
+    writeToStdout(
+      chalk.yellow('Warning: Cannot update development build') + '\n',
+    )
+    await gracefulShutdown(1)
+  }
+
+  // Check if running from a package manager
+  if (diagnostic.installationType === 'package-manager') {
+    const packageManager = await getPackageManager()
+    writeToStdout('\n')
+
+    if (packageManager === 'homebrew') {
+      writeToStdout('Claude is managed by Homebrew.\n')
+      const latest = await getLatestVersion(channel)
+      if (latest && !gte(MACRO.VERSION, latest)) {
+        writeToStdout(`Update available: ${MACRO.VERSION} → ${latest}\n`)
+        writeToStdout('\n')
+        writeToStdout('To update, run:\n')
+        writeToStdout(chalk.bold('  brew upgrade claude-code') + '\n')
+      } else {
+        writeToStdout('Claude is up to date!\n')
+      }
+    } else if (packageManager === 'winget') {
+      writeToStdout('Claude is managed by winget.\n')
+      const latest = await getLatestVersion(channel)
+      if (latest && !gte(MACRO.VERSION, latest)) {
+        writeToStdout(`Update available: ${MACRO.VERSION} → ${latest}\n`)
+        writeToStdout('\n')
+        writeToStdout('To update, run:\n')
+        writeToStdout(
+          chalk.bold('  winget upgrade Anthropic.ClaudeCode') + '\n',
+        )
+      } else {
+        writeToStdout('Claude is up to date!\n')
+      }
+    } else if (packageManager === 'apk') {
+      writeToStdout('Claude is managed by apk.\n')
+      const latest = await getLatestVersion(channel)
+      if (latest && !gte(MACRO.VERSION, latest)) {
+        writeToStdout(`Update available: ${MACRO.VERSION} → ${latest}\n`)
+        writeToStdout('\n')
+        writeToStdout('To update, run:\n')
+        writeToStdout(chalk.bold('  apk upgrade claude-code') + '\n')
+      } else {
+        writeToStdout('Claude is up to date!\n')
+      }
+    } else {
+      // pacman, deb, and rpm don't get specific commands because they each have
+      // multiple frontends (pacman: yay/paru/makepkg, deb: apt/apt-get/aptitude/nala,
+      // rpm: dnf/yum/zypper)
+      writeToStdout('Claude is managed by a package manager.\n')
+      writeToStdout('Please use your package manager to update.\n')
+    }
+
+    await gracefulShutdown(0)
+  }
+
+  // Check for config/reality mismatch (skip for package-manager installs)
+  if (
+    config.installMethod &&
+    diagnostic.configInstallMethod !== 'not set' &&
+    diagnostic.installationType !== 'package-manager'
+  ) {
+    const runningType = diagnostic.installationType
+    const configExpects = diagnostic.configInstallMethod
+
+    // Map installation types for comparison
+    const typeMapping: Record<string, string> = {
+      'npm-local': 'local',
+      'npm-global': 'global',
+      native: 'native',
+      development: 'development',
+      unknown: 'unknown',
+    }
+
+    const normalizedRunningType = typeMapping[runningType] || runningType
+
+    if (
+      normalizedRunningType !== configExpects &&
+      configExpects !== 'unknown'
+    ) {
+      writeToStdout('\n')
+      writeToStdout(chalk.yellow('Warning: Configuration mismatch') + '\n')
+      writeToStdout(`Config expects: ${configExpects} installation\n`)
+      writeToStdout(`Currently running: ${runningType}\n`)
+      writeToStdout(
+        chalk.yellow(
+          `Updating the ${runningType} installation you are currently using`,
+        ) + '\n',
+      )
+
+      // Update config to match reality
+      saveGlobalConfig(current => ({
+        ...current,
+        installMethod: normalizedRunningType as InstallMethod,
+      }))
+      writeToStdout(
+        `Config updated to reflect current installation method: ${normalizedRunningType}\n`,
+      )
+    }
+  }
+
+  // Handle native installation updates first
+  if (diagnostic.installationType === 'native') {
+    logForDebugging(
+      'update: Detected native installation, using native updater',
+    )
+    try {
+      const result = await installLatestNative(channel, true)
+
+      // Handle lock contention gracefully
+      if (result.lockFailed) {
+        const pidInfo = result.lockHolderPid
+          ? ` (PID ${result.lockHolderPid})`
+          : ''
+        writeToStdout(
+          chalk.yellow(
+            `Another Claude process${pidInfo} is currently running. Please try again in a moment.`,
+          ) + '\n',
+        )
+        await gracefulShutdown(0)
+      }
+
+      if (!result.latestVersion) {
+        process.stderr.write('Failed to check for updates\n')
+        await gracefulShutdown(1)
+      }
+
+      if (result.latestVersion === MACRO.VERSION) {
+        writeToStdout(
+          chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
+        )
+      } else {
+        writeToStdout(
+          chalk.green(
+            `Successfully updated from ${MACRO.VERSION} to version ${result.latestVersion}`,
+          ) + '\n',
+        )
+        await regenerateCompletionCache()
+      }
+      await gracefulShutdown(0)
+    } catch (error) {
+      process.stderr.write('Error: Failed to install native update\n')
+      process.stderr.write(String(error) + '\n')
+      process.stderr.write('Try running "claude doctor" for diagnostics\n')
+      await gracefulShutdown(1)
+    }
+  }
+
+  // Fallback to existing JS/npm-based update logic
+  // Remove native installer symlink since we're not using native installation
+  // But only if user hasn't migrated to native installation
+  if (config.installMethod !== 'native') {
+    await removeInstalledSymlink()
+  }
+
+  logForDebugging('update: Checking npm registry for latest version')
+  logForDebugging(`update: Package URL: ${MACRO.PACKAGE_URL}`)
+  const npmTag = channel === 'stable' ? 'stable' : 'latest'
+  const npmCommand = `npm view ${MACRO.PACKAGE_URL}@${npmTag} version`
+  logForDebugging(`update: Running: ${npmCommand}`)
+  const latestVersion = await getLatestVersion(channel)
+  logForDebugging(
+    `update: Latest version from npm: ${latestVersion || 'FAILED'}`,
+  )
+
+  if (!latestVersion) {
+    logForDebugging('update: Failed to get latest version from npm registry')
+    process.stderr.write(chalk.red('Failed to check for updates') + '\n')
+    process.stderr.write('Unable to fetch latest version from npm registry\n')
+    process.stderr.write('\n')
+    process.stderr.write('Possible causes:\n')
+    process.stderr.write('  • Network connectivity issues\n')
+    process.stderr.write('  • npm registry is unreachable\n')
+    process.stderr.write('  • Corporate proxy/firewall blocking npm\n')
+    if (MACRO.PACKAGE_URL && !MACRO.PACKAGE_URL.startsWith('@anthropic')) {
+      process.stderr.write(
+        '  • Internal/development build not published to npm\n',
+      )
+    }
+    process.stderr.write('\n')
+    process.stderr.write('Try:\n')
+    process.stderr.write('  • Check your internet connection\n')
+    process.stderr.write('  • Run with --debug flag for more details\n')
+    const packageName =
+      MACRO.PACKAGE_URL ||
+      (process.env.USER_TYPE === 'ant'
+        ? '@anthropic-ai/claude-cli'
+        : '@anthropic-ai/claude-code')
+    process.stderr.write(
+      `  • Manually check: npm view ${packageName} version\n`,
+    )
+
+    process.stderr.write('  • Check if you need to login: npm whoami\n')
+    await gracefulShutdown(1)
+  }
+
+  // Check if versions match exactly, including any build metadata (like SHA)
+  if (latestVersion === MACRO.VERSION) {
+    writeToStdout(
+      chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
+    )
+    await gracefulShutdown(0)
+  }
+
+  writeToStdout(
+    `New version available: ${latestVersion} (current: ${MACRO.VERSION})\n`,
+  )
+  writeToStdout('Installing update...\n')
+
+  // Determine update method based on what's actually running
+  let useLocalUpdate = false
+  let updateMethodName = ''
+
+  switch (diagnostic.installationType) {
+    case 'npm-local':
+      useLocalUpdate = true
+      updateMethodName = 'local'
+      break
+    case 'npm-global':
+      useLocalUpdate = false
+      updateMethodName = 'global'
+      break
+    case 'unknown': {
+      // Fallback to detection if we can't determine installation type
+      const isLocal = await localInstallationExists()
+      useLocalUpdate = isLocal
+      updateMethodName = isLocal ? 'local' : 'global'
+      writeToStdout(
+        chalk.yellow('Warning: Could not determine installation type') + '\n',
+      )
+      writeToStdout(
+        `Attempting ${updateMethodName} update based on file detection...\n`,
+      )
+      break
+    }
+    default:
+      process.stderr.write(
+        `Error: Cannot update ${diagnostic.installationType} installation\n`,
+      )
+      await gracefulShutdown(1)
+  }
+
+  writeToStdout(`Using ${updateMethodName} installation update method...\n`)
+
+  logForDebugging(`update: Update method determined: ${updateMethodName}`)
+  logForDebugging(`update: useLocalUpdate: ${useLocalUpdate}`)
+
+  let status: InstallStatus
+
+  if (useLocalUpdate) {
+    logForDebugging(
+      'update: Calling installOrUpdateClaudePackage() for local update',
+    )
+    status = await installOrUpdateClaudePackage(channel)
+  } else {
+    logForDebugging('update: Calling installGlobalPackage() for global update')
+    status = await installGlobalPackage()
+  }
+
+  logForDebugging(`update: Installation status: ${status}`)
+
+  switch (status) {
+    case 'success':
+      writeToStdout(
+        chalk.green(
+          `Successfully updated from ${MACRO.VERSION} to version ${latestVersion}`,
+        ) + '\n',
+      )
+      await regenerateCompletionCache()
+      break
+    case 'no_permissions':
+      process.stderr.write(
+        'Error: Insufficient permissions to install update\n',
+      )
+      if (useLocalUpdate) {
+        process.stderr.write('Try manually updating with:\n')
+        process.stderr.write(
+          `  cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
+        )
+      } else {
+        process.stderr.write('Try running with sudo or fix npm permissions\n')
+        process.stderr.write(
+          'Or consider using native installation with: claude install\n',
+        )
+      }
+      await gracefulShutdown(1)
+      break
+    case 'install_failed':
+      process.stderr.write('Error: Failed to install update\n')
+      if (useLocalUpdate) {
+        process.stderr.write('Try manually updating with:\n')
+        process.stderr.write(
+          `  cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
+        )
+      } else {
+        process.stderr.write(
+          'Or consider using native installation with: claude install\n',
+        )
+      }
+      await gracefulShutdown(1)
+      break
+    case 'in_progress':
+      process.stderr.write(
+        'Error: Another instance is currently performing an update\n',
+      )
+      process.stderr.write('Please wait and try again later\n')
+      await gracefulShutdown(1)
+      break
+  }
+  await gracefulShutdown(0)
+}

+ 754 - 0
src/commands.ts

@@ -0,0 +1,754 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+import addDir from './commands/add-dir/index.js'
+import autofixPr from './commands/autofix-pr/index.js'
+import backfillSessions from './commands/backfill-sessions/index.js'
+import btw from './commands/btw/index.js'
+import goodClaude from './commands/good-claude/index.js'
+import issue from './commands/issue/index.js'
+import feedback from './commands/feedback/index.js'
+import clear from './commands/clear/index.js'
+import color from './commands/color/index.js'
+import commit from './commands/commit.js'
+import copy from './commands/copy/index.js'
+import desktop from './commands/desktop/index.js'
+import commitPushPr from './commands/commit-push-pr.js'
+import compact from './commands/compact/index.js'
+import config from './commands/config/index.js'
+import { context, contextNonInteractive } from './commands/context/index.js'
+import cost from './commands/cost/index.js'
+import diff from './commands/diff/index.js'
+import ctx_viz from './commands/ctx_viz/index.js'
+import doctor from './commands/doctor/index.js'
+import memory from './commands/memory/index.js'
+import help from './commands/help/index.js'
+import ide from './commands/ide/index.js'
+import init from './commands/init.js'
+import initVerifiers from './commands/init-verifiers.js'
+import keybindings from './commands/keybindings/index.js'
+import login from './commands/login/index.js'
+import logout from './commands/logout/index.js'
+import installGitHubApp from './commands/install-github-app/index.js'
+import installSlackApp from './commands/install-slack-app/index.js'
+import breakCache from './commands/break-cache/index.js'
+import mcp from './commands/mcp/index.js'
+import mobile from './commands/mobile/index.js'
+import onboarding from './commands/onboarding/index.js'
+import pr_comments from './commands/pr_comments/index.js'
+import releaseNotes from './commands/release-notes/index.js'
+import rename from './commands/rename/index.js'
+import resume from './commands/resume/index.js'
+import review, { ultrareview } from './commands/review.js'
+import session from './commands/session/index.js'
+import share from './commands/share/index.js'
+import skills from './commands/skills/index.js'
+import status from './commands/status/index.js'
+import tasks from './commands/tasks/index.js'
+import teleport from './commands/teleport/index.js'
+/* eslint-disable @typescript-eslint/no-require-imports */
+const agentsPlatform =
+  process.env.USER_TYPE === 'ant'
+    ? require('./commands/agents-platform/index.js').default
+    : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+import securityReview from './commands/security-review.js'
+import bughunter from './commands/bughunter/index.js'
+import terminalSetup from './commands/terminalSetup/index.js'
+import usage from './commands/usage/index.js'
+import theme from './commands/theme/index.js'
+import vim from './commands/vim/index.js'
+import { feature } from 'bun:bundle'
+// Dead code elimination: conditional imports
+/* eslint-disable @typescript-eslint/no-require-imports */
+const proactive =
+  feature('PROACTIVE') || feature('KAIROS')
+    ? require('./commands/proactive.js').default
+    : null
+const briefCommand =
+  feature('KAIROS') || feature('KAIROS_BRIEF')
+    ? require('./commands/brief.js').default
+    : null
+const assistantCommand = feature('KAIROS')
+  ? require('./commands/assistant/index.js').default
+  : null
+const bridge = feature('BRIDGE_MODE')
+  ? require('./commands/bridge/index.js').default
+  : null
+const remoteControlServerCommand =
+  feature('DAEMON') && feature('BRIDGE_MODE')
+    ? require('./commands/remoteControlServer/index.js').default
+    : null
+const voiceCommand = feature('VOICE_MODE')
+  ? require('./commands/voice/index.js').default
+  : null
+const forceSnip = feature('HISTORY_SNIP')
+  ? require('./commands/force-snip.js').default
+  : null
+const workflowsCmd = feature('WORKFLOW_SCRIPTS')
+  ? (
+      require('./commands/workflows/index.js') as typeof import('./commands/workflows/index.js')
+    ).default
+  : null
+const webCmd = feature('CCR_REMOTE_SETUP')
+  ? (
+      require('./commands/remote-setup/index.js') as typeof import('./commands/remote-setup/index.js')
+    ).default
+  : null
+const clearSkillIndexCache = feature('EXPERIMENTAL_SKILL_SEARCH')
+  ? (
+      require('./services/skillSearch/localSearch.js') as typeof import('./services/skillSearch/localSearch.js')
+    ).clearSkillIndexCache
+  : null
+const subscribePr = feature('KAIROS_GITHUB_WEBHOOKS')
+  ? require('./commands/subscribe-pr.js').default
+  : null
+const ultraplan = feature('ULTRAPLAN')
+  ? require('./commands/ultraplan.js').default
+  : null
+const torch = feature('TORCH') ? require('./commands/torch.js').default : null
+const peersCmd = feature('UDS_INBOX')
+  ? (
+      require('./commands/peers/index.js') as typeof import('./commands/peers/index.js')
+    ).default
+  : null
+const forkCmd = feature('FORK_SUBAGENT')
+  ? (
+      require('./commands/fork/index.js') as typeof import('./commands/fork/index.js')
+    ).default
+  : null
+const buddy = feature('BUDDY')
+  ? (
+      require('./commands/buddy/index.js') as typeof import('./commands/buddy/index.js')
+    ).default
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+import thinkback from './commands/thinkback/index.js'
+import thinkbackPlay from './commands/thinkback-play/index.js'
+import permissions from './commands/permissions/index.js'
+import plan from './commands/plan/index.js'
+import fast from './commands/fast/index.js'
+import passes from './commands/passes/index.js'
+import privacySettings from './commands/privacy-settings/index.js'
+import hooks from './commands/hooks/index.js'
+import files from './commands/files/index.js'
+import branch from './commands/branch/index.js'
+import agents from './commands/agents/index.js'
+import plugin from './commands/plugin/index.js'
+import reloadPlugins from './commands/reload-plugins/index.js'
+import rewind from './commands/rewind/index.js'
+import heapDump from './commands/heapdump/index.js'
+import mockLimits from './commands/mock-limits/index.js'
+import bridgeKick from './commands/bridge-kick.js'
+import version from './commands/version.js'
+import summary from './commands/summary/index.js'
+import {
+  resetLimits,
+  resetLimitsNonInteractive,
+} from './commands/reset-limits/index.js'
+import antTrace from './commands/ant-trace/index.js'
+import perfIssue from './commands/perf-issue/index.js'
+import sandboxToggle from './commands/sandbox-toggle/index.js'
+import chrome from './commands/chrome/index.js'
+import stickers from './commands/stickers/index.js'
+import advisor from './commands/advisor.js'
+import { logError } from './utils/log.js'
+import { toError } from './utils/errors.js'
+import { logForDebugging } from './utils/debug.js'
+import {
+  getSkillDirCommands,
+  clearSkillCaches,
+  getDynamicSkills,
+} from './skills/loadSkillsDir.js'
+import { getBundledSkills } from './skills/bundledSkills.js'
+import { getBuiltinPluginSkillCommands } from './plugins/builtinPlugins.js'
+import {
+  getPluginCommands,
+  clearPluginCommandCache,
+  getPluginSkills,
+  clearPluginSkillsCache,
+} from './utils/plugins/loadPluginCommands.js'
+import memoize from 'lodash-es/memoize.js'
+import { isUsing3PServices, isClaudeAISubscriber } from './utils/auth.js'
+import { isFirstPartyAnthropicBaseUrl } from './utils/model/providers.js'
+import env from './commands/env/index.js'
+import exit from './commands/exit/index.js'
+import exportCommand from './commands/export/index.js'
+import model from './commands/model/index.js'
+import tag from './commands/tag/index.js'
+import outputStyle from './commands/output-style/index.js'
+import remoteEnv from './commands/remote-env/index.js'
+import upgrade from './commands/upgrade/index.js'
+import {
+  extraUsage,
+  extraUsageNonInteractive,
+} from './commands/extra-usage/index.js'
+import rateLimitOptions from './commands/rate-limit-options/index.js'
+import statusline from './commands/statusline.js'
+import effort from './commands/effort/index.js'
+import stats from './commands/stats/index.js'
+// insights.ts is 113KB (3200 lines, includes diffLines/html rendering). Lazy
+// shim defers the heavy module until /insights is actually invoked.
+const usageReport: Command = {
+  type: 'prompt',
+  name: 'insights',
+  description: 'Generate a report analyzing your Claude Code sessions',
+  contentLength: 0,
+  progressMessage: 'analyzing your sessions',
+  source: 'builtin',
+  async getPromptForCommand(args, context) {
+    const real = (await import('./commands/insights.js')).default
+    if (real.type !== 'prompt') throw new Error('unreachable')
+    return real.getPromptForCommand(args, context)
+  },
+}
+import oauthRefresh from './commands/oauth-refresh/index.js'
+import debugToolCall from './commands/debug-tool-call/index.js'
+import { getSettingSourceName } from './utils/settings/constants.js'
+import {
+  type Command,
+  getCommandName,
+  isCommandEnabled,
+} from './types/command.js'
+
+// Re-export types from the centralized location
+export type {
+  Command,
+  CommandBase,
+  CommandResultDisplay,
+  LocalCommandResult,
+  LocalJSXCommandContext,
+  PromptCommand,
+  ResumeEntrypoint,
+} from './types/command.js'
+export { getCommandName, isCommandEnabled } from './types/command.js'
+
+// Commands that get eliminated from the external build
+export const INTERNAL_ONLY_COMMANDS = [
+  backfillSessions,
+  breakCache,
+  bughunter,
+  commit,
+  commitPushPr,
+  ctx_viz,
+  goodClaude,
+  issue,
+  initVerifiers,
+  ...(forceSnip ? [forceSnip] : []),
+  mockLimits,
+  bridgeKick,
+  version,
+  ...(ultraplan ? [ultraplan] : []),
+  ...(subscribePr ? [subscribePr] : []),
+  resetLimits,
+  resetLimitsNonInteractive,
+  onboarding,
+  share,
+  summary,
+  teleport,
+  antTrace,
+  perfIssue,
+  env,
+  oauthRefresh,
+  debugToolCall,
+  agentsPlatform,
+  autofixPr,
+].filter(Boolean)
+
+// Declared as a function so that we don't run this until getCommands is called,
+// since underlying functions read from config, which can't be read at module initialization time
+const COMMANDS = memoize((): Command[] => [
+  addDir,
+  advisor,
+  agents,
+  branch,
+  btw,
+  chrome,
+  clear,
+  color,
+  compact,
+  config,
+  copy,
+  desktop,
+  context,
+  contextNonInteractive,
+  cost,
+  diff,
+  doctor,
+  effort,
+  exit,
+  fast,
+  files,
+  heapDump,
+  help,
+  ide,
+  init,
+  keybindings,
+  installGitHubApp,
+  installSlackApp,
+  mcp,
+  memory,
+  mobile,
+  model,
+  outputStyle,
+  remoteEnv,
+  plugin,
+  pr_comments,
+  releaseNotes,
+  reloadPlugins,
+  rename,
+  resume,
+  session,
+  skills,
+  stats,
+  status,
+  statusline,
+  stickers,
+  tag,
+  theme,
+  feedback,
+  review,
+  ultrareview,
+  rewind,
+  securityReview,
+  terminalSetup,
+  upgrade,
+  extraUsage,
+  extraUsageNonInteractive,
+  rateLimitOptions,
+  usage,
+  usageReport,
+  vim,
+  ...(webCmd ? [webCmd] : []),
+  ...(forkCmd ? [forkCmd] : []),
+  ...(buddy ? [buddy] : []),
+  ...(proactive ? [proactive] : []),
+  ...(briefCommand ? [briefCommand] : []),
+  ...(assistantCommand ? [assistantCommand] : []),
+  ...(bridge ? [bridge] : []),
+  ...(remoteControlServerCommand ? [remoteControlServerCommand] : []),
+  ...(voiceCommand ? [voiceCommand] : []),
+  thinkback,
+  thinkbackPlay,
+  permissions,
+  plan,
+  privacySettings,
+  hooks,
+  exportCommand,
+  sandboxToggle,
+  ...(!isUsing3PServices() ? [logout, login()] : []),
+  passes,
+  ...(peersCmd ? [peersCmd] : []),
+  tasks,
+  ...(workflowsCmd ? [workflowsCmd] : []),
+  ...(torch ? [torch] : []),
+  ...(process.env.USER_TYPE === 'ant' && !process.env.IS_DEMO
+    ? INTERNAL_ONLY_COMMANDS
+    : []),
+])
+
+export const builtInCommandNames = memoize(
+  (): Set<string> =>
+    new Set(COMMANDS().flatMap(_ => [_.name, ...(_.aliases ?? [])])),
+)
+
+async function getSkills(cwd: string): Promise<{
+  skillDirCommands: Command[]
+  pluginSkills: Command[]
+  bundledSkills: Command[]
+  builtinPluginSkills: Command[]
+}> {
+  try {
+    const [skillDirCommands, pluginSkills] = await Promise.all([
+      getSkillDirCommands(cwd).catch(err => {
+        logError(toError(err))
+        logForDebugging(
+          'Skill directory commands failed to load, continuing without them',
+        )
+        return []
+      }),
+      getPluginSkills().catch(err => {
+        logError(toError(err))
+        logForDebugging('Plugin skills failed to load, continuing without them')
+        return []
+      }),
+    ])
+    // Bundled skills are registered synchronously at startup
+    const bundledSkills = getBundledSkills()
+    // Built-in plugin skills come from enabled built-in plugins
+    const builtinPluginSkills = getBuiltinPluginSkillCommands()
+    logForDebugging(
+      `getSkills returning: ${skillDirCommands.length} skill dir commands, ${pluginSkills.length} plugin skills, ${bundledSkills.length} bundled skills, ${builtinPluginSkills.length} builtin plugin skills`,
+    )
+    return {
+      skillDirCommands,
+      pluginSkills,
+      bundledSkills,
+      builtinPluginSkills,
+    }
+  } catch (err) {
+    // This should never happen since we catch at the Promise level, but defensive
+    logError(toError(err))
+    logForDebugging('Unexpected error in getSkills, returning empty')
+    return {
+      skillDirCommands: [],
+      pluginSkills: [],
+      bundledSkills: [],
+      builtinPluginSkills: [],
+    }
+  }
+}
+
+/* eslint-disable @typescript-eslint/no-require-imports */
+const getWorkflowCommands = feature('WORKFLOW_SCRIPTS')
+  ? (
+      require('./tools/WorkflowTool/createWorkflowCommand.js') as typeof import('./tools/WorkflowTool/createWorkflowCommand.js')
+    ).getWorkflowCommands
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+/**
+ * Filters commands by their declared `availability` (auth/provider requirement).
+ * Commands without `availability` are treated as universal.
+ * This runs before `isEnabled()` so that provider-gated commands are hidden
+ * regardless of feature-flag state.
+ *
+ * Not memoized — auth state can change mid-session (e.g. after /login),
+ * so this must be re-evaluated on every getCommands() call.
+ */
+export function meetsAvailabilityRequirement(cmd: Command): boolean {
+  if (!cmd.availability) return true
+  for (const a of cmd.availability) {
+    switch (a) {
+      case 'claude-ai':
+        if (isClaudeAISubscriber()) return true
+        break
+      case 'console':
+        // Console API key user = direct 1P API customer (not 3P, not claude.ai).
+        // Excludes 3P (Bedrock/Vertex/Foundry) who don't set ANTHROPIC_BASE_URL
+        // and gateway users who proxy through a custom base URL.
+        if (
+          !isClaudeAISubscriber() &&
+          !isUsing3PServices() &&
+          isFirstPartyAnthropicBaseUrl()
+        )
+          return true
+        break
+      default: {
+        const _exhaustive: never = a
+        void _exhaustive
+        break
+      }
+    }
+  }
+  return false
+}
+
+/**
+ * Loads all command sources (skills, plugins, workflows). Memoized by cwd
+ * because loading is expensive (disk I/O, dynamic imports).
+ */
+const loadAllCommands = memoize(async (cwd: string): Promise<Command[]> => {
+  const [
+    { skillDirCommands, pluginSkills, bundledSkills, builtinPluginSkills },
+    pluginCommands,
+    workflowCommands,
+  ] = await Promise.all([
+    getSkills(cwd),
+    getPluginCommands(),
+    getWorkflowCommands ? getWorkflowCommands(cwd) : Promise.resolve([]),
+  ])
+
+  return [
+    ...bundledSkills,
+    ...builtinPluginSkills,
+    ...skillDirCommands,
+    ...workflowCommands,
+    ...pluginCommands,
+    ...pluginSkills,
+    ...COMMANDS(),
+  ]
+})
+
+/**
+ * Returns commands available to the current user. The expensive loading is
+ * memoized, but availability and isEnabled checks run fresh every call so
+ * auth changes (e.g. /login) take effect immediately.
+ */
+export async function getCommands(cwd: string): Promise<Command[]> {
+  const allCommands = await loadAllCommands(cwd)
+
+  // Get dynamic skills discovered during file operations
+  const dynamicSkills = getDynamicSkills()
+
+  // Build base commands without dynamic skills
+  const baseCommands = allCommands.filter(
+    _ => meetsAvailabilityRequirement(_) && isCommandEnabled(_),
+  )
+
+  if (dynamicSkills.length === 0) {
+    return baseCommands
+  }
+
+  // Dedupe dynamic skills - only add if not already present
+  const baseCommandNames = new Set(baseCommands.map(c => c.name))
+  const uniqueDynamicSkills = dynamicSkills.filter(
+    s =>
+      !baseCommandNames.has(s.name) &&
+      meetsAvailabilityRequirement(s) &&
+      isCommandEnabled(s),
+  )
+
+  if (uniqueDynamicSkills.length === 0) {
+    return baseCommands
+  }
+
+  // Insert dynamic skills after plugin skills but before built-in commands
+  const builtInNames = new Set(COMMANDS().map(c => c.name))
+  const insertIndex = baseCommands.findIndex(c => builtInNames.has(c.name))
+
+  if (insertIndex === -1) {
+    return [...baseCommands, ...uniqueDynamicSkills]
+  }
+
+  return [
+    ...baseCommands.slice(0, insertIndex),
+    ...uniqueDynamicSkills,
+    ...baseCommands.slice(insertIndex),
+  ]
+}
+
+/**
+ * Clears only the memoization caches for commands, WITHOUT clearing skill caches.
+ * Use this when dynamic skills are added to invalidate cached command lists.
+ */
+export function clearCommandMemoizationCaches(): void {
+  loadAllCommands.cache?.clear?.()
+  getSkillToolCommands.cache?.clear?.()
+  getSlashCommandToolSkills.cache?.clear?.()
+  // getSkillIndex in skillSearch/localSearch.ts is a separate memoization layer
+  // built ON TOP of getSkillToolCommands/getCommands. Clearing only the inner
+  // caches is a no-op for the outer — lodash memoize returns the cached result
+  // without ever reaching the cleared inners. Must clear it explicitly.
+  clearSkillIndexCache?.()
+}
+
+export function clearCommandsCache(): void {
+  clearCommandMemoizationCaches()
+  clearPluginCommandCache()
+  clearPluginSkillsCache()
+  clearSkillCaches()
+}
+
+/**
+ * Filter AppState.mcp.commands to MCP-provided skills (prompt-type,
+ * model-invocable, loaded from MCP). These live outside getCommands() so
+ * callers that need MCP skills in their skill index thread them through
+ * separately.
+ */
+export function getMcpSkillCommands(
+  mcpCommands: readonly Command[],
+): readonly Command[] {
+  if (feature('MCP_SKILLS')) {
+    return mcpCommands.filter(
+      cmd =>
+        cmd.type === 'prompt' &&
+        cmd.loadedFrom === 'mcp' &&
+        !cmd.disableModelInvocation,
+    )
+  }
+  return []
+}
+
+// SkillTool shows ALL prompt-based commands that the model can invoke
+// This includes both skills (from /skills/) and commands (from /commands/)
+export const getSkillToolCommands = memoize(
+  async (cwd: string): Promise<Command[]> => {
+    const allCommands = await getCommands(cwd)
+    return allCommands.filter(
+      cmd =>
+        cmd.type === 'prompt' &&
+        !cmd.disableModelInvocation &&
+        cmd.source !== 'builtin' &&
+        // Always include skills from /skills/ dirs, bundled skills, and legacy /commands/ entries
+        // (they all get an auto-derived description from the first line if frontmatter is missing).
+        // Plugin/MCP commands still require an explicit description to appear in the listing.
+        (cmd.loadedFrom === 'bundled' ||
+          cmd.loadedFrom === 'skills' ||
+          cmd.loadedFrom === 'commands_DEPRECATED' ||
+          cmd.hasUserSpecifiedDescription ||
+          cmd.whenToUse),
+    )
+  },
+)
+
+// Filters commands to include only skills. Skills are commands that provide
+// specialized capabilities for the model to use. They are identified by
+// loadedFrom being 'skills', 'plugin', or 'bundled', or having disableModelInvocation set.
+export const getSlashCommandToolSkills = memoize(
+  async (cwd: string): Promise<Command[]> => {
+    try {
+      const allCommands = await getCommands(cwd)
+      return allCommands.filter(
+        cmd =>
+          cmd.type === 'prompt' &&
+          cmd.source !== 'builtin' &&
+          (cmd.hasUserSpecifiedDescription || cmd.whenToUse) &&
+          (cmd.loadedFrom === 'skills' ||
+            cmd.loadedFrom === 'plugin' ||
+            cmd.loadedFrom === 'bundled' ||
+            cmd.disableModelInvocation),
+      )
+    } catch (error) {
+      logError(toError(error))
+      // Return empty array rather than throwing - skills are non-critical
+      // This prevents skill loading failures from breaking the entire system
+      logForDebugging('Returning empty skills array due to load failure')
+      return []
+    }
+  },
+)
+
+/**
+ * Commands that are safe to use in remote mode (--remote).
+ * These only affect local TUI state and don't depend on local filesystem,
+ * git, shell, IDE, MCP, or other local execution context.
+ *
+ * Used in two places:
+ * 1. Pre-filtering commands in main.tsx before REPL renders (prevents race with CCR init)
+ * 2. Preserving local-only commands in REPL's handleRemoteInit after CCR filters
+ */
+export const REMOTE_SAFE_COMMANDS: Set<Command> = new Set([
+  session, // Shows QR code / URL for remote session
+  exit, // Exit the TUI
+  clear, // Clear screen
+  help, // Show help
+  theme, // Change terminal theme
+  color, // Change agent color
+  vim, // Toggle vim mode
+  cost, // Show session cost (local cost tracking)
+  usage, // Show usage info
+  copy, // Copy last message
+  btw, // Quick note
+  feedback, // Send feedback
+  plan, // Plan mode toggle
+  keybindings, // Keybinding management
+  statusline, // Status line toggle
+  stickers, // Stickers
+  mobile, // Mobile QR code
+])
+
+/**
+ * Builtin commands of type 'local' that ARE safe to execute when received
+ * over the Remote Control bridge. These produce text output that streams
+ * back to the mobile/web client and have no terminal-only side effects.
+ *
+ * 'local-jsx' commands are blocked by type (they render Ink UI) and
+ * 'prompt' commands are allowed by type (they expand to text sent to the
+ * model) — this set only gates 'local' commands.
+ *
+ * When adding a new 'local' command that should work from mobile, add it
+ * here. Default is blocked.
+ */
+export const BRIDGE_SAFE_COMMANDS: Set<Command> = new Set(
+  [
+    compact, // Shrink context — useful mid-session from a phone
+    clear, // Wipe transcript
+    cost, // Show session cost
+    summary, // Summarize conversation
+    releaseNotes, // Show changelog
+    files, // List tracked files
+  ].filter((c): c is Command => c !== null),
+)
+
+/**
+ * Whether a slash command is safe to execute when its input arrived over the
+ * Remote Control bridge (mobile/web client).
+ *
+ * PR #19134 blanket-blocked all slash commands from bridge inbound because
+ * `/model` from iOS was popping the local Ink picker. This predicate relaxes
+ * that with an explicit allowlist: 'prompt' commands (skills) expand to text
+ * and are safe by construction; 'local' commands need an explicit opt-in via
+ * BRIDGE_SAFE_COMMANDS; 'local-jsx' commands render Ink UI and stay blocked.
+ */
+export function isBridgeSafeCommand(cmd: Command): boolean {
+  if (cmd.type === 'local-jsx') return false
+  if (cmd.type === 'prompt') return true
+  return BRIDGE_SAFE_COMMANDS.has(cmd)
+}
+
+/**
+ * Filter commands to only include those safe for remote mode.
+ * Used to pre-filter commands when rendering the REPL in --remote mode,
+ * preventing local-only commands from being briefly available before
+ * the CCR init message arrives.
+ */
+export function filterCommandsForRemoteMode(commands: Command[]): Command[] {
+  return commands.filter(cmd => REMOTE_SAFE_COMMANDS.has(cmd))
+}
+
+export function findCommand(
+  commandName: string,
+  commands: Command[],
+): Command | undefined {
+  return commands.find(
+    _ =>
+      _.name === commandName ||
+      getCommandName(_) === commandName ||
+      _.aliases?.includes(commandName),
+  )
+}
+
+export function hasCommand(commandName: string, commands: Command[]): boolean {
+  return findCommand(commandName, commands) !== undefined
+}
+
+export function getCommand(commandName: string, commands: Command[]): Command {
+  const command = findCommand(commandName, commands)
+  if (!command) {
+    throw ReferenceError(
+      `Command ${commandName} not found. Available commands: ${commands
+        .map(_ => {
+          const name = getCommandName(_)
+          return _.aliases ? `${name} (aliases: ${_.aliases.join(', ')})` : name
+        })
+        .sort((a, b) => a.localeCompare(b))
+        .join(', ')}`,
+    )
+  }
+
+  return command
+}
+
+/**
+ * Formats a command's description with its source annotation for user-facing UI.
+ * Use this in typeahead, help screens, and other places where users need to see
+ * where a command comes from.
+ *
+ * For model-facing prompts (like SkillTool), use cmd.description directly.
+ */
+export function formatDescriptionWithSource(cmd: Command): string {
+  if (cmd.type !== 'prompt') {
+    return cmd.description
+  }
+
+  if (cmd.kind === 'workflow') {
+    return `${cmd.description} (workflow)`
+  }
+
+  if (cmd.source === 'plugin') {
+    const pluginName = cmd.pluginInfo?.pluginManifest.name
+    if (pluginName) {
+      return `(${pluginName}) ${cmd.description}`
+    }
+    return `${cmd.description} (plugin)`
+  }
+
+  if (cmd.source === 'builtin' || cmd.source === 'mcp') {
+    return cmd.description
+  }
+
+  if (cmd.source === 'bundled') {
+    return `${cmd.description} (bundled)`
+  }
+
+  return `${cmd.description} (${getSettingSourceName(cmd.source)})`
+}

Різницю між файлами не показано, бо вона завелика
+ 125 - 0
src/commands/add-dir/add-dir.tsx


+ 11 - 0
src/commands/add-dir/index.ts

@@ -0,0 +1,11 @@
+import type { Command } from '../../commands.js'
+
+const addDir = {
+  type: 'local-jsx',
+  name: 'add-dir',
+  description: 'Add a new working directory',
+  argumentHint: '<path>',
+  load: () => import('./add-dir.js'),
+} satisfies Command
+
+export default addDir

+ 110 - 0
src/commands/add-dir/validation.ts

@@ -0,0 +1,110 @@
+import chalk from 'chalk'
+import { stat } from 'fs/promises'
+import { dirname, resolve } from 'path'
+import type { ToolPermissionContext } from '../../Tool.js'
+import { getErrnoCode } from '../../utils/errors.js'
+import { expandPath } from '../../utils/path.js'
+import {
+  allWorkingDirectories,
+  pathInWorkingPath,
+} from '../../utils/permissions/filesystem.js'
+
+export type AddDirectoryResult =
+  | {
+      resultType: 'success'
+      absolutePath: string
+    }
+  | {
+      resultType: 'emptyPath'
+    }
+  | {
+      resultType: 'pathNotFound' | 'notADirectory'
+      directoryPath: string
+      absolutePath: string
+    }
+  | {
+      resultType: 'alreadyInWorkingDirectory'
+      directoryPath: string
+      workingDir: string
+    }
+
+export async function validateDirectoryForWorkspace(
+  directoryPath: string,
+  permissionContext: ToolPermissionContext,
+): Promise<AddDirectoryResult> {
+  if (!directoryPath) {
+    return {
+      resultType: 'emptyPath',
+    }
+  }
+
+  // resolve() strips the trailing slash expandPath can leave on absolute
+  // inputs, so /foo and /foo/ map to the same storage key (CC-33).
+  const absolutePath = resolve(expandPath(directoryPath))
+
+  // Check if path exists and is a directory (single syscall)
+  try {
+    const stats = await stat(absolutePath)
+    if (!stats.isDirectory()) {
+      return {
+        resultType: 'notADirectory',
+        directoryPath,
+        absolutePath,
+      }
+    }
+  } catch (e: unknown) {
+    const code = getErrnoCode(e)
+    // Match prior existsSync() semantics: treat any of these as "not found"
+    // rather than re-throwing. EACCES/EPERM in particular must not crash
+    // startup when a settings-configured additional directory is inaccessible.
+    if (
+      code === 'ENOENT' ||
+      code === 'ENOTDIR' ||
+      code === 'EACCES' ||
+      code === 'EPERM'
+    ) {
+      return {
+        resultType: 'pathNotFound',
+        directoryPath,
+        absolutePath,
+      }
+    }
+    throw e
+  }
+
+  // Get current permission context
+  const currentWorkingDirs = allWorkingDirectories(permissionContext)
+
+  // Check if already within an existing working directory
+  for (const workingDir of currentWorkingDirs) {
+    if (pathInWorkingPath(absolutePath, workingDir)) {
+      return {
+        resultType: 'alreadyInWorkingDirectory',
+        directoryPath,
+        workingDir,
+      }
+    }
+  }
+
+  return {
+    resultType: 'success',
+    absolutePath,
+  }
+}
+
+export function addDirHelpMessage(result: AddDirectoryResult): string {
+  switch (result.resultType) {
+    case 'emptyPath':
+      return 'Please provide a directory path.'
+    case 'pathNotFound':
+      return `Path ${chalk.bold(result.absolutePath)} was not found.`
+    case 'notADirectory': {
+      const parentDir = dirname(result.absolutePath)
+      return `${chalk.bold(result.directoryPath)} is not a directory. Did you mean to add the parent directory ${chalk.bold(parentDir)}?`
+    }
+    case 'alreadyInWorkingDirectory':
+      return `${chalk.bold(result.directoryPath)} is already accessible within the existing working directory ${chalk.bold(result.workingDir)}.`
+    case 'success':
+      return `Added ${chalk.bold(result.absolutePath)} as a working directory.`
+  }
+}

+ 109 - 0
src/commands/advisor.ts

@@ -0,0 +1,109 @@
+import type { Command } from '../commands.js'
+import type { LocalCommandCall } from '../types/command.js'
+import {
+  canUserConfigureAdvisor,
+  isValidAdvisorModel,
+  modelSupportsAdvisor,
+} from '../utils/advisor.js'
+import {
+  getDefaultMainLoopModelSetting,
+  normalizeModelStringForAPI,
+  parseUserSpecifiedModel,
+} from '../utils/model/model.js'
+import { validateModel } from '../utils/model/validateModel.js'
+import { updateSettingsForSource } from '../utils/settings/settings.js'
+
+const call: LocalCommandCall = async (args, context) => {
+  const arg = args.trim().toLowerCase()
+  const baseModel = parseUserSpecifiedModel(
+    context.getAppState().mainLoopModel ?? getDefaultMainLoopModelSetting(),
+  )
+
+  if (!arg) {
+    const current = context.getAppState().advisorModel
+    if (!current) {
+      return {
+        type: 'text',
+        value:
+          'Advisor: not set\nUse "/advisor <model>" to enable (e.g. "/advisor opus").',
+      }
+    }
+    if (!modelSupportsAdvisor(baseModel)) {
+      return {
+        type: 'text',
+        value: `Advisor: ${current} (inactive)\nThe current model (${baseModel}) does not support advisors.`,
+      }
+    }
+    return {
+      type: 'text',
+      value: `Advisor: ${current}\nUse "/advisor unset" to disable or "/advisor <model>" to change.`,
+    }
+  }
+
+  if (arg === 'unset' || arg === 'off') {
+    const prev = context.getAppState().advisorModel
+    context.setAppState(s => {
+      if (s.advisorModel === undefined) return s
+      return { ...s, advisorModel: undefined }
+    })
+    updateSettingsForSource('userSettings', { advisorModel: undefined })
+    return {
+      type: 'text',
+      value: prev
+        ? `Advisor disabled (was ${prev}).`
+        : 'Advisor already unset.',
+    }
+  }
+
+  const normalizedModel = normalizeModelStringForAPI(arg)
+  const resolvedModel = parseUserSpecifiedModel(arg)
+  const { valid, error } = await validateModel(resolvedModel)
+  if (!valid) {
+    return {
+      type: 'text',
+      value: error
+        ? `Invalid advisor model: ${error}`
+        : `Unknown model: ${arg} (${resolvedModel})`,
+    }
+  }
+
+  if (!isValidAdvisorModel(resolvedModel)) {
+    return {
+      type: 'text',
+      value: `The model ${arg} (${resolvedModel}) cannot be used as an advisor`,
+    }
+  }
+
+  context.setAppState(s => {
+    if (s.advisorModel === normalizedModel) return s
+    return { ...s, advisorModel: normalizedModel }
+  })
+  updateSettingsForSource('userSettings', { advisorModel: normalizedModel })
+
+  if (!modelSupportsAdvisor(baseModel)) {
+    return {
+      type: 'text',
+      value: `Advisor set to ${normalizedModel}.\nNote: Your current model (${baseModel}) does not support advisors. Switch to a supported model to use the advisor.`,
+    }
+  }
+
+  return {
+    type: 'text',
+    value: `Advisor set to ${normalizedModel}.`,
+  }
+}
+
+const advisor = {
+  type: 'local',
+  name: 'advisor',
+  description: 'Configure the advisor model',
+  argumentHint: '[<model>|off]',
+  isEnabled: () => canUserConfigureAdvisor(),
+  get isHidden() {
+    return !canUserConfigureAdvisor()
+  },
+  supportsNonInteractive: true,
+  load: () => Promise.resolve({ call }),
+} satisfies Command
+
+export default advisor

+ 12 - 0
src/commands/agents/agents.tsx

@@ -0,0 +1,12 @@
+import * as React from 'react';
+import { AgentsMenu } from '../../components/agents/AgentsMenu.js';
+import type { ToolUseContext } from '../../Tool.js';
+import { getTools } from '../../tools.js';
+import type { LocalJSXCommandOnDone } from '../../types/command.js';
+export async function call(onDone: LocalJSXCommandOnDone, context: ToolUseContext): Promise<React.ReactNode> {
+  const appState = context.getAppState();
+  const permissionContext = appState.toolPermissionContext;
+  const tools = getTools(permissionContext);
+  return <AgentsMenu tools={tools} onExit={onDone} />;
+}
+//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJSZWFjdCIsIkFnZW50c01lbnUiLCJUb29sVXNlQ29udGV4dCIsImdldFRvb2xzIiwiTG9jYWxKU1hDb21tYW5kT25Eb25lIiwiY2FsbCIsIm9uRG9uZSIsImNvbnRleHQiLCJQcm9taXNlIiwiUmVhY3ROb2RlIiwiYXBwU3RhdGUiLCJnZXRBcHBTdGF0ZSIsInBlcm1pc3Npb25Db250ZXh0IiwidG9vbFBlcm1pc3Npb25Db250ZXh0IiwidG9vbHMiXSwic291cmNlcyI6WyJhZ2VudHMudHN4Il0sInNvdXJjZXNDb250ZW50IjpbImltcG9ydCAqIGFzIFJlYWN0IGZyb20gJ3JlYWN0J1xuaW1wb3J0IHsgQWdlbnRzTWVudSB9IGZyb20gJy4uLy4uL2NvbXBvbmVudHMvYWdlbnRzL0FnZW50c01lbnUuanMnXG5pbXBvcnQgdHlwZSB7IFRvb2xVc2VDb250ZXh0IH0gZnJvbSAnLi4vLi4vVG9vbC5qcydcbmltcG9ydCB7IGdldFRvb2xzIH0gZnJvbSAnLi4vLi4vdG9vbHMuanMnXG5pbXBvcnQgdHlwZSB7IExvY2FsSlNYQ29tbWFuZE9uRG9uZSB9IGZyb20gJy4uLy4uL3R5cGVzL2NvbW1hbmQuanMnXG5cbmV4cG9ydCBhc3luYyBmdW5jdGlvbiBjYWxsKFxuICBvbkRvbmU6IExvY2FsSlNYQ29tbWFuZE9uRG9uZSxcbiAgY29udGV4dDogVG9vbFVzZUNvbnRleHQsXG4pOiBQcm9taXNlPFJlYWN0LlJlYWN0Tm9kZT4ge1xuICBjb25zdCBhcHBTdGF0ZSA9IGNvbnRleHQuZ2V0QXBwU3RhdGUoKVxuICBjb25zdCBwZXJtaXNzaW9uQ29udGV4dCA9IGFwcFN0YXRlLnRvb2xQZXJtaXNzaW9uQ29udGV4dFxuICBjb25zdCB0b29scyA9IGdldFRvb2xzKHBlcm1pc3Npb25Db250ZXh0KVxuXG4gIHJldHVybiA8QWdlbnRzTWVudSB0b29scz17dG9vbHN9IG9uRXhpdD17b25Eb25lfSAvPlxufVxuIl0sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLEtBQUtBLEtBQUssTUFBTSxPQUFPO0FBQzlCLFNBQVNDLFVBQVUsUUFBUSx1Q0FBdUM7QUFDbEUsY0FBY0MsY0FBYyxRQUFRLGVBQWU7QUFDbkQsU0FBU0MsUUFBUSxRQUFRLGdCQUFnQjtBQUN6QyxjQUFjQyxxQkFBcUIsUUFBUSx3QkFBd0I7QUFFbkUsT0FBTyxlQUFlQyxJQUFJQSxDQUN4QkMsTUFBTSxFQUFFRixxQkFBcUIsRUFDN0JHLE9BQU8sRUFBRUwsY0FBYyxDQUN4QixFQUFFTSxPQUFPLENBQUNSLEtBQUssQ0FBQ1MsU0FBUyxDQUFDLENBQUM7RUFDMUIsTUFBTUMsUUFBUSxHQUFHSCxPQUFPLENBQUNJLFdBQVcsQ0FBQyxDQUFDO0VBQ3RDLE1BQU1DLGlCQUFpQixHQUFHRixRQUFRLENBQUNHLHFCQUFxQjtFQUN4RCxNQUFNQyxLQUFLLEdBQUdYLFFBQVEsQ0FBQ1MsaUJBQWlCLENBQUM7RUFFekMsT0FBTyxDQUFDLFVBQVUsQ0FBQyxLQUFLLENBQUMsQ0FBQ0UsS0FBSyxDQUFDLENBQUMsTUFBTSxDQUFDLENBQUNSLE1BQU0sQ0FBQyxHQUFHO0FBQ3JEIiwiaWdub3JlTGlzdCI6W119

+ 10 - 0
src/commands/agents/index.ts

@@ -0,0 +1,10 @@
+import type { Command } from '../../commands.js'
+
+const agents = {
+  type: 'local-jsx',
+  name: 'agents',
+  description: 'Manage agent configurations',
+  load: () => import('./agents.js'),
+} satisfies Command
+
+export default agents

+ 1 - 0
src/commands/ant-trace/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 1 - 0
src/commands/autofix-pr/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 1 - 0
src/commands/backfill-sessions/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 296 - 0
src/commands/branch/branch.ts

@@ -0,0 +1,296 @@
+import { randomUUID, type UUID } from 'crypto'
+import { mkdir, readFile, writeFile } from 'fs/promises'
+import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'
+import type { LocalJSXCommandContext } from '../../commands.js'
+import { logEvent } from '../../services/analytics/index.js'
+import type { LocalJSXCommandOnDone } from '../../types/command.js'
+import type {
+  ContentReplacementEntry,
+  Entry,
+  LogOption,
+  SerializedMessage,
+  TranscriptMessage,
+} from '../../types/logs.js'
+import { parseJSONL } from '../../utils/json.js'
+import {
+  getProjectDir,
+  getTranscriptPath,
+  getTranscriptPathForSession,
+  isTranscriptMessage,
+  saveCustomTitle,
+  searchSessionsByCustomTitle,
+} from '../../utils/sessionStorage.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import { escapeRegExp } from '../../utils/stringUtils.js'
+
+type TranscriptEntry = TranscriptMessage & {
+  forkedFrom?: {
+    sessionId: string
+    messageUuid: UUID
+  }
+}
+
+/**
+ * Derive a single-line title base from the first user message.
+ * Collapses whitespace — multiline first messages (pasted stacks, code)
+ * otherwise flow into the saved title and break the resume hint.
+ */
+export function deriveFirstPrompt(
+  firstUserMessage: Extract<SerializedMessage, { type: 'user' }> | undefined,
+): string {
+  const content = firstUserMessage?.message?.content
+  if (!content) return 'Branched conversation'
+  const raw =
+    typeof content === 'string'
+      ? content
+      : content.find(
+          (block): block is { type: 'text'; text: string } =>
+            block.type === 'text',
+        )?.text
+  if (!raw) return 'Branched conversation'
+  return (
+    raw.replace(/\s+/g, ' ').trim().slice(0, 100) || 'Branched conversation'
+  )
+}
+
+/**
+ * Creates a fork of the current conversation by copying from the transcript file.
+ * Preserves all original metadata (timestamps, gitBranch, etc.) while updating
+ * sessionId and adding forkedFrom traceability.
+ */
+async function createFork(customTitle?: string): Promise<{
+  sessionId: UUID
+  title: string | undefined
+  forkPath: string
+  serializedMessages: SerializedMessage[]
+  contentReplacementRecords: ContentReplacementEntry['replacements']
+}> {
+  const forkSessionId = randomUUID() as UUID
+  const originalSessionId = getSessionId()
+  const projectDir = getProjectDir(getOriginalCwd())
+  const forkSessionPath = getTranscriptPathForSession(forkSessionId)
+  const currentTranscriptPath = getTranscriptPath()
+
+  // Ensure project directory exists
+  await mkdir(projectDir, { recursive: true, mode: 0o700 })
+
+  // Read current transcript file
+  let transcriptContent: Buffer
+  try {
+    transcriptContent = await readFile(currentTranscriptPath)
+  } catch {
+    throw new Error('No conversation to branch')
+  }
+
+  if (transcriptContent.length === 0) {
+    throw new Error('No conversation to branch')
+  }
+
+  // Parse all transcript entries (messages + metadata entries like content-replacement)
+  const entries = parseJSONL<Entry>(transcriptContent)
+
+  // Filter to only main conversation messages (exclude sidechains and non-message entries)
+  const mainConversationEntries = entries.filter(
+    (entry): entry is TranscriptMessage =>
+      isTranscriptMessage(entry) && !entry.isSidechain,
+  )
+
+  // Content-replacement entries for the original session. These record which
+  // tool_result blocks were replaced with previews by the per-message budget.
+  // Without them in the fork JSONL, `claude -r {forkId}` reconstructs state
+  // with an empty replacements Map → previously-replaced results are classified
+  // as FROZEN and sent as full content (prompt cache miss + permanent overage).
+  // sessionId must be rewritten since loadTranscriptFile keys lookup by the
+  // session's messages' sessionId.
+  const contentReplacementRecords = entries
+    .filter(
+      (entry): entry is ContentReplacementEntry =>
+        entry.type === 'content-replacement' &&
+        entry.sessionId === originalSessionId,
+    )
+    .flatMap(entry => entry.replacements)
+
+  if (mainConversationEntries.length === 0) {
+    throw new Error('No messages to branch')
+  }
+
+  // Build forked entries with new sessionId and preserved metadata
+  let parentUuid: UUID | null = null
+  const lines: string[] = []
+  const serializedMessages: SerializedMessage[] = []
+
+  for (const entry of mainConversationEntries) {
+    // Create forked transcript entry preserving all original metadata
+    const forkedEntry: TranscriptEntry = {
+      ...entry,
+      sessionId: forkSessionId,
+      parentUuid,
+      isSidechain: false,
+      forkedFrom: {
+        sessionId: originalSessionId,
+        messageUuid: entry.uuid,
+      },
+    }
+
+    // Build serialized message for LogOption
+    const serialized: SerializedMessage = {
+      ...entry,
+      sessionId: forkSessionId,
+    }
+
+    serializedMessages.push(serialized)
+    lines.push(jsonStringify(forkedEntry))
+    if (entry.type !== 'progress') {
+      parentUuid = entry.uuid
+    }
+  }
+
+  // Append content-replacement entry (if any) with the fork's sessionId.
+  // Written as a SINGLE entry (same shape as insertContentReplacement) so
+  // loadTranscriptFile's content-replacement branch picks it up.
+  if (contentReplacementRecords.length > 0) {
+    const forkedReplacementEntry: ContentReplacementEntry = {
+      type: 'content-replacement',
+      sessionId: forkSessionId,
+      replacements: contentReplacementRecords,
+    }
+    lines.push(jsonStringify(forkedReplacementEntry))
+  }
+
+  // Write the fork session file
+  await writeFile(forkSessionPath, lines.join('\n') + '\n', {
+    encoding: 'utf8',
+    mode: 0o600,
+  })
+
+  return {
+    sessionId: forkSessionId,
+    title: customTitle,
+    forkPath: forkSessionPath,
+    serializedMessages,
+    contentReplacementRecords,
+  }
+}
+
+/**
+ * Generates a unique fork name by checking for collisions with existing session names.
+ * If "baseName (Branch)" already exists, tries "baseName (Branch 2)", "baseName (Branch 3)", etc.
+ */
+async function getUniqueForkName(baseName: string): Promise<string> {
+  const candidateName = `${baseName} (Branch)`
+
+  // Check if this exact name already exists
+  const existingWithExactName = await searchSessionsByCustomTitle(
+    candidateName,
+    { exact: true },
+  )
+
+  if (existingWithExactName.length === 0) {
+    return candidateName
+  }
+
+  // Name collision - find a unique numbered suffix
+  // Search for all sessions that start with the base pattern
+  const existingForks = await searchSessionsByCustomTitle(`${baseName} (Branch`)
+
+  // Extract existing fork numbers to find the next available
+  const usedNumbers = new Set<number>([1]) // Consider " (Branch)" as number 1
+  const forkNumberPattern = new RegExp(
+    `^${escapeRegExp(baseName)} \\(Branch(?: (\\d+))?\\)$`,
+  )
+
+  for (const session of existingForks) {
+    const match = session.customTitle?.match(forkNumberPattern)
+    if (match) {
+      if (match[1]) {
+        usedNumbers.add(parseInt(match[1], 10))
+      } else {
+        usedNumbers.add(1) // " (Branch)" without number is treated as 1
+      }
+    }
+  }
+
+  // Find the next available number
+  let nextNumber = 2
+  while (usedNumbers.has(nextNumber)) {
+    nextNumber++
+  }
+
+  return `${baseName} (Branch ${nextNumber})`
+}
+
+export async function call(
+  onDone: LocalJSXCommandOnDone,
+  context: LocalJSXCommandContext,
+  args: string,
+): Promise<React.ReactNode> {
+  const customTitle = args?.trim() || undefined
+
+  const originalSessionId = getSessionId()
+
+  try {
+    const {
+      sessionId,
+      title,
+      forkPath,
+      serializedMessages,
+      contentReplacementRecords,
+    } = await createFork(customTitle)
+
+    // Build LogOption for resume
+    const now = new Date()
+    const firstPrompt = deriveFirstPrompt(
+      serializedMessages.find(m => m.type === 'user'),
+    )
+
+    // Save custom title - use provided title or firstPrompt as default
+    // This ensures /status and /resume show the same session name
+    // Always add " (Branch)" suffix to make it clear this is a branched session
+    // Handle collisions by adding a number suffix (e.g., " (Branch 2)", " (Branch 3)")
+    const baseName = title ?? firstPrompt
+    const effectiveTitle = await getUniqueForkName(baseName)
+    await saveCustomTitle(sessionId, effectiveTitle, forkPath)
+
+    logEvent('tengu_conversation_forked', {
+      message_count: serializedMessages.length,
+      has_custom_title: !!title,
+    })
+
+    const forkLog: LogOption = {
+      date: now.toISOString().split('T')[0]!,
+      messages: serializedMessages,
+      fullPath: forkPath,
+      value: now.getTime(),
+      created: now,
+      modified: now,
+      firstPrompt,
+      messageCount: serializedMessages.length,
+      isSidechain: false,
+      sessionId,
+      customTitle: effectiveTitle,
+      contentReplacements: contentReplacementRecords,
+    }
+
+    // Resume into the fork
+    const titleInfo = title ? ` "${title}"` : ''
+    const resumeHint = `\nTo resume the original: claude -r ${originalSessionId}`
+    const successMessage = `Branched conversation${titleInfo}. You are now in the branch.${resumeHint}`
+
+    if (context.resume) {
+      await context.resume(sessionId, forkLog, 'fork')
+      onDone(successMessage, { display: 'system' })
+    } else {
+      // Fallback if resume not available
+      onDone(
+        `Branched conversation${titleInfo}. Resume with: /resume ${sessionId}`,
+      )
+    }
+
+    return null
+  } catch (error) {
+    const message =
+      error instanceof Error ? error.message : 'Unknown error occurred'
+    onDone(`Failed to branch conversation: ${message}`)
+    return null
+  }
+}

+ 14 - 0
src/commands/branch/index.ts

@@ -0,0 +1,14 @@
+import { feature } from 'bun:bundle'
+import type { Command } from '../../commands.js'
+
+const branch = {
+  type: 'local-jsx',
+  name: 'branch',
+  // 'fork' alias only when /fork doesn't exist as its own command
+  aliases: feature('FORK_SUBAGENT') ? [] : ['fork'],
+  description: 'Create a branch of the current conversation at this point',
+  argumentHint: '[name]',
+  load: () => import('./branch.js'),
+} satisfies Command
+
+export default branch

+ 1 - 0
src/commands/break-cache/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 200 - 0
src/commands/bridge-kick.ts

@@ -0,0 +1,200 @@
+import { getBridgeDebugHandle } from '../bridge/bridgeDebug.js'
+import type { Command } from '../commands.js'
+import type { LocalCommandCall } from '../types/command.js'
+
+/**
+ * Ant-only: inject bridge failure states to manually test recovery paths.
+ *
+ *   /bridge-kick close 1002            — fire ws_closed with code 1002
+ *   /bridge-kick close 1006            — fire ws_closed with code 1006
+ *   /bridge-kick poll 404              — next poll throws 404/not_found_error
+ *   /bridge-kick poll 404 <type>       — next poll throws 404 with error_type
+ *   /bridge-kick poll 401              — next poll throws 401 (auth)
+ *   /bridge-kick poll transient        — next poll throws axios-style rejection
+ *   /bridge-kick register fail         — next register (inside doReconnect) transient-fails
+ *   /bridge-kick register fail 3       — next 3 registers transient-fail
+ *   /bridge-kick register fatal        — next register 403s (terminal)
+ *   /bridge-kick reconnect-session fail — POST /bridge/reconnect fails (→ Strategy 2)
+ *   /bridge-kick heartbeat 401         — next heartbeat 401s (JWT expired)
+ *   /bridge-kick reconnect             — call doReconnect directly (= SIGUSR2)
+ *   /bridge-kick status                — print current bridge state
+ *
+ * Workflow: connect Remote Control, run a subcommand, `tail -f debug.log`
+ * and watch [bridge:repl] / [bridge:debug] lines for the recovery reaction.
+ *
+ * Composite sequences — the failure modes in the BQ data are chains, not
+ * single events. Queue faults then fire the trigger:
+ *
+ *   # #22148 residual: ws_closed → register transient-blips → teardown?
+ *   /bridge-kick register fail 2
+ *   /bridge-kick close 1002
+ *   → expect: doReconnect tries register, fails, returns false → teardown
+ *     (demonstrates the retry gap that needs fixing)
+ *
+ *   # Dead gate: poll 404/not_found_error → does onEnvironmentLost fire?
+ *   /bridge-kick poll 404
+ *   → expect: tengu_bridge_repl_fatal_error (gate is dead — 147K/wk)
+ *     after fix: tengu_bridge_repl_env_lost → doReconnect
+ */
+
+const USAGE = `/bridge-kick <subcommand>
+  close <code>              fire ws_closed with the given code (e.g. 1002)
+  poll <status> [type]      next poll throws BridgeFatalError(status, type)
+  poll transient            next poll throws axios-style rejection (5xx/net)
+  register fail [N]         next N registers transient-fail (default 1)
+  register fatal            next register 403s (terminal)
+  reconnect-session fail    next POST /bridge/reconnect fails
+  heartbeat <status>        next heartbeat throws BridgeFatalError(status)
+  reconnect                 call reconnectEnvironmentWithSession directly
+  status                    print bridge state`
+
+const call: LocalCommandCall = async args => {
+  const h = getBridgeDebugHandle()
+  if (!h) {
+    return {
+      type: 'text',
+      value:
+        'No bridge debug handle registered. Remote Control must be connected (USER_TYPE=ant).',
+    }
+  }
+
+  const [sub, a, b] = args.trim().split(/\s+/)
+
+  switch (sub) {
+    case 'close': {
+      const code = Number(a)
+      if (!Number.isFinite(code)) {
+        return { type: 'text', value: `close: need a numeric code\n${USAGE}` }
+      }
+      h.fireClose(code)
+      return {
+        type: 'text',
+        value: `Fired transport close(${code}). Watch debug.log for [bridge:repl] recovery.`,
+      }
+    }
+
+    case 'poll': {
+      if (a === 'transient') {
+        h.injectFault({
+          method: 'pollForWork',
+          kind: 'transient',
+          status: 503,
+          count: 1,
+        })
+        h.wakePollLoop()
+        return {
+          type: 'text',
+          value:
+            'Next poll will throw a transient (axios rejection). Poll loop woken.',
+        }
+      }
+      const status = Number(a)
+      if (!Number.isFinite(status)) {
+        return {
+          type: 'text',
+          value: `poll: need 'transient' or a status code\n${USAGE}`,
+        }
+      }
+      // Default to what the server ACTUALLY sends for 404 (BQ-verified),
+      // so `/bridge-kick poll 404` reproduces the real 147K/week state.
+      const errorType =
+        b ?? (status === 404 ? 'not_found_error' : 'authentication_error')
+      h.injectFault({
+        method: 'pollForWork',
+        kind: 'fatal',
+        status,
+        errorType,
+        count: 1,
+      })
+      h.wakePollLoop()
+      return {
+        type: 'text',
+        value: `Next poll will throw BridgeFatalError(${status}, ${errorType}). Poll loop woken.`,
+      }
+    }
+
+    case 'register': {
+      if (a === 'fatal') {
+        h.injectFault({
+          method: 'registerBridgeEnvironment',
+          kind: 'fatal',
+          status: 403,
+          errorType: 'permission_error',
+          count: 1,
+        })
+        return {
+          type: 'text',
+          value:
+            'Next registerBridgeEnvironment will 403. Trigger with close/reconnect.',
+        }
+      }
+      const n = Number(b) || 1
+      h.injectFault({
+        method: 'registerBridgeEnvironment',
+        kind: 'transient',
+        status: 503,
+        count: n,
+      })
+      return {
+        type: 'text',
+        value: `Next ${n} registerBridgeEnvironment call(s) will transient-fail. Trigger with close/reconnect.`,
+      }
+    }
+
+    case 'reconnect-session': {
+      h.injectFault({
+        method: 'reconnectSession',
+        kind: 'fatal',
+        status: 404,
+        errorType: 'not_found_error',
+        count: 2,
+      })
+      return {
+        type: 'text',
+        value:
+          'Next 2 POST /bridge/reconnect calls will 404. doReconnect Strategy 1 falls through to Strategy 2.',
+      }
+    }
+
+    case 'heartbeat': {
+      const status = Number(a) || 401
+      h.injectFault({
+        method: 'heartbeatWork',
+        kind: 'fatal',
+        status,
+        errorType: status === 401 ? 'authentication_error' : 'not_found_error',
+        count: 1,
+      })
+      return {
+        type: 'text',
+        value: `Next heartbeat will ${status}. Watch for onHeartbeatFatal → work-state teardown.`,
+      }
+    }
+
+    case 'reconnect': {
+      h.forceReconnect()
+      return {
+        type: 'text',
+        value: 'Called reconnectEnvironmentWithSession(). Watch debug.log.',
+      }
+    }
+
+    case 'status': {
+      return { type: 'text', value: h.describe() }
+    }
+
+    default:
+      return { type: 'text', value: USAGE }
+  }
+}
+
+const bridgeKick = {
+  type: 'local',
+  name: 'bridge-kick',
+  description: 'Inject bridge failure states for manual recovery testing',
+  isEnabled: () => process.env.USER_TYPE === 'ant',
+  supportsNonInteractive: false,
+  load: () => Promise.resolve({ call }),
+} satisfies Command
+
+export default bridgeKick

Різницю між файлами не показано, бо вона завелика
+ 508 - 0
src/commands/bridge/bridge.tsx


+ 26 - 0
src/commands/bridge/index.ts

@@ -0,0 +1,26 @@
+import { feature } from 'bun:bundle'
+import { isBridgeEnabled } from '../../bridge/bridgeEnabled.js'
+import type { Command } from '../../commands.js'
+
+function isEnabled(): boolean {
+  if (!feature('BRIDGE_MODE')) {
+    return false
+  }
+  return isBridgeEnabled()
+}
+
+const bridge = {
+  type: 'local-jsx',
+  name: 'remote-control',
+  aliases: ['rc'],
+  description: 'Connect this terminal for remote-control sessions',
+  argumentHint: '[name]',
+  isEnabled,
+  get isHidden() {
+    return !isEnabled()
+  },
+  immediate: true,
+  load: () => import('./bridge.js'),
+} satisfies Command
+
+export default bridge

+ 130 - 0
src/commands/brief.ts

@@ -0,0 +1,130 @@
+import { feature } from 'bun:bundle'
+import { z } from 'zod/v4'
+import { getKairosActive, setUserMsgOptIn } from '../bootstrap/state.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import type { ToolUseContext } from '../Tool.js'
+import { isBriefEntitled } from '../tools/BriefTool/BriefTool.js'
+import { BRIEF_TOOL_NAME } from '../tools/BriefTool/prompt.js'
+import type {
+  Command,
+  LocalJSXCommandContext,
+  LocalJSXCommandOnDone,
+} from '../types/command.js'
+import { lazySchema } from '../utils/lazySchema.js'
+
+// Zod guards against fat-fingered GB pushes (same pattern as pollConfig.ts /
+// cronScheduler.ts). A malformed config falls back to DEFAULT_BRIEF_CONFIG
+// entirely rather than being partially trusted.
+const briefConfigSchema = lazySchema(() =>
+  z.object({
+    enable_slash_command: z.boolean(),
+  }),
+)
+type BriefConfig = z.infer<ReturnType<typeof briefConfigSchema>>
+
+const DEFAULT_BRIEF_CONFIG: BriefConfig = {
+  enable_slash_command: false,
+}
+
+// No TTL — this gate controls slash-command *visibility*, not a kill switch.
+// CACHED_MAY_BE_STALE still has one background-update flip (first call kicks
+// off fetch; second call sees fresh value), but no additional flips after that.
+// The tool-availability gate (tengu_kairos_brief in isBriefEnabled) keeps its
+// 5-min TTL because that one IS a kill switch.
+function getBriefConfig(): BriefConfig {
+  const raw = getFeatureValue_CACHED_MAY_BE_STALE<unknown>(
+    'tengu_kairos_brief_config',
+    DEFAULT_BRIEF_CONFIG,
+  )
+  const parsed = briefConfigSchema().safeParse(raw)
+  return parsed.success ? parsed.data : DEFAULT_BRIEF_CONFIG
+}
+
+const brief = {
+  type: 'local-jsx',
+  name: 'brief',
+  description: 'Toggle brief-only mode',
+  isEnabled: () => {
+    if (feature('KAIROS') || feature('KAIROS_BRIEF')) {
+      return getBriefConfig().enable_slash_command
+    }
+    return false
+  },
+  immediate: true,
+  load: () =>
+    Promise.resolve({
+      async call(
+        onDone: LocalJSXCommandOnDone,
+        context: ToolUseContext & LocalJSXCommandContext,
+      ): Promise<React.ReactNode> {
+        const current = context.getAppState().isBriefOnly
+        const newState = !current
+
+        // Entitlement check only gates the on-transition — off is always
+        // allowed so a user whose GB gate flipped mid-session isn't stuck.
+        if (newState && !isBriefEntitled()) {
+          logEvent('tengu_brief_mode_toggled', {
+            enabled: false,
+            gated: true,
+            source:
+              'slash_command' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          onDone('Brief tool is not enabled for your account', {
+            display: 'system',
+          })
+          return null
+        }
+
+        // Two-way: userMsgOptIn tracks isBriefOnly so the tool is available
+        // exactly when brief mode is on. This invalidates prompt cache on
+        // each toggle (tool list changes), but a stale tool list is worse —
+        // when /brief is enabled mid-session the model was previously left
+        // without the tool, emitting plain text the filter hides.
+        setUserMsgOptIn(newState)
+
+        context.setAppState(prev => {
+          if (prev.isBriefOnly === newState) return prev
+          return { ...prev, isBriefOnly: newState }
+        })
+
+        logEvent('tengu_brief_mode_toggled', {
+          enabled: newState,
+          gated: false,
+          source:
+            'slash_command' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+
+        // The tool list change alone isn't a strong enough signal mid-session
+        // (model may keep emitting plain text from inertia, or keep calling a
+        // tool that just vanished). Inject an explicit reminder into the next
+        // turn's context so the transition is unambiguous.
+        // Skip when Kairos is active: isBriefEnabled() short-circuits on
+        // getKairosActive() so the tool never actually leaves the list, and
+        // the Kairos system prompt already mandates SendUserMessage.
+        // Inline <system-reminder> wrap — importing wrapInSystemReminder from
+        // utils/messages.ts pulls constants/xml.ts into the bridge SDK bundle
+        // via this module's import chain, tripping the excluded-strings check.
+        const metaMessages = getKairosActive()
+          ? undefined
+          : [
+              `<system-reminder>\n${
+                newState
+                  ? `Brief mode is now enabled. Use the ${BRIEF_TOOL_NAME} tool for all user-facing output — plain text outside it is hidden from the user's view.`
+                  : `Brief mode is now disabled. The ${BRIEF_TOOL_NAME} tool is no longer available — reply with plain text.`
+              }\n</system-reminder>`,
+            ]
+
+        onDone(
+          newState ? 'Brief-only mode enabled' : 'Brief-only mode disabled',
+          { display: 'system', metaMessages },
+        )
+        return null
+      },
+    }),
+} satisfies Command
+
+export default brief

Різницю між файлами не показано, бо вона завелика
+ 242 - 0
src/commands/btw/btw.tsx


+ 13 - 0
src/commands/btw/index.ts

@@ -0,0 +1,13 @@
+import type { Command } from '../../commands.js'
+
+const btw = {
+  type: 'local-jsx',
+  name: 'btw',
+  description:
+    'Ask a quick side question without interrupting the main conversation',
+  immediate: true,
+  argumentHint: '<question>',
+  load: () => import('./btw.js'),
+} satisfies Command
+
+export default btw

+ 1 - 0
src/commands/bughunter/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

Різницю між файлами не показано, бо вона завелика
+ 284 - 0
src/commands/chrome/chrome.tsx


+ 13 - 0
src/commands/chrome/index.ts

@@ -0,0 +1,13 @@
+import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
+import type { Command } from '../../commands.js'
+
+const command: Command = {
+  name: 'chrome',
+  description: 'Claude in Chrome (Beta) settings',
+  availability: ['claude-ai'],
+  isEnabled: () => !getIsNonInteractiveSession(),
+  type: 'local-jsx',
+  load: () => import('./chrome.js'),
+}
+
+export default command

+ 144 - 0
src/commands/clear/caches.ts

@@ -0,0 +1,144 @@
+/**
+ * Session cache clearing utilities.
+ * This module is imported at startup by main.tsx, so keep imports minimal.
+ */
+import { feature } from 'bun:bundle'
+import {
+  clearInvokedSkills,
+  setLastEmittedDate,
+} from '../../bootstrap/state.js'
+import { clearCommandsCache } from '../../commands.js'
+import { getSessionStartDate } from '../../constants/common.js'
+import {
+  getGitStatus,
+  getSystemContext,
+  getUserContext,
+  setSystemPromptInjection,
+} from '../../context.js'
+import { clearFileSuggestionCaches } from '../../hooks/fileSuggestions.js'
+import { clearAllPendingCallbacks } from '../../hooks/useSwarmPermissionPoller.js'
+import { clearAllDumpState } from '../../services/api/dumpPrompts.js'
+import { resetPromptCacheBreakDetection } from '../../services/api/promptCacheBreakDetection.js'
+import { clearAllSessions } from '../../services/api/sessionIngress.js'
+import { runPostCompactCleanup } from '../../services/compact/postCompactCleanup.js'
+import { resetAllLSPDiagnosticState } from '../../services/lsp/LSPDiagnosticRegistry.js'
+import { clearTrackedMagicDocs } from '../../services/MagicDocs/magicDocs.js'
+import { clearDynamicSkills } from '../../skills/loadSkillsDir.js'
+import { resetSentSkillNames } from '../../utils/attachments.js'
+import { clearCommandPrefixCaches } from '../../utils/bash/commands.js'
+import { resetGetMemoryFilesCache } from '../../utils/claudemd.js'
+import { clearRepositoryCaches } from '../../utils/detectRepository.js'
+import { clearResolveGitDirCache } from '../../utils/git/gitFilesystem.js'
+import { clearStoredImagePaths } from '../../utils/imageStore.js'
+import { clearSessionEnvVars } from '../../utils/sessionEnvVars.js'
+
+/**
+ * Clear all session-related caches.
+ * Call this when resuming a session to ensure fresh file/skill discovery.
+ * This is a subset of what clearConversation does - it only clears caches
+ * without affecting messages, session ID, or triggering hooks.
+ *
+ * @param preservedAgentIds - Agent IDs whose per-agent state should survive
+ *   the clear (e.g., background tasks preserved across /clear). When non-empty,
+ *   agentId-keyed state (invoked skills) is selectively cleared and requestId-keyed
+ *   state (pending permission callbacks, dump state, cache-break tracking) is left
+ *   intact since it cannot be safely scoped to the main session.
+ */
+export function clearSessionCaches(
+  preservedAgentIds: ReadonlySet<string> = new Set(),
+): void {
+  const hasPreserved = preservedAgentIds.size > 0
+  // Clear context caches
+  getUserContext.cache.clear?.()
+  getSystemContext.cache.clear?.()
+  getGitStatus.cache.clear?.()
+  getSessionStartDate.cache.clear?.()
+  // Clear file suggestion caches (for @ mentions)
+  clearFileSuggestionCaches()
+
+  // Clear commands/skills cache
+  clearCommandsCache()
+
+  // Clear prompt cache break detection state
+  if (!hasPreserved) resetPromptCacheBreakDetection()
+
+  // Clear system prompt injection (cache breaker)
+  setSystemPromptInjection(null)
+
+  // Clear last emitted date so it's re-detected on next turn
+  setLastEmittedDate(null)
+
+  // Run post-compaction cleanup (clears system prompt sections, microcompact tracking,
+  // classifier approvals, speculative checks, and — for main-thread compacts — memory
+  // files cache with load_reason 'compact').
+  runPostCompactCleanup()
+  // Reset sent skill names so the skill listing is re-sent after /clear.
+  // runPostCompactCleanup intentionally does NOT reset this (post-compact
+  // re-injection costs ~4K tokens), but /clear wipes messages entirely so
+  // the model needs the full listing again.
+  resetSentSkillNames()
+  // Override the memory cache reset with 'session_start': clearSessionCaches is called
+  // from /clear and --resume/--continue, which are NOT compaction events. Without this,
+  // the InstructionsLoaded hook would fire with load_reason 'compact' instead of
+  // 'session_start' on the next getMemoryFiles() call.
+  resetGetMemoryFilesCache('session_start')
+
+  // Clear stored image paths cache
+  clearStoredImagePaths()
+
+  // Clear all session ingress caches (lastUuidMap, sequentialAppendBySession)
+  clearAllSessions()
+  // Clear swarm permission pending callbacks
+  if (!hasPreserved) clearAllPendingCallbacks()
+
+  // Clear tungsten session usage tracking
+  if (process.env.USER_TYPE === 'ant') {
+    void import('../../tools/TungstenTool/TungstenTool.js').then(
+      ({ clearSessionsWithTungstenUsage, resetInitializationState }) => {
+        clearSessionsWithTungstenUsage()
+        resetInitializationState()
+      },
+    )
+  }
+  // Clear attribution caches (file content cache, pending bash states)
+  // Dynamic import to preserve dead code elimination for COMMIT_ATTRIBUTION feature flag
+  if (feature('COMMIT_ATTRIBUTION')) {
+    void import('../../utils/attributionHooks.js').then(
+      ({ clearAttributionCaches }) => clearAttributionCaches(),
+    )
+  }
+  // Clear repository detection caches
+  clearRepositoryCaches()
+  // Clear bash command prefix caches (Haiku-extracted prefixes)
+  clearCommandPrefixCaches()
+  // Clear dump prompts state
+  if (!hasPreserved) clearAllDumpState()
+  // Clear invoked skills cache (each entry holds full skill file content)
+  clearInvokedSkills(preservedAgentIds)
+  // Clear git dir resolution cache
+  clearResolveGitDirCache()
+  // Clear dynamic skills (loaded from skill directories)
+  clearDynamicSkills()
+  // Clear LSP diagnostic tracking state
+  resetAllLSPDiagnosticState()
+  // Clear tracked magic docs
+  clearTrackedMagicDocs()
+  // Clear session environment variables
+  clearSessionEnvVars()
+  // Clear WebFetch URL cache (up to 50MB of cached page content)
+  void import('../../tools/WebFetchTool/utils.js').then(
+    ({ clearWebFetchCache }) => clearWebFetchCache(),
+  )
+  // Clear ToolSearch description cache (full tool prompts, ~500KB for 50 MCP tools)
+  void import('../../tools/ToolSearchTool/ToolSearchTool.js').then(
+    ({ clearToolSearchDescriptionCache }) => clearToolSearchDescriptionCache(),
+  )
+  // Clear agent definitions cache (accumulates per-cwd via EnterWorktreeTool)
+  void import('../../tools/AgentTool/loadAgentsDir.js').then(
+    ({ clearAgentDefinitionsCache }) => clearAgentDefinitionsCache(),
+  )
+  // Clear SkillTool prompt cache (accumulates per project root)
+  void import('../../tools/SkillTool/prompt.js').then(({ clearPromptCache }) =>
+    clearPromptCache(),
+  )
+}

+ 7 - 0
src/commands/clear/clear.ts

@@ -0,0 +1,7 @@
+import type { LocalCommandCall } from '../../types/command.js'
+import { clearConversation } from './conversation.js'
+
+export const call: LocalCommandCall = async (_, context) => {
+  await clearConversation(context)
+  return { type: 'text', value: '' }
+}

+ 251 - 0
src/commands/clear/conversation.ts

@@ -0,0 +1,251 @@
+/**
+ * Conversation clearing utility.
+ * This module has heavier dependencies and should be lazy-loaded when possible.
+ */
+import { feature } from 'bun:bundle'
+import { randomUUID, type UUID } from 'crypto'
+import {
+  getLastMainRequestId,
+  getOriginalCwd,
+  getSessionId,
+  regenerateSessionId,
+} from '../../bootstrap/state.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../../services/analytics/index.js'
+import type { AppState } from '../../state/AppState.js'
+import { isInProcessTeammateTask } from '../../tasks/InProcessTeammateTask/types.js'
+import {
+  isLocalAgentTask,
+  type LocalAgentTaskState,
+} from '../../tasks/LocalAgentTask/LocalAgentTask.js'
+import { isLocalShellTask } from '../../tasks/LocalShellTask/guards.js'
+import { asAgentId } from '../../types/ids.js'
+import type { Message } from '../../types/message.js'
+import { createEmptyAttributionState } from '../../utils/commitAttribution.js'
+import type { FileStateCache } from '../../utils/fileStateCache.js'
+import {
+  executeSessionEndHooks,
+  getSessionEndHookTimeoutMs,
+} from '../../utils/hooks.js'
+import { logError } from '../../utils/log.js'
+import { clearAllPlanSlugs } from '../../utils/plans.js'
+import { setCwd } from '../../utils/Shell.js'
+import { processSessionStartHooks } from '../../utils/sessionStart.js'
+import {
+  clearSessionMetadata,
+  getAgentTranscriptPath,
+  resetSessionFilePointer,
+  saveWorktreeState,
+} from '../../utils/sessionStorage.js'
+import {
+  evictTaskOutput,
+  initTaskOutputAsSymlink,
+} from '../../utils/task/diskOutput.js'
+import { getCurrentWorktreeSession } from '../../utils/worktree.js'
+import { clearSessionCaches } from './caches.js'
+
+export async function clearConversation({
+  setMessages,
+  readFileState,
+  discoveredSkillNames,
+  loadedNestedMemoryPaths,
+  getAppState,
+  setAppState,
+  setConversationId,
+}: {
+  setMessages: (updater: (prev: Message[]) => Message[]) => void
+  readFileState: FileStateCache
+  discoveredSkillNames?: Set<string>
+  loadedNestedMemoryPaths?: Set<string>
+  getAppState?: () => AppState
+  setAppState?: (f: (prev: AppState) => AppState) => void
+  setConversationId?: (id: UUID) => void
+}): Promise<void> {
+  // Execute SessionEnd hooks before clearing (bounded by
+  // CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS, default 1.5s)
+  const sessionEndTimeoutMs = getSessionEndHookTimeoutMs()
+  await executeSessionEndHooks('clear', {
+    getAppState,
+    setAppState,
+    signal: AbortSignal.timeout(sessionEndTimeoutMs),
+    timeoutMs: sessionEndTimeoutMs,
+  })
+
+  // Signal to inference that this conversation's cache can be evicted.
+  const lastRequestId = getLastMainRequestId()
+  if (lastRequestId) {
+    logEvent('tengu_cache_eviction_hint', {
+      scope:
+        'conversation_clear' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      last_request_id:
+        lastRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+  }
+
+  // Compute preserved tasks up front so their per-agent state survives the
+  // cache wipe below. A task is preserved unless it explicitly has
+  // isBackgrounded === false. Main-session tasks (Ctrl+B) are preserved —
+  // they write to an isolated per-task transcript and run under an agent
+  // context, so they're safe across session ID regeneration. See
+  // LocalMainSessionTask.ts startBackgroundSession.
+  const preservedAgentIds = new Set<string>()
+  const preservedLocalAgents: LocalAgentTaskState[] = []
+  const shouldKillTask = (task: AppState['tasks'][string]): boolean =>
+    'isBackgrounded' in task && task.isBackgrounded === false
+  if (getAppState) {
+    for (const task of Object.values(getAppState().tasks)) {
+      if (shouldKillTask(task)) continue
+      if (isLocalAgentTask(task)) {
+        preservedAgentIds.add(task.agentId)
+        preservedLocalAgents.push(task)
+      } else if (isInProcessTeammateTask(task)) {
+        preservedAgentIds.add(task.identity.agentId)
+      }
+    }
+  }
+
+  setMessages(() => [])
+
+  // Clear context-blocked flag so proactive ticks resume after /clear
+  if (feature('PROACTIVE') || feature('KAIROS')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { setContextBlocked } = require('../../proactive/index.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    setContextBlocked(false)
+  }
+
+  // Force logo re-render by updating conversationId
+  if (setConversationId) {
+    setConversationId(randomUUID())
+  }
+
+  // Clear all session-related caches. Per-agent state for preserved background
+  // tasks (invoked skills, pending permission callbacks, dump state, cache-break
+  // tracking) is retained so those agents keep functioning.
+  clearSessionCaches(preservedAgentIds)
+
+  setCwd(getOriginalCwd())
+  readFileState.clear()
+  discoveredSkillNames?.clear()
+  loadedNestedMemoryPaths?.clear()
+
+  // Clean out necessary items from App State
+  if (setAppState) {
+    setAppState(prev => {
+      // Partition tasks using the same predicate computed above:
+      // kill+remove foreground tasks, preserve everything else.
+      const nextTasks: AppState['tasks'] = {}
+      for (const [taskId, task] of Object.entries(prev.tasks)) {
+        if (!shouldKillTask(task)) {
+          nextTasks[taskId] = task
+          continue
+        }
+        // Foreground task: kill it and drop from state
+        try {
+          if (task.status === 'running') {
+            if (isLocalShellTask(task)) {
+              task.shellCommand?.kill()
+              task.shellCommand?.cleanup()
+              if (task.cleanupTimeoutId) {
+                clearTimeout(task.cleanupTimeoutId)
+              }
+            }
+            if ('abortController' in task) {
+              task.abortController?.abort()
+            }
+            if ('unregisterCleanup' in task) {
+              task.unregisterCleanup?.()
+            }
+          }
+        } catch (error) {
+          logError(error)
+        }
+        void evictTaskOutput(taskId)
+      }
+
+      return {
+        ...prev,
+        tasks: nextTasks,
+        attribution: createEmptyAttributionState(),
+        // Clear standalone agent context (name/color set by /rename, /color)
+        // so the new session doesn't display the old session's identity badge
+        standaloneAgentContext: undefined,
+        fileHistory: {
+          snapshots: [],
+          trackedFiles: new Set(),
+          snapshotSequence: 0,
+        },
+        // Reset MCP state to default to trigger re-initialization.
+        // Preserve pluginReconnectKey so /clear doesn't cause a no-op
+        // (it's only bumped by /reload-plugins).
+        mcp: {
+          clients: [],
+          tools: [],
+          commands: [],
+          resources: {},
+          pluginReconnectKey: prev.mcp.pluginReconnectKey,
+        },
+      }
+    })
+  }
+
+  // Clear plan slug cache so a new plan file is used after /clear
+  clearAllPlanSlugs()
+
+  // Clear cached session metadata (title, tag, agent name/color)
+  // so the new session doesn't inherit the previous session's identity
+  clearSessionMetadata()
+
+  // Generate new session ID to provide fresh state
+  // Set the old session as parent for analytics lineage tracking
+  regenerateSessionId({ setCurrentAsParent: true })
+  // Update the environment variable so subprocesses use the new session ID
+  if (process.env.USER_TYPE === 'ant' && process.env.CLAUDE_CODE_SESSION_ID) {
+    process.env.CLAUDE_CODE_SESSION_ID = getSessionId()
+  }
+  await resetSessionFilePointer()
+
+  // Preserved local_agent tasks had their TaskOutput symlink baked against the
+  // old session ID at spawn time, but post-clear transcript writes land under
+  // the new session directory (appendEntry re-reads getSessionId()). Re-point
+  // the symlinks so TaskOutput reads the live file instead of a frozen pre-clear
+  // snapshot. Only re-point running tasks — finished tasks will never write
+  // again, so re-pointing would replace a valid symlink with a dangling one.
+  // Main-session tasks use the same per-agent path (they write via
+  // recordSidechainTranscript to getAgentTranscriptPath), so no special case.
+  for (const task of preservedLocalAgents) {
+    if (task.status !== 'running') continue
+    void initTaskOutputAsSymlink(
+      task.id,
+      getAgentTranscriptPath(asAgentId(task.agentId)),
+    )
+  }
+
+  // Re-persist mode and worktree state after the clear so future --resume
+  // knows what the new post-clear session was in. clearSessionMetadata
+  // wiped both from the cache, but the process is still in the same mode
+  // and (if applicable) the same worktree directory.
+  if (feature('COORDINATOR_MODE')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { saveMode } = require('../../utils/sessionStorage.js')
+    const {
+      isCoordinatorMode,
+    } = require('../../coordinator/coordinatorMode.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    saveMode(isCoordinatorMode() ? 'coordinator' : 'normal')
+  }
+  const worktreeSession = getCurrentWorktreeSession()
+  if (worktreeSession) {
+    saveWorktreeState(worktreeSession)
+  }
+
+  // Execute SessionStart hooks after clearing
+  const hookMessages = await processSessionStartHooks('clear')
+
+  // Update messages with hook results
+  if (hookMessages.length > 0) {
+    setMessages(() => hookMessages)
+  }
+}

+ 19 - 0
src/commands/clear/index.ts

@@ -0,0 +1,19 @@
+/**
+ * Clear command - minimal metadata only.
+ * Implementation is lazy-loaded from clear.ts to reduce startup time.
+ * Utility functions:
+ * - clearSessionCaches: import from './clear/caches.js'
+ * - clearConversation: import from './clear/conversation.js'
+ */
+import type { Command } from '../../commands.js'
+
+const clear = {
+  type: 'local',
+  name: 'clear',
+  description: 'Clear conversation history and free up context',
+  aliases: ['reset', 'new'],
+  supportsNonInteractive: false, // Should just create a new session
+  load: () => import('./clear.js'),
+} satisfies Command
+
+export default clear

+ 93 - 0
src/commands/color/color.ts

@@ -0,0 +1,93 @@
+import type { UUID } from 'crypto'
+import { getSessionId } from '../../bootstrap/state.js'
+import type { ToolUseContext } from '../../Tool.js'
+import {
+  AGENT_COLORS,
+  type AgentColorName,
+} from '../../tools/AgentTool/agentColorManager.js'
+import type {
+  LocalJSXCommandContext,
+  LocalJSXCommandOnDone,
+} from '../../types/command.js'
+import {
+  getTranscriptPath,
+  saveAgentColor,
+} from '../../utils/sessionStorage.js'
+import { isTeammate } from '../../utils/teammate.js'
+
+const RESET_ALIASES = ['default', 'reset', 'none', 'gray', 'grey'] as const
+
+export async function call(
+  onDone: LocalJSXCommandOnDone,
+  context: ToolUseContext & LocalJSXCommandContext,
+  args: string,
+): Promise<null> {
+  // Teammates cannot set their own color
+  if (isTeammate()) {
+    onDone(
+      'Cannot set color: This session is a swarm teammate. Teammate colors are assigned by the team leader.',
+      { display: 'system' },
+    )
+    return null
+  }
+
+  if (!args || args.trim() === '') {
+    const colorList = AGENT_COLORS.join(', ')
+    onDone(`Please provide a color. Available colors: ${colorList}, default`, {
+      display: 'system',
+    })
+    return null
+  }
+
+  const colorArg = args.trim().toLowerCase()
+
+  // Handle reset to default (gray)
+  if (RESET_ALIASES.includes(colorArg as (typeof RESET_ALIASES)[number])) {
+    const sessionId = getSessionId() as UUID
+    const fullPath = getTranscriptPath()
+
+    // Use "default" sentinel (not empty string) so truthiness guards
+    // in sessionStorage.ts persist the reset across session restarts
+    await saveAgentColor(sessionId, 'default', fullPath)
+
+    context.setAppState(prev => ({
+      ...prev,
+      standaloneAgentContext: {
+        ...prev.standaloneAgentContext,
+        name: prev.standaloneAgentContext?.name ?? '',
+        color: undefined,
+      },
+    }))
+
+    onDone('Session color reset to default', { display: 'system' })
+    return null
+  }
+
+  if (!AGENT_COLORS.includes(colorArg as AgentColorName)) {
+    const colorList = AGENT_COLORS.join(', ')
+    onDone(
+      `Invalid color "${colorArg}". Available colors: ${colorList}, default`,
+      { display: 'system' },
+    )
+    return null
+  }
+
+  const sessionId = getSessionId() as UUID
+  const fullPath = getTranscriptPath()
+
+  // Save to transcript for persistence across sessions
+  await saveAgentColor(sessionId, colorArg, fullPath)
+
+  // Update AppState for immediate effect
+  context.setAppState(prev => ({
+    ...prev,
+    standaloneAgentContext: {
+      ...prev.standaloneAgentContext,
+      name: prev.standaloneAgentContext?.name ?? '',
+      color: colorArg as AgentColorName,
+    },
+  }))
+
+  onDone(`Session color set to: ${colorArg}`, { display: 'system' })
+  return null
+}

+ 16 - 0
src/commands/color/index.ts

@@ -0,0 +1,16 @@
+/**
+ * Color command - minimal metadata only.
+ * Implementation is lazy-loaded from color.ts to reduce startup time.
+ */
+import type { Command } from '../../commands.js'
+
+const color = {
+  type: 'local-jsx',
+  name: 'color',
+  description: 'Set the prompt bar color for this session',
+  immediate: true,
+  argumentHint: '<color|default>',
+  load: () => import('./color.js'),
+} satisfies Command
+
+export default color

+ 158 - 0
src/commands/commit-push-pr.ts

@@ -0,0 +1,158 @@
+import type { Command } from '../commands.js'
+import {
+  getAttributionTexts,
+  getEnhancedPRAttribution,
+} from '../utils/attribution.js'
+import { getDefaultBranch } from '../utils/git.js'
+import { executeShellCommandsInPrompt } from '../utils/promptShellExecution.js'
+import { getUndercoverInstructions, isUndercover } from '../utils/undercover.js'
+
+const ALLOWED_TOOLS = [
+  'Bash(git checkout --branch:*)',
+  'Bash(git checkout -b:*)',
+  'Bash(git add:*)',
+  'Bash(git status:*)',
+  'Bash(git push:*)',
+  'Bash(git commit:*)',
+  'Bash(gh pr create:*)',
+  'Bash(gh pr edit:*)',
+  'Bash(gh pr view:*)',
+  'Bash(gh pr merge:*)',
+  'ToolSearch',
+  'mcp__slack__send_message',
+  'mcp__claude_ai_Slack__slack_send_message',
+]
+
+function getPromptContent(
+  defaultBranch: string,
+  prAttribution?: string,
+): string {
+  const { commit: commitAttribution, pr: defaultPrAttribution } =
+    getAttributionTexts()
+  // Use provided PR attribution or fall back to default
+  const effectivePrAttribution = prAttribution ?? defaultPrAttribution
+  const safeUser = process.env.SAFEUSER || ''
+  const username = process.env.USER || ''
+
+  let prefix = ''
+  let reviewerArg = ' and `--reviewer anthropics/claude-code`'
+  let addReviewerArg = ' (and add `--add-reviewer anthropics/claude-code`)'
+  let changelogSection = `
+
+## Changelog
+<!-- CHANGELOG:START -->
+[If this PR contains user-facing changes, add a changelog entry here. Otherwise, remove this section.]
+<!-- CHANGELOG:END -->`
+  let slackStep = `
+
+5. After creating/updating the PR, check if the user's CLAUDE.md mentions posting to Slack channels. If it does, use ToolSearch to search for "slack send message" tools. If ToolSearch finds a Slack tool, ask the user if they'd like you to post the PR URL to the relevant Slack channel. Only post if the user confirms. If ToolSearch returns no results or errors, skip this step silently—do not mention the failure, do not attempt workarounds, and do not try alternative approaches.`
+  if (process.env.USER_TYPE === 'ant' && isUndercover()) {
+    prefix = getUndercoverInstructions() + '\n'
+    reviewerArg = ''
+    addReviewerArg = ''
+    changelogSection = ''
+    slackStep = ''
+  }
+
+  return `${prefix}## Context
+
+- \`SAFEUSER\`: ${safeUser}
+- \`whoami\`: ${username}
+- \`git status\`: !\`git status\`
+- \`git diff HEAD\`: !\`git diff HEAD\`
+- \`git branch --show-current\`: !\`git branch --show-current\`
+- \`git diff ${defaultBranch}...HEAD\`: !\`git diff ${defaultBranch}...HEAD\`
+- \`gh pr view --json number 2>/dev/null || true\`: !\`gh pr view --json number 2>/dev/null || true\`
+
+## Git Safety Protocol
+
+- NEVER update the git config
+- NEVER run destructive/irreversible git commands (like push --force, hard reset, etc) unless the user explicitly requests them
+- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
+- NEVER run force push to main/master, warn the user if they request it
+- Do not commit files that likely contain secrets (.env, credentials.json, etc)
+- Never use git commands with the -i flag (like git rebase -i or git add -i) since they require interactive input which is not supported
+
+## Your task
+
+Analyze all changes that will be included in the pull request, making sure to look at all relevant commits (NOT just the latest commit, but ALL commits that will be included in the pull request from the git diff ${defaultBranch}...HEAD output above).
+
+Based on the above changes:
+1. Create a new branch if on ${defaultBranch} (use SAFEUSER from context above for the branch name prefix, falling back to whoami if SAFEUSER is empty, e.g., \`username/feature-name\`)
+2. Create a single commit with an appropriate message using heredoc syntax${commitAttribution ? `, ending with the attribution text shown in the example below` : ''}:
+\`\`\`
+git commit -m "$(cat <<'EOF'
+Commit message here.${commitAttribution ? `\n\n${commitAttribution}` : ''}
+EOF
+)"
+\`\`\`
+3. Push the branch to origin
+4. If a PR already exists for this branch (check the gh pr view output above), update the PR title and body using \`gh pr edit\` to reflect the current diff${addReviewerArg}. Otherwise, create a pull request using \`gh pr create\` with heredoc syntax for the body${reviewerArg}.
+   - IMPORTANT: Keep PR titles short (under 70 characters). Use the body for details.
+\`\`\`
+gh pr create --title "Short, descriptive title" --body "$(cat <<'EOF'
+## Summary
+<1-3 bullet points>
+
+## Test plan
+[Bulleted markdown checklist of TODOs for testing the pull request...]${changelogSection}${effectivePrAttribution ? `\n\n${effectivePrAttribution}` : ''}
+EOF
+)"
+\`\`\`
+
+You have the capability to call multiple tools in a single response. You MUST do all of the above in a single message.${slackStep}
+
+Return the PR URL when you're done, so the user can see it.`
+}
+
+const command = {
+  type: 'prompt',
+  name: 'commit-push-pr',
+  description: 'Commit, push, and open a PR',
+  allowedTools: ALLOWED_TOOLS,
+  get contentLength() {
+    // Use 'main' as estimate for content length calculation
+    return getPromptContent('main').length
+  },
+  progressMessage: 'creating commit and PR',
+  source: 'builtin',
+  async getPromptForCommand(args, context) {
+    // Get default branch and enhanced PR attribution
+    const [defaultBranch, prAttribution] = await Promise.all([
+      getDefaultBranch(),
+      getEnhancedPRAttribution(context.getAppState),
+    ])
+    let promptContent = getPromptContent(defaultBranch, prAttribution)
+
+    // Append user instructions if args provided
+    const trimmedArgs = args?.trim()
+    if (trimmedArgs) {
+      promptContent += `\n\n## Additional instructions from user\n\n${trimmedArgs}`
+    }
+
+    const finalContent = await executeShellCommandsInPrompt(
+      promptContent,
+      {
+        ...context,
+        getAppState() {
+          const appState = context.getAppState()
+          return {
+            ...appState,
+            toolPermissionContext: {
+              ...appState.toolPermissionContext,
+              alwaysAllowRules: {
+                ...appState.toolPermissionContext.alwaysAllowRules,
+                command: ALLOWED_TOOLS,
+              },
+            },
+          }
+        },
+      },
+      '/commit-push-pr',
+    )
+
+    return [{ type: 'text', text: finalContent }]
+  },
+} satisfies Command
+
+export default command

+ 92 - 0
src/commands/commit.ts

@@ -0,0 +1,92 @@
+import type { Command } from '../commands.js'
+import { getAttributionTexts } from '../utils/attribution.js'
+import { executeShellCommandsInPrompt } from '../utils/promptShellExecution.js'
+import { getUndercoverInstructions, isUndercover } from '../utils/undercover.js'
+
+const ALLOWED_TOOLS = [
+  'Bash(git add:*)',
+  'Bash(git status:*)',
+  'Bash(git commit:*)',
+]
+
+function getPromptContent(): string {
+  const { commit: commitAttribution } = getAttributionTexts()
+
+  let prefix = ''
+  if (process.env.USER_TYPE === 'ant' && isUndercover()) {
+    prefix = getUndercoverInstructions() + '\n'
+  }
+
+  return `${prefix}## Context
+
+- Current git status: !\`git status\`
+- Current git diff (staged and unstaged changes): !\`git diff HEAD\`
+- Current branch: !\`git branch --show-current\`
+- Recent commits: !\`git log --oneline -10\`
+
+## Git Safety Protocol
+
+- NEVER update the git config
+- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
+- CRITICAL: ALWAYS create NEW commits. NEVER use git commit --amend, unless the user explicitly requests it
+- Do not commit files that likely contain secrets (.env, credentials.json, etc). Warn the user if they specifically request to commit those files
+- If there are no changes to commit (i.e., no untracked files and no modifications), do not create an empty commit
+- Never use git commands with the -i flag (like git rebase -i or git add -i) since they require interactive input which is not supported
+
+## Your task
+
+Based on the above changes, create a single git commit:
+
+1. Analyze all staged changes and draft a commit message:
+   - Look at the recent commits above to follow this repository's commit message style
+   - Summarize the nature of the changes (new feature, enhancement, bug fix, refactoring, test, docs, etc.)
+   - Ensure the message accurately reflects the changes and their purpose (i.e. "add" means a wholly new feature, "update" means an enhancement to an existing feature, "fix" means a bug fix, etc.)
+   - Draft a concise (1-2 sentences) commit message that focuses on the "why" rather than the "what"
+
+2. Stage relevant files and create the commit using HEREDOC syntax:
+\`\`\`
+git commit -m "$(cat <<'EOF'
+Commit message here.${commitAttribution ? `\n\n${commitAttribution}` : ''}
+EOF
+)"
+\`\`\`
+
+You have the capability to call multiple tools in a single response. Stage and create the commit using a single message. Do not use any other tools or do anything else. Do not send any other text or messages besides these tool calls.`
+}
+
+const command = {
+  type: 'prompt',
+  name: 'commit',
+  description: 'Create a git commit',
+  allowedTools: ALLOWED_TOOLS,
+  contentLength: 0, // Dynamic content
+  progressMessage: 'creating commit',
+  source: 'builtin',
+  async getPromptForCommand(_args, context) {
+    const promptContent = getPromptContent()
+    const finalContent = await executeShellCommandsInPrompt(
+      promptContent,
+      {
+        ...context,
+        getAppState() {
+          const appState = context.getAppState()
+          return {
+            ...appState,
+            toolPermissionContext: {
+              ...appState.toolPermissionContext,
+              alwaysAllowRules: {
+                ...appState.toolPermissionContext.alwaysAllowRules,
+                command: ALLOWED_TOOLS,
+              },
+            },
+          }
+        },
+      },
+      '/commit',
+    )
+
+    return [{ type: 'text', text: finalContent }]
+  },
+} satisfies Command
+
+export default command

+ 287 - 0
src/commands/compact/compact.ts

@@ -0,0 +1,287 @@
+import { feature } from 'bun:bundle'
+import chalk from 'chalk'
+import { markPostCompaction } from 'src/bootstrap/state.js'
+import { getSystemPrompt } from '../../constants/prompts.js'
+import { getSystemContext, getUserContext } from '../../context.js'
+import { getShortcutDisplay } from '../../keybindings/shortcutFormat.js'
+import { notifyCompaction } from '../../services/api/promptCacheBreakDetection.js'
+import {
+  type CompactionResult,
+  compactConversation,
+  ERROR_MESSAGE_INCOMPLETE_RESPONSE,
+  ERROR_MESSAGE_NOT_ENOUGH_MESSAGES,
+  ERROR_MESSAGE_USER_ABORT,
+  mergeHookInstructions,
+} from '../../services/compact/compact.js'
+import { suppressCompactWarning } from '../../services/compact/compactWarningState.js'
+import { microcompactMessages } from '../../services/compact/microCompact.js'
+import { runPostCompactCleanup } from '../../services/compact/postCompactCleanup.js'
+import { trySessionMemoryCompaction } from '../../services/compact/sessionMemoryCompact.js'
+import { setLastSummarizedMessageId } from '../../services/SessionMemory/sessionMemoryUtils.js'
+import type { ToolUseContext } from '../../Tool.js'
+import type { LocalCommandCall } from '../../types/command.js'
+import type { Message } from '../../types/message.js'
+import { hasExactErrorMessage } from '../../utils/errors.js'
+import { executePreCompactHooks } from '../../utils/hooks.js'
+import { logError } from '../../utils/log.js'
+import { getMessagesAfterCompactBoundary } from '../../utils/messages.js'
+import { getUpgradeMessage } from '../../utils/model/contextWindowUpgradeCheck.js'
+import {
+  buildEffectiveSystemPrompt,
+  type SystemPrompt,
+} from '../../utils/systemPrompt.js'
+
+/* eslint-disable @typescript-eslint/no-require-imports */
+const reactiveCompact = feature('REACTIVE_COMPACT')
+  ? (require('../../services/compact/reactiveCompact.js') as typeof import('../../services/compact/reactiveCompact.js'))
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+export const call: LocalCommandCall = async (args, context) => {
+  const { abortController } = context
+  let { messages } = context
+
+  // REPL keeps snipped messages for UI scrollback — project so the compact
+  // model doesn't summarize content that was intentionally removed.
+  messages = getMessagesAfterCompactBoundary(messages)
+
+  if (messages.length === 0) {
+    throw new Error('No messages to compact')
+  }
+
+  const customInstructions = args.trim()
+
+  try {
+    // Try session memory compaction first if no custom instructions
+    // (session memory compaction doesn't support custom instructions)
+    if (!customInstructions) {
+      const sessionMemoryResult = await trySessionMemoryCompaction(
+        messages,
+        context.agentId,
+      )
+      if (sessionMemoryResult) {
+        getUserContext.cache.clear?.()
+        runPostCompactCleanup()
+        // Reset cache read baseline so the post-compact drop isn't flagged
+        // as a break. compactConversation does this internally; SM-compact doesn't.
+        if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
+          notifyCompaction(
+            context.options.querySource ?? 'compact',
+            context.agentId,
+          )
+        }
+        markPostCompaction()
+        // Suppress warning immediately after successful compaction
+        suppressCompactWarning()
+
+        return {
+          type: 'compact',
+          compactionResult: sessionMemoryResult,
+          displayText: buildDisplayText(context),
+        }
+      }
+    }
+
+    // Reactive-only mode: route /compact through the reactive path.
+    // Checked after session-memory (that path is cheap and orthogonal).
+    if (reactiveCompact?.isReactiveOnlyMode()) {
+      return await compactViaReactive(
+        messages,
+        context,
+        customInstructions,
+        reactiveCompact,
+      )
+    }
+
+    // Fall back to traditional compaction
+    // Run microcompact first to reduce tokens before summarization
+    const microcompactResult = await microcompactMessages(messages, context)
+    const messagesForCompact = microcompactResult.messages
+
+    const result = await compactConversation(
+      messagesForCompact,
+      context,
+      await getCacheSharingParams(context, messagesForCompact),
+      false,
+      customInstructions,
+      false,
+    )
+
+    // Reset lastSummarizedMessageId since legacy compaction replaces all messages
+    // and the old message UUID will no longer exist in the new messages array
+    setLastSummarizedMessageId(undefined)
+
+    // Suppress the "Context left until auto-compact" warning after successful compaction
+    suppressCompactWarning()
+
+    getUserContext.cache.clear?.()
+    runPostCompactCleanup()
+
+    return {
+      type: 'compact',
+      compactionResult: result,
+      displayText: buildDisplayText(context, result.userDisplayMessage),
+    }
+  } catch (error) {
+    if (abortController.signal.aborted) {
+      throw new Error('Compaction canceled.')
+    } else if (hasExactErrorMessage(error, ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)) {
+      throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
+    } else if (hasExactErrorMessage(error, ERROR_MESSAGE_INCOMPLETE_RESPONSE)) {
+      throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
+    } else {
+      logError(error)
+      throw new Error(`Error during compaction: ${error}`)
+    }
+  }
+}
+
+async function compactViaReactive(
+  messages: Message[],
+  context: ToolUseContext,
+  customInstructions: string,
+  reactive: NonNullable<typeof reactiveCompact>,
+): Promise<{
+  type: 'compact'
+  compactionResult: CompactionResult
+  displayText: string
+}> {
+  context.onCompactProgress?.({
+    type: 'hooks_start',
+    hookType: 'pre_compact',
+  })
+  context.setSDKStatus?.('compacting')
+
+  try {
+    // Hooks and cache-param build are independent — run concurrently.
+    // getCacheSharingParams walks all tools to build the system prompt;
+    // pre-compact hooks spawn subprocesses. Neither depends on the other.
+    const [hookResult, cacheSafeParams] = await Promise.all([
+      executePreCompactHooks(
+        { trigger: 'manual', customInstructions: customInstructions || null },
+        context.abortController.signal,
+      ),
+      getCacheSharingParams(context, messages),
+    ])
+    const mergedInstructions = mergeHookInstructions(
+      customInstructions,
+      hookResult.newCustomInstructions,
+    )
+
+    context.setStreamMode?.('requesting')
+    context.setResponseLength?.(() => 0)
+    context.onCompactProgress?.({ type: 'compact_start' })
+
+    const outcome = await reactive.reactiveCompactOnPromptTooLong(
+      messages,
+      cacheSafeParams,
+      { customInstructions: mergedInstructions, trigger: 'manual' },
+    )
+
+    if (!outcome.ok) {
+      // The outer catch in `call` translates these: aborted → "Compaction
+      // canceled." (via abortController.signal.aborted check), NOT_ENOUGH →
+      // re-thrown as-is, everything else → "Error during compaction: …".
+      switch (outcome.reason) {
+        case 'too_few_groups':
+          throw new Error(ERROR_MESSAGE_NOT_ENOUGH_MESSAGES)
+        case 'aborted':
+          throw new Error(ERROR_MESSAGE_USER_ABORT)
+        case 'exhausted':
+        case 'error':
+        case 'media_unstrippable':
+          throw new Error(ERROR_MESSAGE_INCOMPLETE_RESPONSE)
+      }
+    }
+
+    // Mirrors the post-success cleanup in tryReactiveCompact, minus
+    // resetMicrocompactState — processSlashCommand calls that for all
+    // type:'compact' results.
+    setLastSummarizedMessageId(undefined)
+    runPostCompactCleanup()
+    suppressCompactWarning()
+    getUserContext.cache.clear?.()
+
+    // reactiveCompactOnPromptTooLong runs PostCompact hooks but not PreCompact
+    // — both callers (here and tryReactiveCompact) run PreCompact outside so
+    // they can merge its userDisplayMessage with PostCompact's here. This
+    // caller additionally runs it concurrently with getCacheSharingParams.
+    const combinedMessage =
+      [hookResult.userDisplayMessage, outcome.result.userDisplayMessage]
+        .filter(Boolean)
+        .join('\n') || undefined
+
+    return {
+      type: 'compact',
+      compactionResult: {
+        ...outcome.result,
+        userDisplayMessage: combinedMessage,
+      },
+      displayText: buildDisplayText(context, combinedMessage),
+    }
+  } finally {
+    context.setStreamMode?.('requesting')
+    context.setResponseLength?.(() => 0)
+    context.onCompactProgress?.({ type: 'compact_end' })
+    context.setSDKStatus?.(null)
+  }
+}
+
+function buildDisplayText(
+  context: ToolUseContext,
+  userDisplayMessage?: string,
+): string {
+  const upgradeMessage = getUpgradeMessage('tip')
+  const expandShortcut = getShortcutDisplay(
+    'app:toggleTranscript',
+    'Global',
+    'ctrl+o',
+  )
+  const dimmed = [
+    ...(context.options.verbose
+      ? []
+      : [`(${expandShortcut} to see full summary)`]),
+    ...(userDisplayMessage ? [userDisplayMessage] : []),
+    ...(upgradeMessage ? [upgradeMessage] : []),
+  ]
+  return chalk.dim('Compacted ' + dimmed.join('\n'))
+}
+
+async function getCacheSharingParams(
+  context: ToolUseContext,
+  forkContextMessages: Message[],
+): Promise<{
+  systemPrompt: SystemPrompt
+  userContext: { [k: string]: string }
+  systemContext: { [k: string]: string }
+  toolUseContext: ToolUseContext
+  forkContextMessages: Message[]
+}> {
+  const appState = context.getAppState()
+  const defaultSysPrompt = await getSystemPrompt(
+    context.options.tools,
+    context.options.mainLoopModel,
+    Array.from(
+      appState.toolPermissionContext.additionalWorkingDirectories.keys(),
+    ),
+    context.options.mcpClients,
+  )
+  const systemPrompt = buildEffectiveSystemPrompt({
+    mainThreadAgentDefinition: undefined,
+    toolUseContext: context,
+    customSystemPrompt: context.options.customSystemPrompt,
+    defaultSystemPrompt: defaultSysPrompt,
+    appendSystemPrompt: context.options.appendSystemPrompt,
+  })
+  const [userContext, systemContext] = await Promise.all([
+    getUserContext(),
+    getSystemContext(),
+  ])
+  return {
+    systemPrompt,
+    userContext,
+    systemContext,
+    toolUseContext: context,
+    forkContextMessages,
+  }
+}

+ 15 - 0
src/commands/compact/index.ts

@@ -0,0 +1,15 @@
+import type { Command } from '../../commands.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+
+const compact = {
+  type: 'local',
+  name: 'compact',
+  description:
+    'Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]',
+  isEnabled: () => !isEnvTruthy(process.env.DISABLE_COMPACT),
+  supportsNonInteractive: true,
+  argumentHint: '<optional custom summarization instructions>',
+  load: () => import('./compact.js'),
+} satisfies Command
+
+export default compact

+ 7 - 0
src/commands/config/config.tsx

@@ -0,0 +1,7 @@
+import * as React from 'react';
+import { Settings } from '../../components/Settings/Settings.js';
+import type { LocalJSXCommandCall } from '../../types/command.js';
+export const call: LocalJSXCommandCall = async (onDone, context) => {
+  return <Settings onClose={onDone} context={context} defaultTab="Config" />;
+};
+//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJSZWFjdCIsIlNldHRpbmdzIiwiTG9jYWxKU1hDb21tYW5kQ2FsbCIsImNhbGwiLCJvbkRvbmUiLCJjb250ZXh0Il0sInNvdXJjZXMiOlsiY29uZmlnLnRzeCJdLCJzb3VyY2VzQ29udGVudCI6WyJpbXBvcnQgKiBhcyBSZWFjdCBmcm9tICdyZWFjdCdcbmltcG9ydCB7IFNldHRpbmdzIH0gZnJvbSAnLi4vLi4vY29tcG9uZW50cy9TZXR0aW5ncy9TZXR0aW5ncy5qcydcbmltcG9ydCB0eXBlIHsgTG9jYWxKU1hDb21tYW5kQ2FsbCB9IGZyb20gJy4uLy4uL3R5cGVzL2NvbW1hbmQuanMnXG5cbmV4cG9ydCBjb25zdCBjYWxsOiBMb2NhbEpTWENvbW1hbmRDYWxsID0gYXN5bmMgKG9uRG9uZSwgY29udGV4dCkgPT4ge1xuICByZXR1cm4gPFNldHRpbmdzIG9uQ2xvc2U9e29uRG9uZX0gY29udGV4dD17Y29udGV4dH0gZGVmYXVsdFRhYj1cIkNvbmZpZ1wiIC8+XG59XG4iXSwibWFwcGluZ3MiOiJBQUFBLE9BQU8sS0FBS0EsS0FBSyxNQUFNLE9BQU87QUFDOUIsU0FBU0MsUUFBUSxRQUFRLHVDQUF1QztBQUNoRSxjQUFjQyxtQkFBbUIsUUFBUSx3QkFBd0I7QUFFakUsT0FBTyxNQUFNQyxJQUFJLEVBQUVELG1CQUFtQixHQUFHLE1BQUFDLENBQU9DLE1BQU0sRUFBRUMsT0FBTyxLQUFLO0VBQ2xFLE9BQU8sQ0FBQyxRQUFRLENBQUMsT0FBTyxDQUFDLENBQUNELE1BQU0sQ0FBQyxDQUFDLE9BQU8sQ0FBQyxDQUFDQyxPQUFPLENBQUMsQ0FBQyxVQUFVLENBQUMsUUFBUSxHQUFHO0FBQzVFLENBQUMiLCJpZ25vcmVMaXN0IjpbXX0=

+ 11 - 0
src/commands/config/index.ts

@@ -0,0 +1,11 @@
+import type { Command } from '../../commands.js'
+
+const config = {
+  aliases: ['settings'],
+  type: 'local-jsx',
+  name: 'config',
+  description: 'Open config panel',
+  load: () => import('./config.js'),
+} satisfies Command
+
+export default config

+ 325 - 0
src/commands/context/context-noninteractive.ts

@@ -0,0 +1,325 @@
+import { feature } from 'bun:bundle'
+import { microcompactMessages } from '../../services/compact/microCompact.js'
+import type { AppState } from '../../state/AppStateStore.js'
+import type { Tools, ToolUseContext } from '../../Tool.js'
+import type { AgentDefinitionsResult } from '../../tools/AgentTool/loadAgentsDir.js'
+import type { Message } from '../../types/message.js'
+import {
+  analyzeContextUsage,
+  type ContextData,
+} from '../../utils/analyzeContext.js'
+import { formatTokens } from '../../utils/format.js'
+import { getMessagesAfterCompactBoundary } from '../../utils/messages.js'
+import { getSourceDisplayName } from '../../utils/settings/constants.js'
+import { plural } from '../../utils/stringUtils.js'
+
+/**
+ * Shared data-collection path for `/context` (slash command) and the SDK
+ * `get_context_usage` control request. Mirrors query.ts's pre-API transforms
+ * (compact boundary, projectView, microcompact) so the token count reflects
+ * what the model actually sees.
+ */
+type CollectContextDataInput = {
+  messages: Message[]
+  getAppState: () => AppState
+  options: {
+    mainLoopModel: string
+    tools: Tools
+    agentDefinitions: AgentDefinitionsResult
+    customSystemPrompt?: string
+    appendSystemPrompt?: string
+  }
+}
+
+export async function collectContextData(
+  context: CollectContextDataInput,
+): Promise<ContextData> {
+  const {
+    messages,
+    getAppState,
+    options: {
+      mainLoopModel,
+      tools,
+      agentDefinitions,
+      customSystemPrompt,
+      appendSystemPrompt,
+    },
+  } = context
+
+  let apiView = getMessagesAfterCompactBoundary(messages)
+  if (feature('CONTEXT_COLLAPSE')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { projectView } =
+      require('../../services/contextCollapse/operations.js') as typeof import('../../services/contextCollapse/operations.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    apiView = projectView(apiView)
+  }
+
+  const { messages: compactedMessages } = await microcompactMessages(apiView)
+  const appState = getAppState()
+
+  return analyzeContextUsage(
+    compactedMessages,
+    mainLoopModel,
+    async () => appState.toolPermissionContext,
+    tools,
+    agentDefinitions,
+    undefined, // terminalWidth
+    // analyzeContextUsage only reads options.{customSystemPrompt,appendSystemPrompt}
+    // but its signature declares the full Pick<ToolUseContext, 'options'>.
+    { options: { customSystemPrompt, appendSystemPrompt } } as Pick<
+      ToolUseContext,
+      'options'
+    >,
+    undefined, // mainThreadAgentDefinition
+    apiView, // original messages for API usage extraction
+  )
+}
+
+export async function call(
+  _args: string,
+  context: ToolUseContext,
+): Promise<{ type: 'text'; value: string }> {
+  const data = await collectContextData(context)
+  return {
+    type: 'text' as const,
+    value: formatContextAsMarkdownTable(data),
+  }
+}
+
+function formatContextAsMarkdownTable(data: ContextData): string {
+  const {
+    categories,
+    totalTokens,
+    rawMaxTokens,
+    percentage,
+    model,
+    memoryFiles,
+    mcpTools,
+    agents,
+    skills,
+    messageBreakdown,
+    systemTools,
+    systemPromptSections,
+  } = data
+
+  let output = `## Context Usage\n\n`
+  output += `**Model:** ${model}  \n`
+  output += `**Tokens:** ${formatTokens(totalTokens)} / ${formatTokens(rawMaxTokens)} (${percentage}%)\n`
+
+  // Context-collapse status. Always show when the runtime gate is on —
+  // the user needs to know which strategy is managing their context
+  // even before anything has fired.
+  if (feature('CONTEXT_COLLAPSE')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { getStats, isContextCollapseEnabled } =
+      require('../../services/contextCollapse/index.js') as typeof import('../../services/contextCollapse/index.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    if (isContextCollapseEnabled()) {
+      const s = getStats()
+      const { health: h } = s
+
+      const parts = []
+      if (s.collapsedSpans > 0) {
+        parts.push(
+          `${s.collapsedSpans} ${plural(s.collapsedSpans, 'span')} summarized (${s.collapsedMessages} messages)`,
+        )
+      }
+      if (s.stagedSpans > 0) parts.push(`${s.stagedSpans} staged`)
+      const summary =
+        parts.length > 0
+          ? parts.join(', ')
+          : h.totalSpawns > 0
+            ? `${h.totalSpawns} ${plural(h.totalSpawns, 'spawn')}, nothing staged yet`
+            : 'waiting for first trigger'
+      output += `**Context strategy:** collapse (${summary})\n`
+
+      if (h.totalErrors > 0) {
+        output += `**Collapse errors:** ${h.totalErrors}/${h.totalSpawns} spawns failed`
+        if (h.lastError) {
+          output += ` (last: ${h.lastError.slice(0, 80)})`
+        }
+        output += '\n'
+      } else if (h.emptySpawnWarningEmitted) {
+        output += `**Collapse idle:** ${h.totalEmptySpawns} consecutive empty runs\n`
+      }
+    }
+  }
+  output += '\n'
+
+  // Main categories table
+  const visibleCategories = categories.filter(
+    cat =>
+      cat.tokens > 0 &&
+      cat.name !== 'Free space' &&
+      cat.name !== 'Autocompact buffer',
+  )
+
+  if (visibleCategories.length > 0) {
+    output += `### Estimated usage by category\n\n`
+    output += `| Category | Tokens | Percentage |\n`
+    output += `|----------|--------|------------|\n`
+
+    for (const cat of visibleCategories) {
+      const percentDisplay = ((cat.tokens / rawMaxTokens) * 100).toFixed(1)
+      output += `| ${cat.name} | ${formatTokens(cat.tokens)} | ${percentDisplay}% |\n`
+    }
+
+    const freeSpaceCategory = categories.find(c => c.name === 'Free space')
+    if (freeSpaceCategory && freeSpaceCategory.tokens > 0) {
+      const percentDisplay = (
+        (freeSpaceCategory.tokens / rawMaxTokens) *
+        100
+      ).toFixed(1)
+      output += `| Free space | ${formatTokens(freeSpaceCategory.tokens)} | ${percentDisplay}% |\n`
+    }
+
+    const autocompactCategory = categories.find(
+      c => c.name === 'Autocompact buffer',
+    )
+    if (autocompactCategory && autocompactCategory.tokens > 0) {
+      const percentDisplay = (
+        (autocompactCategory.tokens / rawMaxTokens) *
+        100
+      ).toFixed(1)
+      output += `| Autocompact buffer | ${formatTokens(autocompactCategory.tokens)} | ${percentDisplay}% |\n`
+    }
+
+    output += `\n`
+  }
+
+  // MCP tools
+  if (mcpTools.length > 0) {
+    output += `### MCP Tools\n\n`
+    output += `| Tool | Server | Tokens |\n`
+    output += `|------|--------|--------|\n`
+    for (const tool of mcpTools) {
+      output += `| ${tool.name} | ${tool.serverName} | ${formatTokens(tool.tokens)} |\n`
+    }
+    output += `\n`
+  }
+
+  // System tools (ant-only)
+  if (
+    systemTools &&
+    systemTools.length > 0 &&
+    process.env.USER_TYPE === 'ant'
+  ) {
+    output += `### [ANT-ONLY] System Tools\n\n`
+    output += `| Tool | Tokens |\n`
+    output += `|------|--------|\n`
+    for (const tool of systemTools) {
+      output += `| ${tool.name} | ${formatTokens(tool.tokens)} |\n`
+    }
+    output += `\n`
+  }
+
+  // System prompt sections (ant-only)
+  if (
+    systemPromptSections &&
+    systemPromptSections.length > 0 &&
+    process.env.USER_TYPE === 'ant'
+  ) {
+    output += `### [ANT-ONLY] System Prompt Sections\n\n`
+    output += `| Section | Tokens |\n`
+    output += `|---------|--------|\n`
+    for (const section of systemPromptSections) {
+      output += `| ${section.name} | ${formatTokens(section.tokens)} |\n`
+    }
+    output += `\n`
+  }
+
+  // Custom agents
+  if (agents.length > 0) {
+    output += `### Custom Agents\n\n`
+    output += `| Agent Type | Source | Tokens |\n`
+    output += `|------------|--------|--------|\n`
+    for (const agent of agents) {
+      let sourceDisplay: string
+      switch (agent.source) {
+        case 'projectSettings':
+          sourceDisplay = 'Project'
+          break
+        case 'userSettings':
+          sourceDisplay = 'User'
+          break
+        case 'localSettings':
+          sourceDisplay = 'Local'
+          break
+        case 'flagSettings':
+          sourceDisplay = 'Flag'
+          break
+        case 'policySettings':
+          sourceDisplay = 'Policy'
+          break
+        case 'plugin':
+          sourceDisplay = 'Plugin'
+          break
+        case 'built-in':
+          sourceDisplay = 'Built-in'
+          break
+        default:
+          sourceDisplay = String(agent.source)
+      }
+      output += `| ${agent.agentType} | ${sourceDisplay} | ${formatTokens(agent.tokens)} |\n`
+    }
+    output += `\n`
+  }
+
+  // Memory files
+  if (memoryFiles.length > 0) {
+    output += `### Memory Files\n\n`
+    output += `| Type | Path | Tokens |\n`
+    output += `|------|------|--------|\n`
+    for (const file of memoryFiles) {
+      output += `| ${file.type} | ${file.path} | ${formatTokens(file.tokens)} |\n`
+    }
+    output += `\n`
+  }
+
+  // Skills
+  if (skills && skills.tokens > 0 && skills.skillFrontmatter.length > 0) {
+    output += `### Skills\n\n`
+    output += `| Skill | Source | Tokens |\n`
+    output += `|-------|--------|--------|\n`
+    for (const skill of skills.skillFrontmatter) {
+      output += `| ${skill.name} | ${getSourceDisplayName(skill.source)} | ${formatTokens(skill.tokens)} |\n`
+    }
+    output += `\n`
+  }
+
+  // Message breakdown (ant-only)
+  if (messageBreakdown && process.env.USER_TYPE === 'ant') {
+    output += `### [ANT-ONLY] Message Breakdown\n\n`
+    output += `| Category | Tokens |\n`
+    output += `|----------|--------|\n`
+    output += `| Tool calls | ${formatTokens(messageBreakdown.toolCallTokens)} |\n`
+    output += `| Tool results | ${formatTokens(messageBreakdown.toolResultTokens)} |\n`
+    output += `| Attachments | ${formatTokens(messageBreakdown.attachmentTokens)} |\n`
+    output += `| Assistant messages (non-tool) | ${formatTokens(messageBreakdown.assistantMessageTokens)} |\n`
+    output += `| User messages (non-tool-result) | ${formatTokens(messageBreakdown.userMessageTokens)} |\n`
+    output += `\n`
+
+    if (messageBreakdown.toolCallsByType.length > 0) {
+      output += `#### Top Tools\n\n`
+      output += `| Tool | Call Tokens | Result Tokens |\n`
+      output += `|------|-------------|---------------|\n`
+      for (const tool of messageBreakdown.toolCallsByType) {
+        output += `| ${tool.name} | ${formatTokens(tool.callTokens)} | ${formatTokens(tool.resultTokens)} |\n`
+      }
+      output += `\n`
+    }
+
+    if (messageBreakdown.attachmentsByType.length > 0) {
+      output += `#### Top Attachments\n\n`
+      output += `| Attachment | Tokens |\n`
+      output += `|------------|--------|\n`
+      for (const attachment of messageBreakdown.attachmentsByType) {
+        output += `| ${attachment.name} | ${formatTokens(attachment.tokens)} |\n`
+      }
+      output += `\n`
+    }
+  }
+
+  return output
+}

Різницю між файлами не показано, бо вона завелика
+ 63 - 0
src/commands/context/context.tsx


Деякі файли не було показано, через те що забагато файлів було змінено