promptSuggestion.ts 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
  2. import type { AppState } from '../../state/AppState.js'
  3. import type { Message } from '../../types/message.js'
  4. import { isAgentSwarmsEnabled } from '../../utils/agentSwarmsEnabled.js'
  5. import { count } from '../../utils/array.js'
  6. import { isEnvDefinedFalsy, isEnvTruthy } from '../../utils/envUtils.js'
  7. import { toError } from '../../utils/errors.js'
  8. import {
  9. type CacheSafeParams,
  10. createCacheSafeParams,
  11. runForkedAgent,
  12. } from '../../utils/forkedAgent.js'
  13. import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
  14. import { logError } from '../../utils/log.js'
  15. import {
  16. createUserMessage,
  17. getLastAssistantMessage,
  18. } from '../../utils/messages.js'
  19. import { getInitialSettings } from '../../utils/settings/settings.js'
  20. import { isTeammate } from '../../utils/teammate.js'
  21. import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
  22. import {
  23. type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  24. logEvent,
  25. } from '../analytics/index.js'
  26. import { currentLimits } from '../claudeAiLimits.js'
  27. import { isSpeculationEnabled, startSpeculation } from './speculation.js'
  28. let currentAbortController: AbortController | null = null
  29. export type PromptVariant = 'user_intent' | 'stated_intent'
  30. export function getPromptVariant(): PromptVariant {
  31. return 'user_intent'
  32. }
  33. export function shouldEnablePromptSuggestion(): boolean {
  34. // Env var overrides everything (for testing)
  35. const envOverride = process.env.CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION
  36. if (isEnvDefinedFalsy(envOverride)) {
  37. logEvent('tengu_prompt_suggestion_init', {
  38. enabled: false,
  39. source:
  40. 'env' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  41. })
  42. return false
  43. }
  44. if (isEnvTruthy(envOverride)) {
  45. logEvent('tengu_prompt_suggestion_init', {
  46. enabled: true,
  47. source:
  48. 'env' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  49. })
  50. return true
  51. }
  52. // Keep default in sync with Config.tsx (settings toggle visibility)
  53. if (!getFeatureValue_CACHED_MAY_BE_STALE('tengu_chomp_inflection', false)) {
  54. logEvent('tengu_prompt_suggestion_init', {
  55. enabled: false,
  56. source:
  57. 'growthbook' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  58. })
  59. return false
  60. }
  61. // Disable in non-interactive mode (print mode, piped input, SDK)
  62. if (getIsNonInteractiveSession()) {
  63. logEvent('tengu_prompt_suggestion_init', {
  64. enabled: false,
  65. source:
  66. 'non_interactive' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  67. })
  68. return false
  69. }
  70. // Disable for swarm teammates (only leader should show suggestions)
  71. if (isAgentSwarmsEnabled() && isTeammate()) {
  72. logEvent('tengu_prompt_suggestion_init', {
  73. enabled: false,
  74. source:
  75. 'swarm_teammate' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  76. })
  77. return false
  78. }
  79. const enabled = getInitialSettings()?.promptSuggestionEnabled !== false
  80. logEvent('tengu_prompt_suggestion_init', {
  81. enabled,
  82. source:
  83. 'setting' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  84. })
  85. return enabled
  86. }
  87. export function abortPromptSuggestion(): void {
  88. if (currentAbortController) {
  89. currentAbortController.abort()
  90. currentAbortController = null
  91. }
  92. }
  93. /**
  94. * Returns a suppression reason if suggestions should not be generated,
  95. * or null if generation is allowed. Shared by main and pipelined paths.
  96. */
  97. export function getSuggestionSuppressReason(appState: AppState): string | null {
  98. if (!appState.promptSuggestionEnabled) return 'disabled'
  99. if (appState.pendingWorkerRequest || appState.pendingSandboxRequest)
  100. return 'pending_permission'
  101. if (appState.elicitation.queue.length > 0) return 'elicitation_active'
  102. if (appState.toolPermissionContext.mode === 'plan') return 'plan_mode'
  103. if (
  104. process.env.USER_TYPE === 'external' &&
  105. currentLimits.status !== 'allowed'
  106. )
  107. return 'rate_limit'
  108. return null
  109. }
  110. /**
  111. * Shared guard + generation logic used by both CLI TUI and SDK push paths.
  112. * Returns the suggestion with metadata, or null if suppressed/filtered.
  113. */
  114. export async function tryGenerateSuggestion(
  115. abortController: AbortController,
  116. messages: Message[],
  117. getAppState: () => AppState,
  118. cacheSafeParams: CacheSafeParams,
  119. source?: 'cli' | 'sdk',
  120. ): Promise<{
  121. suggestion: string
  122. promptId: PromptVariant
  123. generationRequestId: string | null
  124. } | null> {
  125. if (abortController.signal.aborted) {
  126. logSuggestionSuppressed('aborted', undefined, undefined, source)
  127. return null
  128. }
  129. const assistantTurnCount = count(messages, m => m.type === 'assistant')
  130. if (assistantTurnCount < 2) {
  131. logSuggestionSuppressed('early_conversation', undefined, undefined, source)
  132. return null
  133. }
  134. const lastAssistantMessage = getLastAssistantMessage(messages)
  135. if (lastAssistantMessage?.isApiErrorMessage) {
  136. logSuggestionSuppressed('last_response_error', undefined, undefined, source)
  137. return null
  138. }
  139. const cacheReason = getParentCacheSuppressReason(lastAssistantMessage)
  140. if (cacheReason) {
  141. logSuggestionSuppressed(cacheReason, undefined, undefined, source)
  142. return null
  143. }
  144. const appState = getAppState()
  145. const suppressReason = getSuggestionSuppressReason(appState)
  146. if (suppressReason) {
  147. logSuggestionSuppressed(suppressReason, undefined, undefined, source)
  148. return null
  149. }
  150. const promptId = getPromptVariant()
  151. const { suggestion, generationRequestId } = await generateSuggestion(
  152. abortController,
  153. promptId,
  154. cacheSafeParams,
  155. )
  156. if (abortController.signal.aborted) {
  157. logSuggestionSuppressed('aborted', undefined, undefined, source)
  158. return null
  159. }
  160. if (!suggestion) {
  161. logSuggestionSuppressed('empty', undefined, promptId, source)
  162. return null
  163. }
  164. if (shouldFilterSuggestion(suggestion, promptId, source)) return null
  165. return { suggestion, promptId, generationRequestId }
  166. }
  167. export async function executePromptSuggestion(
  168. context: REPLHookContext,
  169. ): Promise<void> {
  170. if (context.querySource !== 'repl_main_thread') return
  171. currentAbortController = new AbortController()
  172. const abortController = currentAbortController
  173. const cacheSafeParams = createCacheSafeParams(context)
  174. try {
  175. const result = await tryGenerateSuggestion(
  176. abortController,
  177. context.messages,
  178. context.toolUseContext.getAppState,
  179. cacheSafeParams,
  180. 'cli',
  181. )
  182. if (!result) return
  183. context.toolUseContext.setAppState(prev => ({
  184. ...prev,
  185. promptSuggestion: {
  186. text: result.suggestion,
  187. promptId: result.promptId,
  188. shownAt: 0,
  189. acceptedAt: 0,
  190. generationRequestId: result.generationRequestId,
  191. },
  192. }))
  193. if (isSpeculationEnabled() && result.suggestion) {
  194. void startSpeculation(
  195. result.suggestion,
  196. context,
  197. context.toolUseContext.setAppState,
  198. false,
  199. cacheSafeParams,
  200. )
  201. }
  202. } catch (error) {
  203. if (
  204. error instanceof Error &&
  205. (error.name === 'AbortError' || error.name === 'APIUserAbortError')
  206. ) {
  207. logSuggestionSuppressed('aborted', undefined, undefined, 'cli')
  208. return
  209. }
  210. logError(toError(error))
  211. } finally {
  212. if (currentAbortController === abortController) {
  213. currentAbortController = null
  214. }
  215. }
  216. }
  217. const MAX_PARENT_UNCACHED_TOKENS = 10_000
  218. export function getParentCacheSuppressReason(
  219. lastAssistantMessage: ReturnType<typeof getLastAssistantMessage>,
  220. ): string | null {
  221. if (!lastAssistantMessage) return null
  222. const usage = lastAssistantMessage.message.usage
  223. const inputTokens = usage.input_tokens ?? 0
  224. const cacheWriteTokens = usage.cache_creation_input_tokens ?? 0
  225. // The fork re-processes the parent's output (never cached) plus its own prompt.
  226. const outputTokens = usage.output_tokens ?? 0
  227. return (inputTokens as number) + (cacheWriteTokens as number) + (outputTokens as number) >
  228. MAX_PARENT_UNCACHED_TOKENS
  229. ? 'cache_cold'
  230. : null
  231. }
  232. const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next into Claude Code.]
  233. FIRST: Look at the user's recent messages and original request.
  234. Your job is to predict what THEY would type - not what you think they should do.
  235. THE TEST: Would they think "I was just about to type that"?
  236. EXAMPLES:
  237. User asked "fix the bug and run tests", bug is fixed → "run the tests"
  238. After code written → "try it out"
  239. Claude offers options → suggest the one the user would likely pick, based on conversation
  240. Claude asks to continue → "yes" or "go ahead"
  241. Task complete, obvious follow-up → "commit this" or "push it"
  242. After error or misunderstanding → silence (let them assess/correct)
  243. Be specific: "run the tests" beats "continue".
  244. NEVER SUGGEST:
  245. - Evaluative ("looks good", "thanks")
  246. - Questions ("what about...?")
  247. - Claude-voice ("Let me...", "I'll...", "Here's...")
  248. - New ideas they didn't ask about
  249. - Multiple sentences
  250. Stay silent if the next step isn't obvious from what the user said.
  251. Format: 2-12 words, match the user's style. Or nothing.
  252. Reply with ONLY the suggestion, no quotes or explanation.`
  253. const SUGGESTION_PROMPTS: Record<PromptVariant, string> = {
  254. user_intent: SUGGESTION_PROMPT,
  255. stated_intent: SUGGESTION_PROMPT,
  256. }
  257. export async function generateSuggestion(
  258. abortController: AbortController,
  259. promptId: PromptVariant,
  260. cacheSafeParams: CacheSafeParams,
  261. ): Promise<{ suggestion: string | null; generationRequestId: string | null }> {
  262. const prompt = SUGGESTION_PROMPTS[promptId]
  263. // Deny tools via callback, NOT by passing tools:[] - that busts cache (0% hit)
  264. const canUseTool = async () => ({
  265. behavior: 'deny' as const,
  266. message: 'No tools needed for suggestion',
  267. decisionReason: { type: 'other' as const, reason: 'suggestion only' },
  268. })
  269. // DO NOT override any API parameter that differs from the parent request.
  270. // The fork piggybacks on the main thread's prompt cache by sending identical
  271. // cache-key params. The billing cache key includes more than just
  272. // system/tools/model/messages/thinking — empirically, setting effortValue
  273. // or maxOutputTokens on the fork (even via output_config or getAppState)
  274. // busts cache. PR #18143 tried effort:'low' and caused a 45x spike in cache
  275. // writes (92.7% → 61% hit rate). The only safe overrides are:
  276. // - abortController (not sent to API)
  277. // - skipTranscript (client-side only)
  278. // - skipCacheWrite (controls cache_control markers, not the cache key)
  279. // - canUseTool (client-side permission check)
  280. const result = await runForkedAgent({
  281. promptMessages: [createUserMessage({ content: prompt })],
  282. cacheSafeParams, // Don't override tools/thinking settings - busts cache
  283. canUseTool,
  284. querySource: 'prompt_suggestion',
  285. forkLabel: 'prompt_suggestion',
  286. overrides: {
  287. abortController,
  288. },
  289. skipTranscript: true,
  290. skipCacheWrite: true,
  291. })
  292. // Check ALL messages - model may loop (try tool → denied → text in next message)
  293. // Also extract the requestId from the first assistant message for RL dataset joins
  294. const firstAssistantMsg = result.messages.find(m => m.type === 'assistant')
  295. const generationRequestId =
  296. firstAssistantMsg?.type === 'assistant'
  297. ? ((firstAssistantMsg.requestId as string) ?? null)
  298. : null
  299. for (const msg of result.messages) {
  300. if (msg.type !== 'assistant') continue
  301. const contentArr = Array.isArray(msg.message.content) ? msg.message.content as Array<{ type: string; text?: string }> : []
  302. const textBlock = contentArr.find(b => b.type === 'text')
  303. if (textBlock?.type === 'text' && typeof textBlock.text === 'string') {
  304. const suggestion = textBlock.text.trim()
  305. if (suggestion) {
  306. return { suggestion, generationRequestId }
  307. }
  308. }
  309. }
  310. return { suggestion: null as (string | null), generationRequestId }
  311. }
  312. export function shouldFilterSuggestion(
  313. suggestion: string | null,
  314. promptId: PromptVariant,
  315. source?: 'cli' | 'sdk',
  316. ): boolean {
  317. if (!suggestion) {
  318. logSuggestionSuppressed('empty', undefined, promptId, source)
  319. return true
  320. }
  321. const lower = suggestion.toLowerCase()
  322. const wordCount = suggestion.trim().split(/\s+/).length
  323. const filters: Array<[string, () => boolean]> = [
  324. ['done', () => lower === 'done'],
  325. [
  326. 'meta_text',
  327. () =>
  328. lower === 'nothing found' ||
  329. lower === 'nothing found.' ||
  330. lower.startsWith('nothing to suggest') ||
  331. lower.startsWith('no suggestion') ||
  332. // Model spells out the prompt's "stay silent" instruction
  333. /\bsilence is\b|\bstay(s|ing)? silent\b/.test(lower) ||
  334. // Model outputs bare "silence" wrapped in punctuation/whitespace
  335. /^\W*silence\W*$/.test(lower),
  336. ],
  337. [
  338. 'meta_wrapped',
  339. // Model wraps meta-reasoning in parens/brackets: (silence — ...), [no suggestion]
  340. () => /^\(.*\)$|^\[.*\]$/.test(suggestion),
  341. ],
  342. [
  343. 'error_message',
  344. () =>
  345. lower.startsWith('api error:') ||
  346. lower.startsWith('prompt is too long') ||
  347. lower.startsWith('request timed out') ||
  348. lower.startsWith('invalid api key') ||
  349. lower.startsWith('image was too large'),
  350. ],
  351. ['prefixed_label', () => /^\w+:\s/.test(suggestion)],
  352. [
  353. 'too_few_words',
  354. () => {
  355. if (wordCount >= 2) return false
  356. // Allow slash commands — these are valid user commands
  357. if (suggestion.startsWith('/')) return false
  358. // Allow common single-word inputs that are valid user commands
  359. const ALLOWED_SINGLE_WORDS = new Set([
  360. // Affirmatives
  361. 'yes',
  362. 'yeah',
  363. 'yep',
  364. 'yea',
  365. 'yup',
  366. 'sure',
  367. 'ok',
  368. 'okay',
  369. // Actions
  370. 'push',
  371. 'commit',
  372. 'deploy',
  373. 'stop',
  374. 'continue',
  375. 'check',
  376. 'exit',
  377. 'quit',
  378. // Negation
  379. 'no',
  380. ])
  381. return !ALLOWED_SINGLE_WORDS.has(lower)
  382. },
  383. ],
  384. ['too_many_words', () => wordCount > 12],
  385. ['too_long', () => suggestion.length >= 100],
  386. ['multiple_sentences', () => /[.!?]\s+[A-Z]/.test(suggestion)],
  387. ['has_formatting', () => /[\n*]|\*\*/.test(suggestion)],
  388. [
  389. 'evaluative',
  390. () =>
  391. /thanks|thank you|looks good|sounds good|that works|that worked|that's all|nice|great|perfect|makes sense|awesome|excellent/.test(
  392. lower,
  393. ),
  394. ],
  395. [
  396. 'claude_voice',
  397. () =>
  398. /^(let me|i'll|i've|i'm|i can|i would|i think|i notice|here's|here is|here are|that's|this is|this will|you can|you should|you could|sure,|of course|certainly)/i.test(
  399. suggestion,
  400. ),
  401. ],
  402. ]
  403. for (const [reason, check] of filters) {
  404. if (check()) {
  405. logSuggestionSuppressed(reason, suggestion, promptId, source)
  406. return true
  407. }
  408. }
  409. return false
  410. }
  411. /**
  412. * Log acceptance/ignoring of a prompt suggestion. Used by the SDK push path
  413. * to track outcomes when the next user message arrives.
  414. */
  415. export function logSuggestionOutcome(
  416. suggestion: string,
  417. userInput: string,
  418. emittedAt: number,
  419. promptId: PromptVariant,
  420. generationRequestId: string | null,
  421. ): void {
  422. const similarity =
  423. Math.round((userInput.length / (suggestion.length || 1)) * 100) / 100
  424. const wasAccepted = userInput === suggestion
  425. const timeMs = Math.max(0, Date.now() - emittedAt)
  426. logEvent('tengu_prompt_suggestion', {
  427. source: 'sdk' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  428. outcome: (wasAccepted
  429. ? 'accepted'
  430. : 'ignored') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  431. prompt_id:
  432. promptId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  433. ...(generationRequestId && {
  434. generationRequestId:
  435. generationRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  436. }),
  437. ...(wasAccepted && {
  438. timeToAcceptMs: timeMs,
  439. }),
  440. ...(!wasAccepted && { timeToIgnoreMs: timeMs }),
  441. similarity,
  442. ...(process.env.USER_TYPE === 'ant' && {
  443. suggestion:
  444. suggestion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  445. userInput:
  446. userInput as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  447. }),
  448. })
  449. }
  450. export function logSuggestionSuppressed(
  451. reason: string,
  452. suggestion?: string,
  453. promptId?: PromptVariant,
  454. source?: 'cli' | 'sdk',
  455. ): void {
  456. const resolvedPromptId = promptId ?? getPromptVariant()
  457. logEvent('tengu_prompt_suggestion', {
  458. ...(source && {
  459. source:
  460. source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  461. }),
  462. outcome:
  463. 'suppressed' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  464. reason:
  465. reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  466. prompt_id:
  467. resolvedPromptId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  468. ...(process.env.USER_TYPE === 'ant' &&
  469. suggestion && {
  470. suggestion:
  471. suggestion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  472. }),
  473. })
  474. }