| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524 |
- import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
- import type { AppState } from '../../state/AppState.js'
- import type { Message } from '../../types/message.js'
- import { isAgentSwarmsEnabled } from '../../utils/agentSwarmsEnabled.js'
- import { count } from '../../utils/array.js'
- import { isEnvDefinedFalsy, isEnvTruthy } from '../../utils/envUtils.js'
- import { toError } from '../../utils/errors.js'
- import {
- type CacheSafeParams,
- createCacheSafeParams,
- runForkedAgent,
- } from '../../utils/forkedAgent.js'
- import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
- import { logError } from '../../utils/log.js'
- import {
- createUserMessage,
- getLastAssistantMessage,
- } from '../../utils/messages.js'
- import { getInitialSettings } from '../../utils/settings/settings.js'
- import { isTeammate } from '../../utils/teammate.js'
- import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
- import {
- type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- logEvent,
- } from '../analytics/index.js'
- import { currentLimits } from '../claudeAiLimits.js'
- import { isSpeculationEnabled, startSpeculation } from './speculation.js'
- let currentAbortController: AbortController | null = null
- export type PromptVariant = 'user_intent' | 'stated_intent'
- export function getPromptVariant(): PromptVariant {
- return 'user_intent'
- }
- export function shouldEnablePromptSuggestion(): boolean {
- // Env var overrides everything (for testing)
- const envOverride = process.env.CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION
- if (isEnvDefinedFalsy(envOverride)) {
- logEvent('tengu_prompt_suggestion_init', {
- enabled: false,
- source:
- 'env' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- })
- return false
- }
- if (isEnvTruthy(envOverride)) {
- logEvent('tengu_prompt_suggestion_init', {
- enabled: true,
- source:
- 'env' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- })
- return true
- }
- // Keep default in sync with Config.tsx (settings toggle visibility)
- if (!getFeatureValue_CACHED_MAY_BE_STALE('tengu_chomp_inflection', false)) {
- logEvent('tengu_prompt_suggestion_init', {
- enabled: false,
- source:
- 'growthbook' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- })
- return false
- }
- // Disable in non-interactive mode (print mode, piped input, SDK)
- if (getIsNonInteractiveSession()) {
- logEvent('tengu_prompt_suggestion_init', {
- enabled: false,
- source:
- 'non_interactive' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- })
- return false
- }
- // Disable for swarm teammates (only leader should show suggestions)
- if (isAgentSwarmsEnabled() && isTeammate()) {
- logEvent('tengu_prompt_suggestion_init', {
- enabled: false,
- source:
- 'swarm_teammate' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- })
- return false
- }
- const enabled = getInitialSettings()?.promptSuggestionEnabled !== false
- logEvent('tengu_prompt_suggestion_init', {
- enabled,
- source:
- 'setting' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- })
- return enabled
- }
- export function abortPromptSuggestion(): void {
- if (currentAbortController) {
- currentAbortController.abort()
- currentAbortController = null
- }
- }
- /**
- * Returns a suppression reason if suggestions should not be generated,
- * or null if generation is allowed. Shared by main and pipelined paths.
- */
- export function getSuggestionSuppressReason(appState: AppState): string | null {
- if (!appState.promptSuggestionEnabled) return 'disabled'
- if (appState.pendingWorkerRequest || appState.pendingSandboxRequest)
- return 'pending_permission'
- if (appState.elicitation.queue.length > 0) return 'elicitation_active'
- if (appState.toolPermissionContext.mode === 'plan') return 'plan_mode'
- if (
- process.env.USER_TYPE === 'external' &&
- currentLimits.status !== 'allowed'
- )
- return 'rate_limit'
- return null
- }
- /**
- * Shared guard + generation logic used by both CLI TUI and SDK push paths.
- * Returns the suggestion with metadata, or null if suppressed/filtered.
- */
- export async function tryGenerateSuggestion(
- abortController: AbortController,
- messages: Message[],
- getAppState: () => AppState,
- cacheSafeParams: CacheSafeParams,
- source?: 'cli' | 'sdk',
- ): Promise<{
- suggestion: string
- promptId: PromptVariant
- generationRequestId: string | null
- } | null> {
- if (abortController.signal.aborted) {
- logSuggestionSuppressed('aborted', undefined, undefined, source)
- return null
- }
- const assistantTurnCount = count(messages, m => m.type === 'assistant')
- if (assistantTurnCount < 2) {
- logSuggestionSuppressed('early_conversation', undefined, undefined, source)
- return null
- }
- const lastAssistantMessage = getLastAssistantMessage(messages)
- if (lastAssistantMessage?.isApiErrorMessage) {
- logSuggestionSuppressed('last_response_error', undefined, undefined, source)
- return null
- }
- const cacheReason = getParentCacheSuppressReason(lastAssistantMessage)
- if (cacheReason) {
- logSuggestionSuppressed(cacheReason, undefined, undefined, source)
- return null
- }
- const appState = getAppState()
- const suppressReason = getSuggestionSuppressReason(appState)
- if (suppressReason) {
- logSuggestionSuppressed(suppressReason, undefined, undefined, source)
- return null
- }
- const promptId = getPromptVariant()
- const { suggestion, generationRequestId } = await generateSuggestion(
- abortController,
- promptId,
- cacheSafeParams,
- )
- if (abortController.signal.aborted) {
- logSuggestionSuppressed('aborted', undefined, undefined, source)
- return null
- }
- if (!suggestion) {
- logSuggestionSuppressed('empty', undefined, promptId, source)
- return null
- }
- if (shouldFilterSuggestion(suggestion, promptId, source)) return null
- return { suggestion, promptId, generationRequestId }
- }
- export async function executePromptSuggestion(
- context: REPLHookContext,
- ): Promise<void> {
- if (context.querySource !== 'repl_main_thread') return
- currentAbortController = new AbortController()
- const abortController = currentAbortController
- const cacheSafeParams = createCacheSafeParams(context)
- try {
- const result = await tryGenerateSuggestion(
- abortController,
- context.messages,
- context.toolUseContext.getAppState,
- cacheSafeParams,
- 'cli',
- )
- if (!result) return
- context.toolUseContext.setAppState(prev => ({
- ...prev,
- promptSuggestion: {
- text: result.suggestion,
- promptId: result.promptId,
- shownAt: 0,
- acceptedAt: 0,
- generationRequestId: result.generationRequestId,
- },
- }))
- if (isSpeculationEnabled() && result.suggestion) {
- void startSpeculation(
- result.suggestion,
- context,
- context.toolUseContext.setAppState,
- false,
- cacheSafeParams,
- )
- }
- } catch (error) {
- if (
- error instanceof Error &&
- (error.name === 'AbortError' || error.name === 'APIUserAbortError')
- ) {
- logSuggestionSuppressed('aborted', undefined, undefined, 'cli')
- return
- }
- logError(toError(error))
- } finally {
- if (currentAbortController === abortController) {
- currentAbortController = null
- }
- }
- }
- const MAX_PARENT_UNCACHED_TOKENS = 10_000
- export function getParentCacheSuppressReason(
- lastAssistantMessage: ReturnType<typeof getLastAssistantMessage>,
- ): string | null {
- if (!lastAssistantMessage) return null
- const usage = lastAssistantMessage.message.usage
- const inputTokens = usage.input_tokens ?? 0
- const cacheWriteTokens = usage.cache_creation_input_tokens ?? 0
- // The fork re-processes the parent's output (never cached) plus its own prompt.
- const outputTokens = usage.output_tokens ?? 0
- return (inputTokens as number) + (cacheWriteTokens as number) + (outputTokens as number) >
- MAX_PARENT_UNCACHED_TOKENS
- ? 'cache_cold'
- : null
- }
- const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next into Claude Code.]
- FIRST: Look at the user's recent messages and original request.
- Your job is to predict what THEY would type - not what you think they should do.
- THE TEST: Would they think "I was just about to type that"?
- EXAMPLES:
- User asked "fix the bug and run tests", bug is fixed → "run the tests"
- After code written → "try it out"
- Claude offers options → suggest the one the user would likely pick, based on conversation
- Claude asks to continue → "yes" or "go ahead"
- Task complete, obvious follow-up → "commit this" or "push it"
- After error or misunderstanding → silence (let them assess/correct)
- Be specific: "run the tests" beats "continue".
- NEVER SUGGEST:
- - Evaluative ("looks good", "thanks")
- - Questions ("what about...?")
- - Claude-voice ("Let me...", "I'll...", "Here's...")
- - New ideas they didn't ask about
- - Multiple sentences
- Stay silent if the next step isn't obvious from what the user said.
- Format: 2-12 words, match the user's style. Or nothing.
- Reply with ONLY the suggestion, no quotes or explanation.`
- const SUGGESTION_PROMPTS: Record<PromptVariant, string> = {
- user_intent: SUGGESTION_PROMPT,
- stated_intent: SUGGESTION_PROMPT,
- }
- export async function generateSuggestion(
- abortController: AbortController,
- promptId: PromptVariant,
- cacheSafeParams: CacheSafeParams,
- ): Promise<{ suggestion: string | null; generationRequestId: string | null }> {
- const prompt = SUGGESTION_PROMPTS[promptId]
- // Deny tools via callback, NOT by passing tools:[] - that busts cache (0% hit)
- const canUseTool = async () => ({
- behavior: 'deny' as const,
- message: 'No tools needed for suggestion',
- decisionReason: { type: 'other' as const, reason: 'suggestion only' },
- })
- // DO NOT override any API parameter that differs from the parent request.
- // The fork piggybacks on the main thread's prompt cache by sending identical
- // cache-key params. The billing cache key includes more than just
- // system/tools/model/messages/thinking — empirically, setting effortValue
- // or maxOutputTokens on the fork (even via output_config or getAppState)
- // busts cache. PR #18143 tried effort:'low' and caused a 45x spike in cache
- // writes (92.7% → 61% hit rate). The only safe overrides are:
- // - abortController (not sent to API)
- // - skipTranscript (client-side only)
- // - skipCacheWrite (controls cache_control markers, not the cache key)
- // - canUseTool (client-side permission check)
- const result = await runForkedAgent({
- promptMessages: [createUserMessage({ content: prompt })],
- cacheSafeParams, // Don't override tools/thinking settings - busts cache
- canUseTool,
- querySource: 'prompt_suggestion',
- forkLabel: 'prompt_suggestion',
- overrides: {
- abortController,
- },
- skipTranscript: true,
- skipCacheWrite: true,
- })
- // Check ALL messages - model may loop (try tool → denied → text in next message)
- // Also extract the requestId from the first assistant message for RL dataset joins
- const firstAssistantMsg = result.messages.find(m => m.type === 'assistant')
- const generationRequestId =
- firstAssistantMsg?.type === 'assistant'
- ? ((firstAssistantMsg.requestId as string) ?? null)
- : null
- for (const msg of result.messages) {
- if (msg.type !== 'assistant') continue
- const contentArr = Array.isArray(msg.message.content) ? msg.message.content as Array<{ type: string; text?: string }> : []
- const textBlock = contentArr.find(b => b.type === 'text')
- if (textBlock?.type === 'text' && typeof textBlock.text === 'string') {
- const suggestion = textBlock.text.trim()
- if (suggestion) {
- return { suggestion, generationRequestId }
- }
- }
- }
- return { suggestion: null as (string | null), generationRequestId }
- }
- export function shouldFilterSuggestion(
- suggestion: string | null,
- promptId: PromptVariant,
- source?: 'cli' | 'sdk',
- ): boolean {
- if (!suggestion) {
- logSuggestionSuppressed('empty', undefined, promptId, source)
- return true
- }
- const lower = suggestion.toLowerCase()
- const wordCount = suggestion.trim().split(/\s+/).length
- const filters: Array<[string, () => boolean]> = [
- ['done', () => lower === 'done'],
- [
- 'meta_text',
- () =>
- lower === 'nothing found' ||
- lower === 'nothing found.' ||
- lower.startsWith('nothing to suggest') ||
- lower.startsWith('no suggestion') ||
- // Model spells out the prompt's "stay silent" instruction
- /\bsilence is\b|\bstay(s|ing)? silent\b/.test(lower) ||
- // Model outputs bare "silence" wrapped in punctuation/whitespace
- /^\W*silence\W*$/.test(lower),
- ],
- [
- 'meta_wrapped',
- // Model wraps meta-reasoning in parens/brackets: (silence — ...), [no suggestion]
- () => /^\(.*\)$|^\[.*\]$/.test(suggestion),
- ],
- [
- 'error_message',
- () =>
- lower.startsWith('api error:') ||
- lower.startsWith('prompt is too long') ||
- lower.startsWith('request timed out') ||
- lower.startsWith('invalid api key') ||
- lower.startsWith('image was too large'),
- ],
- ['prefixed_label', () => /^\w+:\s/.test(suggestion)],
- [
- 'too_few_words',
- () => {
- if (wordCount >= 2) return false
- // Allow slash commands — these are valid user commands
- if (suggestion.startsWith('/')) return false
- // Allow common single-word inputs that are valid user commands
- const ALLOWED_SINGLE_WORDS = new Set([
- // Affirmatives
- 'yes',
- 'yeah',
- 'yep',
- 'yea',
- 'yup',
- 'sure',
- 'ok',
- 'okay',
- // Actions
- 'push',
- 'commit',
- 'deploy',
- 'stop',
- 'continue',
- 'check',
- 'exit',
- 'quit',
- // Negation
- 'no',
- ])
- return !ALLOWED_SINGLE_WORDS.has(lower)
- },
- ],
- ['too_many_words', () => wordCount > 12],
- ['too_long', () => suggestion.length >= 100],
- ['multiple_sentences', () => /[.!?]\s+[A-Z]/.test(suggestion)],
- ['has_formatting', () => /[\n*]|\*\*/.test(suggestion)],
- [
- 'evaluative',
- () =>
- /thanks|thank you|looks good|sounds good|that works|that worked|that's all|nice|great|perfect|makes sense|awesome|excellent/.test(
- lower,
- ),
- ],
- [
- 'claude_voice',
- () =>
- /^(let me|i'll|i've|i'm|i can|i would|i think|i notice|here's|here is|here are|that's|this is|this will|you can|you should|you could|sure,|of course|certainly)/i.test(
- suggestion,
- ),
- ],
- ]
- for (const [reason, check] of filters) {
- if (check()) {
- logSuggestionSuppressed(reason, suggestion, promptId, source)
- return true
- }
- }
- return false
- }
- /**
- * Log acceptance/ignoring of a prompt suggestion. Used by the SDK push path
- * to track outcomes when the next user message arrives.
- */
- export function logSuggestionOutcome(
- suggestion: string,
- userInput: string,
- emittedAt: number,
- promptId: PromptVariant,
- generationRequestId: string | null,
- ): void {
- const similarity =
- Math.round((userInput.length / (suggestion.length || 1)) * 100) / 100
- const wasAccepted = userInput === suggestion
- const timeMs = Math.max(0, Date.now() - emittedAt)
- logEvent('tengu_prompt_suggestion', {
- source: 'sdk' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- outcome: (wasAccepted
- ? 'accepted'
- : 'ignored') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- prompt_id:
- promptId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- ...(generationRequestId && {
- generationRequestId:
- generationRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- }),
- ...(wasAccepted && {
- timeToAcceptMs: timeMs,
- }),
- ...(!wasAccepted && { timeToIgnoreMs: timeMs }),
- similarity,
- ...(process.env.USER_TYPE === 'ant' && {
- suggestion:
- suggestion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- userInput:
- userInput as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- }),
- })
- }
- export function logSuggestionSuppressed(
- reason: string,
- suggestion?: string,
- promptId?: PromptVariant,
- source?: 'cli' | 'sdk',
- ): void {
- const resolvedPromptId = promptId ?? getPromptVariant()
- logEvent('tengu_prompt_suggestion', {
- ...(source && {
- source:
- source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- }),
- outcome:
- 'suppressed' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- reason:
- reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- prompt_id:
- resolvedPromptId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- ...(process.env.USER_TYPE === 'ant' &&
- suggestion && {
- suggestion:
- suggestion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- }),
- })
- }
|