| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222 |
- import type Anthropic from '@anthropic-ai/sdk'
- import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages.js'
- import {
- getLastApiCompletionTimestamp,
- setLastApiCompletionTimestamp,
- } from '../bootstrap/state.js'
- import { STRUCTURED_OUTPUTS_BETA_HEADER } from '../constants/betas.js'
- import type { QuerySource } from '../constants/querySource.js'
- import {
- getAttributionHeader,
- getCLISyspromptPrefix,
- } from '../constants/system.js'
- import { logEvent } from '../services/analytics/index.js'
- import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
- import { getAPIMetadata } from '../services/api/claude.js'
- import { getAnthropicClient } from '../services/api/client.js'
- import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
- import { computeFingerprint } from './fingerprint.js'
- import { normalizeModelStringForAPI } from './model/model.js'
- type MessageParam = Anthropic.MessageParam
- type TextBlockParam = Anthropic.TextBlockParam
- type Tool = Anthropic.Tool
- type ToolChoice = Anthropic.ToolChoice
- type BetaMessage = Anthropic.Beta.Messages.BetaMessage
- type BetaJSONOutputFormat = Anthropic.Beta.Messages.BetaJSONOutputFormat
- type BetaThinkingConfigParam = Anthropic.Beta.Messages.BetaThinkingConfigParam
- export type SideQueryOptions = {
- /** Model to use for the query */
- model: string
- /**
- * System prompt - string or array of text blocks (will be prefixed with CLI attribution).
- *
- * The attribution header is always placed in its own TextBlockParam block to ensure
- * server-side parsing correctly extracts the cc_entrypoint value without including
- * system prompt content.
- */
- system?: string | TextBlockParam[]
- /** Messages to send (supports cache_control on content blocks) */
- messages: MessageParam[]
- /** Optional tools (supports both standard Tool[] and BetaToolUnion[] for custom tool types) */
- tools?: Tool[] | BetaToolUnion[]
- /** Optional tool choice (use { type: 'tool', name: 'x' } for forced output) */
- tool_choice?: ToolChoice
- /** Optional JSON output format for structured responses */
- output_format?: BetaJSONOutputFormat
- /** Max tokens (default: 1024) */
- max_tokens?: number
- /** Max retries (default: 2) */
- maxRetries?: number
- /** Abort signal */
- signal?: AbortSignal
- /** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
- skipSystemPromptPrefix?: boolean
- /** Temperature override */
- temperature?: number
- /** Thinking budget (enables thinking), or `false` to send `{ type: 'disabled' }`. */
- thinking?: number | false
- /** Stop sequences — generation stops when any of these strings is emitted */
- stop_sequences?: string[]
- /** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
- querySource: QuerySource
- }
- /**
- * Extract text from first user message for fingerprint computation.
- */
- function extractFirstUserMessageText(messages: MessageParam[]): string {
- const firstUserMessage = messages.find(m => m.role === 'user')
- if (!firstUserMessage) return ''
- const content = firstUserMessage.content
- if (typeof content === 'string') return content
- // Array of content blocks - find first text block
- const textBlock = content.find(block => block.type === 'text')
- return textBlock?.type === 'text' ? textBlock.text : ''
- }
- /**
- * Lightweight API wrapper for "side queries" outside the main conversation loop.
- *
- * Use this instead of direct client.beta.messages.create() calls to ensure
- * proper OAuth token validation with fingerprint attribution headers.
- *
- * This handles:
- * - Fingerprint computation for OAuth validation
- * - Attribution header injection
- * - CLI system prompt prefix
- * - Proper betas for the model
- * - API metadata
- * - Model string normalization (strips [1m] suffix for API)
- *
- * @example
- * // Permission explainer
- * await sideQuery({ querySource: 'permission_explainer', model, system: SYSTEM_PROMPT, messages, tools, tool_choice })
- *
- * @example
- * // Session search
- * await sideQuery({ querySource: 'session_search', model, system: SEARCH_PROMPT, messages })
- *
- * @example
- * // Model validation
- * await sideQuery({ querySource: 'model_validation', model, max_tokens: 1, messages: [{ role: 'user', content: 'Hi' }] })
- */
- export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
- const {
- model,
- system,
- messages,
- tools,
- tool_choice,
- output_format,
- max_tokens = 1024,
- maxRetries = 2,
- signal,
- skipSystemPromptPrefix,
- temperature,
- thinking,
- stop_sequences,
- } = opts
- const client = await getAnthropicClient({
- maxRetries,
- model,
- source: 'side_query',
- })
- const betas = [...getModelBetas(model)]
- // Add structured-outputs beta if using output_format and provider supports it
- if (
- output_format &&
- modelSupportsStructuredOutputs(model) &&
- !betas.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
- ) {
- betas.push(STRUCTURED_OUTPUTS_BETA_HEADER)
- }
- // Extract first user message text for fingerprint
- const messageText = extractFirstUserMessageText(messages)
- // Compute fingerprint for OAuth attribution
- const fingerprint = computeFingerprint(messageText, MACRO.VERSION)
- const attributionHeader = getAttributionHeader(fingerprint)
- // Build system as array to keep attribution header in its own block
- // (prevents server-side parsing from including system content in cc_entrypoint)
- const systemBlocks: TextBlockParam[] = [
- attributionHeader ? { type: 'text', text: attributionHeader } : null,
- // Skip CLI system prompt prefix for internal classifiers that provide their own prompt
- ...(skipSystemPromptPrefix
- ? []
- : [
- {
- type: 'text' as const,
- text: getCLISyspromptPrefix({
- isNonInteractive: false,
- hasAppendSystemPrompt: false,
- }),
- },
- ]),
- ...(Array.isArray(system)
- ? system
- : system
- ? [{ type: 'text' as const, text: system }]
- : []),
- ].filter((block): block is TextBlockParam => block !== null)
- let thinkingConfig: BetaThinkingConfigParam | undefined
- if (thinking === false) {
- thinkingConfig = { type: 'disabled' }
- } else if (thinking !== undefined) {
- thinkingConfig = {
- type: 'enabled',
- budget_tokens: Math.min(thinking, max_tokens - 1),
- }
- }
- const normalizedModel = normalizeModelStringForAPI(model)
- const start = Date.now()
- // biome-ignore lint/plugin: this IS the wrapper that handles OAuth attribution
- const response = await client.beta.messages.create(
- {
- model: normalizedModel,
- max_tokens,
- system: systemBlocks,
- messages,
- ...(tools && { tools }),
- ...(tool_choice && { tool_choice }),
- ...(output_format && { output_config: { format: output_format } }),
- ...(temperature !== undefined && { temperature }),
- ...(stop_sequences && { stop_sequences }),
- ...(thinkingConfig && { thinking: thinkingConfig }),
- ...(betas.length > 0 && { betas }),
- metadata: getAPIMetadata(),
- },
- { signal },
- )
- const requestId =
- (response as { _request_id?: string | null })._request_id ?? undefined
- const now = Date.now()
- const lastCompletion = getLastApiCompletionTimestamp()
- logEvent('tengu_api_success', {
- requestId:
- requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- querySource:
- opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- model:
- normalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
- inputTokens: response.usage.input_tokens,
- outputTokens: response.usage.output_tokens,
- cachedInputTokens: response.usage.cache_read_input_tokens ?? 0,
- uncachedInputTokens: response.usage.cache_creation_input_tokens ?? 0,
- durationMsIncludingRetries: now - start,
- timeSinceLastApiCallMs:
- lastCompletion !== null ? now - lastCompletion : undefined,
- })
- setLastApiCompletionTimestamp(now)
- return response
- }
|