sideQuery.ts 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. import type Anthropic from '@anthropic-ai/sdk'
  2. import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages.js'
  3. import {
  4. getLastApiCompletionTimestamp,
  5. setLastApiCompletionTimestamp,
  6. } from '../bootstrap/state.js'
  7. import { STRUCTURED_OUTPUTS_BETA_HEADER } from '../constants/betas.js'
  8. import type { QuerySource } from '../constants/querySource.js'
  9. import {
  10. getAttributionHeader,
  11. getCLISyspromptPrefix,
  12. } from '../constants/system.js'
  13. import { logEvent } from '../services/analytics/index.js'
  14. import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../services/analytics/metadata.js'
  15. import { getAPIMetadata } from '../services/api/claude.js'
  16. import { getAnthropicClient } from '../services/api/client.js'
  17. import { getModelBetas, modelSupportsStructuredOutputs } from './betas.js'
  18. import { computeFingerprint } from './fingerprint.js'
  19. import { normalizeModelStringForAPI } from './model/model.js'
  20. type MessageParam = Anthropic.MessageParam
  21. type TextBlockParam = Anthropic.TextBlockParam
  22. type Tool = Anthropic.Tool
  23. type ToolChoice = Anthropic.ToolChoice
  24. type BetaMessage = Anthropic.Beta.Messages.BetaMessage
  25. type BetaJSONOutputFormat = Anthropic.Beta.Messages.BetaJSONOutputFormat
  26. type BetaThinkingConfigParam = Anthropic.Beta.Messages.BetaThinkingConfigParam
  27. export type SideQueryOptions = {
  28. /** Model to use for the query */
  29. model: string
  30. /**
  31. * System prompt - string or array of text blocks (will be prefixed with CLI attribution).
  32. *
  33. * The attribution header is always placed in its own TextBlockParam block to ensure
  34. * server-side parsing correctly extracts the cc_entrypoint value without including
  35. * system prompt content.
  36. */
  37. system?: string | TextBlockParam[]
  38. /** Messages to send (supports cache_control on content blocks) */
  39. messages: MessageParam[]
  40. /** Optional tools (supports both standard Tool[] and BetaToolUnion[] for custom tool types) */
  41. tools?: Tool[] | BetaToolUnion[]
  42. /** Optional tool choice (use { type: 'tool', name: 'x' } for forced output) */
  43. tool_choice?: ToolChoice
  44. /** Optional JSON output format for structured responses */
  45. output_format?: BetaJSONOutputFormat
  46. /** Max tokens (default: 1024) */
  47. max_tokens?: number
  48. /** Max retries (default: 2) */
  49. maxRetries?: number
  50. /** Abort signal */
  51. signal?: AbortSignal
  52. /** Skip CLI system prompt prefix (keeps attribution header for OAuth). For internal classifiers that provide their own prompt. */
  53. skipSystemPromptPrefix?: boolean
  54. /** Temperature override */
  55. temperature?: number
  56. /** Thinking budget (enables thinking), or `false` to send `{ type: 'disabled' }`. */
  57. thinking?: number | false
  58. /** Stop sequences — generation stops when any of these strings is emitted */
  59. stop_sequences?: string[]
  60. /** Attributes this call in tengu_api_success for COGS joining against reporting.sampling_calls. */
  61. querySource: QuerySource
  62. }
  63. /**
  64. * Extract text from first user message for fingerprint computation.
  65. */
  66. function extractFirstUserMessageText(messages: MessageParam[]): string {
  67. const firstUserMessage = messages.find(m => m.role === 'user')
  68. if (!firstUserMessage) return ''
  69. const content = firstUserMessage.content
  70. if (typeof content === 'string') return content
  71. // Array of content blocks - find first text block
  72. const textBlock = content.find(block => block.type === 'text')
  73. return textBlock?.type === 'text' ? textBlock.text : ''
  74. }
  75. /**
  76. * Lightweight API wrapper for "side queries" outside the main conversation loop.
  77. *
  78. * Use this instead of direct client.beta.messages.create() calls to ensure
  79. * proper OAuth token validation with fingerprint attribution headers.
  80. *
  81. * This handles:
  82. * - Fingerprint computation for OAuth validation
  83. * - Attribution header injection
  84. * - CLI system prompt prefix
  85. * - Proper betas for the model
  86. * - API metadata
  87. * - Model string normalization (strips [1m] suffix for API)
  88. *
  89. * @example
  90. * // Permission explainer
  91. * await sideQuery({ querySource: 'permission_explainer', model, system: SYSTEM_PROMPT, messages, tools, tool_choice })
  92. *
  93. * @example
  94. * // Session search
  95. * await sideQuery({ querySource: 'session_search', model, system: SEARCH_PROMPT, messages })
  96. *
  97. * @example
  98. * // Model validation
  99. * await sideQuery({ querySource: 'model_validation', model, max_tokens: 1, messages: [{ role: 'user', content: 'Hi' }] })
  100. */
  101. export async function sideQuery(opts: SideQueryOptions): Promise<BetaMessage> {
  102. const {
  103. model,
  104. system,
  105. messages,
  106. tools,
  107. tool_choice,
  108. output_format,
  109. max_tokens = 1024,
  110. maxRetries = 2,
  111. signal,
  112. skipSystemPromptPrefix,
  113. temperature,
  114. thinking,
  115. stop_sequences,
  116. } = opts
  117. const client = await getAnthropicClient({
  118. maxRetries,
  119. model,
  120. source: 'side_query',
  121. })
  122. const betas = [...getModelBetas(model)]
  123. // Add structured-outputs beta if using output_format and provider supports it
  124. if (
  125. output_format &&
  126. modelSupportsStructuredOutputs(model) &&
  127. !betas.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
  128. ) {
  129. betas.push(STRUCTURED_OUTPUTS_BETA_HEADER)
  130. }
  131. // Extract first user message text for fingerprint
  132. const messageText = extractFirstUserMessageText(messages)
  133. // Compute fingerprint for OAuth attribution
  134. const fingerprint = computeFingerprint(messageText, MACRO.VERSION)
  135. const attributionHeader = getAttributionHeader(fingerprint)
  136. // Build system as array to keep attribution header in its own block
  137. // (prevents server-side parsing from including system content in cc_entrypoint)
  138. const systemBlocks: TextBlockParam[] = [
  139. attributionHeader ? { type: 'text', text: attributionHeader } : null,
  140. // Skip CLI system prompt prefix for internal classifiers that provide their own prompt
  141. ...(skipSystemPromptPrefix
  142. ? []
  143. : [
  144. {
  145. type: 'text' as const,
  146. text: getCLISyspromptPrefix({
  147. isNonInteractive: false,
  148. hasAppendSystemPrompt: false,
  149. }),
  150. },
  151. ]),
  152. ...(Array.isArray(system)
  153. ? system
  154. : system
  155. ? [{ type: 'text' as const, text: system }]
  156. : []),
  157. ].filter((block): block is TextBlockParam => block !== null)
  158. let thinkingConfig: BetaThinkingConfigParam | undefined
  159. if (thinking === false) {
  160. thinkingConfig = { type: 'disabled' }
  161. } else if (thinking !== undefined) {
  162. thinkingConfig = {
  163. type: 'enabled',
  164. budget_tokens: Math.min(thinking, max_tokens - 1),
  165. }
  166. }
  167. const normalizedModel = normalizeModelStringForAPI(model)
  168. const start = Date.now()
  169. // biome-ignore lint/plugin: this IS the wrapper that handles OAuth attribution
  170. const response = await client.beta.messages.create(
  171. {
  172. model: normalizedModel,
  173. max_tokens,
  174. system: systemBlocks,
  175. messages,
  176. ...(tools && { tools }),
  177. ...(tool_choice && { tool_choice }),
  178. ...(output_format && { output_config: { format: output_format } }),
  179. ...(temperature !== undefined && { temperature }),
  180. ...(stop_sequences && { stop_sequences }),
  181. ...(thinkingConfig && { thinking: thinkingConfig }),
  182. ...(betas.length > 0 && { betas }),
  183. metadata: getAPIMetadata(),
  184. },
  185. { signal },
  186. )
  187. const requestId =
  188. (response as { _request_id?: string | null })._request_id ?? undefined
  189. const now = Date.now()
  190. const lastCompletion = getLastApiCompletionTimestamp()
  191. logEvent('tengu_api_success', {
  192. requestId:
  193. requestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  194. querySource:
  195. opts.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  196. model:
  197. normalizedModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  198. inputTokens: response.usage.input_tokens,
  199. outputTokens: response.usage.output_tokens,
  200. cachedInputTokens: response.usage.cache_read_input_tokens ?? 0,
  201. uncachedInputTokens: response.usage.cache_creation_input_tokens ?? 0,
  202. durationMsIncludingRetries: now - start,
  203. timeSinceLastApiCallMs:
  204. lastCompletion !== null ? now - lastCompletion : undefined,
  205. })
  206. setLastApiCompletionTimestamp(now)
  207. return response
  208. }