| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- import type { BetaContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
- import type {
- ContentBlock,
- ContentBlockParam,
- } from '@anthropic-ai/sdk/resources/index.mjs'
- import { roughTokenCountEstimation as countTokens } from '../services/tokenEstimation.js'
- import type {
- AssistantMessage,
- Message,
- UserMessage,
- } from '../types/message.js'
- import { normalizeMessagesForAPI } from './messages.js'
- import { jsonStringify } from './slowOperations.js'
- type TokenStats = {
- toolRequests: Map<string, number>
- toolResults: Map<string, number>
- humanMessages: number
- assistantMessages: number
- localCommandOutputs: number
- other: number
- attachments: Map<string, number>
- duplicateFileReads: Map<string, { count: number; tokens: number }>
- total: number
- }
- export function analyzeContext(messages: Message[]): TokenStats {
- const stats: TokenStats = {
- toolRequests: new Map(),
- toolResults: new Map(),
- humanMessages: 0,
- assistantMessages: 0,
- localCommandOutputs: 0,
- other: 0,
- attachments: new Map(),
- duplicateFileReads: new Map(),
- total: 0,
- }
- const toolIdsToToolNames = new Map<string, string>()
- const readToolIdToFilePath = new Map<string, string>()
- const fileReadStats = new Map<
- string,
- { count: number; totalTokens: number }
- >()
- messages.forEach(msg => {
- if (msg.type === 'attachment') {
- const type = msg.attachment.type || 'unknown'
- stats.attachments.set(type, (stats.attachments.get(type) || 0) + 1)
- }
- })
- const normalizedMessages = normalizeMessagesForAPI(messages)
- normalizedMessages.forEach(msg => {
- const { content } = msg.message
- // Not sure if this path is still used, but adding as a fallback
- if (typeof content === 'string') {
- const tokens = countTokens(content)
- stats.total += tokens
- // Check if this is a local command output
- if (msg.type === 'user' && content.includes('local-command-stdout')) {
- stats.localCommandOutputs += tokens
- } else {
- stats[msg.type === 'user' ? 'humanMessages' : 'assistantMessages'] +=
- tokens
- }
- } else {
- content.forEach(block =>
- processBlock(
- block,
- msg,
- stats,
- toolIdsToToolNames,
- readToolIdToFilePath,
- fileReadStats,
- ),
- )
- }
- })
- // Calculate duplicate file reads
- fileReadStats.forEach((data, path) => {
- if (data.count > 1) {
- const averageTokensPerRead = Math.floor(data.totalTokens / data.count)
- const duplicateTokens = averageTokensPerRead * (data.count - 1)
- stats.duplicateFileReads.set(path, {
- count: data.count,
- tokens: duplicateTokens,
- })
- }
- })
- return stats
- }
- function processBlock(
- block: ContentBlockParam | ContentBlock | BetaContentBlock,
- message: UserMessage | AssistantMessage,
- stats: TokenStats,
- toolIds: Map<string, string>,
- readToolPaths: Map<string, string>,
- fileReads: Map<string, { count: number; totalTokens: number }>,
- ): void {
- const tokens = countTokens(jsonStringify(block))
- stats.total += tokens
- switch (block.type) {
- case 'text':
- // Check if this is a local command output
- if (
- message.type === 'user' &&
- 'text' in block &&
- block.text.includes('local-command-stdout')
- ) {
- stats.localCommandOutputs += tokens
- } else {
- stats[
- message.type === 'user' ? 'humanMessages' : 'assistantMessages'
- ] += tokens
- }
- break
- case 'tool_use': {
- if ('name' in block && 'id' in block) {
- const toolName = block.name || 'unknown'
- increment(stats.toolRequests, toolName, tokens)
- toolIds.set(block.id, toolName)
- // Track Read tool file paths
- if (
- toolName === 'Read' &&
- 'input' in block &&
- block.input &&
- typeof block.input === 'object' &&
- 'file_path' in block.input
- ) {
- const path = String(
- (block.input as Record<string, unknown>).file_path,
- )
- readToolPaths.set(block.id, path)
- }
- }
- break
- }
- case 'tool_result': {
- if ('tool_use_id' in block) {
- const toolName = toolIds.get(block.tool_use_id) || 'unknown'
- increment(stats.toolResults, toolName, tokens)
- // Track file read tokens
- if (toolName === 'Read') {
- const path = readToolPaths.get(block.tool_use_id)
- if (path) {
- const current = fileReads.get(path) || { count: 0, totalTokens: 0 }
- fileReads.set(path, {
- count: current.count + 1,
- totalTokens: current.totalTokens + tokens,
- })
- }
- }
- }
- break
- }
- case 'image':
- case 'server_tool_use':
- case 'web_search_tool_result':
- case 'search_result':
- case 'document':
- case 'thinking':
- case 'redacted_thinking':
- case 'code_execution_tool_result':
- case 'mcp_tool_use':
- case 'mcp_tool_result':
- case 'container_upload':
- case 'web_fetch_tool_result':
- case 'bash_code_execution_tool_result':
- case 'text_editor_code_execution_tool_result':
- case 'tool_search_tool_result':
- case 'compaction':
- // Don't care about these for now..
- stats['other'] += tokens
- break
- }
- }
- function increment(map: Map<string, number>, key: string, value: number): void {
- map.set(key, (map.get(key) || 0) + value)
- }
- export function tokenStatsToStatsigMetrics(
- stats: TokenStats,
- ): Record<string, number> {
- const metrics: Record<string, number> = {
- total_tokens: stats.total,
- human_message_tokens: stats.humanMessages,
- assistant_message_tokens: stats.assistantMessages,
- local_command_output_tokens: stats.localCommandOutputs,
- other_tokens: stats.other,
- }
- stats.attachments.forEach((count, type) => {
- metrics[`attachment_${type}_count`] = count
- })
- stats.toolRequests.forEach((tokens, tool) => {
- metrics[`tool_request_${tool}_tokens`] = tokens
- })
- stats.toolResults.forEach((tokens, tool) => {
- metrics[`tool_result_${tool}_tokens`] = tokens
- })
- const duplicateTotal = [...stats.duplicateFileReads.values()].reduce(
- (sum, d) => sum + d.tokens,
- 0,
- )
- metrics.duplicate_read_tokens = duplicateTotal
- metrics.duplicate_read_file_count = stats.duplicateFileReads.size
- if (stats.total > 0) {
- metrics.human_message_percent = Math.round(
- (stats.humanMessages / stats.total) * 100,
- )
- metrics.assistant_message_percent = Math.round(
- (stats.assistantMessages / stats.total) * 100,
- )
- metrics.local_command_output_percent = Math.round(
- (stats.localCommandOutputs / stats.total) * 100,
- )
- metrics.duplicate_read_percent = Math.round(
- (duplicateTotal / stats.total) * 100,
- )
- const toolRequestTotal = [...stats.toolRequests.values()].reduce(
- (sum, v) => sum + v,
- 0,
- )
- const toolResultTotal = [...stats.toolResults.values()].reduce(
- (sum, v) => sum + v,
- 0,
- )
- metrics.tool_request_percent = Math.round(
- (toolRequestTotal / stats.total) * 100,
- )
- metrics.tool_result_percent = Math.round(
- (toolResultTotal / stats.total) * 100,
- )
- // Add individual tool request percentages
- stats.toolRequests.forEach((tokens, tool) => {
- metrics[`tool_request_${tool}_percent`] = Math.round(
- (tokens / stats.total) * 100,
- )
- })
- // Add individual tool result percentages
- stats.toolResults.forEach((tokens, tool) => {
- metrics[`tool_result_${tool}_percent`] = Math.round(
- (tokens / stats.total) * 100,
- )
- })
- }
- return metrics
- }
|