contextAnalysis.ts 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. import type { BetaContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
  2. import type {
  3. ContentBlock,
  4. ContentBlockParam,
  5. } from '@anthropic-ai/sdk/resources/index.mjs'
  6. import { roughTokenCountEstimation as countTokens } from '../services/tokenEstimation.js'
  7. import type {
  8. AssistantMessage,
  9. Message,
  10. UserMessage,
  11. } from '../types/message.js'
  12. import { normalizeMessagesForAPI } from './messages.js'
  13. import { jsonStringify } from './slowOperations.js'
  14. type TokenStats = {
  15. toolRequests: Map<string, number>
  16. toolResults: Map<string, number>
  17. humanMessages: number
  18. assistantMessages: number
  19. localCommandOutputs: number
  20. other: number
  21. attachments: Map<string, number>
  22. duplicateFileReads: Map<string, { count: number; tokens: number }>
  23. total: number
  24. }
  25. export function analyzeContext(messages: Message[]): TokenStats {
  26. const stats: TokenStats = {
  27. toolRequests: new Map(),
  28. toolResults: new Map(),
  29. humanMessages: 0,
  30. assistantMessages: 0,
  31. localCommandOutputs: 0,
  32. other: 0,
  33. attachments: new Map(),
  34. duplicateFileReads: new Map(),
  35. total: 0,
  36. }
  37. const toolIdsToToolNames = new Map<string, string>()
  38. const readToolIdToFilePath = new Map<string, string>()
  39. const fileReadStats = new Map<
  40. string,
  41. { count: number; totalTokens: number }
  42. >()
  43. messages.forEach(msg => {
  44. if (msg.type === 'attachment') {
  45. const type = msg.attachment.type || 'unknown'
  46. stats.attachments.set(type, (stats.attachments.get(type) || 0) + 1)
  47. }
  48. })
  49. const normalizedMessages = normalizeMessagesForAPI(messages)
  50. normalizedMessages.forEach(msg => {
  51. const { content } = msg.message
  52. // Not sure if this path is still used, but adding as a fallback
  53. if (typeof content === 'string') {
  54. const tokens = countTokens(content)
  55. stats.total += tokens
  56. // Check if this is a local command output
  57. if (msg.type === 'user' && content.includes('local-command-stdout')) {
  58. stats.localCommandOutputs += tokens
  59. } else {
  60. stats[msg.type === 'user' ? 'humanMessages' : 'assistantMessages'] +=
  61. tokens
  62. }
  63. } else {
  64. content.forEach(block =>
  65. processBlock(
  66. block,
  67. msg,
  68. stats,
  69. toolIdsToToolNames,
  70. readToolIdToFilePath,
  71. fileReadStats,
  72. ),
  73. )
  74. }
  75. })
  76. // Calculate duplicate file reads
  77. fileReadStats.forEach((data, path) => {
  78. if (data.count > 1) {
  79. const averageTokensPerRead = Math.floor(data.totalTokens / data.count)
  80. const duplicateTokens = averageTokensPerRead * (data.count - 1)
  81. stats.duplicateFileReads.set(path, {
  82. count: data.count,
  83. tokens: duplicateTokens,
  84. })
  85. }
  86. })
  87. return stats
  88. }
  89. function processBlock(
  90. block: ContentBlockParam | ContentBlock | BetaContentBlock,
  91. message: UserMessage | AssistantMessage,
  92. stats: TokenStats,
  93. toolIds: Map<string, string>,
  94. readToolPaths: Map<string, string>,
  95. fileReads: Map<string, { count: number; totalTokens: number }>,
  96. ): void {
  97. const tokens = countTokens(jsonStringify(block))
  98. stats.total += tokens
  99. switch (block.type) {
  100. case 'text':
  101. // Check if this is a local command output
  102. if (
  103. message.type === 'user' &&
  104. 'text' in block &&
  105. block.text.includes('local-command-stdout')
  106. ) {
  107. stats.localCommandOutputs += tokens
  108. } else {
  109. stats[
  110. message.type === 'user' ? 'humanMessages' : 'assistantMessages'
  111. ] += tokens
  112. }
  113. break
  114. case 'tool_use': {
  115. if ('name' in block && 'id' in block) {
  116. const toolName = block.name || 'unknown'
  117. increment(stats.toolRequests, toolName, tokens)
  118. toolIds.set(block.id, toolName)
  119. // Track Read tool file paths
  120. if (
  121. toolName === 'Read' &&
  122. 'input' in block &&
  123. block.input &&
  124. typeof block.input === 'object' &&
  125. 'file_path' in block.input
  126. ) {
  127. const path = String(
  128. (block.input as Record<string, unknown>).file_path,
  129. )
  130. readToolPaths.set(block.id, path)
  131. }
  132. }
  133. break
  134. }
  135. case 'tool_result': {
  136. if ('tool_use_id' in block) {
  137. const toolName = toolIds.get(block.tool_use_id) || 'unknown'
  138. increment(stats.toolResults, toolName, tokens)
  139. // Track file read tokens
  140. if (toolName === 'Read') {
  141. const path = readToolPaths.get(block.tool_use_id)
  142. if (path) {
  143. const current = fileReads.get(path) || { count: 0, totalTokens: 0 }
  144. fileReads.set(path, {
  145. count: current.count + 1,
  146. totalTokens: current.totalTokens + tokens,
  147. })
  148. }
  149. }
  150. }
  151. break
  152. }
  153. case 'image':
  154. case 'server_tool_use':
  155. case 'web_search_tool_result':
  156. case 'search_result':
  157. case 'document':
  158. case 'thinking':
  159. case 'redacted_thinking':
  160. case 'code_execution_tool_result':
  161. case 'mcp_tool_use':
  162. case 'mcp_tool_result':
  163. case 'container_upload':
  164. case 'web_fetch_tool_result':
  165. case 'bash_code_execution_tool_result':
  166. case 'text_editor_code_execution_tool_result':
  167. case 'tool_search_tool_result':
  168. case 'compaction':
  169. // Don't care about these for now..
  170. stats['other'] += tokens
  171. break
  172. }
  173. }
  174. function increment(map: Map<string, number>, key: string, value: number): void {
  175. map.set(key, (map.get(key) || 0) + value)
  176. }
  177. export function tokenStatsToStatsigMetrics(
  178. stats: TokenStats,
  179. ): Record<string, number> {
  180. const metrics: Record<string, number> = {
  181. total_tokens: stats.total,
  182. human_message_tokens: stats.humanMessages,
  183. assistant_message_tokens: stats.assistantMessages,
  184. local_command_output_tokens: stats.localCommandOutputs,
  185. other_tokens: stats.other,
  186. }
  187. stats.attachments.forEach((count, type) => {
  188. metrics[`attachment_${type}_count`] = count
  189. })
  190. stats.toolRequests.forEach((tokens, tool) => {
  191. metrics[`tool_request_${tool}_tokens`] = tokens
  192. })
  193. stats.toolResults.forEach((tokens, tool) => {
  194. metrics[`tool_result_${tool}_tokens`] = tokens
  195. })
  196. const duplicateTotal = [...stats.duplicateFileReads.values()].reduce(
  197. (sum, d) => sum + d.tokens,
  198. 0,
  199. )
  200. metrics.duplicate_read_tokens = duplicateTotal
  201. metrics.duplicate_read_file_count = stats.duplicateFileReads.size
  202. if (stats.total > 0) {
  203. metrics.human_message_percent = Math.round(
  204. (stats.humanMessages / stats.total) * 100,
  205. )
  206. metrics.assistant_message_percent = Math.round(
  207. (stats.assistantMessages / stats.total) * 100,
  208. )
  209. metrics.local_command_output_percent = Math.round(
  210. (stats.localCommandOutputs / stats.total) * 100,
  211. )
  212. metrics.duplicate_read_percent = Math.round(
  213. (duplicateTotal / stats.total) * 100,
  214. )
  215. const toolRequestTotal = [...stats.toolRequests.values()].reduce(
  216. (sum, v) => sum + v,
  217. 0,
  218. )
  219. const toolResultTotal = [...stats.toolResults.values()].reduce(
  220. (sum, v) => sum + v,
  221. 0,
  222. )
  223. metrics.tool_request_percent = Math.round(
  224. (toolRequestTotal / stats.total) * 100,
  225. )
  226. metrics.tool_result_percent = Math.round(
  227. (toolResultTotal / stats.total) * 100,
  228. )
  229. // Add individual tool request percentages
  230. stats.toolRequests.forEach((tokens, tool) => {
  231. metrics[`tool_request_${tool}_percent`] = Math.round(
  232. (tokens / stats.total) * 100,
  233. )
  234. })
  235. // Add individual tool result percentages
  236. stats.toolResults.forEach((tokens, tool) => {
  237. metrics[`tool_result_${tool}_percent`] = Math.round(
  238. (tokens / stats.total) * 100,
  239. )
  240. })
  241. }
  242. return metrics
  243. }