metadata.ts 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973
  1. // biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
  2. /**
  3. * Shared event metadata enrichment for analytics systems
  4. *
  5. * This module provides a single source of truth for collecting and formatting
  6. * event metadata across all analytics systems (Datadog, 1P).
  7. */
  8. import { extname } from 'path'
  9. import memoize from 'lodash-es/memoize.js'
  10. import { env, getHostPlatformForAnalytics } from '../../utils/env.js'
  11. import { envDynamic } from '../../utils/envDynamic.js'
  12. import { getModelBetas } from '../../utils/betas.js'
  13. import { getMainLoopModel } from '../../utils/model/model.js'
  14. import {
  15. getSessionId,
  16. getIsInteractive,
  17. getKairosActive,
  18. getClientType,
  19. getParentSessionId as getParentSessionIdFromState,
  20. } from '../../bootstrap/state.js'
  21. import { isEnvTruthy } from '../../utils/envUtils.js'
  22. import { isOfficialMcpUrl } from '../mcp/officialRegistry.js'
  23. import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js'
  24. import { getRepoRemoteHash } from '../../utils/git.js'
  25. import {
  26. getWslVersion,
  27. getLinuxDistroInfo,
  28. detectVcs,
  29. } from '../../utils/platform.js'
  30. import type { CoreUserData } from 'src/utils/user.js'
  31. import { getAgentContext } from '../../utils/agentContext.js'
  32. import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
  33. import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js'
  34. import { jsonStringify } from '../../utils/slowOperations.js'
  35. import {
  36. getAgentId,
  37. getParentSessionId as getTeammateParentSessionId,
  38. getTeamName,
  39. isTeammate,
  40. } from '../../utils/teammate.js'
  41. import { feature } from 'bun:bundle'
  42. /**
  43. * Marker type for verifying analytics metadata doesn't contain sensitive data
  44. *
  45. * This type forces explicit verification that string values being logged
  46. * don't contain code snippets, file paths, or other sensitive information.
  47. *
  48. * The metadata is expected to be JSON-serializable.
  49. *
  50. * Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
  51. *
  52. * The type is `never` which means it can never actually hold a value - this is
  53. * intentional as it's only used for type-casting to document developer intent.
  54. */
  55. export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
  56. /**
  57. * Sanitizes tool names for analytics logging to avoid PII exposure.
  58. *
  59. * MCP tool names follow the format `mcp__<server>__<tool>` and can reveal
  60. * user-specific server configurations, which is considered PII-medium.
  61. * This function redacts MCP tool names while preserving built-in tool names
  62. * (Bash, Read, Write, etc.) which are safe to log.
  63. *
  64. * @param toolName - The tool name to sanitize
  65. * @returns The original name for built-in tools, or 'mcp_tool' for MCP tools
  66. */
  67. export function sanitizeToolNameForAnalytics(
  68. toolName: string,
  69. ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
  70. if (toolName.startsWith('mcp__')) {
  71. return 'mcp_tool' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  72. }
  73. return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  74. }
  75. /**
  76. * Check if detailed tool name logging is enabled for OTLP events.
  77. * When enabled, MCP server/tool names and Skill names are logged.
  78. * Disabled by default to protect PII (user-specific server configurations).
  79. *
  80. * Enable with OTEL_LOG_TOOL_DETAILS=1
  81. */
  82. export function isToolDetailsLoggingEnabled(): boolean {
  83. return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS)
  84. }
  85. /**
  86. * Check if detailed tool name logging (MCP server/tool names) is enabled
  87. * for analytics events.
  88. *
  89. * Per go/taxonomy, MCP names are medium PII. We log them for:
  90. * - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs
  91. * - claude.ai-proxied connectors — always official (from claude.ai's list)
  92. * - Servers whose URL matches the official MCP registry — directory
  93. * connectors added via `claude mcp add`, not customer-specific config
  94. *
  95. * Custom/user-configured MCPs stay sanitized (toolName='mcp_tool').
  96. */
  97. export function isAnalyticsToolDetailsLoggingEnabled(
  98. mcpServerType: string | undefined,
  99. mcpServerBaseUrl: string | undefined,
  100. ): boolean {
  101. if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') {
  102. return true
  103. }
  104. if (mcpServerType === 'claudeai-proxy') {
  105. return true
  106. }
  107. if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) {
  108. return true
  109. }
  110. return false
  111. }
  112. /**
  113. * Built-in first-party MCP servers whose names are fixed reserved strings,
  114. * not user-configured — so logging them is not PII. Checked in addition to
  115. * isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio
  116. * built-in would otherwise fail.
  117. *
  118. * Feature-gated so the set is empty when the feature is off: the name
  119. * reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so
  120. * a user-configured 'computer-use' is possible in builds without the feature.
  121. */
  122. /* eslint-disable @typescript-eslint/no-require-imports */
  123. const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set(
  124. feature('CHICAGO_MCP')
  125. ? [
  126. (
  127. require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
  128. ).COMPUTER_USE_MCP_SERVER_NAME,
  129. ]
  130. : [],
  131. )
  132. /* eslint-enable @typescript-eslint/no-require-imports */
  133. /**
  134. * Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName}
  135. * if the gate passes, empty object otherwise. Consolidates the identical IIFE
  136. * pattern at each tengu_tool_use_* call site.
  137. */
  138. export function mcpToolDetailsForAnalytics(
  139. toolName: string,
  140. mcpServerType: string | undefined,
  141. mcpServerBaseUrl: string | undefined,
  142. ): {
  143. mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  144. mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  145. } {
  146. const details = extractMcpToolDetails(toolName)
  147. if (!details) {
  148. return {}
  149. }
  150. if (
  151. !BUILTIN_MCP_SERVER_NAMES.has(details.serverName) &&
  152. !isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl)
  153. ) {
  154. return {}
  155. }
  156. return {
  157. mcpServerName: details.serverName,
  158. mcpToolName: details.mcpToolName,
  159. }
  160. }
  161. /**
  162. * Extract MCP server and tool names from a full MCP tool name.
  163. * MCP tool names follow the format: mcp__<server>__<tool>
  164. *
  165. * @param toolName - The full tool name (e.g., 'mcp__slack__read_channel')
  166. * @returns Object with serverName and toolName, or undefined if not an MCP tool
  167. */
  168. export function extractMcpToolDetails(toolName: string):
  169. | {
  170. serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  171. mcpToolName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  172. }
  173. | undefined {
  174. if (!toolName.startsWith('mcp__')) {
  175. return undefined
  176. }
  177. // Format: mcp__<server>__<tool>
  178. const parts = toolName.split('__')
  179. if (parts.length < 3) {
  180. return undefined
  181. }
  182. const serverName = parts[1]
  183. // Tool name may contain __ so rejoin remaining parts
  184. const mcpToolName = parts.slice(2).join('__')
  185. if (!serverName || !mcpToolName) {
  186. return undefined
  187. }
  188. return {
  189. serverName:
  190. serverName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  191. mcpToolName:
  192. mcpToolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  193. }
  194. }
  195. /**
  196. * Extract skill name from Skill tool input.
  197. *
  198. * @param toolName - The tool name (should be 'Skill')
  199. * @param input - The tool input containing the skill name
  200. * @returns The skill name if this is a Skill tool call, undefined otherwise
  201. */
  202. export function extractSkillName(
  203. toolName: string,
  204. input: unknown,
  205. ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
  206. if (toolName !== 'Skill') {
  207. return undefined
  208. }
  209. if (
  210. typeof input === 'object' &&
  211. input !== null &&
  212. 'skill' in input &&
  213. typeof (input as { skill: unknown }).skill === 'string'
  214. ) {
  215. return (input as { skill: string })
  216. .skill as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  217. }
  218. return undefined
  219. }
  220. const TOOL_INPUT_STRING_TRUNCATE_AT = 512
  221. const TOOL_INPUT_STRING_TRUNCATE_TO = 128
  222. const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024
  223. const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20
  224. const TOOL_INPUT_MAX_DEPTH = 2
  225. function truncateToolInputValue(value: unknown, depth = 0): unknown {
  226. if (typeof value === 'string') {
  227. if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) {
  228. return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]`
  229. }
  230. return value
  231. }
  232. if (
  233. typeof value === 'number' ||
  234. typeof value === 'boolean' ||
  235. value === null ||
  236. value === undefined
  237. ) {
  238. return value
  239. }
  240. if (depth >= TOOL_INPUT_MAX_DEPTH) {
  241. return '<nested>'
  242. }
  243. if (Array.isArray(value)) {
  244. const mapped = value
  245. .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
  246. .map(v => truncateToolInputValue(v, depth + 1))
  247. if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
  248. mapped.push(`…[${value.length} items]`)
  249. }
  250. return mapped
  251. }
  252. if (typeof value === 'object') {
  253. const entries = Object.entries(value as Record<string, unknown>)
  254. // Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by
  255. // SedEditPermissionRequest) so they don't leak into telemetry.
  256. .filter(([k]) => !k.startsWith('_'))
  257. const mapped = entries
  258. .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
  259. .map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)])
  260. if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
  261. mapped.push(['…', `${entries.length} keys`])
  262. }
  263. return Object.fromEntries(mapped)
  264. }
  265. return String(value)
  266. }
  267. /**
  268. * Serialize a tool's input arguments for the OTel tool_result event.
  269. * Truncates long strings and deep nesting to keep the output bounded while
  270. * preserving forensically useful fields like file paths, URLs, and MCP args.
  271. * Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled.
  272. */
  273. export function extractToolInputForTelemetry(
  274. input: unknown,
  275. ): string | undefined {
  276. if (!isToolDetailsLoggingEnabled()) {
  277. return undefined
  278. }
  279. const truncated = truncateToolInputValue(input)
  280. let json = jsonStringify(truncated)
  281. if (json.length > TOOL_INPUT_MAX_JSON_CHARS) {
  282. json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]'
  283. }
  284. return json
  285. }
  286. /**
  287. * Maximum length for file extensions to be logged.
  288. * Extensions longer than this are considered potentially sensitive
  289. * (e.g., hash-based filenames like "key-hash-abcd-123-456") and
  290. * will be replaced with 'other'.
  291. */
  292. const MAX_FILE_EXTENSION_LENGTH = 10
  293. /**
  294. * Extracts and sanitizes a file extension for analytics logging.
  295. *
  296. * Uses Node's path.extname for reliable cross-platform extension extraction.
  297. * Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid
  298. * logging potentially sensitive data (like hash-based filenames).
  299. *
  300. * @param filePath - The file path to extract the extension from
  301. * @returns The sanitized extension, 'other' for long extensions, or undefined if no extension
  302. */
  303. export function getFileExtensionForAnalytics(
  304. filePath: string,
  305. ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
  306. const ext = extname(filePath).toLowerCase()
  307. if (!ext || ext === '.') {
  308. return undefined
  309. }
  310. const extension = ext.slice(1) // remove leading dot
  311. if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
  312. return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  313. }
  314. return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  315. }
  316. /** Allow list of commands we extract file extensions from. */
  317. const FILE_COMMANDS = new Set([
  318. 'rm',
  319. 'mv',
  320. 'cp',
  321. 'touch',
  322. 'mkdir',
  323. 'chmod',
  324. 'chown',
  325. 'cat',
  326. 'head',
  327. 'tail',
  328. 'sort',
  329. 'stat',
  330. 'diff',
  331. 'wc',
  332. 'grep',
  333. 'rg',
  334. 'sed',
  335. ])
  336. /** Regex to split bash commands on compound operators (&&, ||, ;, |). */
  337. const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
  338. /** Regex to split on whitespace. */
  339. const WHITESPACE_REGEX = /\s+/
  340. /**
  341. * Extracts file extensions from a bash command for analytics.
  342. * Best-effort: splits on operators and whitespace, extracts extensions
  343. * from non-flag args of allowed commands. No heavy shell parsing needed
  344. * because grep patterns and sed scripts rarely resemble file extensions.
  345. */
  346. export function getFileExtensionsFromBashCommand(
  347. command: string,
  348. simulatedSedEditFilePath?: string,
  349. ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
  350. if (!command.includes('.') && !simulatedSedEditFilePath) return undefined
  351. let result: string | undefined
  352. const seen = new Set<string>()
  353. if (simulatedSedEditFilePath) {
  354. const ext = getFileExtensionForAnalytics(simulatedSedEditFilePath)
  355. if (ext) {
  356. seen.add(ext)
  357. result = ext
  358. }
  359. }
  360. for (const subcmd of command.split(COMPOUND_OPERATOR_REGEX)) {
  361. if (!subcmd) continue
  362. const tokens = subcmd.split(WHITESPACE_REGEX)
  363. if (tokens.length < 2) continue
  364. const firstToken = tokens[0]!
  365. const slashIdx = firstToken.lastIndexOf('/')
  366. const baseCmd = slashIdx >= 0 ? firstToken.slice(slashIdx + 1) : firstToken
  367. if (!FILE_COMMANDS.has(baseCmd)) continue
  368. for (let i = 1; i < tokens.length; i++) {
  369. const arg = tokens[i]!
  370. if (arg.charCodeAt(0) === 45 /* - */) continue
  371. const ext = getFileExtensionForAnalytics(arg)
  372. if (ext && !seen.has(ext)) {
  373. seen.add(ext)
  374. result = result ? result + ',' + ext : ext
  375. }
  376. }
  377. }
  378. if (!result) return undefined
  379. return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
  380. }
  381. /**
  382. * Environment context metadata
  383. */
  384. export type EnvContext = {
  385. platform: string
  386. platformRaw: string
  387. arch: string
  388. nodeVersion: string
  389. terminal: string | null
  390. packageManagers: string
  391. runtimes: string
  392. isRunningWithBun: boolean
  393. isCi: boolean
  394. isClaubbit: boolean
  395. isClaudeCodeRemote: boolean
  396. isLocalAgentMode: boolean
  397. isConductor: boolean
  398. remoteEnvironmentType?: string
  399. coworkerType?: string
  400. claudeCodeContainerId?: string
  401. claudeCodeRemoteSessionId?: string
  402. tags?: string
  403. isGithubAction: boolean
  404. isClaudeCodeAction: boolean
  405. isClaudeAiAuth: boolean
  406. version: string
  407. versionBase?: string
  408. buildTime: string
  409. deploymentEnvironment: string
  410. githubEventName?: string
  411. githubActionsRunnerEnvironment?: string
  412. githubActionsRunnerOs?: string
  413. githubActionRef?: string
  414. wslVersion?: string
  415. linuxDistroId?: string
  416. linuxDistroVersion?: string
  417. linuxKernel?: string
  418. vcs?: string
  419. }
  420. /**
  421. * Process metrics included with all analytics events.
  422. */
  423. export type ProcessMetrics = {
  424. uptime: number
  425. rss: number
  426. heapTotal: number
  427. heapUsed: number
  428. external: number
  429. arrayBuffers: number
  430. constrainedMemory: number | undefined
  431. cpuUsage: NodeJS.CpuUsage
  432. cpuPercent: number | undefined
  433. }
  434. /**
  435. * Core event metadata shared across all analytics systems
  436. */
  437. export type EventMetadata = {
  438. model: string
  439. sessionId: string
  440. userType: string
  441. betas?: string
  442. envContext: EnvContext
  443. entrypoint?: string
  444. agentSdkVersion?: string
  445. isInteractive: string
  446. clientType: string
  447. processMetrics?: ProcessMetrics
  448. sweBenchRunId: string
  449. sweBenchInstanceId: string
  450. sweBenchTaskId: string
  451. // Swarm/team agent identification for analytics attribution
  452. agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID
  453. parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session)
  454. agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents
  455. teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage)
  456. subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team)
  457. rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data
  458. kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check)
  459. skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation)
  460. observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events)
  461. }
  462. /**
  463. * Options for enriching event metadata
  464. */
  465. export type EnrichMetadataOptions = {
  466. // Model to use, falls back to getMainLoopModel() if not provided
  467. model?: unknown
  468. // Explicit betas string (already joined)
  469. betas?: unknown
  470. // Additional metadata to include (optional)
  471. additionalMetadata?: Record<string, unknown>
  472. }
  473. /**
  474. * Get agent identification for analytics.
  475. * Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
  476. */
  477. function getAgentIdentification(): {
  478. agentId?: string
  479. parentSessionId?: string
  480. agentType?: 'teammate' | 'subagent' | 'standalone'
  481. teamName?: string
  482. } {
  483. // Check AsyncLocalStorage first (for subagents running in same process)
  484. const agentContext = getAgentContext()
  485. if (agentContext) {
  486. const result: ReturnType<typeof getAgentIdentification> = {
  487. agentId: agentContext.agentId,
  488. parentSessionId: agentContext.parentSessionId,
  489. agentType: agentContext.agentType,
  490. }
  491. if (agentContext.agentType === 'teammate') {
  492. result.teamName = agentContext.teamName
  493. }
  494. return result
  495. }
  496. // Fall back to swarm helpers (for swarm agents)
  497. const agentId = getAgentId()
  498. const parentSessionId = getTeammateParentSessionId()
  499. const teamName = getTeamName()
  500. const isSwarmAgent = isTeammate()
  501. // For standalone agents (have agent ID but not a teammate), set agentType to 'standalone'
  502. const agentType = isSwarmAgent
  503. ? ('teammate' as const)
  504. : agentId
  505. ? ('standalone' as const)
  506. : undefined
  507. if (agentId || agentType || parentSessionId || teamName) {
  508. return {
  509. ...(agentId ? { agentId } : {}),
  510. ...(agentType ? { agentType } : {}),
  511. ...(parentSessionId ? { parentSessionId } : {}),
  512. ...(teamName ? { teamName } : {}),
  513. }
  514. }
  515. // Check bootstrap state for parent session ID (e.g., plan mode -> implementation)
  516. const stateParentSessionId = getParentSessionIdFromState()
  517. if (stateParentSessionId) {
  518. return { parentSessionId: stateParentSessionId }
  519. }
  520. return {}
  521. }
  522. /**
  523. * Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev"
  524. */
  525. const getVersionBase = memoize((): string | undefined => {
  526. const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/)
  527. return match ? match[0] : undefined
  528. })
  529. /**
  530. * Builds the environment context object
  531. */
  532. const buildEnvContext = memoize(async (): Promise<EnvContext> => {
  533. const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([
  534. env.getPackageManagers(),
  535. env.getRuntimes(),
  536. getLinuxDistroInfo(),
  537. detectVcs(),
  538. ])
  539. return {
  540. platform: getHostPlatformForAnalytics(),
  541. // Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ.
  542. // getHostPlatformForAnalytics() buckets those into 'linux'; here we want
  543. // the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote.
  544. platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform,
  545. arch: env.arch,
  546. nodeVersion: env.nodeVersion,
  547. terminal: envDynamic.terminal,
  548. packageManagers: packageManagers.join(','),
  549. runtimes: runtimes.join(','),
  550. isRunningWithBun: env.isRunningWithBun(),
  551. isCi: isEnvTruthy(process.env.CI),
  552. isClaubbit: isEnvTruthy(process.env.CLAUBBIT),
  553. isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE),
  554. isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent',
  555. isConductor: env.isConductor(),
  556. ...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && {
  557. remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE,
  558. }),
  559. // Gated by feature flag to prevent leaking "coworkerType" string in external builds
  560. ...(feature('COWORKER_TYPE_TELEMETRY')
  561. ? process.env.CLAUDE_CODE_COWORKER_TYPE
  562. ? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE }
  563. : {}
  564. : {}),
  565. ...(process.env.CLAUDE_CODE_CONTAINER_ID && {
  566. claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID,
  567. }),
  568. ...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && {
  569. claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID,
  570. }),
  571. ...(process.env.CLAUDE_CODE_TAGS && {
  572. tags: process.env.CLAUDE_CODE_TAGS,
  573. }),
  574. isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS),
  575. isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION),
  576. isClaudeAiAuth: isClaudeAISubscriber(),
  577. version: MACRO.VERSION,
  578. versionBase: getVersionBase(),
  579. buildTime: MACRO.BUILD_TIME,
  580. deploymentEnvironment: env.detectDeploymentEnvironment(),
  581. ...(isEnvTruthy(process.env.GITHUB_ACTIONS) && {
  582. githubEventName: process.env.GITHUB_EVENT_NAME,
  583. githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT,
  584. githubActionsRunnerOs: process.env.RUNNER_OS,
  585. githubActionRef: process.env.GITHUB_ACTION_PATH?.includes(
  586. 'claude-code-action/',
  587. )
  588. ? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1]
  589. : undefined,
  590. }),
  591. ...(getWslVersion() && { wslVersion: getWslVersion() }),
  592. ...(linuxDistroInfo ?? {}),
  593. ...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}),
  594. }
  595. })
  596. // --
  597. // CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts
  598. let prevCpuUsage: NodeJS.CpuUsage | null = null
  599. let prevWallTimeMs: number | null = null
  600. /**
  601. * Builds process metrics object for all users.
  602. */
  603. function buildProcessMetrics(): ProcessMetrics | undefined {
  604. try {
  605. const mem = process.memoryUsage()
  606. const cpu = process.cpuUsage()
  607. const now = Date.now()
  608. let cpuPercent: number | undefined
  609. if (prevCpuUsage && prevWallTimeMs) {
  610. const wallDeltaMs = now - prevWallTimeMs
  611. if (wallDeltaMs > 0) {
  612. const userDeltaUs = cpu.user - prevCpuUsage.user
  613. const systemDeltaUs = cpu.system - prevCpuUsage.system
  614. cpuPercent =
  615. ((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100
  616. }
  617. }
  618. prevCpuUsage = cpu
  619. prevWallTimeMs = now
  620. return {
  621. uptime: process.uptime(),
  622. rss: mem.rss,
  623. heapTotal: mem.heapTotal,
  624. heapUsed: mem.heapUsed,
  625. external: mem.external,
  626. arrayBuffers: mem.arrayBuffers,
  627. // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
  628. constrainedMemory: process.constrainedMemory(),
  629. cpuUsage: cpu,
  630. cpuPercent,
  631. }
  632. } catch {
  633. return undefined
  634. }
  635. }
  636. /**
  637. * Get core event metadata shared across all analytics systems.
  638. *
  639. * This function collects environment, runtime, and context information
  640. * that should be included with all analytics events.
  641. *
  642. * @param options - Configuration options
  643. * @returns Promise resolving to enriched metadata object
  644. */
  645. export async function getEventMetadata(
  646. options: EnrichMetadataOptions = {},
  647. ): Promise<EventMetadata> {
  648. const model = options.model ? String(options.model) : getMainLoopModel()
  649. const betas =
  650. typeof options.betas === 'string'
  651. ? options.betas
  652. : getModelBetas(model).join(',')
  653. const [envContext, repoRemoteHash] = await Promise.all([
  654. buildEnvContext(),
  655. getRepoRemoteHash(),
  656. ])
  657. const processMetrics = buildProcessMetrics()
  658. const metadata: EventMetadata = {
  659. model,
  660. sessionId: getSessionId(),
  661. userType: process.env.USER_TYPE || '',
  662. ...(betas.length > 0 ? { betas: betas } : {}),
  663. envContext,
  664. ...(process.env.CLAUDE_CODE_ENTRYPOINT && {
  665. entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT,
  666. }),
  667. ...(process.env.CLAUDE_AGENT_SDK_VERSION && {
  668. agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION,
  669. }),
  670. isInteractive: String(getIsInteractive()),
  671. clientType: getClientType(),
  672. ...(processMetrics && { processMetrics }),
  673. sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '',
  674. sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '',
  675. sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '',
  676. // Swarm/team agent identification
  677. // Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
  678. ...getAgentIdentification(),
  679. // Subscription tier for DAU-by-tier analytics
  680. ...(getSubscriptionType() && {
  681. subscriptionType: getSubscriptionType()!,
  682. }),
  683. // Assistant mode tag — lives outside memoized buildEnvContext() because
  684. // setKairosActive() runs at main.tsx:~1648, after the first event may
  685. // have already fired and memoized the env. Read fresh per-event instead.
  686. ...(feature('KAIROS') && getKairosActive()
  687. ? { kairosActive: true as const }
  688. : {}),
  689. // Repo remote hash for joining with server-side repo bundle data
  690. ...(repoRemoteHash && { rh: repoRemoteHash }),
  691. }
  692. return metadata
  693. }
  694. /**
  695. * Core event metadata for 1P event logging (snake_case format).
  696. */
  697. export type FirstPartyEventLoggingCoreMetadata = {
  698. session_id: string
  699. model: string
  700. user_type: string
  701. betas?: string
  702. entrypoint?: string
  703. agent_sdk_version?: string
  704. is_interactive: boolean
  705. client_type: string
  706. swe_bench_run_id?: string
  707. swe_bench_instance_id?: string
  708. swe_bench_task_id?: string
  709. // Swarm/team agent identification
  710. agent_id?: string
  711. parent_session_id?: string
  712. agent_type?: 'teammate' | 'subagent' | 'standalone'
  713. team_name?: string
  714. }
  715. /**
  716. * Complete event logging metadata format for 1P events.
  717. */
  718. export type FirstPartyEventLoggingMetadata = {
  719. env: EnvironmentMetadata
  720. process?: string
  721. // auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth).
  722. // account_id is intentionally omitted — only UUID fields are populated client-side.
  723. auth?: PublicApiAuth
  724. // core fields correspond to the top level of ClaudeCodeInternalEvent.
  725. // They get directly exported to their individual columns in the BigQuery tables
  726. core: FirstPartyEventLoggingCoreMetadata
  727. // additional fields are populated in the additional_metadata field of the
  728. // ClaudeCodeInternalEvent proto. Includes but is not limited to information
  729. // that differs by event type.
  730. additional: Record<string, unknown>
  731. }
  732. /**
  733. * Convert metadata to 1P event logging format (snake_case fields).
  734. *
  735. * The /api/event_logging/batch endpoint expects snake_case field names
  736. * for environment and core metadata.
  737. *
  738. * @param metadata - Core event metadata
  739. * @param additionalMetadata - Additional metadata to include
  740. * @returns Metadata formatted for 1P event logging
  741. */
  742. export function to1PEventFormat(
  743. metadata: EventMetadata,
  744. userMetadata: CoreUserData,
  745. additionalMetadata: Record<string, unknown> = {},
  746. ): FirstPartyEventLoggingMetadata {
  747. const {
  748. envContext,
  749. processMetrics,
  750. rh,
  751. kairosActive,
  752. skillMode,
  753. observerMode,
  754. ...coreFields
  755. } = metadata
  756. // Convert envContext to snake_case.
  757. // IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that
  758. // adding a field here that the proto doesn't define is a compile error. The
  759. // generated toJSON() serializer silently drops unknown keys — a hand-written
  760. // parallel type previously let #11318, #13924, #19448, and coworker_type all
  761. // ship fields that never reached BQ.
  762. // Adding a field? Update the monorepo proto first (go/cc-logging):
  763. // event_schemas/.../claude_code/v1/claude_code_internal_event.proto
  764. // then run `bun run generate:proto` here.
  765. const env: EnvironmentMetadata = {
  766. platform: envContext.platform,
  767. platform_raw: envContext.platformRaw,
  768. arch: envContext.arch,
  769. node_version: envContext.nodeVersion,
  770. terminal: envContext.terminal || 'unknown',
  771. package_managers: envContext.packageManagers,
  772. runtimes: envContext.runtimes,
  773. is_running_with_bun: envContext.isRunningWithBun,
  774. is_ci: envContext.isCi,
  775. is_claubbit: envContext.isClaubbit,
  776. is_claude_code_remote: envContext.isClaudeCodeRemote,
  777. is_local_agent_mode: envContext.isLocalAgentMode,
  778. is_conductor: envContext.isConductor,
  779. is_github_action: envContext.isGithubAction,
  780. is_claude_code_action: envContext.isClaudeCodeAction,
  781. is_claude_ai_auth: envContext.isClaudeAiAuth,
  782. version: envContext.version,
  783. build_time: envContext.buildTime,
  784. deployment_environment: envContext.deploymentEnvironment,
  785. }
  786. // Add optional env fields
  787. if (envContext.remoteEnvironmentType) {
  788. env.remote_environment_type = envContext.remoteEnvironmentType
  789. }
  790. if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) {
  791. env.coworker_type = envContext.coworkerType
  792. }
  793. if (envContext.claudeCodeContainerId) {
  794. env.claude_code_container_id = envContext.claudeCodeContainerId
  795. }
  796. if (envContext.claudeCodeRemoteSessionId) {
  797. env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId
  798. }
  799. if (envContext.tags) {
  800. env.tags = envContext.tags
  801. .split(',')
  802. .map(t => t.trim())
  803. .filter(Boolean)
  804. }
  805. if (envContext.githubEventName) {
  806. env.github_event_name = envContext.githubEventName
  807. }
  808. if (envContext.githubActionsRunnerEnvironment) {
  809. env.github_actions_runner_environment =
  810. envContext.githubActionsRunnerEnvironment
  811. }
  812. if (envContext.githubActionsRunnerOs) {
  813. env.github_actions_runner_os = envContext.githubActionsRunnerOs
  814. }
  815. if (envContext.githubActionRef) {
  816. env.github_action_ref = envContext.githubActionRef
  817. }
  818. if (envContext.wslVersion) {
  819. env.wsl_version = envContext.wslVersion
  820. }
  821. if (envContext.linuxDistroId) {
  822. env.linux_distro_id = envContext.linuxDistroId
  823. }
  824. if (envContext.linuxDistroVersion) {
  825. env.linux_distro_version = envContext.linuxDistroVersion
  826. }
  827. if (envContext.linuxKernel) {
  828. env.linux_kernel = envContext.linuxKernel
  829. }
  830. if (envContext.vcs) {
  831. env.vcs = envContext.vcs
  832. }
  833. if (envContext.versionBase) {
  834. env.version_base = envContext.versionBase
  835. }
  836. // Convert core fields to snake_case
  837. const core: FirstPartyEventLoggingCoreMetadata = {
  838. session_id: coreFields.sessionId,
  839. model: coreFields.model,
  840. user_type: coreFields.userType,
  841. is_interactive: coreFields.isInteractive === 'true',
  842. client_type: coreFields.clientType,
  843. }
  844. // Add other core fields
  845. if (coreFields.betas) {
  846. core.betas = coreFields.betas
  847. }
  848. if (coreFields.entrypoint) {
  849. core.entrypoint = coreFields.entrypoint
  850. }
  851. if (coreFields.agentSdkVersion) {
  852. core.agent_sdk_version = coreFields.agentSdkVersion
  853. }
  854. if (coreFields.sweBenchRunId) {
  855. core.swe_bench_run_id = coreFields.sweBenchRunId
  856. }
  857. if (coreFields.sweBenchInstanceId) {
  858. core.swe_bench_instance_id = coreFields.sweBenchInstanceId
  859. }
  860. if (coreFields.sweBenchTaskId) {
  861. core.swe_bench_task_id = coreFields.sweBenchTaskId
  862. }
  863. // Swarm/team agent identification
  864. if (coreFields.agentId) {
  865. core.agent_id = coreFields.agentId
  866. }
  867. if (coreFields.parentSessionId) {
  868. core.parent_session_id = coreFields.parentSessionId
  869. }
  870. if (coreFields.agentType) {
  871. core.agent_type = coreFields.agentType
  872. }
  873. if (coreFields.teamName) {
  874. core.team_name = coreFields.teamName
  875. }
  876. // Map userMetadata to output fields.
  877. // Based on src/utils/user.ts getUser(), but with fields present in other
  878. // parts of ClaudeCodeInternalEvent deduplicated.
  879. // Convert camelCase GitHubActionsMetadata to snake_case for 1P API
  880. // Note: github_actions_metadata is placed inside env (EnvironmentMetadata)
  881. // rather than at the top level of ClaudeCodeInternalEvent
  882. if (userMetadata.githubActionsMetadata) {
  883. const ghMeta = userMetadata.githubActionsMetadata
  884. env.github_actions_metadata = {
  885. actor_id: ghMeta.actorId,
  886. repository_id: ghMeta.repositoryId,
  887. repository_owner_id: ghMeta.repositoryOwnerId,
  888. }
  889. }
  890. let auth: PublicApiAuth | undefined
  891. if (userMetadata.accountUuid || userMetadata.organizationUuid) {
  892. auth = {
  893. account_uuid: userMetadata.accountUuid,
  894. organization_uuid: userMetadata.organizationUuid,
  895. }
  896. }
  897. return {
  898. env,
  899. ...(processMetrics && {
  900. process: Buffer.from(jsonStringify(processMetrics)).toString('base64'),
  901. }),
  902. ...(auth && { auth }),
  903. core,
  904. additional: {
  905. ...(rh && { rh }),
  906. ...(kairosActive && { is_assistant_mode: true }),
  907. ...(skillMode && { skill_mode: skillMode }),
  908. ...(observerMode && { observer_mode: observerMode }),
  909. ...additionalMetadata,
  910. },
  911. }
  912. }