commitAttribution.ts 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961
  1. import { createHash, randomUUID, type UUID } from 'crypto'
  2. import { stat } from 'fs/promises'
  3. import { isAbsolute, join, relative, sep } from 'path'
  4. import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
  5. import type {
  6. AttributionSnapshotMessage,
  7. FileAttributionState,
  8. } from '../types/logs.js'
  9. import { getCwd } from './cwd.js'
  10. import { logForDebugging } from './debug.js'
  11. import { execFileNoThrowWithCwd } from './execFileNoThrow.js'
  12. import { getFsImplementation } from './fsOperations.js'
  13. import { isGeneratedFile } from './generatedFiles.js'
  14. import { getRemoteUrlForDir, resolveGitDir } from './git/gitFilesystem.js'
  15. import { findGitRoot, gitExe } from './git.js'
  16. import { logError } from './log.js'
  17. import { getCanonicalName, type ModelName } from './model/model.js'
  18. import { sequential } from './sequential.js'
  19. /**
  20. * List of repos where internal model names are allowed in trailers.
  21. * Includes both SSH and HTTPS URL formats.
  22. *
  23. * NOTE: This is intentionally a repo allowlist, not an org-wide check.
  24. * The anthropics and anthropic-experimental orgs contain PUBLIC repos
  25. * (e.g. anthropics/claude-code, anthropic-experimental/sandbox-runtime).
  26. * Undercover mode must stay ON in those to prevent codename leaks.
  27. * Only add repos here that are confirmed PRIVATE.
  28. */
  29. const INTERNAL_MODEL_REPOS = [
  30. 'github.com:anthropics/claude-cli-internal',
  31. 'github.com/anthropics/claude-cli-internal',
  32. 'github.com:anthropics/anthropic',
  33. 'github.com/anthropics/anthropic',
  34. 'github.com:anthropics/apps',
  35. 'github.com/anthropics/apps',
  36. 'github.com:anthropics/casino',
  37. 'github.com/anthropics/casino',
  38. 'github.com:anthropics/dbt',
  39. 'github.com/anthropics/dbt',
  40. 'github.com:anthropics/dotfiles',
  41. 'github.com/anthropics/dotfiles',
  42. 'github.com:anthropics/terraform-config',
  43. 'github.com/anthropics/terraform-config',
  44. 'github.com:anthropics/hex-export',
  45. 'github.com/anthropics/hex-export',
  46. 'github.com:anthropics/feedback-v2',
  47. 'github.com/anthropics/feedback-v2',
  48. 'github.com:anthropics/labs',
  49. 'github.com/anthropics/labs',
  50. 'github.com:anthropics/argo-rollouts',
  51. 'github.com/anthropics/argo-rollouts',
  52. 'github.com:anthropics/starling-configs',
  53. 'github.com/anthropics/starling-configs',
  54. 'github.com:anthropics/ts-tools',
  55. 'github.com/anthropics/ts-tools',
  56. 'github.com:anthropics/ts-capsules',
  57. 'github.com/anthropics/ts-capsules',
  58. 'github.com:anthropics/feldspar-testing',
  59. 'github.com/anthropics/feldspar-testing',
  60. 'github.com:anthropics/trellis',
  61. 'github.com/anthropics/trellis',
  62. 'github.com:anthropics/claude-for-hiring',
  63. 'github.com/anthropics/claude-for-hiring',
  64. 'github.com:anthropics/forge-web',
  65. 'github.com/anthropics/forge-web',
  66. 'github.com:anthropics/infra-manifests',
  67. 'github.com/anthropics/infra-manifests',
  68. 'github.com:anthropics/mycro_manifests',
  69. 'github.com/anthropics/mycro_manifests',
  70. 'github.com:anthropics/mycro_configs',
  71. 'github.com/anthropics/mycro_configs',
  72. 'github.com:anthropics/mobile-apps',
  73. 'github.com/anthropics/mobile-apps',
  74. ]
  75. /**
  76. * Get the repo root for attribution operations.
  77. * Uses getCwd() which respects agent worktree overrides (AsyncLocalStorage),
  78. * then resolves to git root to handle `cd subdir` case.
  79. * Falls back to getOriginalCwd() if git root can't be determined.
  80. */
  81. export function getAttributionRepoRoot(): string {
  82. const cwd = getCwd()
  83. return findGitRoot(cwd) ?? getOriginalCwd()
  84. }
  85. // Cache for repo classification result. Primed once per process.
  86. // 'internal' = remote matches INTERNAL_MODEL_REPOS allowlist
  87. // 'external' = has a remote, not on allowlist (public/open-source repo)
  88. // 'none' = no remote URL (not a git repo, or no remote configured)
  89. let repoClassCache: 'internal' | 'external' | 'none' | null = null
  90. /**
  91. * Synchronously return the cached repo classification.
  92. * Returns null if the async check hasn't run yet.
  93. */
  94. export function getRepoClassCached(): 'internal' | 'external' | 'none' | null {
  95. return repoClassCache
  96. }
  97. /**
  98. * Synchronously return the cached result of isInternalModelRepo().
  99. * Returns false if the check hasn't run yet (safe default: don't leak).
  100. */
  101. export function isInternalModelRepoCached(): boolean {
  102. return repoClassCache === 'internal'
  103. }
  104. /**
  105. * Check if the current repo is in the allowlist for internal model names.
  106. * Memoized - only checks once per process.
  107. */
  108. export const isInternalModelRepo = sequential(async (): Promise<boolean> => {
  109. if (repoClassCache !== null) {
  110. return repoClassCache === 'internal'
  111. }
  112. const cwd = getAttributionRepoRoot()
  113. const remoteUrl = await getRemoteUrlForDir(cwd)
  114. if (!remoteUrl) {
  115. repoClassCache = 'none'
  116. return false
  117. }
  118. const isInternal = INTERNAL_MODEL_REPOS.some(repo => remoteUrl.includes(repo))
  119. repoClassCache = isInternal ? 'internal' : 'external'
  120. return isInternal
  121. })
  122. /**
  123. * Sanitize a surface key to use public model names.
  124. * Converts internal model variants to their public equivalents.
  125. */
  126. export function sanitizeSurfaceKey(surfaceKey: string): string {
  127. // Split surface key into surface and model parts (e.g., "cli/opus-4-5-fast" -> ["cli", "opus-4-5-fast"])
  128. const slashIndex = surfaceKey.lastIndexOf('/')
  129. if (slashIndex === -1) {
  130. return surfaceKey
  131. }
  132. const surface = surfaceKey.slice(0, slashIndex)
  133. const model = surfaceKey.slice(slashIndex + 1)
  134. const sanitizedModel = sanitizeModelName(model)
  135. return `${surface}/${sanitizedModel}`
  136. }
  137. // @[MODEL LAUNCH]: Add a mapping for the new model ID so git commit trailers show the public name.
  138. /**
  139. * Sanitize a model name to its public equivalent.
  140. * Maps internal variants to their public names based on model family.
  141. */
  142. export function sanitizeModelName(shortName: string): string {
  143. // Map internal variants to public equivalents based on model family
  144. if (shortName.includes('opus-4-6')) return 'claude-opus-4-6'
  145. if (shortName.includes('opus-4-5')) return 'claude-opus-4-5'
  146. if (shortName.includes('opus-4-1')) return 'claude-opus-4-1'
  147. if (shortName.includes('opus-4')) return 'claude-opus-4'
  148. if (shortName.includes('sonnet-4-6')) return 'claude-sonnet-4-6'
  149. if (shortName.includes('sonnet-4-5')) return 'claude-sonnet-4-5'
  150. if (shortName.includes('sonnet-4')) return 'claude-sonnet-4'
  151. if (shortName.includes('sonnet-3-7')) return 'claude-sonnet-3-7'
  152. if (shortName.includes('haiku-4-5')) return 'claude-haiku-4-5'
  153. if (shortName.includes('haiku-3-5')) return 'claude-haiku-3-5'
  154. // Unknown models get a generic name
  155. return 'claude'
  156. }
  157. /**
  158. * Attribution state for tracking Claude's contributions to files.
  159. */
  160. export type AttributionState = {
  161. // File states keyed by relative path (from cwd)
  162. fileStates: Map<string, FileAttributionState>
  163. // Session baseline states for net change calculation
  164. sessionBaselines: Map<string, { contentHash: string; mtime: number }>
  165. // Surface from which edits were made
  166. surface: string
  167. // HEAD SHA at session start (for detecting external commits)
  168. startingHeadSha: string | null
  169. // Total prompts in session (for steer count calculation)
  170. promptCount: number
  171. // Prompts at last commit (to calculate steers for current commit)
  172. promptCountAtLastCommit: number
  173. // Permission prompt tracking
  174. permissionPromptCount: number
  175. permissionPromptCountAtLastCommit: number
  176. // ESC press tracking (user cancelled permission prompt)
  177. escapeCount: number
  178. escapeCountAtLastCommit: number
  179. }
  180. /**
  181. * Summary of Claude's contribution for a commit.
  182. */
  183. export type AttributionSummary = {
  184. claudePercent: number
  185. claudeChars: number
  186. humanChars: number
  187. surfaces: string[]
  188. }
  189. /**
  190. * Per-file attribution details for git notes.
  191. */
  192. export type FileAttribution = {
  193. claudeChars: number
  194. humanChars: number
  195. percent: number
  196. surface: string
  197. }
  198. /**
  199. * Full attribution data for git notes JSON.
  200. */
  201. export type AttributionData = {
  202. version: 1
  203. summary: AttributionSummary
  204. files: Record<string, FileAttribution>
  205. surfaceBreakdown: Record<string, { claudeChars: number; percent: number }>
  206. excludedGenerated: string[]
  207. sessions: string[]
  208. }
  209. /**
  210. * Get the current client surface from environment.
  211. */
  212. export function getClientSurface(): string {
  213. return process.env.CLAUDE_CODE_ENTRYPOINT ?? 'cli'
  214. }
  215. /**
  216. * Build a surface key that includes the model name.
  217. * Format: "surface/model" (e.g., "cli/claude-sonnet")
  218. */
  219. export function buildSurfaceKey(surface: string, model: ModelName): string {
  220. return `${surface}/${getCanonicalName(model)}`
  221. }
  222. /**
  223. * Compute SHA-256 hash of content.
  224. */
  225. export function computeContentHash(content: string): string {
  226. return createHash('sha256').update(content).digest('hex')
  227. }
  228. /**
  229. * Normalize file path to relative path from cwd for consistent tracking.
  230. * Resolves symlinks to handle /tmp vs /private/tmp on macOS.
  231. */
  232. export function normalizeFilePath(filePath: string): string {
  233. const fs = getFsImplementation()
  234. const cwd = getAttributionRepoRoot()
  235. if (!isAbsolute(filePath)) {
  236. return filePath
  237. }
  238. // Resolve symlinks in both paths for consistent comparison
  239. // (e.g., /tmp -> /private/tmp on macOS)
  240. let resolvedPath = filePath
  241. let resolvedCwd = cwd
  242. try {
  243. resolvedPath = fs.realpathSync(filePath)
  244. } catch {
  245. // File may not exist yet, use original path
  246. }
  247. try {
  248. resolvedCwd = fs.realpathSync(cwd)
  249. } catch {
  250. // Keep original cwd
  251. }
  252. if (
  253. resolvedPath.startsWith(resolvedCwd + sep) ||
  254. resolvedPath === resolvedCwd
  255. ) {
  256. // Normalize to forward slashes so keys match git diff output on Windows
  257. return relative(resolvedCwd, resolvedPath).replaceAll(sep, '/')
  258. }
  259. // Fallback: try original comparison
  260. if (filePath.startsWith(cwd + sep) || filePath === cwd) {
  261. return relative(cwd, filePath).replaceAll(sep, '/')
  262. }
  263. return filePath
  264. }
  265. /**
  266. * Expand a relative path to absolute path.
  267. */
  268. export function expandFilePath(filePath: string): string {
  269. if (isAbsolute(filePath)) {
  270. return filePath
  271. }
  272. return join(getAttributionRepoRoot(), filePath)
  273. }
  274. /**
  275. * Create an empty attribution state for a new session.
  276. */
  277. export function createEmptyAttributionState(): AttributionState {
  278. return {
  279. fileStates: new Map(),
  280. sessionBaselines: new Map(),
  281. surface: getClientSurface(),
  282. startingHeadSha: null,
  283. promptCount: 0,
  284. promptCountAtLastCommit: 0,
  285. permissionPromptCount: 0,
  286. permissionPromptCountAtLastCommit: 0,
  287. escapeCount: 0,
  288. escapeCountAtLastCommit: 0,
  289. }
  290. }
  291. /**
  292. * Compute the character contribution for a file modification.
  293. * Returns the FileAttributionState to store, or null if tracking failed.
  294. */
  295. function computeFileModificationState(
  296. existingFileStates: Map<string, FileAttributionState>,
  297. filePath: string,
  298. oldContent: string,
  299. newContent: string,
  300. mtime: number,
  301. ): FileAttributionState | null {
  302. const normalizedPath = normalizeFilePath(filePath)
  303. try {
  304. // Calculate Claude's character contribution
  305. let claudeContribution: number
  306. if (oldContent === '' || newContent === '') {
  307. // New file or full deletion - contribution is the content length
  308. claudeContribution =
  309. oldContent === '' ? newContent.length : oldContent.length
  310. } else {
  311. // Find actual changed region via common prefix/suffix matching.
  312. // This correctly handles same-length replacements (e.g., "Esc" → "esc")
  313. // where Math.abs(newLen - oldLen) would be 0.
  314. const minLen = Math.min(oldContent.length, newContent.length)
  315. let prefixEnd = 0
  316. while (
  317. prefixEnd < minLen &&
  318. oldContent[prefixEnd] === newContent[prefixEnd]
  319. ) {
  320. prefixEnd++
  321. }
  322. let suffixLen = 0
  323. while (
  324. suffixLen < minLen - prefixEnd &&
  325. oldContent[oldContent.length - 1 - suffixLen] ===
  326. newContent[newContent.length - 1 - suffixLen]
  327. ) {
  328. suffixLen++
  329. }
  330. const oldChangedLen = oldContent.length - prefixEnd - suffixLen
  331. const newChangedLen = newContent.length - prefixEnd - suffixLen
  332. claudeContribution = Math.max(oldChangedLen, newChangedLen)
  333. }
  334. // Get current file state if it exists
  335. const existingState = existingFileStates.get(normalizedPath)
  336. const existingContribution = existingState?.claudeContribution ?? 0
  337. return {
  338. contentHash: computeContentHash(newContent),
  339. claudeContribution: existingContribution + claudeContribution,
  340. mtime,
  341. }
  342. } catch (error) {
  343. logError(error as Error)
  344. return null
  345. }
  346. }
  347. /**
  348. * Get a file's modification time (mtimeMs), falling back to Date.now() if
  349. * the file doesn't exist. This is async so it can be precomputed before
  350. * entering a sync setAppState callback.
  351. */
  352. export async function getFileMtime(filePath: string): Promise<number> {
  353. const normalizedPath = normalizeFilePath(filePath)
  354. const absPath = expandFilePath(normalizedPath)
  355. try {
  356. const stats = await stat(absPath)
  357. return stats.mtimeMs
  358. } catch {
  359. return Date.now()
  360. }
  361. }
  362. /**
  363. * Track a file modification by Claude.
  364. * Called after Edit/Write tool completes.
  365. */
  366. export function trackFileModification(
  367. state: AttributionState,
  368. filePath: string,
  369. oldContent: string,
  370. newContent: string,
  371. _userModified: boolean,
  372. mtime: number = Date.now(),
  373. ): AttributionState {
  374. const normalizedPath = normalizeFilePath(filePath)
  375. const newFileState = computeFileModificationState(
  376. state.fileStates,
  377. filePath,
  378. oldContent,
  379. newContent,
  380. mtime,
  381. )
  382. if (!newFileState) {
  383. return state
  384. }
  385. const newFileStates = new Map(state.fileStates)
  386. newFileStates.set(normalizedPath, newFileState)
  387. logForDebugging(
  388. `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
  389. )
  390. return {
  391. ...state,
  392. fileStates: newFileStates,
  393. }
  394. }
  395. /**
  396. * Track a file creation by Claude (e.g., via bash command).
  397. * Used when Claude creates a new file through a non-tracked mechanism.
  398. */
  399. export function trackFileCreation(
  400. state: AttributionState,
  401. filePath: string,
  402. content: string,
  403. mtime: number = Date.now(),
  404. ): AttributionState {
  405. // A creation is simply a modification from empty to the new content
  406. return trackFileModification(state, filePath, '', content, false, mtime)
  407. }
  408. /**
  409. * Track a file deletion by Claude (e.g., via bash rm command).
  410. * Used when Claude deletes a file through a non-tracked mechanism.
  411. */
  412. export function trackFileDeletion(
  413. state: AttributionState,
  414. filePath: string,
  415. oldContent: string,
  416. ): AttributionState {
  417. const normalizedPath = normalizeFilePath(filePath)
  418. const existingState = state.fileStates.get(normalizedPath)
  419. const existingContribution = existingState?.claudeContribution ?? 0
  420. const deletedChars = oldContent.length
  421. const newFileState: FileAttributionState = {
  422. contentHash: '', // Empty hash for deleted files
  423. claudeContribution: existingContribution + deletedChars,
  424. mtime: Date.now(),
  425. }
  426. const newFileStates = new Map(state.fileStates)
  427. newFileStates.set(normalizedPath, newFileState)
  428. logForDebugging(
  429. `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${newFileState.claudeContribution})`,
  430. )
  431. return {
  432. ...state,
  433. fileStates: newFileStates,
  434. }
  435. }
  436. // --
  437. /**
  438. * Track multiple file changes in bulk, mutating a single Map copy.
  439. * This avoids the O(n²) cost of copying the Map per file when processing
  440. * large git diffs (e.g., jj operations that touch hundreds of thousands of files).
  441. */
  442. export function trackBulkFileChanges(
  443. state: AttributionState,
  444. changes: ReadonlyArray<{
  445. path: string
  446. type: 'modified' | 'created' | 'deleted'
  447. oldContent: string
  448. newContent: string
  449. mtime?: number
  450. }>,
  451. ): AttributionState {
  452. // Create ONE copy of the Map, then mutate it for each file
  453. const newFileStates = new Map(state.fileStates)
  454. for (const change of changes) {
  455. const mtime = change.mtime ?? Date.now()
  456. if (change.type === 'deleted') {
  457. const normalizedPath = normalizeFilePath(change.path)
  458. const existingState = newFileStates.get(normalizedPath)
  459. const existingContribution = existingState?.claudeContribution ?? 0
  460. const deletedChars = change.oldContent.length
  461. newFileStates.set(normalizedPath, {
  462. contentHash: '',
  463. claudeContribution: existingContribution + deletedChars,
  464. mtime,
  465. })
  466. logForDebugging(
  467. `Attribution: Tracked deletion of ${normalizedPath} (${deletedChars} chars removed, total contribution: ${existingContribution + deletedChars})`,
  468. )
  469. } else {
  470. const newFileState = computeFileModificationState(
  471. newFileStates,
  472. change.path,
  473. change.oldContent,
  474. change.newContent,
  475. mtime,
  476. )
  477. if (newFileState) {
  478. const normalizedPath = normalizeFilePath(change.path)
  479. newFileStates.set(normalizedPath, newFileState)
  480. logForDebugging(
  481. `Attribution: Tracked ${newFileState.claudeContribution} chars for ${normalizedPath}`,
  482. )
  483. }
  484. }
  485. }
  486. return {
  487. ...state,
  488. fileStates: newFileStates,
  489. }
  490. }
  491. /**
  492. * Calculate final attribution for staged files.
  493. * Compares session baseline to committed state.
  494. */
  495. export async function calculateCommitAttribution(
  496. states: AttributionState[],
  497. stagedFiles: string[],
  498. ): Promise<AttributionData> {
  499. const cwd = getAttributionRepoRoot()
  500. const sessionId = getSessionId()
  501. const files: Record<string, FileAttribution> = {}
  502. const excludedGenerated: string[] = []
  503. const surfaces = new Set<string>()
  504. const surfaceCounts: Record<string, number> = {}
  505. let totalClaudeChars = 0
  506. let totalHumanChars = 0
  507. // Merge file states from all sessions
  508. const mergedFileStates = new Map<string, FileAttributionState>()
  509. const mergedBaselines = new Map<
  510. string,
  511. { contentHash: string; mtime: number }
  512. >()
  513. for (const state of states) {
  514. surfaces.add(state.surface)
  515. // Merge baselines (earliest baseline wins)
  516. // Handle both Map and plain object (in case of serialization)
  517. const baselines =
  518. state.sessionBaselines instanceof Map
  519. ? state.sessionBaselines
  520. : new Map(
  521. Object.entries(
  522. (state.sessionBaselines ?? {}) as Record<
  523. string,
  524. { contentHash: string; mtime: number }
  525. >,
  526. ),
  527. )
  528. for (const [path, baseline] of baselines) {
  529. if (!mergedBaselines.has(path)) {
  530. mergedBaselines.set(path, baseline)
  531. }
  532. }
  533. // Merge file states (accumulate contributions)
  534. // Handle both Map and plain object (in case of serialization)
  535. const fileStates =
  536. state.fileStates instanceof Map
  537. ? state.fileStates
  538. : new Map(
  539. Object.entries(
  540. (state.fileStates ?? {}) as Record<string, FileAttributionState>,
  541. ),
  542. )
  543. for (const [path, fileState] of fileStates) {
  544. const existing = mergedFileStates.get(path)
  545. if (existing) {
  546. mergedFileStates.set(path, {
  547. ...fileState,
  548. claudeContribution:
  549. existing.claudeContribution + fileState.claudeContribution,
  550. })
  551. } else {
  552. mergedFileStates.set(path, fileState)
  553. }
  554. }
  555. }
  556. // Process files in parallel
  557. const fileResults = await Promise.all(
  558. stagedFiles.map(async file => {
  559. // Skip generated files
  560. if (isGeneratedFile(file)) {
  561. return { type: 'generated' as const, file }
  562. }
  563. const absPath = join(cwd, file)
  564. const fileState = mergedFileStates.get(file)
  565. const baseline = mergedBaselines.get(file)
  566. // Get the surface for this file
  567. const fileSurface = states[0]!.surface
  568. let claudeChars = 0
  569. let humanChars = 0
  570. // Check if file was deleted
  571. const deleted = await isFileDeleted(file)
  572. if (deleted) {
  573. // File was deleted
  574. if (fileState) {
  575. // Claude deleted this file (tracked deletion)
  576. claudeChars = fileState.claudeContribution
  577. humanChars = 0
  578. } else {
  579. // Human deleted this file (untracked deletion)
  580. // Use diff size to get the actual change size
  581. const diffSize = await getGitDiffSize(file)
  582. humanChars = diffSize > 0 ? diffSize : 100 // Minimum attribution for a deletion
  583. }
  584. } else {
  585. try {
  586. // Only need file size, not content - stat() avoids loading GB-scale
  587. // build artifacts into memory when they appear in the working tree.
  588. // stats.size (bytes) is an adequate proxy for char count here.
  589. const stats = await stat(absPath)
  590. if (fileState) {
  591. // We have tracked modifications for this file
  592. claudeChars = fileState.claudeContribution
  593. humanChars = 0
  594. } else if (baseline) {
  595. // File was modified but not tracked - human modification
  596. const diffSize = await getGitDiffSize(file)
  597. humanChars = diffSize > 0 ? diffSize : stats.size
  598. } else {
  599. // New file not created by Claude
  600. humanChars = stats.size
  601. }
  602. } catch {
  603. // File doesn't exist or stat failed - skip it
  604. return null
  605. }
  606. }
  607. // Ensure non-negative values
  608. claudeChars = Math.max(0, claudeChars)
  609. humanChars = Math.max(0, humanChars)
  610. const total = claudeChars + humanChars
  611. const percent = total > 0 ? Math.round((claudeChars / total) * 100) : 0
  612. return {
  613. type: 'file' as const,
  614. file,
  615. claudeChars,
  616. humanChars,
  617. percent,
  618. surface: fileSurface,
  619. }
  620. }),
  621. )
  622. // Aggregate results
  623. for (const result of fileResults) {
  624. if (!result) continue
  625. if (result.type === 'generated') {
  626. excludedGenerated.push(result.file)
  627. continue
  628. }
  629. files[result.file] = {
  630. claudeChars: result.claudeChars,
  631. humanChars: result.humanChars,
  632. percent: result.percent,
  633. surface: result.surface,
  634. }
  635. totalClaudeChars += result.claudeChars
  636. totalHumanChars += result.humanChars
  637. surfaceCounts[result.surface] =
  638. (surfaceCounts[result.surface] ?? 0) + result.claudeChars
  639. }
  640. const totalChars = totalClaudeChars + totalHumanChars
  641. const claudePercent =
  642. totalChars > 0 ? Math.round((totalClaudeChars / totalChars) * 100) : 0
  643. // Calculate surface breakdown (percentage of total content per surface)
  644. const surfaceBreakdown: Record<
  645. string,
  646. { claudeChars: number; percent: number }
  647. > = {}
  648. for (const [surface, chars] of Object.entries(surfaceCounts)) {
  649. // Calculate what percentage of TOTAL content this surface contributed
  650. const percent = totalChars > 0 ? Math.round((chars / totalChars) * 100) : 0
  651. surfaceBreakdown[surface] = { claudeChars: chars, percent }
  652. }
  653. return {
  654. version: 1,
  655. summary: {
  656. claudePercent,
  657. claudeChars: totalClaudeChars,
  658. humanChars: totalHumanChars,
  659. surfaces: Array.from(surfaces),
  660. },
  661. files,
  662. surfaceBreakdown,
  663. excludedGenerated,
  664. sessions: [sessionId],
  665. }
  666. }
  667. /**
  668. * Get the size of changes for a file from git diff.
  669. * Returns the number of characters added/removed (absolute difference).
  670. * For new files, returns the total file size.
  671. * For deleted files, returns the size of the deleted content.
  672. */
  673. export async function getGitDiffSize(filePath: string): Promise<number> {
  674. const cwd = getAttributionRepoRoot()
  675. try {
  676. // Use git diff --stat to get a summary of changes
  677. const result = await execFileNoThrowWithCwd(
  678. gitExe(),
  679. ['diff', '--cached', '--stat', '--', filePath],
  680. { cwd, timeout: 5000 },
  681. )
  682. if (result.code !== 0 || !result.stdout) {
  683. return 0
  684. }
  685. // Parse the stat output to extract additions and deletions
  686. // Format: " file | 5 ++---" or " file | 10 +"
  687. const lines = result.stdout.split('\n').filter(Boolean)
  688. let totalChanges = 0
  689. for (const line of lines) {
  690. // Skip the summary line (e.g., "1 file changed, 3 insertions(+), 2 deletions(-)")
  691. if (line.includes('file changed') || line.includes('files changed')) {
  692. const insertMatch = line.match(/(\d+) insertions?/)
  693. const deleteMatch = line.match(/(\d+) deletions?/)
  694. // Use line-based changes and approximate chars per line (~40 chars average)
  695. const insertions = insertMatch ? parseInt(insertMatch[1]!, 10) : 0
  696. const deletions = deleteMatch ? parseInt(deleteMatch[1]!, 10) : 0
  697. totalChanges += (insertions + deletions) * 40
  698. }
  699. }
  700. return totalChanges
  701. } catch {
  702. return 0
  703. }
  704. }
  705. /**
  706. * Check if a file was deleted in the staged changes.
  707. */
  708. export async function isFileDeleted(filePath: string): Promise<boolean> {
  709. const cwd = getAttributionRepoRoot()
  710. try {
  711. const result = await execFileNoThrowWithCwd(
  712. gitExe(),
  713. ['diff', '--cached', '--name-status', '--', filePath],
  714. { cwd, timeout: 5000 },
  715. )
  716. if (result.code === 0 && result.stdout) {
  717. // Format: "D\tfilename" for deleted files
  718. return result.stdout.trim().startsWith('D\t')
  719. }
  720. } catch {
  721. // Ignore errors
  722. }
  723. return false
  724. }
  725. /**
  726. * Get staged files from git.
  727. */
  728. export async function getStagedFiles(): Promise<string[]> {
  729. const cwd = getAttributionRepoRoot()
  730. try {
  731. const result = await execFileNoThrowWithCwd(
  732. gitExe(),
  733. ['diff', '--cached', '--name-only'],
  734. { cwd, timeout: 5000 },
  735. )
  736. if (result.code === 0 && result.stdout) {
  737. return result.stdout.split('\n').filter(Boolean)
  738. }
  739. } catch (error) {
  740. logError(error as Error)
  741. }
  742. return []
  743. }
  744. // formatAttributionTrailer moved to attributionTrailer.ts for tree-shaking
  745. // (contains excluded strings that should not be in external builds)
  746. /**
  747. * Check if we're in a transient git state (rebase, merge, cherry-pick).
  748. */
  749. export async function isGitTransientState(): Promise<boolean> {
  750. const gitDir = await resolveGitDir(getAttributionRepoRoot())
  751. if (!gitDir) return false
  752. const indicators = [
  753. 'rebase-merge',
  754. 'rebase-apply',
  755. 'MERGE_HEAD',
  756. 'CHERRY_PICK_HEAD',
  757. 'BISECT_LOG',
  758. ]
  759. const results = await Promise.all(
  760. indicators.map(async indicator => {
  761. try {
  762. await stat(join(gitDir, indicator))
  763. return true
  764. } catch {
  765. return false
  766. }
  767. }),
  768. )
  769. return results.some(exists => exists)
  770. }
  771. /**
  772. * Convert attribution state to snapshot message for persistence.
  773. */
  774. export function stateToSnapshotMessage(
  775. state: AttributionState,
  776. messageId: UUID,
  777. ): AttributionSnapshotMessage {
  778. const fileStates: Record<string, FileAttributionState> = {}
  779. for (const [path, fileState] of state.fileStates) {
  780. fileStates[path] = fileState
  781. }
  782. return {
  783. type: 'attribution-snapshot',
  784. messageId,
  785. surface: state.surface,
  786. fileStates,
  787. promptCount: state.promptCount,
  788. promptCountAtLastCommit: state.promptCountAtLastCommit,
  789. permissionPromptCount: state.permissionPromptCount,
  790. permissionPromptCountAtLastCommit: state.permissionPromptCountAtLastCommit,
  791. escapeCount: state.escapeCount,
  792. escapeCountAtLastCommit: state.escapeCountAtLastCommit,
  793. }
  794. }
  795. /**
  796. * Restore attribution state from snapshot messages.
  797. */
  798. export function restoreAttributionStateFromSnapshots(
  799. snapshots: AttributionSnapshotMessage[],
  800. ): AttributionState {
  801. const state = createEmptyAttributionState()
  802. // Snapshots are full-state dumps (see stateToSnapshotMessage), not deltas.
  803. // The last snapshot has the most recent count for every path — fileStates
  804. // never shrinks. Iterating and SUMMING counts across snapshots causes
  805. // quadratic growth on restore (837 snapshots × 280 files → 1.15 quadrillion
  806. // "chars" tracked for a 5KB file over a 5-day session).
  807. const lastSnapshot = snapshots[snapshots.length - 1]
  808. if (!lastSnapshot) {
  809. return state
  810. }
  811. state.surface = lastSnapshot.surface
  812. for (const [path, fileState] of Object.entries(lastSnapshot.fileStates)) {
  813. state.fileStates.set(path, fileState)
  814. }
  815. // Restore prompt counts from the last snapshot (most recent state)
  816. state.promptCount = lastSnapshot.promptCount ?? 0
  817. state.promptCountAtLastCommit = lastSnapshot.promptCountAtLastCommit ?? 0
  818. state.permissionPromptCount = lastSnapshot.permissionPromptCount ?? 0
  819. state.permissionPromptCountAtLastCommit =
  820. lastSnapshot.permissionPromptCountAtLastCommit ?? 0
  821. state.escapeCount = lastSnapshot.escapeCount ?? 0
  822. state.escapeCountAtLastCommit = lastSnapshot.escapeCountAtLastCommit ?? 0
  823. return state
  824. }
  825. /**
  826. * Restore attribution state from log snapshots on session resume.
  827. */
  828. export function attributionRestoreStateFromLog(
  829. attributionSnapshots: AttributionSnapshotMessage[],
  830. onUpdateState: (newState: AttributionState) => void,
  831. ): void {
  832. const state = restoreAttributionStateFromSnapshots(attributionSnapshots)
  833. onUpdateState(state)
  834. }
  835. /**
  836. * Increment promptCount and save an attribution snapshot.
  837. * Used to persist the prompt count across compaction.
  838. *
  839. * @param attribution - Current attribution state
  840. * @param saveSnapshot - Function to save the snapshot (allows async handling by caller)
  841. * @returns New attribution state with incremented promptCount
  842. */
  843. export function incrementPromptCount(
  844. attribution: AttributionState,
  845. saveSnapshot: (snapshot: AttributionSnapshotMessage) => void,
  846. ): AttributionState {
  847. const newAttribution = {
  848. ...attribution,
  849. promptCount: attribution.promptCount + 1,
  850. }
  851. const snapshot = stateToSnapshotMessage(newAttribution, randomUUID())
  852. saveSnapshot(snapshot)
  853. return newAttribution
  854. }