attribution.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. import { feature } from 'bun:bundle'
  2. import { stat } from 'fs/promises'
  3. import { getClientType } from '../bootstrap/state.js'
  4. import {
  5. getRemoteSessionUrl,
  6. isRemoteSessionLocal,
  7. PRODUCT_URL,
  8. } from '../constants/product.js'
  9. import { TERMINAL_OUTPUT_TAGS } from '../constants/xml.js'
  10. import type { AppState } from '../state/AppState.js'
  11. import { FILE_EDIT_TOOL_NAME } from '../tools/FileEditTool/constants.js'
  12. import { FILE_READ_TOOL_NAME } from '../tools/FileReadTool/prompt.js'
  13. import { FILE_WRITE_TOOL_NAME } from '../tools/FileWriteTool/prompt.js'
  14. import { GLOB_TOOL_NAME } from '../tools/GlobTool/prompt.js'
  15. import { GREP_TOOL_NAME } from '../tools/GrepTool/prompt.js'
  16. import type { Entry } from '../types/logs.js'
  17. import {
  18. type AttributionData,
  19. calculateCommitAttribution,
  20. isInternalModelRepo,
  21. isInternalModelRepoCached,
  22. sanitizeModelName,
  23. } from './commitAttribution.js'
  24. import { logForDebugging } from './debug.js'
  25. import { parseJSONL } from './json.js'
  26. import { logError } from './log.js'
  27. import {
  28. getCanonicalName,
  29. getMainLoopModel,
  30. getPublicModelDisplayName,
  31. getPublicModelName,
  32. } from './model/model.js'
  33. import { isMemoryFileAccess } from './sessionFileAccessHooks.js'
  34. import { getTranscriptPath } from './sessionStorage.js'
  35. import { readTranscriptForLoad } from './sessionStoragePortable.js'
  36. import { getInitialSettings } from './settings/settings.js'
  37. import { isUndercover } from './undercover.js'
  38. export type AttributionTexts = {
  39. commit: string
  40. pr: string
  41. }
  42. /**
  43. * Returns attribution text for commits and PRs based on user settings.
  44. * Handles:
  45. * - Dynamic model name via getPublicModelName()
  46. * - Custom attribution settings (settings.attribution.commit/pr)
  47. * - Backward compatibility with deprecated includeCoAuthoredBy setting
  48. * - Remote mode: returns session URL for attribution
  49. */
  50. export function getAttributionTexts(): AttributionTexts {
  51. if (process.env.USER_TYPE === 'ant' && isUndercover()) {
  52. return { commit: '', pr: '' }
  53. }
  54. if (getClientType() === 'remote') {
  55. const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
  56. if (remoteSessionId) {
  57. const ingressUrl = process.env.SESSION_INGRESS_URL
  58. // Skip for local dev - URLs won't persist
  59. if (!isRemoteSessionLocal(remoteSessionId, ingressUrl)) {
  60. const sessionUrl = getRemoteSessionUrl(remoteSessionId, ingressUrl)
  61. return { commit: sessionUrl, pr: sessionUrl }
  62. }
  63. }
  64. return { commit: '', pr: '' }
  65. }
  66. // @[MODEL LAUNCH]: Update the hardcoded fallback model name below (guards against codename leaks).
  67. // For internal repos, use the real model name. For external repos,
  68. // fall back to "Claude Opus 4.6" for unrecognized models to avoid leaking codenames.
  69. const model = getMainLoopModel()
  70. const isKnownPublicModel = getPublicModelDisplayName(model) !== null
  71. const modelName =
  72. isInternalModelRepoCached() || isKnownPublicModel
  73. ? getPublicModelName(model)
  74. : 'Claude Opus 4.6'
  75. const defaultAttribution = `🤖 Generated with [Claude Code](${PRODUCT_URL})`
  76. const defaultCommit = `Co-Authored-By: ${modelName} <noreply@anthropic.com>`
  77. const settings = getInitialSettings()
  78. // New attribution setting takes precedence over deprecated includeCoAuthoredBy
  79. if (settings.attribution) {
  80. return {
  81. commit: settings.attribution.commit ?? defaultCommit,
  82. pr: settings.attribution.pr ?? defaultAttribution,
  83. }
  84. }
  85. // Backward compatibility: deprecated includeCoAuthoredBy setting
  86. if (settings.includeCoAuthoredBy === false) {
  87. return { commit: '', pr: '' }
  88. }
  89. return { commit: defaultCommit, pr: defaultAttribution }
  90. }
  91. /**
  92. * Check if a message content string is terminal output rather than a user prompt.
  93. * Terminal output includes bash input/output tags and caveat messages about local commands.
  94. */
  95. function isTerminalOutput(content: string): boolean {
  96. for (const tag of TERMINAL_OUTPUT_TAGS) {
  97. if (content.includes(`<${tag}>`)) {
  98. return true
  99. }
  100. }
  101. return false
  102. }
  103. /**
  104. * Count user messages with visible text content in a list of non-sidechain messages.
  105. * Excludes tool_result blocks, terminal output, and empty messages.
  106. *
  107. * Callers should pass messages already filtered to exclude sidechain messages.
  108. */
  109. export function countUserPromptsInMessages(
  110. messages: ReadonlyArray<{ type: string; message?: { content?: unknown } }>,
  111. ): number {
  112. let count = 0
  113. for (const message of messages) {
  114. if (message.type !== 'user') {
  115. continue
  116. }
  117. const content = message.message?.content
  118. if (!content) {
  119. continue
  120. }
  121. let hasUserText = false
  122. if (typeof content === 'string') {
  123. if (isTerminalOutput(content)) {
  124. continue
  125. }
  126. hasUserText = content.trim().length > 0
  127. } else if (Array.isArray(content)) {
  128. hasUserText = content.some(block => {
  129. if (!block || typeof block !== 'object' || !('type' in block)) {
  130. return false
  131. }
  132. return (
  133. (block.type === 'text' &&
  134. typeof block.text === 'string' &&
  135. !isTerminalOutput(block.text)) ||
  136. block.type === 'image' ||
  137. block.type === 'document'
  138. )
  139. })
  140. }
  141. if (hasUserText) {
  142. count++
  143. }
  144. }
  145. return count
  146. }
  147. /**
  148. * Count non-sidechain user messages in transcript entries.
  149. * Used to calculate the number of "steers" (user prompts - 1).
  150. *
  151. * Counts user messages that contain actual user-typed text,
  152. * excluding tool_result blocks, sidechain messages, and terminal output.
  153. */
  154. function countUserPromptsFromEntries(entries: ReadonlyArray<Entry>): number {
  155. const nonSidechain = entries.filter(
  156. entry =>
  157. entry.type === 'user' && !('isSidechain' in entry && entry.isSidechain),
  158. )
  159. return countUserPromptsInMessages(nonSidechain)
  160. }
  161. /**
  162. * Get full attribution data from the provided AppState's attribution state.
  163. * Uses ALL tracked files from the attribution state (not just staged files)
  164. * because for PR attribution, files may not be staged yet.
  165. * Returns null if no attribution data is available.
  166. */
  167. async function getPRAttributionData(
  168. appState: AppState,
  169. ): Promise<AttributionData | null> {
  170. const attribution = appState.attribution
  171. if (!attribution) {
  172. return null
  173. }
  174. // Handle both Map and plain object (in case of serialization)
  175. const fileStates = attribution.fileStates
  176. const isMap = fileStates instanceof Map
  177. const trackedFiles = isMap
  178. ? Array.from(fileStates.keys())
  179. : Object.keys(fileStates)
  180. if (trackedFiles.length === 0) {
  181. return null
  182. }
  183. try {
  184. return await calculateCommitAttribution([attribution], trackedFiles)
  185. } catch (error) {
  186. logError(error as Error)
  187. return null
  188. }
  189. }
  190. const MEMORY_ACCESS_TOOL_NAMES = new Set([
  191. FILE_READ_TOOL_NAME,
  192. GREP_TOOL_NAME,
  193. GLOB_TOOL_NAME,
  194. FILE_EDIT_TOOL_NAME,
  195. FILE_WRITE_TOOL_NAME,
  196. ])
  197. /**
  198. * Count memory file accesses in transcript entries.
  199. * Uses the same detection conditions as the PostToolUse session file access hooks.
  200. */
  201. function countMemoryFileAccessFromEntries(
  202. entries: ReadonlyArray<Entry>,
  203. ): number {
  204. let count = 0
  205. for (const entry of entries) {
  206. if (entry.type !== 'assistant') continue
  207. const content = entry.message?.content
  208. if (!Array.isArray(content)) continue
  209. for (const block of content) {
  210. if (
  211. block.type !== 'tool_use' ||
  212. !MEMORY_ACCESS_TOOL_NAMES.has(block.name)
  213. )
  214. continue
  215. if (isMemoryFileAccess(block.name, block.input)) count++
  216. }
  217. }
  218. return count
  219. }
  220. /**
  221. * Read session transcript entries and compute prompt count and memory access
  222. * count. Pre-compact entries are skipped — the N-shot count and memory-access
  223. * count should reflect only the current conversation arc, not accumulated
  224. * prompts from before a compaction boundary.
  225. */
  226. async function getTranscriptStats(): Promise<{
  227. promptCount: number
  228. memoryAccessCount: number
  229. }> {
  230. try {
  231. const filePath = getTranscriptPath()
  232. const fileSize = (await stat(filePath)).size
  233. // Fused reader: attr-snap lines (84% of a long session by bytes) are
  234. // skipped at the fd level so peak scales with output, not file size. The
  235. // one surviving attr-snap at EOF is a no-op for the count functions
  236. // (neither checks type === 'attribution-snapshot'). When the last
  237. // boundary has preservedSegment the reader returns full (no truncate);
  238. // the findLastIndex below still slices to post-boundary.
  239. const scan = await readTranscriptForLoad(filePath, fileSize)
  240. const buf = scan.postBoundaryBuf
  241. const entries = parseJSONL<Entry>(buf)
  242. const lastBoundaryIdx = entries.findLastIndex(
  243. e =>
  244. e.type === 'system' &&
  245. 'subtype' in e &&
  246. e.subtype === 'compact_boundary',
  247. )
  248. const postBoundary =
  249. lastBoundaryIdx >= 0 ? entries.slice(lastBoundaryIdx + 1) : entries
  250. return {
  251. promptCount: countUserPromptsFromEntries(postBoundary),
  252. memoryAccessCount: countMemoryFileAccessFromEntries(postBoundary),
  253. }
  254. } catch {
  255. return { promptCount: 0, memoryAccessCount: 0 }
  256. }
  257. }
  258. /**
  259. * Get enhanced PR attribution text with Claude contribution stats.
  260. *
  261. * Format: "🤖 Generated with Claude Code (93% 3-shotted by claude-opus-4-5)"
  262. *
  263. * Rules:
  264. * - Shows Claude contribution percentage from commit attribution
  265. * - Shows N-shotted where N is the prompt count (1-shotted, 2-shotted, etc.)
  266. * - Shows short model name (e.g., claude-opus-4-5)
  267. * - Returns default attribution if stats can't be computed
  268. *
  269. * @param getAppState Function to get the current AppState (from command context)
  270. */
  271. export async function getEnhancedPRAttribution(
  272. getAppState: () => AppState,
  273. ): Promise<string> {
  274. if (process.env.USER_TYPE === 'ant' && isUndercover()) {
  275. return ''
  276. }
  277. if (getClientType() === 'remote') {
  278. const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
  279. if (remoteSessionId) {
  280. const ingressUrl = process.env.SESSION_INGRESS_URL
  281. // Skip for local dev - URLs won't persist
  282. if (!isRemoteSessionLocal(remoteSessionId, ingressUrl)) {
  283. return getRemoteSessionUrl(remoteSessionId, ingressUrl)
  284. }
  285. }
  286. return ''
  287. }
  288. const settings = getInitialSettings()
  289. // If user has custom PR attribution, use that
  290. if (settings.attribution?.pr) {
  291. return settings.attribution.pr
  292. }
  293. // Backward compatibility: deprecated includeCoAuthoredBy setting
  294. if (settings.includeCoAuthoredBy === false) {
  295. return ''
  296. }
  297. const defaultAttribution = `🤖 Generated with [Claude Code](${PRODUCT_URL})`
  298. // Get AppState first
  299. const appState = getAppState()
  300. logForDebugging(
  301. `PR Attribution: appState.attribution exists: ${!!appState.attribution}`,
  302. )
  303. if (appState.attribution) {
  304. const fileStates = appState.attribution.fileStates
  305. const isMap = fileStates instanceof Map
  306. const fileCount = isMap ? fileStates.size : Object.keys(fileStates).length
  307. logForDebugging(`PR Attribution: fileStates count: ${fileCount}`)
  308. }
  309. // Get attribution stats (transcript is read once for both prompt count and memory access)
  310. const [attributionData, { promptCount, memoryAccessCount }, isInternal] =
  311. await Promise.all([
  312. getPRAttributionData(appState),
  313. getTranscriptStats(),
  314. isInternalModelRepo(),
  315. ])
  316. const claudePercent = attributionData?.summary.claudePercent ?? 0
  317. logForDebugging(
  318. `PR Attribution: claudePercent: ${claudePercent}, promptCount: ${promptCount}, memoryAccessCount: ${memoryAccessCount}`,
  319. )
  320. // Get short model name, sanitized for non-internal repos
  321. const rawModelName = getCanonicalName(getMainLoopModel())
  322. const shortModelName = isInternal
  323. ? rawModelName
  324. : sanitizeModelName(rawModelName)
  325. // If no attribution data, return default
  326. if (claudePercent === 0 && promptCount === 0 && memoryAccessCount === 0) {
  327. logForDebugging('PR Attribution: returning default (no data)')
  328. return defaultAttribution
  329. }
  330. // Build the enhanced attribution: "🤖 Generated with Claude Code (93% 3-shotted by claude-opus-4-5, 2 memories recalled)"
  331. const memSuffix =
  332. memoryAccessCount > 0
  333. ? `, ${memoryAccessCount} ${memoryAccessCount === 1 ? 'memory' : 'memories'} recalled`
  334. : ''
  335. const summary = `🤖 Generated with [Claude Code](${PRODUCT_URL}) (${claudePercent}% ${promptCount}-shotted by ${shortModelName}${memSuffix})`
  336. // Append trailer lines for squash-merge survival. Only for allowlisted repos
  337. // (INTERNAL_MODEL_REPOS) and only in builds with COMMIT_ATTRIBUTION enabled —
  338. // attributionTrailer.ts contains excluded strings, so reach it via dynamic
  339. // import behind feature(). When the repo is configured with
  340. // squash_merge_commit_message=PR_BODY (cli, apps), the PR body becomes the
  341. // squash commit body verbatim — trailer lines at the end become proper git
  342. // trailers on the squash commit.
  343. if (feature('COMMIT_ATTRIBUTION') && isInternal && attributionData) {
  344. const { buildPRTrailers } = await import('./attributionTrailer.js')
  345. const trailers = buildPRTrailers(attributionData, appState.attribution)
  346. const result = `${summary}\n\n${trailers.join('\n')}`
  347. logForDebugging(`PR Attribution: returning with trailers: ${result}`)
  348. return result
  349. }
  350. logForDebugging(`PR Attribution: returning summary: ${summary}`)
  351. return summary
  352. }