mcpOutputStorage.ts 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. import { writeFile } from 'fs/promises'
  2. import { join } from 'path'
  3. import {
  4. type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  5. logEvent,
  6. } from '../services/analytics/index.js'
  7. import type { MCPResultType } from '../services/mcp/client.js'
  8. import { toError } from './errors.js'
  9. import { formatFileSize } from './format.js'
  10. import { logError } from './log.js'
  11. import { ensureToolResultsDir, getToolResultsDir } from './toolResultStorage.js'
  12. /**
  13. * Generates a format description string based on the MCP result type and schema.
  14. */
  15. export function getFormatDescription(
  16. type: MCPResultType,
  17. schema?: unknown,
  18. ): string {
  19. switch (type) {
  20. case 'toolResult':
  21. return 'Plain text'
  22. case 'structuredContent':
  23. return schema ? `JSON with schema: ${schema}` : 'JSON'
  24. case 'contentArray':
  25. return schema ? `JSON array with schema: ${schema}` : 'JSON array'
  26. }
  27. }
  28. /**
  29. * Generates instruction text for Claude to read from a saved output file.
  30. *
  31. * @param rawOutputPath - Path to the saved output file
  32. * @param contentLength - Length of the content in characters
  33. * @param formatDescription - Description of the content format
  34. * @param maxReadLength - Optional max chars for Read tool (for Bash output context)
  35. * @returns Instruction text to include in the tool result
  36. */
  37. export function getLargeOutputInstructions(
  38. rawOutputPath: string,
  39. contentLength: number,
  40. formatDescription: string,
  41. maxReadLength?: number,
  42. ): string {
  43. const baseInstructions =
  44. `Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Output has been saved to ${rawOutputPath}.\n` +
  45. `Format: ${formatDescription}\n` +
  46. `Use offset and limit parameters to read specific portions of the file, search within it for specific content, and jq to make structured queries.\n` +
  47. `REQUIREMENTS FOR SUMMARIZATION/ANALYSIS/REVIEW:\n` +
  48. `- You MUST read the content from the file at ${rawOutputPath} in sequential chunks until 100% of the content has been read.\n`
  49. const truncationWarning = maxReadLength
  50. ? `- If you receive truncation warnings when reading the file ("[N lines truncated]"), reduce the chunk size until you have read 100% of the content without truncation ***DO NOT PROCEED UNTIL YOU HAVE DONE THIS***. Bash output is limited to ${maxReadLength.toLocaleString()} chars.\n`
  51. : `- If you receive truncation warnings when reading the file, reduce the chunk size until you have read 100% of the content without truncation.\n`
  52. const completionRequirement = `- Before producing ANY summary or analysis, you MUST explicitly describe what portion of the content you have read. ***If you did not read the entire content, you MUST explicitly state this.***\n`
  53. return baseInstructions + truncationWarning + completionRequirement
  54. }
  55. /**
  56. * Map a mime type to a file extension. Conservative: known types get their
  57. * proper extension; unknown types get 'bin'. The extension matters because
  58. * the Read tool dispatches on it (PDFs, images, etc. need the right ext).
  59. */
  60. export function extensionForMimeType(mimeType: string | undefined): string {
  61. if (!mimeType) return 'bin'
  62. // Strip any charset/boundary parameter
  63. const mt = (mimeType.split(';')[0] ?? '').trim().toLowerCase()
  64. switch (mt) {
  65. case 'application/pdf':
  66. return 'pdf'
  67. case 'application/json':
  68. return 'json'
  69. case 'text/csv':
  70. return 'csv'
  71. case 'text/plain':
  72. return 'txt'
  73. case 'text/html':
  74. return 'html'
  75. case 'text/markdown':
  76. return 'md'
  77. case 'application/zip':
  78. return 'zip'
  79. case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
  80. return 'docx'
  81. case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
  82. return 'xlsx'
  83. case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
  84. return 'pptx'
  85. case 'application/msword':
  86. return 'doc'
  87. case 'application/vnd.ms-excel':
  88. return 'xls'
  89. case 'audio/mpeg':
  90. return 'mp3'
  91. case 'audio/wav':
  92. return 'wav'
  93. case 'audio/ogg':
  94. return 'ogg'
  95. case 'video/mp4':
  96. return 'mp4'
  97. case 'video/webm':
  98. return 'webm'
  99. case 'image/png':
  100. return 'png'
  101. case 'image/jpeg':
  102. return 'jpg'
  103. case 'image/gif':
  104. return 'gif'
  105. case 'image/webp':
  106. return 'webp'
  107. case 'image/svg+xml':
  108. return 'svg'
  109. default:
  110. return 'bin'
  111. }
  112. }
  113. /**
  114. * Heuristic for whether a content-type header indicates binary content that
  115. * should be saved to disk rather than put into the model context.
  116. * Text-ish types (text/*, json, xml, form data) are treated as non-binary.
  117. */
  118. export function isBinaryContentType(contentType: string): boolean {
  119. if (!contentType) return false
  120. const mt = (contentType.split(';')[0] ?? '').trim().toLowerCase()
  121. if (mt.startsWith('text/')) return false
  122. // Structured text formats delivered with an application/ type. Use suffix
  123. // or exact match rather than substring so 'openxmlformats' (docx/xlsx) stays binary.
  124. if (mt.endsWith('+json') || mt === 'application/json') return false
  125. if (mt.endsWith('+xml') || mt === 'application/xml') return false
  126. if (mt.startsWith('application/javascript')) return false
  127. if (mt === 'application/x-www-form-urlencoded') return false
  128. return true
  129. }
  130. export type PersistBinaryResult =
  131. | { filepath: string; size: number; ext: string }
  132. | { error: string }
  133. /**
  134. * Write raw binary bytes to the tool-results directory with a mime-derived
  135. * extension. Unlike persistToolResult (which stringifies), this writes the
  136. * bytes as-is so the resulting file can be opened with native tools (Read
  137. * for PDFs, pandas for xlsx, etc.).
  138. */
  139. export async function persistBinaryContent(
  140. bytes: Buffer,
  141. mimeType: string | undefined,
  142. persistId: string,
  143. ): Promise<PersistBinaryResult> {
  144. await ensureToolResultsDir()
  145. const ext = extensionForMimeType(mimeType)
  146. const filepath = join(getToolResultsDir(), `${persistId}.${ext}`)
  147. try {
  148. await writeFile(filepath, bytes)
  149. } catch (error) {
  150. const err = toError(error)
  151. logError(err)
  152. return { error: err.message }
  153. }
  154. // mime type and extension are safe fixed-vocabulary strings (not paths/code)
  155. logEvent('tengu_binary_content_persisted', {
  156. mimeType: (mimeType ??
  157. 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  158. sizeBytes: bytes.length,
  159. ext: ext as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  160. })
  161. return { filepath, size: bytes.length, ext }
  162. }
  163. /**
  164. * Build a short message telling Claude where binary content was saved.
  165. * Just states the path — no prescriptive hint, since what the model can
  166. * actually do with the file depends on provider/tooling.
  167. */
  168. export function getBinaryBlobSavedMessage(
  169. filepath: string,
  170. mimeType: string | undefined,
  171. size: number,
  172. sourceDescription: string,
  173. ): string {
  174. const mt = mimeType || 'unknown type'
  175. return `${sourceDescription}Binary content (${mt}, ${formatFileSize(size)}) saved to ${filepath}`
  176. }