| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307 |
- import type { LogOption, SerializedMessage } from '../types/logs.js'
- import { count } from './array.js'
- import { logForDebugging } from './debug.js'
- import { getLogDisplayTitle, logError } from './log.js'
- import { getSmallFastModel } from './model/model.js'
- import { isLiteLog, loadFullLog } from './sessionStorage.js'
- import { sideQuery } from './sideQuery.js'
- import { jsonParse } from './slowOperations.js'
- // Limits for transcript extraction
- const MAX_TRANSCRIPT_CHARS = 2000 // Max chars of transcript per session
- const MAX_MESSAGES_TO_SCAN = 100 // Max messages to scan from start/end
- const MAX_SESSIONS_TO_SEARCH = 100 // Max sessions to send to the API
- const SESSION_SEARCH_SYSTEM_PROMPT = `Your goal is to find relevant sessions based on a user's search query.
- You will be given a list of sessions with their metadata and a search query. Identify which sessions are most relevant to the query.
- Each session may include:
- - Title (display name or custom title)
- - Tag (user-assigned category, shown as [tag: name] - users tag sessions with /tag command to categorize them)
- - Branch (git branch name, shown as [branch: name])
- - Summary (AI-generated summary)
- - First message (beginning of the conversation)
- - Transcript (excerpt of conversation content)
- IMPORTANT: Tags are user-assigned labels that indicate the session's topic or category. If the query matches a tag exactly or partially, those sessions should be highly prioritized.
- For each session, consider (in order of priority):
- 1. Exact tag matches (highest priority - user explicitly categorized this session)
- 2. Partial tag matches or tag-related terms
- 3. Title matches (custom titles or first message content)
- 4. Branch name matches
- 5. Summary and transcript content matches
- 6. Semantic similarity and related concepts
- CRITICAL: Be VERY inclusive in your matching. Include sessions that:
- - Contain the query term anywhere in any field
- - Are semantically related to the query (e.g., "testing" matches sessions about "tests", "unit tests", "QA", etc.)
- - Discuss topics that could be related to the query
- - Have transcripts that mention the concept even in passing
- When in doubt, INCLUDE the session. It's better to return too many results than too few. The user can easily scan through results, but missing relevant sessions is frustrating.
- Return sessions ordered by relevance (most relevant first). If truly no sessions have ANY connection to the query, return an empty array - but this should be rare.
- Respond with ONLY the JSON object, no markdown formatting:
- {"relevant_indices": [2, 5, 0]}`
- type AgenticSearchResult = {
- relevant_indices: number[]
- }
- /**
- * Extracts searchable text content from a message.
- */
- function extractMessageText(message: SerializedMessage): string {
- if (message.type !== 'user' && message.type !== 'assistant') {
- return ''
- }
- const content = 'message' in message ? message.message?.content : undefined
- if (!content) return ''
- if (typeof content === 'string') {
- return content
- }
- if (Array.isArray(content)) {
- return content
- .map(block => {
- if (typeof block === 'string') return block
- if ('text' in block && typeof block.text === 'string') return block.text
- return ''
- })
- .filter(Boolean)
- .join(' ')
- }
- return ''
- }
- /**
- * Extracts a truncated transcript from session messages.
- */
- function extractTranscript(messages: SerializedMessage[]): string {
- if (messages.length === 0) return ''
- // Take messages from start and end to get context
- const messagesToScan =
- messages.length <= MAX_MESSAGES_TO_SCAN
- ? messages
- : [
- ...messages.slice(0, MAX_MESSAGES_TO_SCAN / 2),
- ...messages.slice(-MAX_MESSAGES_TO_SCAN / 2),
- ]
- const text = messagesToScan
- .map(extractMessageText)
- .filter(Boolean)
- .join(' ')
- .replace(/\s+/g, ' ')
- .trim()
- return text.length > MAX_TRANSCRIPT_CHARS
- ? text.slice(0, MAX_TRANSCRIPT_CHARS) + '…'
- : text
- }
- /**
- * Checks if a log contains the query term in any searchable field.
- */
- function logContainsQuery(log: LogOption, queryLower: string): boolean {
- // Check title
- const title = getLogDisplayTitle(log).toLowerCase()
- if (title.includes(queryLower)) return true
- // Check custom title
- if (log.customTitle?.toLowerCase().includes(queryLower)) return true
- // Check tag
- if (log.tag?.toLowerCase().includes(queryLower)) return true
- // Check branch
- if (log.gitBranch?.toLowerCase().includes(queryLower)) return true
- // Check summary
- if (log.summary?.toLowerCase().includes(queryLower)) return true
- // Check first prompt
- if (log.firstPrompt?.toLowerCase().includes(queryLower)) return true
- // Check transcript (more expensive, do last)
- if (log.messages && log.messages.length > 0) {
- const transcript = extractTranscript(log.messages).toLowerCase()
- if (transcript.includes(queryLower)) return true
- }
- return false
- }
- /**
- * Performs an agentic search using Claude to find relevant sessions
- * based on semantic understanding of the query.
- */
- export async function agenticSessionSearch(
- query: string,
- logs: LogOption[],
- signal?: AbortSignal,
- ): Promise<LogOption[]> {
- if (!query.trim() || logs.length === 0) {
- return []
- }
- const queryLower = query.toLowerCase()
- // Pre-filter: find sessions that contain the query term
- // This ensures we search relevant sessions, not just recent ones
- const matchingLogs = logs.filter(log => logContainsQuery(log, queryLower))
- // Take up to MAX_SESSIONS_TO_SEARCH matching logs
- // If fewer matches, fill remaining slots with recent non-matching logs for context
- let logsToSearch: LogOption[]
- if (matchingLogs.length >= MAX_SESSIONS_TO_SEARCH) {
- logsToSearch = matchingLogs.slice(0, MAX_SESSIONS_TO_SEARCH)
- } else {
- const nonMatchingLogs = logs.filter(
- log => !logContainsQuery(log, queryLower),
- )
- const remainingSlots = MAX_SESSIONS_TO_SEARCH - matchingLogs.length
- logsToSearch = [
- ...matchingLogs,
- ...nonMatchingLogs.slice(0, remainingSlots),
- ]
- }
- // Debug: log what data we have
- logForDebugging(
- `Agentic search: ${logsToSearch.length}/${logs.length} logs, query="${query}", ` +
- `matching: ${matchingLogs.length}, with messages: ${count(logsToSearch, l => l.messages?.length > 0)}`,
- )
- // Load full logs for lite logs to get transcript content
- const logsWithTranscriptsPromises = logsToSearch.map(async log => {
- if (isLiteLog(log)) {
- try {
- return await loadFullLog(log)
- } catch (error) {
- logError(error as Error)
- // If loading fails, use the lite log (no transcript)
- return log
- }
- }
- return log
- })
- const logsWithTranscripts = await Promise.all(logsWithTranscriptsPromises)
- logForDebugging(
- `Agentic search: loaded ${count(logsWithTranscripts, l => l.messages?.length > 0)}/${logsToSearch.length} logs with transcripts`,
- )
- // Build session list for the prompt with all searchable metadata
- const sessionList = logsWithTranscripts
- .map((log, index) => {
- const parts: string[] = [`${index}:`]
- // Title (display title, may be custom or from first prompt)
- const displayTitle = getLogDisplayTitle(log)
- parts.push(displayTitle)
- // Custom title if different from display title
- if (log.customTitle && log.customTitle !== displayTitle) {
- parts.push(`[custom title: ${log.customTitle}]`)
- }
- // Tag
- if (log.tag) {
- parts.push(`[tag: ${log.tag}]`)
- }
- // Git branch
- if (log.gitBranch) {
- parts.push(`[branch: ${log.gitBranch}]`)
- }
- // Summary
- if (log.summary) {
- parts.push(`- Summary: ${log.summary}`)
- }
- // First prompt content (truncated)
- if (log.firstPrompt && log.firstPrompt !== 'No prompt') {
- parts.push(`- First message: ${log.firstPrompt.slice(0, 300)}`)
- }
- // Transcript excerpt (if messages are available)
- if (log.messages && log.messages.length > 0) {
- const transcript = extractTranscript(log.messages)
- if (transcript) {
- parts.push(`- Transcript: ${transcript}`)
- }
- }
- return parts.join(' ')
- })
- .join('\n')
- const userMessage = `Sessions:
- ${sessionList}
- Search query: "${query}"
- Find the sessions that are most relevant to this query.`
- // Debug: log first part of the session list
- logForDebugging(
- `Agentic search prompt (first 500 chars): ${userMessage.slice(0, 500)}...`,
- )
- try {
- const model = getSmallFastModel()
- logForDebugging(`Agentic search using model: ${model}`)
- const response = await sideQuery({
- model,
- system: SESSION_SEARCH_SYSTEM_PROMPT,
- messages: [{ role: 'user', content: userMessage }],
- signal,
- querySource: 'session_search',
- })
- // Extract the text content from the response
- const textContent = response.content.find(block => block.type === 'text')
- if (!textContent || textContent.type !== 'text') {
- logForDebugging('No text content in agentic search response')
- return []
- }
- // Debug: log the response
- logForDebugging(`Agentic search response: ${textContent.text}`)
- // Parse the JSON response
- const jsonMatch = textContent.text.match(/\{[\s\S]*\}/)
- if (!jsonMatch) {
- logForDebugging('Could not find JSON in agentic search response')
- return []
- }
- const result: AgenticSearchResult = jsonParse(jsonMatch[0])
- const relevantIndices = result.relevant_indices || []
- // Map indices back to logs (indices are relative to logsWithTranscripts)
- const relevantLogs = relevantIndices
- .filter(index => index >= 0 && index < logsWithTranscripts.length)
- .map(index => logsWithTranscripts[index]!)
- logForDebugging(
- `Agentic search found ${relevantLogs.length} relevant sessions`,
- )
- return relevantLogs
- } catch (error) {
- logError(error as Error)
- logForDebugging(`Agentic search error: ${error}`)
- return []
- }
- }
|