classifier.ts - CmdCode Source

📄 classifier.ts • 9367 bytes
/**
 * 意图识别 - 分类器（关键词 + 向量语义双路融合）
 * Phase 1: 基于关键词 + 模式匹配
 * Phase 2: 向量语义相似度辅助（处理口语化/模糊表达）
 */
import { INTENT_PATTERNS, extractParams, type IntentType, type IntentPattern } from './patterns'

/** 意图识别结果 */
export interface IntentResult {
  type: IntentType
  confidence: number // 0-1 置信度
  params: Record<string, any>
  suggestedTools?: string[]
  fastPath: boolean
  reason: string
  /** 向量匹配的相似种子（可选，用于调试） */
  vectorMatches?: string[]
}

/** 工具建议映射 */
const TOOL_SUGGESTIONS: Partial<Record<IntentType, string[]>> = {
  code: ['file_write', 'bash_run'],
  debug: ['grep_search', 'file_read', 'bash_run'],
  file: ['file_read', 'file_write', 'file_edit', 'list_dir'],
  search: ['grep_search', 'file_read'],
  test: ['bash_run', 'file_write'],
  config: ['file_read', 'file_write', 'bash_run'],
  skill: ['bash_run', 'file_read'],
}

/**
 * 计算匹配分数（关键词 + 正则）
 */
function calculateScore(text: string, pattern: IntentPattern): number {
  const lowerText = text.toLowerCase()
  let score = 0
  
  // 关键词匹配
  for (const keyword of pattern.keywords) {
    const lowerKeyword = keyword.toLowerCase()
    if (lowerText.includes(lowerKeyword)) {
      // 完全匹配权重更高
      if (lowerText === lowerKeyword) {
        score += 20
      } else if (lowerText.startsWith(lowerKeyword)) {
        score += 10
      } else {
        score += 5
      }
    }
  }
  
  // 正则匹配
  if (pattern.regex) {
    for (const regex of pattern.regex) {
      if (regex.test(text)) {
        score += 15
      }
    }
  }
  
  return score
}

/**
 * 关键词分类（同步，快速路径）
 */
function classifyByKeywords(input: string): { type: IntentType; confidence: number; score: number; pattern: IntentPattern | null } {
  const cleanInput = input.trim()
  
  if (!cleanInput) {
    return { type: 'unknown', confidence: 0, score: 0, pattern: null }
  }
  
  let bestMatch: IntentPattern | null = null
  let bestScore = 0
  
  for (const pattern of INTENT_PATTERNS) {
    const score = calculateScore(cleanInput, pattern)
    if (score > bestScore) {
      bestScore = score
      bestMatch = pattern
    }
  }
  
  let confidence = 0
  if (bestMatch && bestScore > 0) {
    confidence = Math.min(1, (bestScore / 20) * (bestMatch.priority / 100))
  }
  
  return {
    type: bestMatch?.type || 'unknown',
    confidence,
    score: bestScore,
    pattern: bestMatch,
  }
}

/**
 * 向量语义分类（异步，需要API调用）
 * 通过搜索意图种子库，找到最相似的种子语句，推断意图类型
 */
async function classifyByVector(input: string): Promise<{
  type: IntentType
  confidence: number
  matches: string[]
} | null> {
  try {
    // 延迟导入避免循环依赖
    const { searchVectors } = await import('../memory/vectorSearch')
    
    // 搜索最相似的5条种子（纯向量搜索，保留distance信息）
    const results = await searchVectors(input, '__intent_seeds__', 5)
    
    if (results.length === 0) {
      return null
    }
    
    // 计算相似度（distance越小越相似，转为0-1的相似度）
    const similarities = results.map(r => 1 - r.distance)
    const avgSimilarity = similarities.reduce((a, b) => a + b, 0) / similarities.length
    const maxSimilarity = Math.max(...similarities)
    
    // 对每条匹配的种子，用关键词分类推断其意图类型
    const typeScores = new Map<IntentType, number>()
    
    for (const result of results) {
      const seedClassification = classifyByKeywords(result.content)
      if (seedClassification.type !== 'unknown') {
        const current = typeScores.get(seedClassification.type) || 0
        // 加权：相似度越高权重越大
        typeScores.set(seedClassification.type, current + (1 - result.distance))
      }
    }
    
    // 找到得分最高的意图类型
    let bestType: IntentType = 'unknown'
    let bestScore = 0
    for (const [type, score] of typeScores) {
      if (score > bestScore) {
        bestScore = score
        bestType = type
      }
    }
    
    // 向量匹配置信度：基于最大相似度
    const confidence = maxSimilarity > 0.6 ? Math.min(0.8, maxSimilarity * 0.9) : maxSimilarity * 0.6
    
    return {
      type: bestType,
      confidence,
      matches: results.map(r => r.content),
    }
  } catch (e) {
    // 向量搜索失败不影响主流程
    return null
  }
}

/**
 * 主分类函数（同步版本，保持向后兼容）
 * 仅使用关键词匹配，适用于不需要向量搜索的场景
 */
export function classifyIntent(input: string): IntentResult {
  const cleanInput = input.trim()
  
  if (!cleanInput) {
    return {
      type: 'unknown',
      confidence: 0,
      params: {},
      fastPath: true,
      reason: '空输入',
    }
  }
  
  const kw = classifyByKeywords(cleanInput)
  
  // 如果关键词置信度足够高，直接返回
  if (kw.confidence >= 0.3 || kw.score >= 10) {
    return {
      type: kw.type,
      confidence: kw.confidence,
      params: extractParams(cleanInput),
      suggestedTools: TOOL_SUGGESTIONS[kw.type],
      fastPath: kw.pattern?.fastPath ?? true,
      reason: `关键词匹配: ${kw.type}，分数 ${kw.score}`,
    }
  }
  
  // 置信度低，标记为 unknown（向量搜索会在异步版本中补充）
  return {
    type: 'unknown',
    confidence: 0,
    params: extractParams(cleanInput),
    fastPath: true,
    reason: '关键词匹配置信度不足',
  }
}

/**
 * 异步分类函数（关键词 + 向量语义双路融合）
 * 当关键词匹配置信度低时，使用向量搜索补充
 */
export async function classifyIntentAsync(input: string): Promise<IntentResult> {
  const cleanInput = input.trim()
  
  if (!cleanInput) {
    return {
      type: 'unknown',
      confidence: 0,
      params: {},
      fastPath: true,
      reason: '空输入',
    }
  }
  
  // 1. 关键词分类（快速）
  const kw = classifyByKeywords(cleanInput)
  
  // 2. 如果关键词置信度足够高，直接返回（跳过向量搜索，节省API调用）
  if (kw.confidence >= 0.5 || kw.score >= 15) {
    return {
      type: kw.type,
      confidence: kw.confidence,
      params: extractParams(cleanInput),
      suggestedTools: TOOL_SUGGESTIONS[kw.type],
      fastPath: kw.pattern?.fastPath ?? true,
      reason: `关键词匹配: ${kw.type}，分数 ${kw.score}`,
    }
  }
  
  // 3. 关键词置信度中等或低，尝试向量搜索补充
  const vec = await classifyByVector(cleanInput)
  
  if (vec && vec.confidence > kw.confidence && vec.type !== 'unknown') {
    // 向量匹配更好，使用向量结果
    const pattern = INTENT_PATTERNS.find(p => p.type === vec.type)
    return {
      type: vec.type,
      confidence: vec.confidence,
      params: extractParams(cleanInput),
      suggestedTools: TOOL_SUGGESTIONS[vec.type],
      fastPath: pattern?.fastPath ?? false,
      reason: `向量语义匹配: ${vec.type}，相似度 ${vec.confidence.toFixed(2)}`,
      vectorMatches: vec.matches,
    }
  }
  
  // 4. 关键词结果更好或向量无结果，使用关键词结果
  if (kw.score > 0) {
    return {
      type: kw.type,
      confidence: kw.confidence,
      params: extractParams(cleanInput),
      suggestedTools: TOOL_SUGGESTIONS[kw.type],
      fastPath: kw.pattern?.fastPath ?? true,
      reason: `关键词匹配: ${kw.type}，分数 ${kw.score}`,
    }
  }
  
  // 5. 都没匹配到
  return {
    type: 'unknown',
    confidence: 0,
    params: extractParams(cleanInput),
    fastPath: true,
    reason: '未匹配到明确意图',
  }
}

/**
 * 批量分类（用于历史消息分析，仅关键词）
 */
export function classifyBatch(inputs: string[]): IntentResult[] {
  return inputs.map(input => classifyIntent(input))
}

/**
 * 获取意图类型的中文描述
 */
export function getIntentLabel(type: IntentType): string {
  const labels: Record<IntentType, string> = {
    code: '💻 代码生成',
    debug: '🔧 调试修复',
    explain: '📖 解释说明',
    refactor: '♻️ 重构优化',
    test: '🧪 测试相关',
    chat: '💬 闲聊问答',
    skill: '🎯 技能执行',
    file: '📁 文件操作',
    search: '🔍 搜索查询',
    config: '⚙️ 配置相关',
    unknown: '❓ 未知',
  }
  return labels[type] || labels.unknown
}

/**
 * 获取快速通道的描述
 */
export function getFastPathReason(fastPath: boolean, type: IntentType): string {
  if (fastPath) {
    const reasons: Record<IntentType, string> = {
      chat: '闲聊问答，直接回复',
      explain: '解释说明类任务，简单直接回复',
      skill: '技能执行，直接调用技能处理器',
      code: '简单代码生成，直接生成',
      debug: '调试任务，需要详细分析',
      refactor: '重构任务，需要详细规划',
      test: '测试任务，需要详细规划',
      file: '文件操作，需要验证路径',
      search: '搜索任务，需要验证查询',
      config: '配置任务，需要验证环境',
      unknown: '未知任务，需要进一步确认',
    }
    return reasons[type] || '快速通道'
  }
  return '复杂任务，需要执行计划'
}