import faiss import numpy as np import re from sentence_transformers import SentenceTransformer # 加载向量模型 model = SentenceTransformer('all-MiniLM-L6-v2') def clean_traceback(traceback_string: str) -> str: cleaned = traceback_string.replace('\r', '') cleaned = re.sub(r'^Traceback \(most recent call last\):[\n]+', '', cleaned, flags=re.MULTILINE) file_line_regex = re.compile(r'^ *File \"(.*?)\", line (\d+)(, in .*?)?$', re.MULTILINE) def replace_file_line(match): full_path = match.group(1) line_num = match.group(2) function_part = match.group(3) or '' filename = full_path.split('/')[-1].split('\\')[-1] return f' File "{filename}", line {line_num}{function_part}' cleaned = file_line_regex.sub(replace_file_line, cleaned) cleaned = re.sub(r'<.* at 0x[0-9a-fA-F]+>', '<...>', cleaned) cleaned = re.sub(r'\n\s*\n+', '\n', cleaned).strip() return cleaned def split_traceback_layers(traceback_string: str): return [line.strip() for line in traceback_string.strip().split('\n') if line.strip()] def rebuild_index(error_memory): vectors = [] id_to_index = {} index_to_id = {} for idx, (db_id, item) in enumerate(error_memory.items()): vectors.append(item["vector"]) id_to_index[db_id] = idx index_to_id[idx] = db_id if not vectors: return None, id_to_index, index_to_id mat = np.array(vectors).astype("float32") index = faiss.IndexFlatIP(mat.shape[1]) index.add(mat) return index, id_to_index, index_to_id # 分层相似度计算 def compute_layered_similarity_sco(user_vecs, db_vectors): """ user_vecs: np.ndarray of shape (L1, D) db_vectors: np.ndarray of shape (L2, D) """ weighted_score = 0.0 layer_weights = np.logspace(0, 1, len(user_vecs)) # 层数权重,例如 [1, 2.15, ..., 10] layer_weights /= np.sum(layer_weights) # 归一化 for i, u_vec in enumerate(user_vecs): sims = np.dot(db_vectors, u_vec) # 对每个用户层和 DB 所有层计算 dot similarity max_sim = float(np.max(sims)) # 取最大匹配层 weighted_score += max_sim * layer_weights[i] return weighted_score # 全局状态 error_memory = {} # {db_id: {"error": str, "vector": np.array, "index": FAISS index, ...}} id_to_index = {} index_to_id = {} current_index = 0 aggregate_index = faiss.IndexFlatIP(384) # all-MiniLM-L6-v2 输出维度