未使用faiss

2025-04-22 19:38:19 +08:00 · 2025-04-22 19:38:19 +08:00 · f3965e9237
parent b6f7ddbcfd
commit f3965e9237
1 changed files with 29 additions and 9 deletions
--- a/vector_tests.py
+++ b/vector_tests.py
@ -23,6 +23,14 @@ error_db = [
    {
        "error": """Traceback (most recent call last):\n  File \"cnn_success.py\", line 1, in <module>\n    import torch\nModuleNotFoundError: No module named 'torch'\n""",
        "solution": "找不到模块torch, 请检查是否已安装 PyTorch。"
+    },
+    {
+        "error": """Traceback (most recent call last):\n  File \"rnn_error.py\", line 40, in <module>\n    loss = criterion(output, label.unsqueeze(0))\n  File \"module.py\", line 1553, in _wrapped_call_impl\n    return self._call_impl(*args, **kwargs)\n  File \"module.py\", line 1562, in _call_impl\n    return forward_call(*args, **kwargs)\n  File \"loss.py\", line 1188, in forward\n    return F.cross_entropy(input, target, weight=self.weight,\n  File \"functional.py\", line 3104, in cross_entropy\n    return cross_entropy_loss(input, target, ...)\nValueError: Expected input batch_size (2) to match target batch_size (1).\n""",
+        "solution": "检查模型输出和标签的形状是否匹配。确保它们在 batch_size 维度上是一致的。"
+    },
+    {
+        "error": """Syntax error in command""",
+        "solution": "检查代码中的语法错误。可能是缺少括号、引号或其他语法问题。"
    }
 ]

@ -63,20 +71,32 @@ for entry in error_db:
    })

 # 查询错误信息
-user_error = """Traceback (most recent call last):\n  File \"train_model.py\", line 72, in <module>\n    loss = loss_function(predictions, ground_truth)\n  File \"core.py\", line 442, in _call_function\n    return self._execute(*args, **kwargs)\n  File \"core.py\", line 450, in _execute\n    return forward_execution(*args, **kwargs)\n  File \"loss_functions.py\", line 205, in forward\n    return F.cross_entropy(input, target, weight=self.weight,\n  File \"functional.py\", line 3104, in cross_entropy\n    return cross_entropy_loss(input, target, ...)\nIndexError: Target 15 is out of bounds.\n"""
+
+# 完全相同
+user_error = """Traceback (most recent call last):\n  File \"image_error.py\", line 55, in <module>\n    loss = criterion(outputs, labels)\n  File \"module.py\", line 1553, in _wrapped_call_impl\n    return self._call_impl(*args, **kwargs)\n  File \"module.py\", line 1562, in _call_impl\n    return forward_call(*args, **kwargs)\n  File \"loss.py\", line 1188, in forward\n    return F.cross_entropy(input, target, weight=self.weight,\n  File \"functional.py\", line 3104, in cross_entropy\n    return cross_entropy_loss(input, target, ...)\nIndexError: Target 15 is out of bounds.\n"""
+# 完全不同
+# user_error = """"Traceback (most recent call last):\n  File \"D:\\Develop\\Projects\\bash-hook\\matches\\perf_2.py\", line 94, in <module>\n    D, I = index.search(query_vec, k=1)\n  File \"C:\\Users\\qcqcqc\\.conda\\envs\\sentence_transformers\\lib\\site-packages\\faiss\\class_wrappers.py\", line 329, in replacement_search\n    assert d == self.d\nAssertionError\n"""
+# 底层不同
 # user_error = """Traceback (most recent call last):\n  File \"image_error.py\", line 55, in <module>\n    loss = criterion(outputs, labels)\n  File \"module.py\", line 1553, in _wrapped_call_impl\n    return self._call_impl(*args, **kwargs)\n  File \"module.py\", line 1570, in _call_impl\n    return forward_call(*args, **kwargs)\n  File \"loss.py\", line 1200, in forward\n    return F.mse_loss(input, target, reduction=self.reduction)\n  File \"functional.py\", line 2301, in mse_loss\n    return mean_squared_error_loss(input, target, ...)\nValueError: The size of input tensor must match the size of target tensor.\n"""
+# 顶层不同
+# user_error = """Traceback (most recent call last):\n  File \"train_model.py\", line 72, in <module>\n    loss = loss_function(predictions, ground_truth)\n  File \"core.py\", line 442, in _call_function\n    return self._execute(*args, **kwargs)\n  File \"core.py\", line 450, in _execute\n    return forward_execution(*args, **kwargs)\n  File \"loss_functions.py\", line 205, in forward\n    return F.cross_entropy(input, target, weight=self.weight,\n  File \"functional.py\", line 3104, in cross_entropy\n    return cross_entropy_loss(input, target, ...)\nIndexError: Target 15 is out of bounds.\n"""
+# 无错误信息
+# user_error = "success."
+
 cleaned_user = clean_traceback(user_error)
 user_layers = split_traceback_layers(cleaned_user)
 user_vectors = model.encode(user_layers)

 # 计算逐层匹配分数（平均最大匹配）
-# def compute_layered_similarity(user_vecs, db_vecs):
-#     score = 0.0
-#     for u_vec in user_vecs:
-#         sims = np.dot(db_vecs, u_vec)
-#         score += np.max(sims)
-#     return score / len(user_vecs) if len(user_vecs) > 0 else 0.0
-def compute_layered_similarity(user_vecs, db_vecs, layer_weights=None):
+def compute_layered_similarity_avg(user_vecs, db_vecs):
+    score = 0.0
+    for u_vec in user_vecs:
+        sims = np.dot(db_vecs, u_vec)
+        score += np.max(sims)
+    return score / len(user_vecs) if len(user_vecs) > 0 else 0.0
+
+
+def compute_layered_similarity_sco(user_vecs, db_vecs, layer_weights=None):
    score = 0.0
    weighted_score = 0.0

@ -98,7 +118,7 @@ def compute_layered_similarity(user_vecs, db_vecs, layer_weights=None):
 best_match = None
 best_score = -1
 for db_entry in layered_error_db:
-    score = compute_layered_similarity(user_vectors, db_entry["vectors"])
+    score = compute_layered_similarity_sco(user_vectors, db_entry["vectors"])
    if score > best_score:
        best_score = score
        best_match = db_entry