Spaces:
Running
Running
Update docSim.py
Browse files
docSim.py
CHANGED
|
@@ -20,25 +20,11 @@ def semantic_similarity(text1, text2):
|
|
| 20 |
return float(util.cos_sim(emb1, emb2))
|
| 21 |
|
| 22 |
def calcDocSims(file):
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# 2. 替换 JS 风格 true/false/null 为 Python 能识别的形式
|
| 29 |
-
cleaned = cleaned.replace("true", "true").replace("false", "false").replace("null", "null")
|
| 30 |
-
data = json.loads(cleaned)
|
| 31 |
-
|
| 32 |
-
pattern = r"核心痛点[::\s]*([\s\S]*?)优化措施[::\s]*"
|
| 33 |
-
res1 = re.search(pattern, data['file'][0][0]['text'], flags=re.S)
|
| 34 |
-
res1 = res1.group(1).strip()
|
| 35 |
-
res1 = re.sub(r"-?\s*核心教学痛点\d*[::]\s*", "", res1)
|
| 36 |
-
|
| 37 |
-
res2 = re.search(pattern, data['file'][1][0]['text'], flags=re.S)
|
| 38 |
-
res2 = res2.group(1).strip()
|
| 39 |
-
res2 = re.sub(r"-?\s*核心教学痛点\d*[::]\s*", "", res2)
|
| 40 |
-
sim = semantic_similarity(res1, res2)
|
| 41 |
-
return 1-sim, res1, res2
|
| 42 |
|
| 43 |
if __name__ == '__main__':
|
| 44 |
s = """
|
|
|
|
| 20 |
return float(util.cos_sim(emb1, emb2))
|
| 21 |
|
| 22 |
def calcDocSims(file):
|
| 23 |
+
file = re.findall(r'text=(.*?),\s*error=', s, flags=re.DOTALL)
|
| 24 |
+
res_list = [extract_core_painpoints(t) for t in file]
|
| 25 |
+
# 假设只比较前两个
|
| 26 |
+
sim = semantic_similarity(res_list[0], res_list[1])
|
| 27 |
+
return 1-sim, res_list[0], res_list[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
if __name__ == '__main__':
|
| 30 |
s = """
|