Spaces:
Sleeping
Sleeping
File size: 3,435 Bytes
8496edd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import json
import re
import pypandoc
# A sample Markdown string
markdown_text = """
# My Document
Some **bold** text here, and some *italic* text there.
- Bullet point 1
- Bullet point 2
"""
def markdown_to_latex(markdown_text):
# Convert Markdown string to LaTeX
latex_text = pypandoc.convert_text(markdown_text, to='latex', format='md')
return latex_text
def markdown_to_json_method(markdown_text):
# 初始化根节点和层级堆栈,初始层级设为 0,以便支持一级标题
root = {"method_class": "root", "children": []}
stack = [{"node": root, "level": 0}] # 用堆栈跟踪层级关系
lines = markdown_text.strip().split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
i += 1
if not line:
continue
# 匹配标题
if line.startswith('#'):
match = re.match(r'^(#+)\s*(.*?)$', line)
if not match:
continue
hashes, method_class = match.groups()
current_level = len(hashes)
# 创建新节点
new_node = {"method_class": method_class, "children": [], "description": ""}
# 寻找合适的父节点
while stack and stack[-1]["level"] >= current_level:
stack.pop()
# 如果没有找到合适的父节点,则将 new_node 加入到 root 下
if stack:
parent = stack[-1]["node"]
else:
parent = root
parent["children"].append(new_node)
# 更新堆栈
stack.append({"node": new_node, "level": current_level})
# 查找紧随标题后的描述文本
description_lines = []
while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith('#') and not lines[i].strip().startswith('-'):
description_lines.append(lines[i].strip())
i += 1
if description_lines:
new_node["description"] = " ".join(description_lines)
# 回退一行,因为下一行可能是列表项或新标题
if i < len(lines):
i -= 1
# 匹配列表项
elif line.startswith('-'):
item = {}
if ': ' in line:
method, description = line[1:].strip().split(': ', 1)
description = description
item = {"method": method.strip(), "description": description.strip()}
else:
item = {"method": line[1:].strip(), "description": ""}
# 添加到当前层级的子节点;若无标题节点,则直接添加到 root
if stack:
current_node = stack[-1]["node"]
current_node.setdefault("children", []).append(item)
else:
root.setdefault("children", []).append(item)
# 返回所有解析到的顶级标题节点
return root["children"]
if __name__ == "__main__":
with open("../data/actor_data/docs/method_en_v1.md", "r", encoding="utf-8") as f:
markdown_text = f.read()
result = markdown_to_json_method(markdown_text)
print(json.dumps(result, indent=2, ensure_ascii=False))
# AIzaSyCfcnYh7jBDnjP7kex7HEj4rpUpHRxvM_0 |