Spaces:
Sleeping
Sleeping
| import json | |
| import re | |
| import pypandoc | |
| # A sample Markdown string | |
| markdown_text = """ | |
| # My Document | |
| Some **bold** text here, and some *italic* text there. | |
| - Bullet point 1 | |
| - Bullet point 2 | |
| """ | |
| def markdown_to_latex(markdown_text): | |
| # Convert Markdown string to LaTeX | |
| latex_text = pypandoc.convert_text(markdown_text, to='latex', format='md') | |
| return latex_text | |
| def markdown_to_json_method(markdown_text): | |
| # 初始化根节点和层级堆栈,初始层级设为 0,以便支持一级标题 | |
| root = {"method_class": "root", "children": []} | |
| stack = [{"node": root, "level": 0}] # 用堆栈跟踪层级关系 | |
| lines = markdown_text.strip().split('\n') | |
| i = 0 | |
| while i < len(lines): | |
| line = lines[i].strip() | |
| i += 1 | |
| if not line: | |
| continue | |
| # 匹配标题 | |
| if line.startswith('#'): | |
| match = re.match(r'^(#+)\s*(.*?)$', line) | |
| if not match: | |
| continue | |
| hashes, method_class = match.groups() | |
| current_level = len(hashes) | |
| # 创建新节点 | |
| new_node = {"method_class": method_class, "children": [], "description": ""} | |
| # 寻找合适的父节点 | |
| while stack and stack[-1]["level"] >= current_level: | |
| stack.pop() | |
| # 如果没有找到合适的父节点,则将 new_node 加入到 root 下 | |
| if stack: | |
| parent = stack[-1]["node"] | |
| else: | |
| parent = root | |
| parent["children"].append(new_node) | |
| # 更新堆栈 | |
| stack.append({"node": new_node, "level": current_level}) | |
| # 查找紧随标题后的描述文本 | |
| description_lines = [] | |
| while i < len(lines) and lines[i].strip() and not lines[i].strip().startswith('#') and not lines[i].strip().startswith('-'): | |
| description_lines.append(lines[i].strip()) | |
| i += 1 | |
| if description_lines: | |
| new_node["description"] = " ".join(description_lines) | |
| # 回退一行,因为下一行可能是列表项或新标题 | |
| if i < len(lines): | |
| i -= 1 | |
| # 匹配列表项 | |
| elif line.startswith('-'): | |
| item = {} | |
| if ': ' in line: | |
| method, description = line[1:].strip().split(': ', 1) | |
| description = description | |
| item = {"method": method.strip(), "description": description.strip()} | |
| else: | |
| item = {"method": line[1:].strip(), "description": ""} | |
| # 添加到当前层级的子节点;若无标题节点,则直接添加到 root | |
| if stack: | |
| current_node = stack[-1]["node"] | |
| current_node.setdefault("children", []).append(item) | |
| else: | |
| root.setdefault("children", []).append(item) | |
| # 返回所有解析到的顶级标题节点 | |
| return root["children"] | |
| if __name__ == "__main__": | |
| with open("../data/actor_data/docs/method_en_v1.md", "r", encoding="utf-8") as f: | |
| markdown_text = f.read() | |
| result = markdown_to_json_method(markdown_text) | |
| print(json.dumps(result, indent=2, ensure_ascii=False)) | |
| # AIzaSyCfcnYh7jBDnjP7kex7HEj4rpUpHRxvM_0 |