|
|
import random |
|
|
from llm.llm import LLM |
|
|
from collections import Counter, defaultdict |
|
|
from prompt.template import DECOMPOSE_PRINCIPLE_PROMPT |
|
|
|
|
|
from utils.utils import read_json_file, write_json_file |
|
|
|
|
|
|
|
|
def read_problem_papers(problem_name): |
|
|
paper_dict = read_json_file('../data/paper_info_dataset.json')['data'] |
|
|
papers = [] |
|
|
for paper in paper_dict: |
|
|
if paper['paper'].startswith(problem_name): |
|
|
papers.append(paper['info']) |
|
|
return papers |
|
|
|
|
|
|
|
|
def generate_decompose_prompt(data): |
|
|
|
|
|
llm = LLM('chatgpt-4o-latest') |
|
|
|
|
|
filtered_papers = [paper for paper in data if paper['paper'].split('/')[0] >= '2014'] |
|
|
|
|
|
|
|
|
problem_papers = defaultdict(list) |
|
|
|
|
|
|
|
|
for paper in filtered_papers: |
|
|
problem = paper['paper'].split('/')[0] |
|
|
problem_papers[problem].append(paper['info']) |
|
|
|
|
|
|
|
|
for problem, papers in problem_papers.items(): |
|
|
if len(papers) > 3: |
|
|
problem_papers[problem] = random.sample(papers, 3) |
|
|
else: |
|
|
problem_papers[problem] = papers |
|
|
|
|
|
|
|
|
problem_type_papers = defaultdict(list) |
|
|
for problem, papers in problem_papers.items(): |
|
|
problem_type = problem.split('_')[1] |
|
|
problem_type_papers[problem_type] += papers |
|
|
|
|
|
|
|
|
tasknum_papers = defaultdict(list) |
|
|
for problem_type, papers in problem_type_papers.items(): |
|
|
for paper in papers: |
|
|
tasknum_papers[(problem_type, len(paper['tasks']))].append(paper) |
|
|
|
|
|
filtered_tasknum_papers = tasknum_papers |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result = defaultdict(dict) |
|
|
for (problem_type, tasknum), papers in filtered_tasknum_papers.items(): |
|
|
if tasknum not in [3, 4, 5] or problem_type not in ['A', 'B', 'C', 'D', 'E', 'F']: |
|
|
continue |
|
|
|
|
|
|
|
|
print(f"Problem Type: {problem_type}, Task Number: {tasknum}, size: {len(papers)}") |
|
|
selected_papers = random.sample(papers, min(len(papers), 6)) |
|
|
examples = '---'.join(([task_decompose(paper) for paper in selected_papers])) |
|
|
prompt = DECOMPOSE_PRINCIPLE_PROMPT.format(examples=examples, tasknum=tasknum) |
|
|
answer = llm.generate(prompt) |
|
|
result[problem_type][int(tasknum)] = answer |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
def task_decompose(paper): |
|
|
return '\n'.join([f"- Subtask {i}: {task['task_description'][:]}" for i, task in enumerate(paper['tasks'], start=1)]) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
data = read_json_file('../data/actor_data/input/paper_info_dataset.json') |
|
|
result = generate_decompose_prompt(data['data']) |
|
|
write_json_file('../data/actor_data/input/decompose_prompt.json', result) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|