|
|
import streamlit as st |
|
|
import arxiv |
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
|
|
|
|
|
NUM_PAPERS = 5 |
|
|
SUMMARIZER_MODEL = "sshleifer/distilbart-cnn-12-6" |
|
|
LLM_MODEL = "MBZUAI/LaMini-Flan-T5-783M" |
|
|
|
|
|
summarizer = pipeline("summarization", model=SUMMARIZER_MODEL) |
|
|
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL) |
|
|
|
|
|
SECTION_PROMPTS = { |
|
|
"Introduction": """Write a detailed academic introduction (~300 words) for a literature review on the use of AI in healthcare, based on the following paper summaries. |
|
|
|
|
|
Paper summaries: |
|
|
{summaries} |
|
|
""", |
|
|
"Key Approaches and Findings": """Describe the most important technical approaches, methodologies, and key findings. |
|
|
|
|
|
Paper summaries: |
|
|
{summaries} |
|
|
""", |
|
|
"Comparative Analysis": """Write a comparative analysis of the research papers summarized below. |
|
|
|
|
|
Paper summaries: |
|
|
{summaries} |
|
|
""", |
|
|
"Gaps and Future Directions": """Write a discussion about the gaps and future directions. |
|
|
|
|
|
Paper summaries: |
|
|
{summaries} |
|
|
""" |
|
|
} |
|
|
|
|
|
def fetch_and_summarize_papers(topic): |
|
|
search = arxiv.Search(query=topic, max_results=NUM_PAPERS, sort_by=arxiv.SortCriterion.Relevance) |
|
|
results = list(search.results()) |
|
|
if not results: |
|
|
return "⚠️ No papers found.", [], [] |
|
|
|
|
|
summaries = [] |
|
|
bib_entries = [] |
|
|
|
|
|
for i, result in enumerate(results): |
|
|
abstract = result.summary.strip().replace("\n", " ") |
|
|
try: |
|
|
summary = summarizer(abstract, max_length=150, min_length=60, do_sample=False)[0]['summary_text'] |
|
|
except: |
|
|
summary = abstract[:300] |
|
|
summaries.append(f"- {summary}") |
|
|
|
|
|
authors = ", ".join(a.name for a in result.authors) |
|
|
year = result.published.year |
|
|
title = result.title.strip() |
|
|
url = result.entry_id |
|
|
bib_entries.append(f"{i+1}. {authors} ({year}). *{title}*. arXiv. {url}") |
|
|
|
|
|
return "\n".join(summaries), results, bib_entries |
|
|
|
|
|
def generate_section(prompt_text): |
|
|
inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=1024) |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=512, |
|
|
do_sample=False, |
|
|
temperature=0.7, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip() |
|
|
|
|
|
def generate_lit_review(topic): |
|
|
summaries_text, _, bib_entries = fetch_and_summarize_papers(topic) |
|
|
if summaries_text.startswith("⚠️"): |
|
|
return summaries_text |
|
|
|
|
|
full_review = "" |
|
|
for section, template in SECTION_PROMPTS.items(): |
|
|
prompt = template.format(summaries=summaries_text) |
|
|
section_text = generate_section(prompt) |
|
|
full_review += f"### {section}\n{section_text}\n\n" |
|
|
|
|
|
bibliography = "## Bibliography\n" + "\n".join(bib_entries) |
|
|
return full_review + bibliography |
|
|
|
|
|
|
|
|
st.title("📚 AI Researcher - Literature Review Generator") |
|
|
topic = st.text_input("Enter a research topic") |
|
|
|
|
|
if st.button("Generate Review") and topic: |
|
|
with st.spinner("Fetching and generating literature review..."): |
|
|
result = generate_lit_review(topic) |
|
|
st.markdown(result) |
|
|
|