File size: 3,186 Bytes
3684bac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import arxiv
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# --- CONFIG ---
NUM_PAPERS = 5
SUMMARIZER_MODEL = "sshleifer/distilbart-cnn-12-6"
LLM_MODEL = "MBZUAI/LaMini-Flan-T5-783M"

summarizer = pipeline("summarization", model=SUMMARIZER_MODEL)
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL)

SECTION_PROMPTS = {
    "Introduction": """Write a detailed academic introduction (~300 words) for a literature review on the use of AI in healthcare, based on the following paper summaries.

Paper summaries:
{summaries}
""",
    "Key Approaches and Findings": """Describe the most important technical approaches, methodologies, and key findings.

Paper summaries:
{summaries}
""",
    "Comparative Analysis": """Write a comparative analysis of the research papers summarized below.

Paper summaries:
{summaries}
""",
    "Gaps and Future Directions": """Write a discussion about the gaps and future directions.

Paper summaries:
{summaries}
"""
}

def fetch_and_summarize_papers(topic):
    search = arxiv.Search(query=topic, max_results=NUM_PAPERS, sort_by=arxiv.SortCriterion.Relevance)
    results = list(search.results())
    if not results:
        return "⚠️ No papers found.", [], []

    summaries = []
    bib_entries = []

    for i, result in enumerate(results):
        abstract = result.summary.strip().replace("\n", " ")
        try:
            summary = summarizer(abstract, max_length=150, min_length=60, do_sample=False)[0]['summary_text']
        except:
            summary = abstract[:300]
        summaries.append(f"- {summary}")

        authors = ", ".join(a.name for a in result.authors)
        year = result.published.year
        title = result.title.strip()
        url = result.entry_id
        bib_entries.append(f"{i+1}. {authors} ({year}). *{title}*. arXiv. {url}")

    return "\n".join(summaries), results, bib_entries

def generate_section(prompt_text):
    inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=1024)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        do_sample=False,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

def generate_lit_review(topic):
    summaries_text, _, bib_entries = fetch_and_summarize_papers(topic)
    if summaries_text.startswith("⚠️"):
        return summaries_text

    full_review = ""
    for section, template in SECTION_PROMPTS.items():
        prompt = template.format(summaries=summaries_text)
        section_text = generate_section(prompt)
        full_review += f"### {section}\n{section_text}\n\n"

    bibliography = "## Bibliography\n" + "\n".join(bib_entries)
    return full_review + bibliography

# --- STREAMLIT UI ---
st.title("📚 AI Researcher - Literature Review Generator")
topic = st.text_input("Enter a research topic")

if st.button("Generate Review") and topic:
    with st.spinner("Fetching and generating literature review..."):
        result = generate_lit_review(topic)
        st.markdown(result)