ai-researcher / app.py
gunner2k25's picture
Upload 3 files
3684bac verified
import streamlit as st
import arxiv
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
# --- CONFIG ---
NUM_PAPERS = 5
SUMMARIZER_MODEL = "sshleifer/distilbart-cnn-12-6"
LLM_MODEL = "MBZUAI/LaMini-Flan-T5-783M"
summarizer = pipeline("summarization", model=SUMMARIZER_MODEL)
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL)
SECTION_PROMPTS = {
"Introduction": """Write a detailed academic introduction (~300 words) for a literature review on the use of AI in healthcare, based on the following paper summaries.
Paper summaries:
{summaries}
""",
"Key Approaches and Findings": """Describe the most important technical approaches, methodologies, and key findings.
Paper summaries:
{summaries}
""",
"Comparative Analysis": """Write a comparative analysis of the research papers summarized below.
Paper summaries:
{summaries}
""",
"Gaps and Future Directions": """Write a discussion about the gaps and future directions.
Paper summaries:
{summaries}
"""
}
def fetch_and_summarize_papers(topic):
search = arxiv.Search(query=topic, max_results=NUM_PAPERS, sort_by=arxiv.SortCriterion.Relevance)
results = list(search.results())
if not results:
return "⚠️ No papers found.", [], []
summaries = []
bib_entries = []
for i, result in enumerate(results):
abstract = result.summary.strip().replace("\n", " ")
try:
summary = summarizer(abstract, max_length=150, min_length=60, do_sample=False)[0]['summary_text']
except:
summary = abstract[:300]
summaries.append(f"- {summary}")
authors = ", ".join(a.name for a in result.authors)
year = result.published.year
title = result.title.strip()
url = result.entry_id
bib_entries.append(f"{i+1}. {authors} ({year}). *{title}*. arXiv. {url}")
return "\n".join(summaries), results, bib_entries
def generate_section(prompt_text):
inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=1024)
outputs = model.generate(
**inputs,
max_new_tokens=512,
do_sample=False,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id
)
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
def generate_lit_review(topic):
summaries_text, _, bib_entries = fetch_and_summarize_papers(topic)
if summaries_text.startswith("⚠️"):
return summaries_text
full_review = ""
for section, template in SECTION_PROMPTS.items():
prompt = template.format(summaries=summaries_text)
section_text = generate_section(prompt)
full_review += f"### {section}\n{section_text}\n\n"
bibliography = "## Bibliography\n" + "\n".join(bib_entries)
return full_review + bibliography
# --- STREAMLIT UI ---
st.title("📚 AI Researcher - Literature Review Generator")
topic = st.text_input("Enter a research topic")
if st.button("Generate Review") and topic:
with st.spinner("Fetching and generating literature review..."):
result = generate_lit_review(topic)
st.markdown(result)