import base64
import io
import json
import logging
import re
import urllib.error
import urllib.request
from pathlib import Path

import gradio as gr
import spaces  # Hugging Face Spaces Zero GPU support
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
    PdfPipelineOptions,
    granite_picture_description,
)
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.document_extractor import DocumentExtractor
from PIL import Image

# Try new preset-based API first (docling >= 2.72), fall back to legacy
try:
    from docling.datamodel.pipeline_options import PictureDescriptionVlmEngineOptions
    from docling.datamodel.vlm_engine_options import ApiVlmEngineOptions, VlmEngineType

    _HAS_VLM_ENGINE = True
except ImportError:
    try:
        from docling.datamodel.pipeline_options import PictureDescriptionApiOptions

        _HAS_VLM_ENGINE = False
    except ImportError:
        _HAS_VLM_ENGINE = False

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

OLLAMA_BASE_URL = "http://127.0.0.1:11434"
OLLAMA_API_URL = f"{OLLAMA_BASE_URL}/v1/chat/completions"
OLLAMA_MODEL = "ibm/granite3.3-vision:2b"


# Initialize the extractor (will be moved to GPU when decorated function is called)
def get_extractor():
    """Initialize extractor - called within GPU context"""
    return DocumentExtractor(allowed_formats=[InputFormat.IMAGE, InputFormat.PDF])


def is_ollama_available(
    url: str = OLLAMA_BASE_URL, timeout: int = 3
) -> bool:
    """Check if Ollama is running and reachable on localhost."""
    try:
        req = urllib.request.Request(url, method="GET")
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            return resp.status == 200
    except (urllib.error.URLError, OSError):
        return False


# Check Ollama availability at startup
if is_ollama_available():
    logger.info(
        "Ollama is running on %s — remote VLM will be used for ALL VLM inference "
        "(picture descriptions AND template extraction)",
        OLLAMA_BASE_URL,
    )
else:
    logger.info(
        "Ollama not found on %s — will use local GPU for VLM inference",
        OLLAMA_BASE_URL,
    )


def get_document_images(source: str) -> list:
    """Convert a document source (file path or URL) to a list of PIL images.

    Handles image files directly and renders PDF pages via pypdfium2
    (a docling dependency). For URLs, downloads the file first.
    """
    source_path = None
    tmp_data = None

    if source.startswith("http://") or source.startswith("https://"):
        # Download the file
        req = urllib.request.Request(source)
        with urllib.request.urlopen(req, timeout=60) as resp:
            tmp_data = resp.read()
            content_type = resp.headers.get("Content-Type", "")
        # Determine type from URL or content-type
        lower_url = source.lower()
        if any(
            lower_url.endswith(ext)
            for ext in (".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp")
        ) or "image" in content_type:
            return [Image.open(io.BytesIO(tmp_data)).convert("RGB")]
        # Assume PDF
        import pypdfium2 as pdfium

        pdf = pdfium.PdfDocument(io.BytesIO(tmp_data))
        images = []
        for page_idx in range(len(pdf)):
            page = pdf[page_idx]
            bitmap = page.render(scale=2.0)
            images.append(bitmap.to_pil().convert("RGB"))
        return images
    else:
        source_path = Path(source)
        if source_path.suffix.lower() in (
            ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp",
        ):
            return [Image.open(source_path).convert("RGB")]
        # PDF
        import pypdfium2 as pdfium

        pdf = pdfium.PdfDocument(str(source_path))
        images = []
        for page_idx in range(len(pdf)):
            page = pdf[page_idx]
            bitmap = page.render(scale=2.0)
            images.append(bitmap.to_pil().convert("RGB"))
        return images


def _image_to_base64(img: Image.Image) -> str:
    """Convert a PIL Image to a base64-encoded PNG data URL."""
    buf = io.BytesIO()
    img.save(buf, format="PNG")
    return base64.b64encode(buf.getvalue()).decode("utf-8")


def extract_with_ollama(source: str, template: dict) -> dict:
    """Extract structured data by sending document images + template to Ollama.

    Uses Ollama's OpenAI-compatible /v1/chat/completions endpoint with
    vision support. This offloads ALL VLM inference to Ollama so the local
    GPU is not needed.

    Returns a dict in the same shape as the DocumentExtractor output:
        {"pages": [{"page_no": int, "extracted_data": ..., "raw_text": str, "errors": []}]}
    """
    images = get_document_images(source)
    logger.info(
        "Extracting with Ollama (%s): %d page(s), template keys: %s",
        OLLAMA_MODEL,
        len(images),
        list(template.keys()),
    )

    template_str = json.dumps(template, indent=2)
    prompt = (
        "Extract the following structured information from this document image. "
        "Return ONLY valid JSON matching this exact template structure:\n"
        f"{template_str}\n\n"
        "Rules:\n"
        "- Fill in the actual values found in the document\n"
        "- Use null for fields not found in the document\n"
        "- Return ONLY the JSON object, no explanation or markdown fences"
    )

    output = {"pages": []}

    for page_no, img in enumerate(images, start=1):
        img_b64 = _image_to_base64(img)

        payload = json.dumps({
            "model": OLLAMA_MODEL,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{img_b64}",
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt,
                        },
                    ],
                }
            ],
            "temperature": 0.0,
            "max_tokens": 4096,
        }).encode("utf-8")

        req = urllib.request.Request(
            OLLAMA_API_URL,
            data=payload,
            headers={"Content-Type": "application/json"},
            method="POST",
        )

        try:
            with urllib.request.urlopen(req, timeout=120) as resp:
                response_data = json.loads(resp.read().decode("utf-8"))
                content = response_data["choices"][0]["message"]["content"]

                # Try to parse as JSON
                try:
                    extracted = json.loads(content)
                except json.JSONDecodeError:
                    # Try to extract JSON from markdown code block
                    match = re.search(
                        r"```(?:json)?\s*\n?(.*?)\n?```", content, re.DOTALL
                    )
                    if match:
                        try:
                            extracted = json.loads(match.group(1))
                        except json.JSONDecodeError:
                            extracted = {"raw_response": content}
                    else:
                        extracted = {"raw_response": content}

                output["pages"].append({
                    "page_no": page_no,
                    "extracted_data": extracted,
                    "raw_text": content,
                    "errors": [],
                })
                logger.info("Page %d extracted successfully via Ollama", page_no)

        except Exception as e:
            logger.error("Ollama extraction failed for page %d: %s", page_no, e)
            output["pages"].append({
                "page_no": page_no,
                "extracted_data": None,
                "raw_text": "",
                "errors": [str(e)],
            })

    return output


def get_converter_with_vision():
    """Initialize converter with vision.

    Checks if Ollama is running on localhost:11434. If available, uses the
    remote Ollama VLM for picture descriptions (no local GPU required).
    Otherwise falls back to the local integrated granite_picture_description.
    """
    pipeline_options = PdfPipelineOptions()
    pipeline_options.do_picture_description = True
    pipeline_options.images_scale = 2.0
    pipeline_options.generate_picture_images = True

    if is_ollama_available():
        logger.info("Ollama detected on localhost:11434 — using remote VLM")
        if _HAS_VLM_ENGINE:
            # New preset-based API (docling >= 2.72)
            picture_desc_options = PictureDescriptionVlmEngineOptions.from_preset(
                "granite_vision",
                engine_options=ApiVlmEngineOptions(
                    runtime_type=VlmEngineType.API_OLLAMA,
                    timeout=90,
                ),
            )
            picture_desc_options.prompt = (
                "Describe the image in as much detail as possible."
            )
        else:
            # Legacy API
            picture_desc_options = PictureDescriptionApiOptions(
                url="http://127.0.0.1:11434/v1/chat/completions",
                params={"model": "ibm/granite3.3-vision:2b"},
                prompt="Describe the image in as much detail as possible.",
                timeout=90,
            )
        pipeline_options.picture_description_options = picture_desc_options
        pipeline_options.enable_remote_services = True
    else:
        logger.info("Ollama not available — falling back to local VLM")
        pipeline_options.picture_description_options = granite_picture_description
        pipeline_options.picture_description_options.prompt = (
            "Describe the image in as much detail as possible."
        )

    return DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_options=pipeline_options,
            )
        }
    )


@spaces.GPU(duration=60)  # Allocate GPU for up to 60 seconds
def process_extraction(file_input, url_input, template_json):
    """
    Process document extraction with the provided template.
    Uses Hugging Face Spaces Zero GPU feature.

    Args:
        file_input: Uploaded file (PDF or image)
        url_input: URL to a document
        template_json: JSON string defining the extraction template

    Returns:
        JSON string with extracted data
    """
    try:
        # Determine the source
        source = None
        if file_input is not None:
            source = file_input.name
        elif url_input and url_input.strip():
            source = url_input.strip()
        else:
            return json.dumps(
                {"error": "Please provide either a file or a URL"}, indent=2
            )

        # If no template is provided, use the converter with vision
        if not template_json or not template_json.strip():
            converter = get_converter_with_vision()
            try:
                result = converter.convert(source)
                doc = result.document

                # Create a simplified output with Markdown and picture descriptions
                simplified_output = {
                    "markdown": doc.export_to_markdown(),
                    "pictures": [],
                }

                # Extract picture descriptions if available
                if hasattr(doc, "pictures"):
                    for i, pic in enumerate(doc.pictures):
                        descriptions = []
                        if hasattr(pic, "annotations"):
                            for ann in pic.annotations:
                                if hasattr(ann, "text"):
                                    descriptions.append(ann.text)

                        if descriptions:
                            simplified_output["pictures"].append(
                                {"index": i, "descriptions": descriptions}
                            )

                return json.dumps(simplified_output, indent=2)
            except Exception as e:
                return json.dumps({"error": f"Conversion failed: {str(e)}"}, indent=2)

        # Parse the template JSON
        try:
            template = json.loads(template_json)
        except json.JSONDecodeError as e:
            return json.dumps({"error": f"Invalid JSON template: {str(e)}"}, indent=2)

        # Use Ollama for extraction when available (no local GPU needed for VLM)
        if is_ollama_available():
            logger.info("Using Ollama for template extraction (remote VLM)")
            output = extract_with_ollama(source, template)
            return json.dumps(output, indent=2)

        # Fall back to local DocumentExtractor (uses local GPU)
        logger.info("Using local DocumentExtractor (local GPU)")
        extractor = get_extractor()

        # Perform extraction
        result = extractor.extract(
            source=source,
            template=template,
        )

        # Format the output
        output = {"pages": []}

        for page in result.pages:
            page_data = {
                "page_no": page.page_no,
                "extracted_data": page.extracted_data,
                "raw_text": page.raw_text,
                "errors": page.errors if page.errors else [],
            }
            output["pages"].append(page_data)

        return json.dumps(output, indent=2)

    except Exception as e:
        return json.dumps({"error": f"Extraction failed: {str(e)}"}, indent=2)


# Default template example
default_template = json.dumps(
    {"bill_no": "string", "total": "float", "date": "string"}, indent=2
)

# Create Gradio interface
with gr.Blocks(title="Docling Structured Extraction") as demo:
    gr.Markdown(
        """
    # 📄 Docling Structured Extraction Demo
    
    Extract structured data from documents (PDF/Images) using AI-powered extraction.
    
    **Note:** This feature is currently in beta.
    
    ### How to use:
    1. Upload a file OR provide a URL to a document
    2. Define your extraction template in JSON format (or leave empty for full document conversion with picture descriptions)
    3. Click "Extract" to get structured data or full document JSON
    
    🚀 **Powered by Hugging Face Spaces Zero GPU**
    """
    )

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Input Source")
            file_input = gr.File(
                label="Upload File (PDF or Image)"
            )
            url_input = gr.Textbox(
                label="Or Enter Document URL",
                placeholder="https://example.com/document.pdf",
                lines=1,
            )

            gr.Markdown("### Extraction Template")
            gr.Markdown(
                """
            Define the structure of data you want to extract. Use JSON format with field names and types:
            - `"string"` for text fields
            - `"float"` for numbers with decimals
            - `"int"` for whole numbers
            """
            )
            template_input = gr.Code(
                label="JSON Template", value=default_template, language="json", lines=15
            )

            extract_btn = gr.Button("Extract", variant="primary", size="lg")

        with gr.Column():
            gr.Markdown("### Extracted Data")
            output_json = gr.Code(label="Result (JSON)", language="json", lines=25)

    # Examples section
    gr.Markdown("### Examples")
    gr.Examples(
        examples=[
            [
                None,
                "https://upload.wikimedia.org/wikipedia/commons/9/9f/Swiss_QR-Bill_example.jpg",
                json.dumps({"bill_no": "string", "total": "float"}, indent=2),
            ],
            [
                None,
                "https://upload.wikimedia.org/wikipedia/commons/9/9f/Swiss_QR-Bill_example.jpg",
                json.dumps(
                    {
                        "bill_no": "string",
                        "total": "float",
                        "sender_name": "string",
                        "receiver_name": "string",
                        "postal_code": "string",
                    },
                    indent=2,
                ),
            ],
        ],
        inputs=[file_input, url_input, template_input],
        label="Try these examples",
    )

    # Connect the extraction function
    extract_btn.click(
        fn=process_extraction,
        inputs=[file_input, url_input, template_input],
        outputs=output_json,
    )

# Launch the app
if __name__ == "__main__":
    demo.launch()