import openai
import requests
import base64
import fitz
from typing import Union
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed


client = openai.OpenAI(api_key="sk-", base_url="http://ip:port/v1")

model = "winninghealth/DocLoom"

build_no_anchoring_v4_yaml_prompt = "Attached is one page of a document that you must process. Just return the plain text representation of this document as if you were reading it naturally. Convert equations to LateX and tables to HTML.\nIf there are any figures or charts, label them with the following markdown syntax ![Alt text describing the contents of the figure](page_startx_starty_width_height.png)\nReturn your output as markdown."


def render_pdf_to_base64png(doc, page_num, target_longest_image_dim: int = 2048):
    page = doc[page_num - 1]  # PyMuPDF uses 0-based indexing
    rect = page.rect
    width, height = rect.width, rect.height
    longest_dim = max(width, height)

    # Calculate zoom factor to achieve target dimension
    zoom = target_longest_image_dim / longest_dim

    # Render page to pixmap
    mat = fitz.Matrix(zoom, zoom)
    pix = page.get_pixmap(matrix=mat)

    # Convert pixmap to PNG bytes
    img_bytes = pix.tobytes("png")

    return base64.b64encode(img_bytes).decode("utf-8")


def get_image_base64_from_url(image_url):
    response = requests.get(image_url)
    response.raise_for_status()
    return base64.b64encode(response.content).decode("utf-8")


def ocr_page_with_nanonets_s(img_base64):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{img_base64}"},
                    },
                    {
                        "type": "text",
                        "text": build_no_anchoring_v4_yaml_prompt,
                    },
                ],
            }
        ],
        temperature=0.0,
        max_tokens=15000,  # max 16192
    )
    return response.choices[0].message.content


def process_page(doc, page_num, page_count):
    img_base64 = render_pdf_to_base64png(doc, page_num, target_longest_image_dim=1288)
    content = ocr_page_with_nanonets_s(img_base64)
    return page_num, content


# Process all pages concurrently and save to markdown
if len(sys.argv) < 2:
    print("Usage: python DocLoom_test.py <pdf_file_path>")
    sys.exit(1)

file_path = sys.argv[1]
output_path = file_path.replace(".pdf", ".md")

# Open PDF once for all operations
doc = fitz.open(file_path)
page_count = len(doc)

print(f"Total pages: {page_count}")
print("Starting OCR processing...\n")

completed_pages = 0

# Open output file for streaming write
with open(output_path, "w", encoding="utf-8") as f:
    page_contents = {}

    with ThreadPoolExecutor(max_workers=8) as executor:
        futures = {
            executor.submit(process_page, doc, page_num, page_count): page_num for page_num in range(1, page_count + 1)
        }

        for future in as_completed(futures):
            page_num, content = future.result()
            page_contents[page_num] = content
            completed_pages += 1

            # Display progress
            progress = (completed_pages / page_count) * 100
            print(f"Progress: {completed_pages}/{page_count} pages ({progress:.1f}%)")

    # Sort by page number and write to file
    for i in range(1, page_count + 1):
        f.write(page_contents[i])
        # if i < page_count:
        #     f.write("\n\n")

doc.close()
print(f"\nDone! Output saved to: {output_path}")