| from __future__ import annotations |
|
|
| from datetime import datetime |
| from typing import Tuple |
| import re |
|
|
| from src.export_utils import ( |
| SUBTITLE_FORMATS, |
| SUMMARY_FORMATS, |
| TRANSCRIPT_FORMATS, |
| export_plain_text, |
| ) |
|
|
| from ..models.export import SummaryExportRequest, TranscriptExportRequest |
|
|
|
|
| def _sanitize_filename(title: str) -> str: |
| """Sanitize title for use in filename""" |
| if not title: |
| return "" |
| |
| sanitized = re.sub(r'[<>:"/\\|?*]', '', title) |
| |
| sanitized = re.sub(r'[^\w\-_.]', '_', sanitized, flags=re.UNICODE) |
| |
| sanitized = re.sub(r'_+', '_', sanitized) |
| |
| sanitized = sanitized.strip('_') |
| |
| return sanitized[:50] if sanitized else "" |
|
|
|
|
| def _build_utterance_tuples(payload: TranscriptExportRequest): |
| utterances = [(u.start, u.end, u.text) for u in payload.utterances] |
| has_speakers = any(u.speaker is not None for u in payload.utterances) |
| utterances_with_speakers = None |
| if has_speakers: |
| utterances_with_speakers = [ |
| (u.start, u.end, u.text, u.speaker if u.speaker is not None else 0) |
| for u in payload.utterances |
| ] |
| return utterances, utterances_with_speakers |
|
|
|
|
| def generate_transcript_export(payload: TranscriptExportRequest) -> Tuple[str, str, str]: |
| utterances, utterances_with_speakers = _build_utterance_tuples(payload) |
|
|
| if payload.format in SUBTITLE_FORMATS: |
| fmt = SUBTITLE_FORMATS[payload.format] |
| content = fmt["function"](utterances, utterances_with_speakers) |
| elif payload.format in TRANSCRIPT_FORMATS: |
| fmt = TRANSCRIPT_FORMATS[payload.format] |
| if payload.format == "Plain Text": |
| content = export_plain_text( |
| utterances, |
| utterances_with_speakers, |
| include_timestamps=payload.include_timestamps, |
| ) |
| else: |
| content = fmt["function"](utterances, utterances_with_speakers) |
| else: |
| raise ValueError(f"Unsupported transcript export format: {payload.format}") |
|
|
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else "" |
| filename = f"transcript{title_part}_{timestamp}{fmt['extension']}" |
| return content, filename, fmt["mime_type"] |
|
|
|
|
| def generate_summary_export(payload: SummaryExportRequest) -> Tuple[str, str, str]: |
| if payload.format not in SUMMARY_FORMATS: |
| raise ValueError(f"Unsupported summary export format: {payload.format}") |
|
|
| fmt = SUMMARY_FORMATS[payload.format] |
| content = fmt["function"](payload.summary, payload.metadata) |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else "" |
| filename = f"summary{title_part}_{timestamp}{fmt['extension']}" |
| return content, filename, fmt["mime_type"] |
|
|