import re
import random
import nltk
import numpy as np
from typing import List, Dict, Optional
import time
from collections import Counter
import statistics

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet

# Advanced imports with fallbacks
def safe_import_with_fallback(module_name, component=None):
    """Safe import with fallback handling"""
    try:
        if component:
            module = __import__(module_name, fromlist=[component])
            return getattr(module, component), True
        else:
            return __import__(module_name), True
    except ImportError:
        return None, False
    except Exception:
        return None, False

# Load advanced models
print("🚀 Loading Universal AI Text Humanizer...")
SentenceTransformer, SENTENCE_TRANSFORMERS_AVAILABLE = safe_import_with_fallback('sentence_transformers', 'SentenceTransformer')
pipeline, TRANSFORMERS_AVAILABLE = safe_import_with_fallback('transformers', 'pipeline')

try:
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity as sklearn_cosine_similarity
    SKLEARN_AVAILABLE = True
except ImportError:
    SKLEARN_AVAILABLE = False

try:
    import torch
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False

class UniversalAITextHumanizer:
    """
    Universal AI Text Humanizer for All Business Use Cases
    Based on QuillBot and Walter Writes AI research
    Simplified interface with only Natural/Conversational modes
    """
    
    def __init__(self, enable_gpu=True):
        print("🌍 Initializing Universal AI Text Humanizer...")
        print("🎯 Designed for E-commerce, Marketing, SEO & All Business Needs")
        
        self.enable_gpu = enable_gpu and TORCH_AVAILABLE
        
        # Initialize models and databases
        self._load_models()
        self._initialize_universal_patterns()
        
        print("✅ Universal AI Text Humanizer ready for all use cases!")
        self._print_status()
    
    def _load_models(self):
        """Load AI models with graceful fallbacks"""
        self.similarity_model = None
        self.paraphraser = None
        
        # Load sentence transformer for quality control
        if SENTENCE_TRANSFORMERS_AVAILABLE:
            try:
                device = 'cuda' if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu'
                self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
                print("✅ Advanced similarity model loaded")
            except Exception as e:
                print(f"⚠️ Similarity model unavailable: {e}")
        
        # Load paraphrasing model
        if TRANSFORMERS_AVAILABLE:
            try:
                device = 0 if self.enable_gpu and TORCH_AVAILABLE and torch.cuda.is_available() else -1
                self.paraphraser = pipeline(
                    "text2text-generation",
                    model="google/flan-t5-small",
                    device=device,
                    max_length=256
                )
                print("✅ AI paraphrasing model loaded")
            except Exception as e:
                print(f"⚠️ Paraphrasing model unavailable: {e}")
        
        # Fallback similarity using TF-IDF
        if SKLEARN_AVAILABLE:
            self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=5000)
        else:
            self.tfidf_vectorizer = None
    
    def _initialize_universal_patterns(self):
        """Initialize patterns based on QuillBot & Walter Writes research"""
        
        # Universal word replacements (business-friendly)
        self.word_replacements = {
            # Formal business terms -> Natural alternatives
            "utilize": "use", "demonstrate": "show", "facilitate": "help", "implement": "set up",
            "consequently": "so", "furthermore": "also", "moreover": "plus", "nevertheless": "but",
            "subsequently": "then", "accordingly": "therefore", "regarding": "about", "concerning": "about",
            "approximately": "about", "endeavor": "try", "commence": "start", "terminate": "end",
            "obtain": "get", "purchase": "buy", "examine": "check", "analyze": "look at",
            "construct": "build", "establish": "create", "methodology": "method", "systematic": "organized",
            "comprehensive": "complete", "significant": "important", "substantial": "large", "optimal": "best",
            "sufficient": "enough", "adequate": "good", "exceptional": "great", "fundamental": "basic",
            "essential": "key", "crucial": "important", "paramount": "very important", "imperative": "must",
            "mandatory": "required", "optimization": "improvement", "enhancement": "upgrade",
            "implementation": "setup", "utilization": "use", "evaluation": "review", "assessment": "check",
            "validation": "proof", "verification": "confirmation", "consolidation": "combining", 
            "integration": "merging", "transformation": "change", "modification": "change"
        }
        
        # AI-specific phrases to replace (QuillBot research)
        self.ai_phrase_replacements = {
            "it is important to note that": "notably", "it should be emphasized that": "importantly",
            "it is worth mentioning that": "by the way", "it is crucial to understand that": "remember",
            "from a practical standpoint": "practically", "in terms of implementation": "when implementing",
            "with respect to the aforementioned": "about this", "as previously mentioned": "as noted",
            "in light of this": "because of this", "it is imperative to understand": "you should know",
            "one must consider": "consider", "it is evident that": "clearly", "it can be observed that": "we can see",
            "upon careful consideration": "after thinking", "in the final analysis": "ultimately"
        }
        
        # Professional contractions (universal appeal)
        self.contractions = {
            "do not": "don't", "does not": "doesn't", "did not": "didn't", "will not": "won't",
            "would not": "wouldn't", "should not": "shouldn't", "could not": "couldn't", "cannot": "can't",
            "is not": "isn't", "are not": "aren't", "was not": "wasn't", "were not": "weren't",
            "have not": "haven't", "has not": "hasn't", "had not": "hadn't", "I am": "I'm",
            "you are": "you're", "he is": "he's", "she is": "she's", "it is": "it's",
            "we are": "we're", "they are": "they're", "I have": "I've", "you have": "you've",
            "we have": "we've", "they have": "they've", "I will": "I'll", "you will": "you'll",
            "we will": "we'll", "they will": "they'll"
        }
        
        # Natural transition words (Walter Writes research)
        self.natural_transitions = [
            "Also", "Plus", "And", "Then", "So", "But", "However", "Still", "Now", "Well",
            "Actually", "Besides", "Additionally", "What's more", "On top of that", "Beyond that"
        ]
    
    def preserve_structure(self, original: str, processed: str) -> str:
        """Preserve original text structure (paragraphs, formatting)"""
        # Split by double newlines (paragraphs)
        original_paragraphs = re.split(r'\n\s*\n', original)
        if len(original_paragraphs) <= 1:
            return processed
        
        # Split processed text into sentences
        processed_sentences = sent_tokenize(processed)
        
        # Try to maintain paragraph structure
        result_paragraphs = []
        sentence_idx = 0
        
        for para in original_paragraphs:
            para_sentences = sent_tokenize(para)
            para_sentence_count = len(para_sentences)
            
            if sentence_idx + para_sentence_count <= len(processed_sentences):
                para_processed = ' '.join(processed_sentences[sentence_idx:sentence_idx + para_sentence_count])
                result_paragraphs.append(para_processed)
                sentence_idx += para_sentence_count
            else:
                # Add remaining sentences to this paragraph
                remaining = ' '.join(processed_sentences[sentence_idx:])
                if remaining:
                    result_paragraphs.append(remaining)
                break
        
        return '\n\n'.join(result_paragraphs)
    
    def apply_word_replacements(self, text: str, intensity: float = 0.7) -> str:
        """Apply universal word replacements"""
        words = word_tokenize(text)
        modified_words = []
        
        for word in words:
            word_clean = word.lower().strip('.,!?;:"')
            
            if word_clean in self.word_replacements and random.random() < intensity:
                replacement = self.word_replacements[word_clean]
                # Preserve case
                if word.isupper():
                    replacement = replacement.upper()
                elif word.istitle():
                    replacement = replacement.title()
                modified_words.append(replacement)
            else:
                modified_words.append(word)
        
        # Reconstruct with proper spacing
        result = ""
        for i, word in enumerate(modified_words):
            if i > 0 and word not in ".,!?;:\"')":
                result += " "
            result += word
        
        return result
    
    def apply_contractions(self, text: str, style: str, intensity: float = 0.6) -> str:
        """Apply contractions based on style"""
        if style == "natural" and intensity < 0.5:
            intensity *= 0.7  # Less aggressive for natural style
        
        for formal, contracted in self.contractions.items():
            if random.random() < intensity:
                pattern = r'\b' + re.escape(formal) + r'\b'
                text = re.sub(pattern, contracted, text, flags=re.IGNORECASE)
        
        return text
    
    def replace_ai_phrases(self, text: str, intensity: float = 0.8) -> str:
        """Replace AI-specific phrases"""
        for ai_phrase, replacement in self.ai_phrase_replacements.items():
            if ai_phrase in text.lower():
                if random.random() < intensity:
                    # Preserve case
                    if ai_phrase[0].isupper() or text.find(ai_phrase.title()) != -1:
                        replacement = replacement.capitalize()
                    
                    text = text.replace(ai_phrase, replacement)
                    text = text.replace(ai_phrase.title(), replacement.title())
        
        return text
    
    def vary_sentence_structure(self, text: str, style: str, intensity: float = 0.4) -> str:
        """Add sentence variety based on style"""
        sentences = sent_tokenize(text)
        varied_sentences = []
        
        for sentence in sentences:
            if len(sentence.split()) > 8 and random.random() < intensity:
                # Add natural transitions occasionally
                if style == "conversational" and random.random() < 0.3:
                    transition = random.choice(self.natural_transitions)
                    sentence = transition + ", " + sentence.lower()
                
                # Split long sentences occasionally (Walter Writes technique)
                elif len(sentence.split()) > 15 and random.random() < 0.2:
                    words = sentence.split()
                    mid_point = len(words) // 2
                    # Find a natural break point
                    for i in range(mid_point-2, mid_point+3):
                        if i < len(words) and words[i].lower() in ['and', 'but', 'so', 'because']:
                            first_part = ' '.join(words[:i]) + '.'
                            second_part = ' '.join(words[i+1:])
                            if second_part:
                                second_part = second_part[0].upper() + second_part[1:]
                                varied_sentences.extend([first_part, second_part])
                                continue
            
            varied_sentences.append(sentence)
        
        return ' '.join(varied_sentences)
    
    def apply_advanced_paraphrasing(self, text: str, style: str, intensity: float = 0.3) -> str:
        """Apply AI paraphrasing if available"""
        if not self.paraphraser or intensity < 0.6:
            return text
        
        sentences = sent_tokenize(text)
        paraphrased_sentences = []
        
        for sentence in sentences:
            if len(sentence.split()) > 10 and random.random() < intensity * 0.4:
                try:
                    # Style-specific prompts
                    if style == "conversational":
                        prompt = f"Make this more conversational and natural: {sentence}"
                    else:
                        prompt = f"Rewrite this naturally: {sentence}"
                    
                    result = self.paraphraser(
                        prompt,
                        max_length=min(150, len(sentence) + 30),
                        min_length=max(10, len(sentence) // 2),
                        temperature=0.7,
                        do_sample=True
                    )
                    
                    paraphrased = result[0]['generated_text'].replace(prompt, '').strip().strip('"\'')
                    
                    # Quality check
                    if (paraphrased and len(paraphrased) > 5 and 
                        len(paraphrased) < len(sentence) * 1.8 and
                        not paraphrased.lower().startswith(('sorry', 'i cannot'))):
                        paraphrased_sentences.append(paraphrased)
                    else:
                        paraphrased_sentences.append(sentence)
                except Exception:
                    paraphrased_sentences.append(sentence)
            else:
                paraphrased_sentences.append(sentence)
        
        return ' '.join(paraphrased_sentences)
    
    def calculate_similarity(self, text1: str, text2: str) -> float:
        """Calculate semantic similarity"""
        if self.similarity_model:
            try:
                embeddings1 = self.similarity_model.encode([text1])
                embeddings2 = self.similarity_model.encode([text2])
                similarity = np.dot(embeddings1[0], embeddings2[0]) / (
                    np.linalg.norm(embeddings1[0]) * np.linalg.norm(embeddings2[0])
                )
                return float(similarity)
            except Exception:
                pass
        
        # Fallback to TF-IDF
        if self.tfidf_vectorizer and SKLEARN_AVAILABLE:
            try:
                tfidf_matrix = self.tfidf_vectorizer.fit_transform([text1, text2])
                similarity = sklearn_cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
                return float(similarity)
            except Exception:
                pass
        
        # Basic word overlap fallback
        words1 = set(word_tokenize(text1.lower()))
        words2 = set(word_tokenize(text2.lower()))
        if not words1 or not words2:
            return 1.0 if text1 == text2 else 0.0
        
        intersection = words1.intersection(words2)
        union = words1.union(words2)
        return len(intersection) / len(union) if union else 1.0
    
    def humanize_text_universal(self, 
                               text: str, 
                               style: str = "natural",
                               intensity: float = 0.7) -> Dict:
        """
        Universal text humanization for all business use cases
        
        Args:
            text: Input text to humanize
            style: 'natural' or 'conversational'
            intensity: Transformation intensity (0.0 to 1.0)
        
        Returns:
            Dictionary with results and metrics
        """
        if not text.strip():
            return {
                "original_text": text,
                "humanized_text": text,
                "similarity_score": 1.0,
                "changes_made": [],
                "processing_time_ms": 0.0,
                "style": style,
                "intensity": intensity,
                "structure_preserved": True
            }
        
        start_time = time.time()
        original_text = text
        humanized_text = text
        changes_made = []
        
        # Phase 1: Replace AI-specific phrases
        if intensity > 0.2:
            before = humanized_text
            humanized_text = self.replace_ai_phrases(humanized_text, intensity * 0.9)
            if humanized_text != before:
                changes_made.append("Removed AI phrases")
        
        # Phase 2: Universal word replacements
        if intensity > 0.3:
            before = humanized_text
            humanized_text = self.apply_word_replacements(humanized_text, intensity * 0.8)
            if humanized_text != before:
                changes_made.append("Improved word choice")
        
        # Phase 3: Add contractions
        if intensity > 0.4:
            before = humanized_text
            humanized_text = self.apply_contractions(humanized_text, style, intensity * 0.7)
            if humanized_text != before:
                changes_made.append("Added natural contractions")
        
        # Phase 4: Vary sentence structure
        if intensity > 0.5:
            before = humanized_text
            humanized_text = self.vary_sentence_structure(humanized_text, style, intensity * 0.4)
            if humanized_text != before:
                changes_made.append("Improved sentence flow")
        
        # Phase 5: Advanced paraphrasing (if available and high intensity)
        if intensity > 0.7 and self.paraphraser:
            before = humanized_text
            humanized_text = self.apply_advanced_paraphrasing(humanized_text, style, intensity)
            if humanized_text != before:
                changes_made.append("Enhanced with AI paraphrasing")
        
        # Phase 6: Preserve structure
        humanized_text = self.preserve_structure(original_text, humanized_text)
        
        # Calculate quality metrics
        similarity_score = self.calculate_similarity(original_text, humanized_text)
        processing_time = (time.time() - start_time) * 1000
        
        # Quality control - revert if too different
        if similarity_score < 0.7:
            print(f"⚠️ Similarity too low ({similarity_score:.3f}), reverting changes")
            humanized_text = original_text
            similarity_score = 1.0
            changes_made = ["Reverted - maintained original meaning"]
        
        return {
            "original_text": original_text,
            "humanized_text": humanized_text,
            "similarity_score": similarity_score,
            "changes_made": changes_made,
            "processing_time_ms": processing_time,
            "style": style,
            "intensity": intensity,
            "structure_preserved": True,
            "word_count_original": len(original_text.split()),
            "word_count_humanized": len(humanized_text.split()),
            "character_count_original": len(original_text),
            "character_count_humanized": len(humanized_text)
        }
    
    def _print_status(self):
        """Print current status"""
        print("\n📊 UNIVERSAL AI TEXT HUMANIZER STATUS:")
        print("-" * 45)
        print(f"🧠 Advanced Similarity: {'✅' if self.similarity_model else '❌'}")
        print(f"🤖 AI Paraphrasing: {'✅' if self.paraphraser else '❌'}")
        print(f"📊 TF-IDF Fallback: {'✅' if self.tfidf_vectorizer else '❌'}")
        print(f"🚀 GPU Acceleration: {'✅' if self.enable_gpu else '❌'}")
        print(f"🌍 Universal Patterns: ✅ LOADED")
        print(f"📝 Word Replacements: ✅ {len(self.word_replacements)} mappings")
        print(f"🔤 AI Phrase Detection: ✅ {len(self.ai_phrase_replacements)} patterns")
        print(f"💬 Contractions: ✅ {len(self.contractions)} patterns")
        print(f"🏗️ Structure Preservation: ✅ ENABLED")
        
        # Calculate feature completeness
        features = [
            bool(self.similarity_model),
            bool(self.paraphraser),
            bool(self.tfidf_vectorizer),
            True,  # Universal patterns
            True,  # Structure preservation
            True   # Quality control
        ]
        completeness = (sum(features) / len(features)) * 100
        print(f"🎯 System Completeness: {completeness:.1f}%")
        
        if completeness >= 80:
            print("🎉 READY FOR ALL BUSINESS USE CASES!")
        elif completeness >= 60:
            print("✅ Core features ready - some advanced features may be limited")
        else:
            print("⚠️ Basic mode - install additional dependencies for full features")

# Test function
if __name__ == "__main__":
    humanizer = UniversalAITextHumanizer()
    
    # Test cases for different business scenarios
    test_cases = [
        {
            "name": "E-commerce Product Description",
            "text": "Furthermore, this product demonstrates exceptional quality and utilizes advanced materials to ensure optimal performance. Subsequently, customers will experience significant improvements in their daily activities.",
            "style": "natural"
        },
        {
            "name": "Marketing Copy",
            "text": "Moreover, our comprehensive solution facilitates unprecedented optimization of business processes. Therefore, organizations should implement our platform to obtain optimal results.",
            "style": "conversational"
        },
        {
            "name": "SEO Blog Content",
            "text": "It is important to note that search engine optimization requires systematic approaches. Subsequently, websites must utilize comprehensive strategies to enhance their visibility.",
            "style": "natural"
        }
    ]
    
    print(f"\n🧪 TESTING UNIVERSAL HUMANIZER")
    print("=" * 40)
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n🔬 Test {i}: {test_case['name']}")
        print("-" * 50)
        print(f"📝 Original: {test_case['text']}")
        
        result = humanizer.humanize_text_universal(
            text=test_case['text'],
            style=test_case['style'],
            intensity=0.7
        )
        
        print(f"✨ Humanized: {result['humanized_text']}")
        print(f"📊 Similarity: {result['similarity_score']:.3f}")
        print(f"⚡ Processing: {result['processing_time_ms']:.1f}ms")
        print(f"🔧 Changes: {', '.join(result['changes_made'])}")
    
    print(f"\n🎉 Universal testing completed!")
    print(f"🌍 Ready for E-commerce, Marketing, SEO & All Business Use Cases!")